├── automation
    ├── .tmp
    │   └── .gitignore
    ├── ocr
    │   ├── ocrconfig.meta
    │   ├── sk_districts.meta
    │   ├── nl_districts.meta
    │   ├── mz_districts.meta
    │   ├── hp_districts.meta
    │   ├── ocrconfig.meta.orig
    │   ├── kl_districts.meta
    │   ├── ut_districts.meta
    │   ├── mn_districts.meta
    │   ├── ap_districts.meta
    │   ├── ml_districts.meta
    │   ├── jk_districts.meta
    │   ├── hr_districts.meta
    │   ├── jh_districts.meta
    │   ├── pb_districts.meta
    │   ├── ar_districts.meta
    │   ├── rj_districts.meta
    │   ├── mh_districts.meta
    │   ├── ka_districts.meta
    │   ├── as_districts.meta
    │   ├── tn_districts.meta
    │   ├── mp_districts.meta
    │   ├── br_districts.meta
    │   ├── ct_districts.meta
    │   ├── ocr_vision.py
    │   ├── tg_districts.meta
    │   ├── up_districts.meta
    │   ├── ocr.sh
    │   └── googlevision.py
    ├── biharIndividual.py
    ├── misc
    │   ├── kabulletinextractor.py
    │   ├── tnfilegeneration.py
    │   └── kapatients.py
    ├── output2.out
    ├── automation.meta
    ├── nameMapping.meta
    ├── kaautomation.py
    ├── deltaCalculator.py
    ├── x
    └── automation.py
├── detailedflow.png
├── extract.sh
├── requirements.txt
├── .gitignore
├── extract.meta
├── extract.meta.bk
├── README.md
└── extract.py


/automation/.tmp/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | */
3 | !.gitignore


--------------------------------------------------------------------------------
/detailedflow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bee-rickey/webScraper/HEAD/detailedflow.png


--------------------------------------------------------------------------------
/automation/ocr/ocrconfig.meta:
--------------------------------------------------------------------------------
1 | startingText:Araria
2 | enableTranslation:True
3 | translationFile:br_districts.meta
4 | yInterval:0
5 | xInterval:0
6 | 


--------------------------------------------------------------------------------
/automation/ocr/sk_districts.meta:
--------------------------------------------------------------------------------
1 | East Sikkim, East Sikkim
2 | North Sikkim, North Sikkim
3 | South Sikkim, South Sikkim
4 | West Sikkim, West Sikkim
5 | 


--------------------------------------------------------------------------------
/extract.sh:
--------------------------------------------------------------------------------
1 | today=`date +"%d_%b_%Y"`
2 | mkdir data/$today
3 | curl https://covid19.nagaland.gov.in/ > x.html
4 | python3 extract.py $today
5 | #git pull
6 | #git add --all
7 | #git commit -m "Folder creation"
8 | #git push --all
9 | 


--------------------------------------------------------------------------------
/automation/ocr/nl_districts.meta:
--------------------------------------------------------------------------------
 1 | Dimapur,Dimapur
 2 | Kiphire,Kiphire
 3 | Kohima,Kohima
 4 | Longleng,Longleng
 5 | Mokokchung,Mokokchung
 6 | Mon,Mon
 7 | Peren,Peren
 8 | Phek,Phek
 9 | Tuensang,Tuensang
10 | Wokha,Wokha
11 | Zunheboto,Zunheboto
12 | 


--------------------------------------------------------------------------------
/automation/ocr/mz_districts.meta:
--------------------------------------------------------------------------------
 1 | Aizawl,Aizawl
 2 | Champhai,Champhai
 3 | Kolasib,Kolasib
 4 | Lawngtlai,Lawngtlai
 5 | Lunglei,Lunglei
 6 | Mamit,Mamit
 7 | Saiha,Saiha
 8 | Serchhip,Serchhip
 9 | Saitual,Saitual
10 | Hnahthial,Hnahthial
11 | Khawzawl,Khawzawl
12 | 


--------------------------------------------------------------------------------
/automation/ocr/hp_districts.meta:
--------------------------------------------------------------------------------
 1 | Bilaspur,Bilaspur
 2 | Chamba,Chamba
 3 | Hamirpur,Hamirpur
 4 | Kangra,Kangra
 5 | Kinnaur,Kinnaur
 6 | Kullu,Kullu
 7 | Lahaul and Spiti,Lahaul and Spiti
 8 | Mandi,Mandi
 9 | Shimla,Shimla
10 | Sirmaur,Sirmaur
11 | Solan,Solan
12 | Una,Una
13 | 


--------------------------------------------------------------------------------
/automation/ocr/ocrconfig.meta.orig:
--------------------------------------------------------------------------------
1 | startingText:@@startingText@@
2 | enableTranslation:@@enableTranslation@@
3 | translationFile:@@statename@@_districts.meta
4 | yInterval:@@yInterval@@
5 | xInterval:@@xInterval@@
6 | houghTransform:@@houghTransform@@
7 | configMinLineLength:@@configMinLineLength@@
8 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | beautifulsoup4==4.9.3
 2 | camelot-py==0.8.2
 3 | google-cloud-vision==2.0.0
 4 | html5lib==1.1
 5 | matplotlib==3.3.2
 6 | pandas==1.1.3
 7 | pdftotext==2.1.5
 8 | Pillow==8.2.0
 9 | requests==2.24.0
10 | opencv-python==4.4.0.40
11 | fuzzywuzzy==0.18.0
12 | python-Levenshtein==0.12.0
13 | 


--------------------------------------------------------------------------------
/automation/ocr/kl_districts.meta:
--------------------------------------------------------------------------------
 1 | Alappuzha,Alappuzha
 2 | Ernakulam,Ernakulam
 3 | Idukki,Idukki
 4 | Kannur,Kannur
 5 | Kasaragod,Kasaragod
 6 | Kollam,Kollam
 7 | Kottayam,Kottayam
 8 | Kozhikode,Kozhikode
 9 | Malappuram,Malappuram
10 | Palakkad,Palakkad
11 | Pathanamthitta,Pathanamthitta
12 | Thiruvananthapuram,Thiruvananthapuram
13 | Thrissur,Thrissur
14 | Wayanad,Wayanad
15 | 


--------------------------------------------------------------------------------
/automation/ocr/ut_districts.meta:
--------------------------------------------------------------------------------
 1 | Almora,Almora
 2 | Bageshwar,Bageshwar
 3 | Chamoli,Chamoli
 4 | Champawat,Champawat
 5 | Dehradun,Dehradun
 6 | Haridwar,Haridwar
 7 | Nainital,Nainital
 8 | Pauri Garhwal,Pauri Garhwal
 9 | Pithoragarh,Pithoragarh
10 | Rudraprayag,Rudraprayag
11 | Tehri Garhwal,Tehri Garhwal
12 | Udham Singh Nagar,Udham Singh Nagar
13 | Uttarkashi,Uttarkashi
14 | 


--------------------------------------------------------------------------------
/automation/ocr/mn_districts.meta:
--------------------------------------------------------------------------------
 1 | Bishnupur,Bishnupur
 2 | Chandel,Chandel
 3 | Churachandpur,Churachandpur
 4 | Imphal East,Imphal East
 5 | Imphal West,Imphal West
 6 | Jiribam,Jiribam
 7 | Kakching,Kakching
 8 | Kamjong,Kamjong
 9 | Kangpokpi,Kangpokpi
10 | Noney,Noney
11 | Pherzawl,Pherzawl
12 | Senapati,Senapati
13 | Tamenglong,Tamenglong
14 | Tengnoupal,Tengnoupal
15 | Thoubal,Thoubal
16 | Ukhrul,Ukhrul
17 | 


--------------------------------------------------------------------------------
/automation/ocr/ap_districts.meta:
--------------------------------------------------------------------------------
 1 | Anantapur,Anantapur
 2 | Chittoor,Chittoor
 3 | East Godavari,East Godavari
 4 | Guntur,Guntur
 5 | Krishna,Krishna
 6 | Kurnool,Kurnool
 7 | Prakasam,Prakasam
 8 | S.P.S. Nellore,S.P.S. Nellore
 9 | Srikakulam,Srikakulam
10 | Visakhapatnam,Visakhapatnam
11 | Vizianagaram,Vizianagaram
12 | West Godavari,West Godavari
13 | Y.S.R. Kadapa,Y.S.R. Kadapa
14 | YSR Kadapa,Y.S.R. Kadapa
15 | 


--------------------------------------------------------------------------------
/automation/ocr/ml_districts.meta:
--------------------------------------------------------------------------------
 1 | East Garo Hills,East Garo Hills
 2 | East Jaintia Hills,East Jaintia Hills
 3 | East Khasi Hills,East Khasi Hills
 4 | North Garo Hills,North Garo Hills
 5 | Ribhoi,Ribhoi
 6 | South Garo Hills,South Garo Hills
 7 | South West Garo Hills,South West Garo Hills
 8 | South West Khasi Hills,South West Khasi Hills
 9 | West Garo Hills,West Garo Hills
10 | West Jaintia Hills,West Jaintia Hills
11 | West Khasi Hills,West Khasi Hills
12 | 


--------------------------------------------------------------------------------
/automation/ocr/jk_districts.meta:
--------------------------------------------------------------------------------
 1 | Anantnag,Anantnag
 2 | Budgam,Budgam
 3 | Bandipora,Bandipora
 4 | Baramulla,Baramulla
 5 | Doda,Doda
 6 | Ganderbal,Ganderbal
 7 | Jammu,Jammu
 8 | Kathua,Kathua
 9 | Kishtwar,Kishtwar
10 | Kulgam,Kulgam
11 | Kupwara,Kupwara
12 | Mirpur,Mirpur
13 | Muzaffarabad,Muzaffarabad
14 | Pulwama,Pulwama
15 | Punch,Punch
16 | Rajouri,Rajouri
17 | Ramban,Ramban
18 | Reasi,Reasi
19 | Samba,Samba
20 | Shopiyan,Shopiyan
21 | Srinagar,Srinagar
22 | Udhampur,Udhampur
23 | 


--------------------------------------------------------------------------------
/automation/ocr/hr_districts.meta:
--------------------------------------------------------------------------------
 1 | Ambala,Ambala
 2 | Bhiwani,Bhiwani
 3 | Charkhi Dadri,Charkhi Dadri
 4 | Faridabad,Faridabad
 5 | Fatehabad,Fatehabad
 6 | Gurugram,Gurugram
 7 | Hisar,Hisar
 8 | Jhajjar,Jhajjar
 9 | Jind,Jind
10 | Kaithal,Kaithal
11 | Karnal,Karnal
12 | Kurukshetra,Kurukshetra
13 | Mahendragarh,Mahendragarh
14 | Nuh,Nuh
15 | Palwal,Palwal
16 | Panchkula,Panchkula
17 | Panipat,Panipat
18 | Rewari,Rewari
19 | Rohtak,Rohtak
20 | Sirsa,Sirsa
21 | Sonipat,Sonipat
22 | Yamunanagar,Yamunanagar
23 | 


--------------------------------------------------------------------------------
/automation/biharIndividual.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | 
 3 | testingNumbersFile = open("ocr/output.txt", "r")
 4 | 
 5 | for index, line in enumerate(testingNumbersFile):
 6 | 	if index == 0:
 7 | 		continue
 8 | 	outputString = ""
 9 | 	linesArray = line.split('|')[0].split(',')
10 | 
11 | 	gender = "F" if linesArray[1].strip() == "FEMALE" else "M"
12 | 	print("{}, {}, {}, {}, {}, {}, {}, {}".format(linesArray[2].strip(), gender, linesArray[3].strip().title(), linesArray[4].strip().title(), 'Bihar', 'BR', 1, 'Hospitalized'))
13 | 


--------------------------------------------------------------------------------
/automation/ocr/jh_districts.meta:
--------------------------------------------------------------------------------
 1 | Bokaro,Bokaro
 2 | Chatra,Chatra
 3 | Deoghar,Deoghar
 4 | Dhanbad,Dhanbad
 5 | Dumka,Dumka
 6 | Garhwa,Garhwa
 7 | Giridih,Giridih
 8 | Godda,Godda
 9 | Gumla,Gumla
10 | Hazaribagh,Hazaribagh
11 | Jamtara,Jamtara
12 | Khunti,Khunti
13 | Koderma,Koderma
14 | Latehar,Latehar
15 | Lohardaga,Lohardaga
16 | Pakur,Pakur
17 | Palamu,Palamu
18 | West Singhbhum,West Singhbhum
19 | East Singhbhum,East Singhbhum
20 | Ramgarh,Ramgarh
21 | Ranchi,Ranchi
22 | Sahibganj,Sahibganj
23 | Saraikela-Kharsawan,Saraikela-Kharsawan
24 | Simdega,Simdega
25 | West, West Singhbhum
26 | 


--------------------------------------------------------------------------------
/automation/ocr/pb_districts.meta:
--------------------------------------------------------------------------------
 1 | Amritsar,Amritsar
 2 | Barnala,Barnala
 3 | Bathinda,Bathinda
 4 | Faridkot,Faridkot
 5 | Fatehgarh Sahib,Fatehgarh Sahib
 6 | Fazilka,Fazilka
 7 | Ferozepur,Ferozepur
 8 | Gurdaspur,Gurdaspur
 9 | Hoshiarpur,Hoshiarpur
10 | Jalandhar,Jalandhar
11 | Kapurthala,Kapurthala
12 | Ludhiana,Ludhiana
13 | Mansa,Mansa
14 | Moga,Moga
15 | Pathankot,Pathankot
16 | Patiala,Patiala
17 | Rupnagar,Rupnagar
18 | S.A.S. Nagar,S.A.S. Nagar
19 | Sangrur,Sangrur
20 | Shahid Bhagat Singh Nagar,Shahid Bhagat Singh Nagar
21 | Sri Muktsar Sahib,Sri Muktsar Sahib
22 | Tarn Taran,Tarn Taran
23 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | x.html
 2 | .extract.meta.swp
 3 | automation/orsite.csv
 4 | *.swp
 5 | *.swp
 6 | automation/deltaCalculator.log
 7 | automation/automated.txt
 8 | automation/__pycache__/deltaCalculator.cpython-37.pyc
 9 | automation/br.txt
10 | .DS_Store
11 | automation/HR.txt
12 | automation/hr.pdf.txt
13 | automation/hr.pdf
14 | automation/hr.csv
15 | *.txt
16 | *.csv
17 | *.pdf
18 | *.jpg
19 | *.png
20 | automation/ocr/ocrconfig.meta
21 | automation/ocr/ocrconfig.meta
22 | automation/deltaCalculator.log
23 | automation/ocr/ocrconfig.meta
24 | automation/ocr/ocrconfig.meta
25 | 
26 | venv/
27 | __pycache__
28 | !requirements.txt
29 | 
30 | .vscode
31 | automation/ocr/ocrconfig.meta
32 | 


--------------------------------------------------------------------------------
/automation/ocr/ar_districts.meta:
--------------------------------------------------------------------------------
 1 | Anjaw,Anjaw
 2 | Changlang,Changlang
 3 | East Kameng,East Kameng
 4 | East Siang,East Siang
 5 | Kamle,Kamle
 6 | Kra Daadi,Kra Daadi
 7 | Kurung Kumey,Kurung Kumey
 8 | Lepa Rada,Lepa Rada
 9 | Lohit,Lohit
10 | Longding,Longding
11 | Lower Dibang Valley,Lower Dibang Valley
12 | Lower Siang,Lower Siang
13 | Lower Subansiri,Lower Subansiri
14 | Namsai,Namsai
15 | Pakke Kessang,Pakke Kessang
16 | Papum Pare,Papum Pare
17 | Shi Yomi,Shi Yomi
18 | Siang,Siang
19 | Tawang,Tawang
20 | Tirap,Tirap
21 | Upper Dibang Valley,Upper Dibang Valley
22 | Upper Siang,Upper Siang
23 | Upper Subansiri,Upper Subansiri
24 | West Kameng,West Kameng
25 | West Siang,West Siang
26 | 


--------------------------------------------------------------------------------
/automation/misc/kabulletinextractor.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import pdftotext
 3 | import re
 4 | from bs4 import BeautifulSoup
 5 | 
 6 | 
 7 | 
 8 | response = requests.request("GET", "https://covid19.karnataka.gov.in/new-page/Health%20Department%20Bulletin/en")
 9 | soup = BeautifulSoup(response.content, 'html5lib')
10 | 
11 | rows = soup.find_all("td")
12 | 
13 | for row in rows:
14 | 	aTag = row.find("a")
15 | 	if aTag is not None:
16 | 		if 'karnataka.gov' in aTag['href']:
17 | 			fileNameArray = aTag['href'].split('/')
18 | 			fileName = fileNameArray[len(fileNameArray) - 1]
19 | 			r = requests.get(aTag['href'], allow_redirects=True)
20 | 			open("KABulletin/" + fileName, 'wb').write(r.content)
21 | 


--------------------------------------------------------------------------------
/automation/ocr/rj_districts.meta:
--------------------------------------------------------------------------------
 1 | Ajmer,Ajmer
 2 | Alwar,Alwar
 3 | Banswara,Banswara
 4 | Baran,Baran
 5 | Barmer,Barmer
 6 | Bharatpur,Bharatpur
 7 | Bhilwara,Bhilwara
 8 | Bikaner,Bikaner
 9 | Bundi,Bundi
10 | Chittorgarh,Chittorgarh
11 | Churu,Churu
12 | Dausa,Dausa
13 | Dholpur,Dholpur
14 | Dungarpur,Dungarpur
15 | Ganganagar,Ganganagar
16 | Hanumangarh,Hanumangarh
17 | Jaipur,Jaipur
18 | Jaisalmer,Jaisalmer
19 | Jalore,Jalore
20 | Jhalawar,Jhalawar
21 | Jhunjhunu,Jhunjhunu
22 | Jodhpur,Jodhpur
23 | Karauli,Karauli
24 | Kota,Kota
25 | Nagaur,Nagaur
26 | Pali,Pali
27 | Pratapgarh,Pratapgarh
28 | Rajsamand,Rajsamand
29 | Sawai Madhopur,Sawai Madhopur
30 | Sikar,Sikar
31 | Sirohi,Sirohi
32 | Tonk,Tonk
33 | Udaipur,Udaipur
34 | 


--------------------------------------------------------------------------------
/automation/ocr/mh_districts.meta:
--------------------------------------------------------------------------------
 1 | Ahmednagar,Ahmednagar
 2 | Akola,Akola
 3 | Amravati,Amravati
 4 | Aurangabad,Aurangabad
 5 | Bhandara,Bhandara
 6 | Beed,Beed
 7 | Buldhana,Buldhana
 8 | Chandrapur,Chandrapur
 9 | Dhule,Dhule
10 | Gadchiroli,Gadchiroli
11 | Gondia,Gondia
12 | Hingoli,Hingoli
13 | Jalgaon,Jalgaon
14 | Jalna,Jalna
15 | Kolhapur,Kolhapur
16 | Latur,Latur
17 | Mumbai,Mumbai
18 | Mumbai Suburban,Mumbai Suburban
19 | Nagpur,Nagpur
20 | Nanded,Nanded
21 | Nandurbar,Nandurbar
22 | Nashik,Nashik
23 | Osmanabad,Osmanabad
24 | Palghar,Palghar
25 | Parbhani,Parbhani
26 | Pune,Pune
27 | Raigad,Raigad
28 | Ratnagiri,Ratnagiri
29 | Sangli,Sangli
30 | Satara,Satara
31 | Sindhudurg,Sindhudurg
32 | Solapur,Solapur
33 | Thane,Thane
34 | Wardha,Wardha
35 | Washim,Washim
36 | Yavatmal,Yavatmal
37 | 


--------------------------------------------------------------------------------
/automation/ocr/ka_districts.meta:
--------------------------------------------------------------------------------
 1 | Others,Others
 2 | Bagalakote,Bagalakote
 3 | Ballari,Ballari
 4 | Belagavi,Belagavi
 5 | Bengaluru Urban,Bengaluru Urban
 6 | Bengaluru Rural,Bengaluru Rural
 7 | Bidar,Bidar
 8 | Chamarajanagara,Chamarajanagara
 9 | Chikkaballapura,Chikkaballapura
10 | Chikkamagaluru,Chikkamagaluru
11 | Chitradurga,Chitradurga
12 | Dakshina Kannada,Dakshina Kannada
13 | Davanagere,Davanagere
14 | Dharwad,Dharwad
15 | Gadag,Gadag
16 | Hassan,Hassan
17 | Haveri,Haveri
18 | Kalaburagi,Kalaburagi
19 | Kodagu,Kodagu
20 | Kolar,Kolar
21 | Koppal,Koppal
22 | Mandya,Mandya
23 | Mysuru,Mysuru
24 | Raichur,Raichur
25 | Ramanagara,Ramanagara
26 | Shivamogga,Shivamogga
27 | Tumakuru,Tumakuru
28 | Udupi,Udupi
29 | Uttara Kannada,Uttara Kannada
30 | Vijayapura,Vijayapura
31 | Yadgir,Yadgir
32 | 


--------------------------------------------------------------------------------
/automation/ocr/as_districts.meta:
--------------------------------------------------------------------------------
 1 | Baksa,Baksa
 2 | Barpeta,Barpeta
 3 | Biswanath,Biswanath
 4 | Bongaigaon,Bongaigaon
 5 | Cachar,Cachar
 6 | Charaideo,Charaideo
 7 | Chirang,Chirang
 8 | Darrang,Darrang
 9 | Dhemaji,Dhemaji
10 | Dhubri,Dhubri
11 | Dibrugarh,Dibrugarh
12 | Dima Hasao,Dima Hasao
13 | Goalpara,Goalpara
14 | Golaghat,Golaghat
15 | Hailakandi,Hailakandi
16 | Hojai,Hojai
17 | Jorhat,Jorhat
18 | Kamrup,Kamrup
19 | Kamrup Metropolitan,Kamrup Metropolitan
20 | Karbi Anglong,Karbi Anglong
21 | Karimganj,Karimganj
22 | Kokrajhar,Kokrajhar
23 | Lakhimpur,Lakhimpur
24 | Majuli,Majuli
25 | Morigaon,Morigaon
26 | Nagaon,Nagaon
27 | Nalbari,Nalbari
28 | Sivasagar,Sivasagar
29 | Sonitpur,Sonitpur
30 | South Salmara Mankachar,South Salmara Mankachar
31 | Tinsukia,Tinsukia
32 | Udalguri,Udalguri
33 | West Karbi Anglong,West Karbi Anglong
34 | 


--------------------------------------------------------------------------------
/automation/output2.out:
--------------------------------------------------------------------------------
1 | 16-03-2021, India, "", 1798467, 25709257, 5366679, 33886, 25709257, 6161488, 13993527, 11713149, 2581, 2557019, 29313725 
2 | 16-03-2021, Andaman and Nicobar Islands, "", 328, 11950, 700, 7, 11950, 2876, 7391, 4559, 0, 0, 14826 
3 | 16-03-2021, Andaman and Nicobar Islands, "North and Middle Andaman", 79, 2245, 300, 3, 2245, 929, 1086, 1159, 0, 0, 3174 
4 | 16-03-2021, Andaman and Nicobar Islands, "South Andaman", 178, 8879, 300, 3, 8879, 1650, 5874, 3005, 0, 0, 10529 
5 | 16-03-2021, Andaman and Nicobar Islands, "Nicobar", 71, 826, 100, 1, 826, 297, 431, 395, 0, 0, 1123 
6 | 16-03-2021, Andhra Pradesh, "", 67824, 976078, 331083, 2204, 976078, 257997, 501439, 474547, 92, 145251, 1088824 
7 | 16-03-2021, Andhra Pradesh, "Krishna", 5537, 83524, 28619, 191, 83524, 22711, 42137, 41385, 2, 23600, 82635 
8 | 16-03-2021, Andhra Pradesh, "Guntur", 5117, 86069, 34150, 227, 86069, 20637, 46752, 39311, 6, 23124, 83582 
9 | 


--------------------------------------------------------------------------------
/automation/ocr/tn_districts.meta:
--------------------------------------------------------------------------------
 1 | Ariyalur,Ariyalur
 2 | Chennai,Chennai
 3 | Coimbatore,Coimbatore
 4 | Cuddalore,Cuddalore
 5 | Dharmapuri,Dharmapuri
 6 | Dindigul,Dindigul
 7 | Erode,Erode
 8 | Kancheepuram,Kancheepuram
 9 | Kanyakumari,Kanyakumari
10 | Karur,Karur
11 | Krishnagiri,Krishnagiri
12 | Madurai,Madurai
13 | Nagapattinam,Nagapattinam
14 | Namakkal,Namakkal
15 | Perambalur,Perambalur
16 | Pudukkottai,Pudukkottai
17 | Ramanathapuram,Ramanathapuram
18 | Salem,Salem
19 | Sivaganga,Sivaganga
20 | Thanjavur,Thanjavur
21 | Nilgiris,Nilgiris
22 | Theni,Theni
23 | Thiruvallur,Thiruvallur
24 | Thiruvarur,Thiruvarur
25 | Thoothukkudi,Thoothukkudi
26 | Tiruchirappalli,Tiruchirappalli
27 | Tirunelveli,Tirunelveli
28 | Tiruppur,Tiruppur
29 | Tiruvannamalai,Tiruvannamalai
30 | Vellore,Vellore
31 | Viluppuram,Viluppuram
32 | Virudhunagar,Virudhunagar
33 | Tenkasi,Tenkasi
34 | Chengalpattu,Chengalpattu
35 | Ranipet,Ranipet
36 | Kallakurichi,Kallakurichi
37 | Tirupathur,Tirupathur
38 | 


--------------------------------------------------------------------------------
/automation/ocr/mp_districts.meta:
--------------------------------------------------------------------------------
 1 | Agar Malwa,Agar Malwa
 2 | Alirajpur,Alirajpur
 3 | Anuppur,Anuppur
 4 | Ashoknagar,Ashoknagar
 5 | Balaghat,Balaghat
 6 | Barwani,Barwani
 7 | Betul,Betul
 8 | Bhind,Bhind
 9 | Bhopal,Bhopal
10 | Burhanpur,Burhanpur
11 | Chhatarpur,Chhatarpur
12 | Chhindwara,Chhindwara
13 | Damoh,Damoh
14 | Datia,Datia
15 | Dewas,Dewas
16 | Dhar,Dhar
17 | Dindori,Dindori
18 | Khandwa,Khandwa
19 | Guna,Guna
20 | Gwalior,Gwalior
21 | Harda,Harda
22 | Hoshangabad,Hoshangabad
23 | Indore,Indore
24 | Jabalpur,Jabalpur
25 | Jhabua,Jhabua
26 | Katni,Katni
27 | Mandla,Mandla
28 | Mandsaur,Mandsaur
29 | Morena,Morena
30 | Narsinghpur,Narsinghpur
31 | Neemuch,Neemuch
32 | Niwari,Niwari
33 | Panna,Panna
34 | Raisen,Raisen
35 | Rajgarh,Rajgarh
36 | Ratlam,Ratlam
37 | Rewa,Rewa
38 | Sagar,Sagar
39 | Satna,Satna
40 | Sehore,Sehore
41 | Seoni,Seoni
42 | Shahdol,Shahdol
43 | Shajapur,Shajapur
44 | Sheopur,Sheopur
45 | Shivpuri,Shivpuri
46 | Sidhi,Sidhi
47 | Singrauli,Singrauli
48 | Tikamgarh,Tikamgarh
49 | Ujjain,Ujjain
50 | Umaria,Umaria
51 | Vidisha,Vidisha
52 | Khargone,Khargone
53 | 


--------------------------------------------------------------------------------
/extract.meta:
--------------------------------------------------------------------------------
 1 | http://www.rajswasthya.nic.in/, Rajasthan, Districts
 2 | https://statedashboard.odisha.gov.in/,Odisha, Districts
 3 | https://covid19dashboard.py.gov.in/QuarantineReport,Puducherry, Districts
 4 | https://dashboard.kerala.gov.in/testing-view-public.php, Kerala, Districts
 5 | https://gujcovid19.gujarat.gov.in/, Gujarat, Districts
 6 | http://covid19.ap.gov.in/Covid19_Admin/api/CV/DashboardCountAPI, Andhra Pradesh, Districts
 7 | http://hmfw.ap.gov.in/covid_dashboard.aspx, Andhra Pradesh,
 8 | http://covid19.itanagarsmartcity.in/covidstatus.php,Arunachal Pradesh,
 9 | http://chdcovid19.in/, Chandigarh,
10 | https://nhm.goa.gov.in/,Goa,
11 | https://gujcovid19.gujarat.gov.in/, Gujarat,
12 | https://dashboard.kerala.gov.in/index.php, Kerala,
13 | https://corona.meghalayagov.in/, Meghalaya,
14 | https://covid19.nagaland.gov.in/,Nagaland,
15 | https://statedashboard.odisha.gov.in/, Odisha,
16 | https://covid19dashboard.py.gov.in/, Puducherry,
17 | https://covid19sikkim.org/, Sikkim,
18 | http://www.rajswasthya.nic.in/, Rajasthan,
19 | https://covid19.tripura.gov.in/Visitor/ViewStatus.aspx, Tripura,
20 | 


--------------------------------------------------------------------------------
/extract.meta.bk:
--------------------------------------------------------------------------------
 1 | http://www.rajswasthya.nic.in/, Rajasthan, Districts
 2 | https://gujcovid19.gujarat.gov.in/, Gujarat, 
 3 | https://gujcovid19.gujarat.gov.in/, Gujarat, Districts
 4 | http://covid19.ap.gov.in/Covid19_Admin/api/CV/DashboardCountAPI, Andhra Pradesh, Districts
 5 | https://gujcovid19.gujarat.gov.in/, Gujarat, Districts
 6 | https://statedashboard.odisha.gov.in/,Odisha, Districts
 7 | https://covid19dashboard.py.gov.in/QuarantineReport,Puducherry, Districts
 8 | https://dashboard.kerala.gov.in/testing-view-public.php, Kerala, Districts
 9 | http://hmfw.ap.gov.in/covid_dashboard.aspx, Andhra Pradesh,
10 | http://covid19.itanagarsmartcity.in/covidstatus.php,Arunachal Pradesh,
11 | http://chdcovid19.in/, Chandigarh,
12 | https://gujcovid19.gujarat.gov.in/, Gujarat, 
13 | https://nhm.goa.gov.in/,Goa,
14 | https://dashboard.kerala.gov.in/index.php, Kerala,
15 | https://corona.meghalayagov.in/, Meghalaya,
16 | https://covid19.nagaland.gov.in/,Nagaland,
17 | https://statedashboard.odisha.gov.in/, Odisha,
18 | https://covid19dashboard.py.gov.in/, Puducherry,
19 | https://covid19sikkim.org/, Sikkim,
20 | http://www.rajswasthya.nic.in/, Rajasthan,
21 | https://covid19.tripura.gov.in/Visitor/ViewStatus.aspx, Tripura,
22 | 


--------------------------------------------------------------------------------
/automation/ocr/br_districts.meta:
--------------------------------------------------------------------------------
 1 | वांका , Banka
 2 | बका , Banka
 3 | मधुवनी, Madhubani
 4 | अररिया , Araria
 5 | अरवल , Arwal
 6 | औरंगाबाद , Aurangabad
 7 | बांका , Banka
 8 |  बेगुसराय , Begusarai
 9 | बेगूसराय , Begusarai
10 | वेगूसराय , Begusarai
11 | भागलपुर , Bhagalpur
12 | भोजपुर , Bhojpur
13 | बक्सर, Buxar
14 | वक्सर , Buxar
15 | दरभंगा , Darbhanga
16 | गया , Gaya
17 | गोपालगंज , Gopalganj
18 | जमुई , Jamui
19 | जहानाबाद , Jehanabad
20 | कैमूर , Kaimur
21 | कटिहार , Katihar
22 | खगड़िया , Khagaria
23 | किशनगंज , Kishanganj
24 | लखीसराय , Lakhisarai
25 | मधेपुरा , Madhepura
26 | मधुबनी , Madhubani
27 | मुंगेर , Munger
28 | मुजफ्फरपुर , Muzaffarpur
29 | नालंदा , Nalanda
30 | नालन्दा , Nalanda
31 | नवादा , Nawada
32 | पश्चिम , West Champaran
33 | पक्षिम, West Champaran
34 | पश्चिम चंपारण, West Champaran
35 | पश्चिमी, West Champaran
36 | पटना , Patna
37 | पूर्वी चम्पारण, East Champaran
38 | पूर्वी, East Champaran
39 | पूर्वी चंपारण , East Champaran
40 | पूर्णिया , Purnia
41 | पूर्णियाँ , Purnia
42 | रोहतास , Rohtas
43 | सहरसा , Saharsa
44 | समस्तीपुर , Samastipur
45 | सारण , Saran
46 | शेखपुरा , Sheikhpura
47 | शिवहर , Sheohar
48 | सीतामढ़ी , Sitamarhi
49 | सिवान , Siwan
50 | सुपौल , Supaul
51 | सुपीत , Supaul
52 | वैशाली , Vaishali
53 | 


--------------------------------------------------------------------------------
/automation/ocr/ct_districts.meta:
--------------------------------------------------------------------------------
 1 | दुर्ग, Durg
 2 | दर्ग, Durg
 3 |  राजनांदगांव ,  Rajnandgaon
 4 |  राजनादगाव , Rajnandgaon
 5 |  राजनांदगाव , Rajnandgaon
 6 |  बालोद , Balod
 7 |  बेमेतरा , Bametara
 8 |  कबीरधाम , Kabeerdham
 9 |  रायपुर , Raipur
10 |  रायगढ़ , Raigarh
11 |  धमतरी , Dhamtari
12 |  बलौदाबाज़ार , Baloda Bazar
13 |  बलौदाबाजार , Baloda Bazar
14 |  महासमुंद , Mahasamund
15 |  महासमुद , Mahasamund
16 |  गरियाबंद , Gariaband
17 |  बिलासपुर , Bilaspur
18 |  कोरबा , Korba
19 |  जांजगीर - चांपा , Janjgir Champa
20 |  मुंगेली , Mungeli
21 |  मगेली , Mungeli
22 |  मंगेली , Mungeli
23 |  मगला , Mungeli
24 |  गौरेला पेंड्रा मरवाही , Gaurela Pendra Marwahi
25 |  गौरेला पेड़ा मरवाही , Gaurela Pendra Marwahi
26 |  सरगुजा ,  Surguja
27 |  कोरिया , Koriya
28 |  सूरजपुर , Surajpur
29 |  बलरामपुर , Balrampur
30 |  बलासपुर , Balrampur
31 |  जशपुर , Jashpur
32 |  बस्तर , Bastar
33 |  कोंडागांव , Kondagaon
34 |  दंतेवाडा , Dakshin Bastar Dantewada
35 |  दंतेवाड़ा , Dakshin Bastar Dantewada
36 |  सुकमा , Sukma
37 |  कांकेर , Uttar Bastar Kanker
38 |  काकेर , Uttar Bastar Kanker
39 |  काकर, Uttar Bastar Kanker
40 |  नारायणपुर , Narayanpur
41 |  बीजापुर , Bijapur
42 |  बीजापूर , Bijapur
43 |  अन्य , Other State
44 |  अन्य राज्य , Other State
45 | 


--------------------------------------------------------------------------------
/automation/misc/tnfilegeneration.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import pdftotext
 3 | import re
 4 | 
 5 | 
 6 | def is_number(s):
 7 |   try:
 8 |     int(s)
 9 |     return True
10 |   except ValueError:
11 |     return False
12 | 
13 | url = "http://www.nhmharyana.gov.in/WriteReadData/userfiles/file/CoronaVirus/Daily%20Bulletin%20of%20COVID%2019%20as%20on%209-06-2020%20Evening.pdf"
14 | r = requests.get(url, allow_redirects=True)
15 | open('hr.pdf', 'wb').write(r.content)
16 | 
17 | with open("ka.pdf", "rb") as f:
18 |     pdf = pdftotext.PDF(f)
19 | 
20 | recoveryKa = open("ka.pdf.txt", "w")
21 | pid = input("Enter district page:")
22 | print(pdf[int(pid)] , file = recoveryKa)
23 | recoveryKa.close()
24 | 
25 | tnFile = open('ka.pdf.txt', 'r') 
26 | lines = tnFile.readlines() 
27 | tnOutputFile = open('ka.csv', 'w') 
28 | 
29 | startedReadingDistricts = False
30 | for line in lines:
31 | 	if len(line) == 0:
32 | 		continue
33 | 
34 | 	if 'Yadagiri' in line:
35 | 		startedReadingDistricts = True
36 | 	if 'Total' in line:
37 | 		startedReadingDistricts = False
38 | 		continue
39 | 	if startedReadingDistricts == False:
40 | 		continue
41 | 	
42 | 	line = re.sub(' +', ',', re.sub('^ +', '', line))
43 | 
44 | 	linesArray = line.split(',')
45 | 	print(linesArray)
46 | 
47 | tnOutputFile.close()
48 | 


--------------------------------------------------------------------------------
/automation/ocr/ocr_vision.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import io
 4 | import pickle
 5 | from google.cloud import vision
 6 | 
 7 | fileName = ""
 8 | 
 9 | def detect_text(path):
10 |   """Detects text in the file."""
11 |   client = vision.ImageAnnotatorClient()
12 | 
13 |   with io.open(path, 'rb') as image_file:
14 |     content = image_file.read()
15 | 
16 |   image = vision.Image(content=content)
17 | 
18 |   response = client.document_text_detection(image=image)
19 |   texts = response.text_annotations
20 |   print(texts)
21 |   with io.open('poly.txt', 'w') as boundsFile:
22 |     print(texts, file = boundsFile)
23 |   boundsFile.close()
24 | 
25 | # Save output
26 | 
27 |   for text in texts:
28 |     vertices = (['{},{}'.format(vertex.x, vertex.y) for vertex in text.bounding_poly.vertices])
29 |     print('{}'.format(text.description), end ="|")
30 |     print('bounds|{}'.format('|'.join(vertices)))
31 | 
32 |   if response.error.message:
33 |     raise Exception(
34 |       '{}\nFor more info on error messages, check: '
35 |       'https://cloud.google.com/apis/design/errors'.format(
36 |       response.error.message))
37 | 
38 | 
39 | def main():
40 |   os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "../../../visionapi.json"
41 |   path = fileName
42 | 
43 | # Do OCR
44 |   detect_text(path)
45 | 
46 | if __name__ == "__main__":
47 |   fileName = sys.argv[1]
48 |   main()
49 | 


--------------------------------------------------------------------------------
/automation/ocr/tg_districts.meta:
--------------------------------------------------------------------------------
 1 | Adilabad,Adilabad
 2 | Bhadradri Kothagudem,Bhadradri Kothagudem
 3 | Bhadradri kothagudem,Bhadradri Kothagudem
 4 | GHMC,Hyderabad
 5 | Jagtial,Jagtial
 6 | Jagityal,Jagtial
 7 | Jangaon,Jangaon
 8 | Jayashankar Bhupalapally,Jayashankar Bhupalapally
 9 | Jayashankar bhupalpally,Jayashankar Bhupalapally
10 | Jogulamba Gadwal,Jogulamba Gadwal
11 | Jogulamba gadwal,Jogulamba Gadwal
12 | Kamareddy,Kamareddy
13 | Karimnagar,Karimnagar
14 | Khammam,Khammam
15 | Komaram Bheem,Komaram Bheem
16 | Komarambheem asifabad,Komaram Bheem
17 | Mahabubabad,Mahabubabad
18 | Mahabubnagar,Mahabubnagar
19 | Mahaboobnagar,Mahabubnagar
20 | Mancherial,Mancherial
21 | Medak,Medak
22 | Medchal Malkajgiri,Medchal Malkajgiri
23 | Medchal malkajigiri,Medchal Malkajgiri
24 | Mulugu,Mulugu
25 | Nagarkurnool,Nagarkurnool
26 | Nalgonda,Nalgonda
27 | Narayanpet,Narayanpet
28 | Nirmal,Nirmal
29 | Nizamabad,Nizamabad
30 | Peddapalli,Peddapalli
31 | Rajanna Sircilla,Rajanna Sircilla
32 | Rajanna siricilla,Rajanna Sircilla
33 | Ranga Reddy,Ranga Reddy
34 | Rangareddy,Ranga Reddy
35 | Sangareddy,Sangareddy
36 | Siddipet,Siddipet
37 | Suryapet,Suryapet
38 | Vikarabad,Vikarabad
39 | Wanaparthy,Wanaparthy
40 | Warangal Rural,Warangal Rural
41 | Warangal rural,Warangal Rural
42 | Warangal Urban,Warangal Urban
43 | Warangal urban,Warangal Urban
44 | Yadadri Bhuvanagiri,Yadadri Bhuvanagiri
45 | Yadadri, Yadadri Bhuvanagiri
46 | Yadadri bhonigir,Yadadri Bhuvanagiri
47 | 


--------------------------------------------------------------------------------
/automation/automation.meta:
--------------------------------------------------------------------------------
 1 | Vaccine, VC, https://api.cowin.gov.in/api/v1/reports/getPublicReports?state_id=@@state_id@@&district_id=@@district_id@@&date=@@date@@
 2 | VCMohfw, VCM,https://raw.githubusercontent.com/datameet/covid19/master/downloads/mohfw-backup/cumulative_vaccination_coverage/ 
 3 | Andhra Pradesh, AP, http://hmfw.ap.gov.in/covid_dashboard.aspx
 4 | Arunachal Pradesh, AR, 
 5 | #Goa, GA, https://www.goa.gov.in/covid-19/
 6 | Odisha, OR, https://health.odisha.gov.in/js/distDtls.js
 7 | Rajasthan, RJ, http://www.rajswasthya.nic.in/
 8 | Maharashtra, MH, https://www.covid19maharashtragov.in/mh-covid/dbd-cases-file?_by=District&_by=Date
 9 | Telangana, TG, 
10 | Gujarat, GJ,https://gujcovid19.gujarat.gov.in/DrillDownCharts.aspx/GetDistDataForLineCovidDisrtict
11 | Uttar Pradesh, UP,
12 | Bihar, BR,
13 | Chhattisgarh, CT,
14 | Nagaland, NL, https://covid19.nagaland.gov.in/health-main
15 | Mizoram, MZ,
16 | Manipur, MN,
17 | Assam, AS, https://covid19.assam.gov.in/all-districts/
18 | Tripura, TR, https://covid19.tripura.gov.in/Visitor/ViewStatus.aspx
19 | Puducherry, PY, https://covid19dashboard.py.gov.in/Reporting/DateWise
20 | Chandigarh, CH, http://chdcovid19.in/
21 | Kerala, KL, https://dashboard.kerala.gov.in/maps/outside.geojson
22 | KeralaDeaths, KLD, https://dashboard.kerala.gov.in/maps/outside.geojson
23 | Ladakh, LA, http://covid.ladakh.gov.in/#dataInsights
24 | Punjab, PB,
25 | Tamil Nadu, TN,
26 | Madhya Pradesh, MP,
27 | Karnataka, KA,
28 | West Bengal, WB,
29 | Haryana, HR,
30 | Himachal Pradesh, HP,
31 | Jammu and Kashmir, JK,
32 | Jharkhand, JH,
33 | Uttarakhand, UT,
34 | Meghalaya, ML, https://services7.arcgis.com/nzBTI19PTHBZaEPT/arcgis/rest/services/Admin_Boundary/FeatureServer/1/query?f=json&returnGeometry=false&outFields=*&where=1=1
35 | #Mizoram, MZ, https://mcovid19.mizoram.gov.in/api/home-stats
36 | 


--------------------------------------------------------------------------------
/automation/misc/kapatients.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import pdftotext
 3 | import PyPDF2 as pypdf
 4 | import camelot
 5 | import re
 6 | import datetime
 7 | import matplotlib.pyplot as plt
 8 | from deltaCalculator import DeltaCalculator
 9 | 
10 | 
11 | def is_number(s):
12 |   try:
13 |     int(s)
14 |     return True
15 |   except ValueError:
16 |     return False
17 | 
18 | tables = camelot.read_pdf('ka.pdf',strip_text='\n', pages="5,6,7,8", split_text = True)
19 | 
20 | print(len(tables))
21 | for index, table in enumerate(tables):
22 | 	tables[index].to_csv('ka' + str(index) + '.csv')
23 | 
24 | kaOutputFile = open('kafull.csv', 'w') 
25 | for index, table in enumerate(tables):
26 | 	kaFile = open('ka' + str(index) + '.csv', 'r') 
27 | 	lines = kaFile.readlines()
28 | 
29 | 	for line in lines:
30 | 		line = line.replace('\"', '')
31 | 		linesArray = line.split(',')
32 | 		if len(linesArray[7]) == 0:
33 | 			continue
34 | 
35 | 		gender = ""
36 | 		if linesArray[6].strip() == 'Female':
37 | 			gender = 'F'
38 | 		elif linesArray[6].strip() == 'Male':
39 | 			gender = 'M'
40 | 		else:
41 | 			gender = 'Non-Binary'
42 | 
43 | 		print("{},{},{},{},{},{},{},{},{},{},{}".format(linesArray[4].replace('P-', 'KA-P'), datetime.date.today().strftime("%d/%m/%Y"), linesArray[5], gender,'',linesArray[7],'Karnataka', 'KA', 1, 'Hospitalized',linesArray[8]), file = kaOutputFile)
44 | 
45 | kaOutputFile.close()
46 | 
47 | ##camelot.plot(tables[0], kind = "contour")
48 | #plt.show()
49 | 
50 | 
51 | """
52 | pdfobject=open('ka.pdf','rb')
53 | pdf=pypdf.PdfFileReader(pdfobject)
54 | print(pdf.extractText())
55 | 
56 | url = "http://www.nhmharyana.gov.in/WriteReadData/userfiles/file/CoronaVirus/Daily%20Bulletin%20of%20COVID%2019%20as%20on%209-06-2020%20Evening.pdf"
57 | r = requests.get(url, allow_redirects=True)
58 | open('hr.pdf', 'wb').write(r.content)
59 | 
60 | with open("ka.pdf", "rb") as f:
61 |     pdf = pdftotext.PDF(f)
62 | 
63 | recoveryKa = open("ka.pdf.txt", "w")
64 | pid = input("Enter district page:")
65 | print(pdf[int(pid)] , file = recoveryKa)
66 | recoveryKa.close()
67 | 
68 | tnFile = open('ka.pdf.txt', 'r') 
69 | lines = tnFile.readlines() 
70 | tnOutputFile = open('ka.csv', 'w') 
71 | 
72 | startedReadingDistricts = False
73 | for line in lines:
74 | 	if len(line) == 0:
75 | 		continue
76 | 	print(line)
77 | 
78 | 	if 'Yadagiri' in line:
79 | 		startedReadingDistricts = True
80 | 	if 'Total' in line:
81 | 		startedReadingDistricts = False
82 | 		continue
83 | 	if startedReadingDistricts == False:
84 | 		continue
85 | 	
86 | 	line = re.sub(' +', ',', re.sub('^ +', '', line))
87 | 
88 | 	linesArray = line.split(',')
89 | 	print(linesArray)
90 | 
91 | tnOutputFile.close()
92 | """
93 | 


--------------------------------------------------------------------------------
/automation/ocr/up_districts.meta:
--------------------------------------------------------------------------------
  1 | औरय्या , Auraiya
  2 |  सभल , Sambhal
  3 |  गोडा , Gonda
  4 | गौतम बुद्ध नगर, Gautam Buddha Nagar
  5 | बुध नगर , Gautam Buddha Nagar
  6 | बुद्ध नगर, Gautam Buddha Nagar
  7 | बनिया , Ballia
  8 | अम्बेडकरनगर, Ambedkar Nagar
  9 | अम्बेडकर, Ambedkar Nagar
 10 | गाँडा , Gonda
 11 | मयुरा , Mathura
 12 | संतकबीर नगर , Sant Kabir Nagar
 13 | सतकबारनगर, Sant Kabir Nagar
 14 | सत कबार नगर, Sant Kabir Nagar
 15 | समल , Sambhal
 16 | सिद्घार्य नगर, Siddharthnagar
 17 | संत कबीर, Sant Kabir Nagar
 18 | सिद्धार्थनगर, Siddharthnagar
 19 | सिद्धार्थनग, Siddharthnagar
 20 | गौतम बुध नगर , Gautam Buddha Nagar
 21 | संत कबीर नगर , Sant Kabir Nagar
 22 | कानपुर नगर, Kanpur Nagar
 23 | रायबरेली , Rae Bareli
 24 | राय बरेली, Rae Bareli
 25 | राय , Rae Bareli
 26 | लखीमपुर , Lakhimpur Kheri
 27 | लखीमपुर खीरी , Lakhimpur Kheri
 28 | खीरी , Lakhimpur Kheri
 29 | लखीमपुर - खीरी , Lakhimpur Kheri
 30 | अमरोहा , Amroha
 31 | मथुरा , Mathura
 32 | आगरा  ,  Agra
 33 | मेरठ  ,  Meerut
 34 | गौतम  ,  Gautam Buddha Nagar
 35 | लखनऊ  ,  Lucknow
 36 | गाज़ियाबाद  ,  Ghaziabad
 37 | गाजियाबाद , Ghaziabad
 38 | सहारनपुर  ,  Saharanpur
 39 | सहारनपर, Saharanpur
 40 | वाराणसी  ,  Varanasi
 41 | रामपुर  ,  Rampur
 42 | जौनपुर  ,  Jaunpur
 43 | जौनपर , Jaunpur
 44 | बस्ती  ,Basti
 45 | बाराबंकी  ,  Barabanki
 46 | अलीगढ  ,  Aligarh
 47 | अलीगढ़ , Aligarh
 48 | अलीगढ़ , Aligarh
 49 | अलीगढ़ , Aligarh
 50 | कबीर नगर, Sant Kabir Nagar
 51 | हापुड़, Hapur
 52 | हापड , Hapur
 53 | हापुड  ,  Hapur
 54 | अमेठी  ,  Amethi
 55 | बुलंदशहर  ,  Bulandshahr
 56 | बलंदशहर , Bulandshahr
 57 |  बुलदशहर , Bulandshahr
 58 | अयोध्या  , Ayodhya
 59 | सिद्धार्थ नगर, Siddharthnagar
 60 | सिद्धार्थ, Siddharthnagar
 61 | गाजीपुर  ,  Ghazipur
 62 | गाजीपर , Ghazipur
 63 | बिजनौर  ,  Bijnor
 64 | प्रयागराज  , Prayagraj
 65 | आजमगढ़  ,  Azamgarh
 66 | आजमगढ , Azamgarh
 67 | संभल  , Sambhal
 68 | बहराइच  ,Bahraich
 69 | सुल्तानपुर ,  Sultanpur
 70 | संत  ,  Sant Kabir Nagar
 71 | प्रतापगढ़ ,  Pratapgarh
 72 | गोरखपुर  ,Gorakhpur
 73 | कानपुर  ,  Kanpur Nagar
 74 | फिरोजाबाद  ,  Firozabad
 75 | मुरादाबाद  ,  Moradabad
 76 | मरादाबाद , Moradabad
 77 | देवरिया  ,  Deoria
 78 | बरेली  ,  Bareilly
 79 | गोंडा  ,  Gonda
 80 | कौशाम्बी  , Kaushambi
 81 | अमेठी  ,  Amethi
 82 | मुजफ्फरनगर  ,Muzaffarnagar
 83 | इटावा  ,  Etawah
 84 | जालौन  ,  Jalaun
 85 | शामली  ,  Shamli
 86 | पीलीभीत  , Pilibhit
 87 |  अम्बेडकर नगर  ,Ambedkar Nagar
 88 |  फतेहपुर  ,  Fatehpur
 89 |  महाराजगंज  ,  Maharajganj
 90 |  सीतापुर  ,  Sitapur
 91 |  हरदोई  ,  Hardoi
 92 |  बदायूँ  ,  Budaun
 93 |  बदार्य, Budaun
 94 |  बदायें , Budaun
 95 |  बदायूं , Budaun
 96 |  बलरामपुर  ,  Balrampur
 97 |  कन्नौज  , Kannauj
 98 |  झाँसी  ,  Jhansi
 99 |  झांसी , Jhansi
100 |  बलिया  ,  Ballia
101 |  मिर्जापुर  , Mirzapur
102 |  बागपत  ,  Baghpat
103 |  चित्रकूट  , Chitrakoot
104 |  श्रावस्ती  ,Shrawasti
105 |  भदोही  ,  Bhadohi
106 |  उन्नाव  , Unnao
107 |  मैनपुरी  ,  Mainpuri
108 |  फर्रुखाबाद  ,Farrukhabad
109 |  फरुखाबाद , Farrukhabad
110 |  बाँदा  ,  Banda
111 |  बांदा , Banda
112 |  औरैय्या  ,Auraiya
113 |  हाथरस  ,  Hathras
114 |  चंदौली  ,  Chandauli
115 |  चंदाली , Chandauli
116 |  शाहजहांपुर  ,  Shahjahanpur
117 |  शाहजहापुर , Shahjahanpur
118 |  एटा  ,  Etah
119 |  कासगंज  ,  Kasganj
120 |  मऊ  ,  Mau
121 |  कानपुर देहात  ,  Kanpur Dehat
122 |  देहात , Kanpur Dehat
123 |  कुशीनगर  ,  Kushinagar
124 |  महोबा  ,  Mahoba
125 |  हमीरपुर  ,  Hamirpur
126 |  सोनभद्र  , Sonbhadra
127 |  ललितपुर  ,  Lalitpur
128 | 


--------------------------------------------------------------------------------
/automation/nameMapping.meta:
--------------------------------------------------------------------------------
  1 | Himachal Pradesh, Sirmour, Sirmaur
  2 | Himachal Pradesh, L & Spiti, Lahaul and Spiti
  3 | Himachal Pradesh, & Spiti, Lahaul and Spiti
  4 | Arunachal Pradesh, Kessang, Pakke Kessang
  5 | Arunachal Pradesh, Kumey, Kurung Kumey
  6 | Arunachal Pradesh, Leparada, Lepa Rada
  7 | Arunachal Pradesh, Dibang Valley, Upper Dibang Valley
  8 | Arunachal Pradesh, Capital Region, Papum Pare
  9 | Arunachal Pradesh, Capital Complex, Papum Pare
 10 | Arunachal Pradesh, Kra - Daadi, Kra Daadi
 11 | Arunachal Pradesh, Lower Dibang, Lower Dibang Valley
 12 | Madhya Pradesh, Betull, Betul
 13 | Jammu and Kashmir, Shopian,  Shopiyan
 14 | Jammu and Kashmir, Poonch,  Punch
 15 | Andhra Pradesh, Kadapa, Y.S.R. Kadapa
 16 | Andhra Pradesh, Nellore, S.P.S. Nellore
 17 | Andhra Pradesh, lOther States *, Other State
 18 | Andhra Pradesh, lOther Countries **, Foreign Evacuees
 19 | Andhra Pradesh, Other States *, Other State
 20 | Andhra Pradesh, Other Countries **, Foreign Evacuees
 21 | Andhra Pradesh, Other States, Other State
 22 | Andhra Pradesh, Other Countries, Foreign Evacuees
 23 | Andhra Pradesh, Ananthapur, Anantapur
 24 | Odisha,Sonepur,Subarnapur
 25 | Odisha,Khurdha,Khordha
 26 | Odisha,Sundergarh,Sundargarh
 27 | Odisha,Keonjhar,Kendujhar
 28 | Odisha,Baragarh,Bargarh
 29 | Odisha,Others *,Others
 30 | Odisha,Nawarangpur,Nabarangapur
 31 | Odisha,Bolangir,Balangir
 32 | Odisha,Jagatsinghapur,Jagatsinghpur
 33 | Odisha,Nabarangpur,Nabarangapur
 34 | Odisha,Tstate Pool,State Pool
 35 | Maharashtra,Ahmadnagar,Ahmednagar
 36 | Maharashtra,Ahmadnagar,Ahmednagar
 37 | Maharashtra,Amaravati,Amravati
 38 | Maharashtra,Other States,Other State
 39 | Maharashtra,Other states/country, Other State
 40 | Mizoram,Siaha,Saiha
 41 | Rajasthan,S. Madhopur, Sawai Madhopur
 42 | Gujarat, Devbhoomi Dwarka, Devbhumi Dwarka
 43 | Assam, Kamrup Metro, Kamrup Metropolitan
 44 | Tripura, Sepahijala, Sipahijala
 45 | Tripura, Unakoti, Unokoti
 46 | Puducherry, Pondicherry, Puducherry
 47 | Tamil Nadu, Pudukottai, Pudukkottai
 48 | Tamil Nadu, Sivagangai, Sivaganga
 49 | Tamil Nadu, Thirupathur, Tirupathur
 50 | Tamil Nadu, Thiruvannamalai, Tiruvannamalai
 51 | Tamil Nadu, Thoothukudi, Thoothukkudi
 52 | Tamil Nadu, Trichy, Tiruchirappalli
 53 | Tamil Nadu, Villupuram, Viluppuram
 54 | Punjab, SBS Nagar, Shahid Bhagat Singh Nagar
 55 | Punjab, Muktsar, Sri Muktsar Sahib
 56 | Punjab, FG Sahib, Fatehgarh Sahib
 57 | Punjab, SAS Nagar, S.A.S. Nagar
 58 | Punjab, Ropar, Rupnagar
 59 | Rajasthan, S.Madhopur, Sawai Madhopur
 60 | Jharkhand, Saraikela,  Saraikela-Kharsawan
 61 | Jharkhand, Sahebgani, Sahebganj
 62 | Jharkhand, Sahebganj, Sahibganj
 63 | Haryana, Foreign returnee (USA), Foreign Evacuees
 64 | Haryana, Charkhi, Charkhi Dadri
 65 | Haryana, CharkhiDadri, Charkhi Dadri
 66 | Haryana, Sonepat, Sonipat
 67 | Haryana, Mohindergarh, Mahendragarh
 68 | Haryana, Mahindergarh, Mahendragarh
 69 | Karnataka, Gadaga, Gadag
 70 | Karnataka, Bengaluru, Bengaluru Urban
 71 | Karnataka, Vijayapur, Vijayapura
 72 | Karnataka, Chikballapura, Chikkaballapura
 73 | Karnataka, Chamarajanagar, Chamarajanagara
 74 | Karnataka, Chikkamgaluru, Chikkamagaluru
 75 | Karnataka, Davangere, Davanagere
 76 | Karnataka, Mysore, Mysuru
 77 | Karnataka, Uttar Kannada, Uttara Kannada
 78 | Karnataka, Others, Other State
 79 | Karnataka, Dakshin Kannada, Dakshina Kannada
 80 | Karnataka, Yadagiri, Yadgir
 81 | Karnataka, Yadgiri, Yadgir
 82 | Karnataka, Koppala, Koppal
 83 | Karnataka, Dharawada, Dharwad
 84 | Karnataka, Dharwada, Dharwad
 85 | Karnataka, Bagalakote, Bagalkote
 86 | Karnataka, Bagalkot, Bagalkote
 87 | Karnataka, Raichuru, Raichur
 88 | Karnataka, Kolara, Kolar
 89 | Karnataka, Hassana, Hassan
 90 | Karnataka, Others*, Other State
 91 | Karnataka, Bellary, Ballari
 92 | Karnataka, Ramanagar, Ramanagara
 93 | Karnataka, Kalaburgi, Kalaburagi
 94 | Karnataka, Kalburgi, Kalaburagi
 95 | Karnataka, Bangalore Urban, Bengaluru Urban
 96 | Karnataka, Bangalore Rural, Bengaluru Rural
 97 | Karnataka, Tumkur, Tumakuru
 98 | Kerala, Kasargod, Kasaragod
 99 | Kerala, Kozhikkode, Kozhikode
100 | West Bengal,Coochbehar,Cooch Behar
101 | West Bengal,Bankura*,Bankura
102 | West Bengal,Purba Bardhaman*, Purba Bardhaman
103 | Meghalaya, South -West Garo Hills, South West Garo Hills
104 | Meghalaya, Ri Bhoi, Ribhoi
105 | Meghalaya, Ri - Bhoi District, Ribhoi
106 | Meghalaya, RiBhoi, Ribhoi
107 | Uttarakhand, U.S. Nagar, Udham Singh Nagar
108 | Uttarakhand, Chamoll, Chamoli
109 | 


--------------------------------------------------------------------------------
/automation/ocr/ocr.sh:
--------------------------------------------------------------------------------
  1 | customiseMetaConfig() {
  2 |   stateCode=$( echo $1 )
  3 |   replacementLine=$( echo $2 )
  4 |   sedString=$( echo $3 )
  5 |   parameterStateCode=$( echo $3 | cut -d':' -f1 )
  6 |   parametersToReplace=$( echo $3 | cut -d':' -f2 )
  7 |   if [ "$stateCode" = "$parameterStateCode" ]
  8 |   then
  9 |     for param in $(echo $parametersToReplace | sed "s/,/ /g")  
 10 |     do
 11 |       parameterToReplace=$( echo $param | cut -d'=' -f1 )
 12 |       value=$( echo $param | cut -d'=' -f2 )
 13 |       replacementSubString=$( echo "$replacementSubString;s/\\\$$parameterToReplace/$value/g" )
 14 |     done
 15 |   fi
 16 |   echo $replacementLine | sed "$replacementSubString"
 17 | }
 18 | 
 19 | 
 20 | 
 21 | if (( $# != 4 && $# != 5 ))
 22 | then
 23 |   echo "Usage: ./ocr.sh <Image Name> <State Name> [Starting String] <IsTranslationRequired>"
 24 |   exit
 25 | fi
 26 | 
 27 | format="ocr"
 28 | 
 29 | skipOcr=0
 30 | skipTable=0
 31 | skipAutomation=0
 32 | individualRecords=0
 33 | 
 34 | if (( $# == 5 ))
 35 | then
 36 |   for i in $(echo $5 | sed "s/,/ /g")
 37 |   do
 38 |     option=`echo $i |awk '{print tolower($0)}'`
 39 | 
 40 |     case $option in
 41 |       "all")
 42 |         ;;
 43 |       "ocr")
 44 |         skipOcr=1
 45 |         echo "**** Skipping OCR Generation ****"
 46 |         ;;
 47 |       "table")
 48 |         echo "**** Skipping CSV Generation ****"
 49 |         skipTable=1
 50 |         ;;
 51 |       "automation")
 52 |         echo "**** Skipping Automation ****"
 53 |         skipAutomation=1
 54 |         ;;
 55 |       "ocr,table")
 56 |         echo "**** Skipping OCR, CSV Generation ****"
 57 |         skipOcr=1
 58 |         skipTable=1
 59 |         ;;
 60 |       "individual")
 61 |         individualRecords=1
 62 |         ;;
 63 |       "f1")
 64 |         echo "**** Using format type 1 for UP ****"
 65 |         format="ocr1"
 66 |         ;;
 67 |       "f2")
 68 |         echo "**** Using format type 2 for UP ****"
 69 |         format="ocr2"
 70 |         ;;
 71 |     esac
 72 |   done
 73 | fi
 74 | 
 75 | stateCode=""
 76 | case $2 in
 77 |   "Bihar")
 78 |     stateCode="br"
 79 |     ;;
 80 |   "Uttar Pradesh")
 81 |     stateCode="up"
 82 |     ;;
 83 |   "Madhya Pradesh")
 84 |     stateCode="mp"
 85 |     ;;
 86 |   "Jharkhand")
 87 |     stateCode="jh"
 88 |     ;;
 89 |   "Rajasthan")
 90 |     stateCode="rj"
 91 |     ;;
 92 |   "Punjab")
 93 |     stateCode="pb"
 94 |     ;;
 95 |   "Jammu and Kashmir")
 96 |     stateCode="jk"
 97 |     ;;
 98 |   "Haryana")
 99 |     stateCode="hr"
100 |     ;;
101 |   "Andhra Pradesh")
102 |     stateCode="ap"
103 |     ;;
104 |   "Maharashtra")
105 |     stateCode="mh"
106 |     ;;
107 |   "Himachal Pradesh")
108 |     stateCode="hp"
109 |     ;;
110 |   "Chhattisgarh")
111 |     stateCode="ct"
112 |     ;;
113 |   "Uttarakhand")
114 |     stateCode="ut"
115 |     ;;
116 |   "Arunachal Pradesh")
117 |     stateCode="ar"
118 |     ;;
119 |   "Gujarat")
120 |     stateCode="gj"
121 |     ;;
122 |   "Tamil Nadu")
123 |     stateCode="tn"
124 |     ;;
125 |   "Nagaland")
126 |     stateCode="nl"
127 |     ;;
128 |   "Telangana")
129 |     stateCode="tg"
130 |     ;;
131 |   "Karnataka")
132 |     stateCode="ka"
133 |     ;;
134 |   "Sikkim")
135 |     stateCode="sk"
136 |     ;;
137 |   "Mizoram")
138 |     stateCode="mz"
139 |     ;;
140 |   "Meghalaya")
141 |     stateCode="ml"
142 |     ;;
143 |   "Kerala")
144 |     stateCode="kl"
145 |     ;;
146 |   "Assam")
147 |     stateCode="as"
148 | 		;;
149 |   "Manipur")
150 |     stateCode="mn"
151 |     ;;
152 |     
153 |   *)
154 |     stateCode="invalid"
155 | esac
156 | 
157 | echo -e "\n********************* If you want to see the ocr data, cat output.txt *********************\n"
158 | 
159 | if (( $skipOcr != 1 ))
160 | then
161 |   echo -e "\n******** Calling google vision api *******"
162 |   python3 ocr_vision.py $1 > bounds.txt
163 | fi
164 | 
165 | if (( $skipTable != 1 ))
166 | then
167 |   replacementLine="s/@@statename@@/\$stateCode/g;s/@@yInterval@@/\$yInterval/g;s/@@xInterval@@/\$xInterval/g;s/@@houghTransform@@/\$houghTransform/g;s/@@enableTranslation@@/\$enableTranslation/g;s/@@startingText@@/\$startingText/g;s/@@configMinLineLength@@/\$configMinLineLength/g;"
168 | 
169 |   replacementLine=$( customiseMetaConfig $stateCode $replacementLine "hp:houghTransform=False,yInterval=5" )
170 |   replacementLine=$( customiseMetaConfig $stateCode $replacementLine "br:houghTransform=False" )
171 |   replacementLine=$( customiseMetaConfig $stateCode $replacementLine "mp:houghTransform=False" )
172 |   replacementLine=$( customiseMetaConfig $stateCode $replacementLine "ap:configMinLineLength=300" )
173 |   replacementLine=$( customiseMetaConfig $stateCode $replacementLine "tn:configMinLineLength=500" )
174 |   replacementLine=$( customiseMetaConfig $stateCode $replacementLine "tg:enableTranslation=True" )
175 |   replacementLine=$( customiseMetaConfig $stateCode $replacementLine "mz:houghTransform=False" )
176 |   replacementLine=$( customiseMetaConfig $stateCode $replacementLine "ml:configMinLineLength=250" )
177 |   replacementLine=$( customiseMetaConfig $stateCode $replacementLine "ut:houghTransform=False" )
178 |   replacementLine=$( customiseMetaConfig $stateCode $replacementLine "nl:configMinLineLength=250" )
179 | 
180 |   configMinLineLength=400
181 |   enableTranslation=`echo $4`
182 |   startingText=`echo $3`
183 |   houghTransform="True"
184 |   yInterval=0
185 |   xInterval=0
186 | 
187 |   finalReplacementString=$( echo $replacementLine | sed "s/\$stateCode/$stateCode/g; s/\$yInterval/$yInterval/g; s/\$xInterval/$xInterval/g; s/\$houghTransform/$houghTransform/g; s/\$enableTranslation/$enableTranslation/g; s/\$startingText/$startingText/g; s/\$configMinLineLength/$configMinLineLength/g" )
188 | 
189 |   echo $finalReplacementString
190 | 
191 |   sed "$finalReplacementString" ocrconfig.meta.orig > ocrconfig.meta
192 | 
193 |   echo -e "\n******** Using ocrconfig.meta, change ocrconfig.meta.orig for x and y intervals ******* "
194 |   cat ocrconfig.meta
195 |   echo -e "******** ++++++++ *******"
196 |   python3 googlevision.py ocrconfig.meta $1
197 | fi
198 | 
199 | cp output.txt ../.tmp/$stateCode.txt
200 | 
201 | if (( $skipAutomation != 1 && $individualRecords != 1 ))
202 | then
203 |   cd ..
204 |   echo -e "\n******** Calling automation.py for $2  ******* "
205 |   python3 ./automation.py "$2" "full" $format
206 | fi
207 | 


--------------------------------------------------------------------------------
/automation/kaautomation.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | import sys
  3 | import csv
  4 | import requests
  5 | import camelot
  6 | import re
  7 | import datetime
  8 | from deltaCalculator import DeltaCalculator
  9 | 
 10 | deltaCalculator = DeltaCalculator(True)
 11 | category = "d"
 12 | 
 13 | def readPDF():
 14 |   global category
 15 |   """
 16 |   r = requests.get(sys.argv[1], allow_redirects=True)  
 17 |   print("URL: " + sys.argv[1])
 18 |   open(".tmp/ka.pdf", 'wb').write(r.content)
 19 |   """
 20 | 
 21 |   print(10*"-" + " Deceased details (IGNORE THE FIRST TWO LINES) " + 10*"-")
 22 |   if len(sys.argv) == 4:
 23 |     category = sys.argv[1]
 24 |     startPid = sys.argv[2]
 25 |     endPid = sys.argv[3]
 26 |   else:
 27 |     category = input("Enter c/r/d : ")
 28 |     startPid = input("Enter start page number: ")
 29 |     endPid = input("Enter end page number: ")
 30 | 
 31 |   pages = ""
 32 |   for i in range(int(startPid), int(endPid) + 1):
 33 |     pages = pages + "," + str(i) if len(pages) != 0 else str(i)
 34 |   print(f"Processing pages {pages}")
 35 | 
 36 |   tables = camelot.read_pdf('.tmp/KA.pdf',strip_text='\n', pages=pages, split_text = True)
 37 | 
 38 |   for index, table in enumerate(tables):
 39 |     tables[index].to_csv('.tmp/ka' + str(index) + '.csv')
 40 | 
 41 |   processTmpFiles(tables)
 42 | 
 43 | 
 44 | def processTmpFiles(tables):
 45 |   kaOutputFile = open('kaconfirmed.csv', 'w') 
 46 |   csvWriter = csv.writer(kaOutputFile, delimiter=',', quotechar='"')
 47 |   linesToWrite = []
 48 |   lineNumber = 0
 49 |   for index, table in enumerate(tables):
 50 |     kaFile = open('.tmp/ka' + str(index) + '.csv', 'r') 
 51 |     with open('.tmp/ka' + str(index) + '.csv', newline='') as kaFile:
 52 |       rowReader = csv.reader(kaFile, delimiter=',', quotechar='"')
 53 |       for row in rowReader:
 54 |         line = '|'.join(row)
 55 |         line = re.sub('^\|', '', line)
 56 |         if len(re.sub('^\|+', '', line)) == 0:
 57 |           continue
 58 |         if 'Page' in line:
 59 |           continue
 60 | #line = re.sub('\|$', '', re.sub('^\|+', '', line.replace('\"', '').replace(',,', ',')))
 61 |         
 62 |         linesArray = line.split('|')
 63 | 
 64 |         if category == "c":
 65 |           confirmedFileWriter(linesArray, linesToWrite)
 66 | 
 67 |         if category == "r":
 68 |           recoveredFileWriter(linesArray, linesToWrite)
 69 | 
 70 |         if category == "d":
 71 |           deceasedFileWriter(linesArray, linesToWrite)
 72 | 
 73 |     kaFile.close()
 74 |   for row in linesToWrite:
 75 |     csvWriter.writerow(row)
 76 |   kaOutputFile.close()
 77 | 
 78 | 
 79 | def is_number(s):
 80 |   try:
 81 |     int(s)
 82 |     return True
 83 |   except ValueError:
 84 |     return False
 85 | 
 86 | 
 87 | def confirmedFileWriter(linesArray, linesToWrite):
 88 |   '''
 89 |   if len(linesArray) != 8 or len(linesArray[5]) == 0:
 90 |     print("Ignoring {}".format(linesArray))
 91 |     return ""
 92 |   '''
 93 | 
 94 |   gender = ""
 95 |   if linesArray[5].strip() == 'Female':
 96 |     gender = 'F'
 97 |   elif linesArray[5].strip() == 'Male':
 98 |     gender = 'M'
 99 |   else:
100 |     gender = 'Non-Binary'
101 | 
102 |   districtName = ""
103 |   districtName = deltaCalculator.getNameMapping('Karnataka', linesArray[6])
104 | 
105 |   if len(linesArray[3]) == 0 and len(linesToWrite) != 0:
106 |     print("Processing: {}".format(linesArray))
107 |     for index, cellValue in enumerate(linesArray):
108 |       if len(cellValue) > 0 and index == 4:
109 |         linesToWrite[len(linesToWrite) - 1][2] = str(linesToWrite[len(linesToWrite) - 1][2]) + " " + str(cellValue)
110 |       if len(cellValue) > 0 and index == 7:
111 |         linesToWrite[len(linesToWrite) - 1][11] = str(linesToWrite[len(linesToWrite) - 1][11]) + " " + str(cellValue)
112 |       if len(cellValue) > 0 and index == 6:
113 |         linesToWrite[len(linesToWrite) - 1][5] = linesToWrite[len(linesToWrite) - 1][5] + " " + str(cellValue)
114 |     return
115 |   patientNumber = linesArray[3].replace('P-', 'KA-P') if 'P' in linesArray[3] else "KA-P" + str(linesArray[3])
116 |     
117 |   linesToWrite.append([patientNumber, datetime.date.today().strftime("%d/%m/%Y"), linesArray[4], gender, '', districtName, 'Karnataka', 'KA', 1, 'Hospitalized','', linesArray[7]])
118 | 
119 | def recoveredFileWriter(linesArray, linesToWrite):
120 |   """
121 |   if len(linesArray) < 3:
122 |     print("Ignoring {} ".format(linesArray))
123 |     return ""
124 |   """
125 | 
126 |   districtName = linesArray[1].split('(')[0].strip()
127 |   districtName = deltaCalculator.getNameMapping('Karnataka', districtName)
128 | 
129 |   patientIds = re.sub('\.', '', re.sub('&', ',', re.sub(' +', ',', linesArray[3])))
130 |   patientIdArray = patientIds.split(',')
131 | 
132 |   if len(linesArray[2]) == 0 and len(linesToWrite) != 0 and len(patientIdArray) > 0:
133 |     districtName = linesToWrite[len(linesToWrite) - 1][5]
134 | 
135 |   for item in patientIdArray:
136 |     if len(item) == 0 or item == "0": #or is_number(item) or '(' in item:  
137 |       continue
138 |     if item == "\n":
139 |       continue
140 |     patientNumber = item.replace('P-', '').replace('\n', '') if 'P' in item else str(item)
141 |     linesToWrite.append([patientNumber, datetime.date.today().strftime("%d/%m/%Y"), '', '','',districtName,'Karnataka', 'KA', 1, 'Recovered'])
142 | #csvWriter.writerow([item.replace('P-', 'KA-P').replace('\n', ''), datetime.date.today().strftime("%d/%m/%Y"), '', '','',districtName,'Karnataka', 'KA', 1, 'Recovered'])
143 |   
144 | 
145 | def deceasedFileWriter(linesArray, linesToWrite):
146 |   """
147 |   if len(linesArray) < 8 or len(linesArray[1]) == 0:
148 |     print("Ignoring {} ".format(linesArray))
149 |     return ""
150 |   """
151 |   if len(linesArray[0]) == 0:
152 |     linesArray.pop(0)
153 | 
154 | #print(linesArray)
155 |   districtName = linesArray[1].strip()
156 |   districtName = deltaCalculator.getNameMapping('Karnataka', districtName)
157 |   description = ""
158 |   if len(linesArray) < 5:
159 |     return
160 |   for index, data in enumerate(linesArray):
161 |     if index < 5:
162 |       continue
163 |     else:
164 |       description = description + ";" + data if len(description) > 0 else data
165 | #csvWriter.writerow(["KA-P" + str(linesArray[2]), datetime.date.today().strftime("%d/%m/%Y"), linesArray[3], linesArray[4], '', districtName, 'Karnataka', 'KA', 1, 'Deceased', '', description])
166 |   if len(linesArray) > 3 and len(linesArray[2]) == 0 and len(linesToWrite) != 0:
167 |     for index, cellValue in enumerate(linesArray):
168 |       if len(cellValue) > 0 and index == 3:
169 |         linesToWrite[len(linesToWrite) - 1][2] = str(linesToWrite[len(linesToWrite) - 1][2]) + " " + str(cellValue)
170 |       if len(cellValue) > 0 and index == 4:
171 |         linesToWrite[len(linesToWrite) - 1][3] = str(linesToWrite[len(linesToWrite) - 1][3]) + " " + str(cellValue)
172 |     return
173 |   linesToWrite.append([str(linesArray[2]), datetime.date.today().strftime("%d/%m/%Y"), linesArray[3], linesArray[4], '', districtName, 'Karnataka', 'KA', 1, 'Deceased', '', description])
174 | 
175 | readPDF()
176 | 


--------------------------------------------------------------------------------
/automation/deltaCalculator.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | import json
  3 | import logging
  4 | import re
  5 | import sys
  6 | import csv
  7 | 
  8 | logging.basicConfig(filename='deltaCalculator.log', format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO)
  9 | 
 10 | class DeltaCalculator:
 11 |   def __init__(self, lightLoad = False):
 12 |     if lightLoad == False:
 13 |       self.buildJson()
 14 | 
 15 |     self.nameMapping = {}
 16 |     self.loadMetaData()
 17 | 
 18 |   def buildJson(self):
 19 |     self.covidDashboardData = {}
 20 |     self.modifiedDashboardData = requests.request("get", "https://api.covid19india.org/csv/latest/district_wise.csv")
 21 |     decoded_content = self.modifiedDashboardData.content.decode('utf-8')
 22 | 
 23 |     cr = csv.reader(decoded_content.splitlines(), delimiter=',')
 24 |     rows = list(cr)
 25 |     for index, row in enumerate(rows):
 26 |       if index == 0:
 27 |         continue
 28 |       if row[2] not in self.covidDashboardData:
 29 |         self.covidDashboardData[row[2]] = {}
 30 | 
 31 |       if 'statecode' not in self.covidDashboardData[row[2]]:
 32 |         self.covidDashboardData[row[2]]['statecode'] = row[1]
 33 | 
 34 |       if 'districtData' not in self.covidDashboardData[row[2]]:
 35 |         self.covidDashboardData[row[2]]['districtData'] = {}
 36 | 
 37 |       if row[4] not in self.covidDashboardData[row[2]]['districtData']:
 38 |         self.covidDashboardData[row[2]]['districtData'][row[4]] = {}
 39 | 
 40 |       self.covidDashboardData[row[2]]['districtData'][row[4]]['confirmed'] = int(row[5])
 41 |       self.covidDashboardData[row[2]]['districtData'][row[4]]['confirmed'] = int(row[5])
 42 |       self.covidDashboardData[row[2]]['districtData'][row[4]]['recovered'] = int(row[7])
 43 |       self.covidDashboardData[row[2]]['districtData'][row[4]]['migratedother'] = int(row[9])
 44 |       self.covidDashboardData[row[2]]['districtData'][row[4]]['deceased'] = int(row[8])
 45 |       self.covidDashboardData[row[2]]['districtData'][row[4]]['active'] = int(row[6])
 46 | 
 47 |   def getNameMapping(self, stateName, districtName):
 48 |     mappedDistrict = ""
 49 |     try:
 50 |       nameMapping = self.nameMapping[stateName]
 51 |       mappedDistrict = nameMapping[districtName]
 52 |     except KeyError:
 53 |       mappedDistrict = districtName
 54 | 
 55 |     return mappedDistrict
 56 | 
 57 |   def isDistrictPresent(self, stateName, districtName):
 58 |     try:
 59 |       self.covidDashboardData[stateName][districtName]
 60 |       return True
 61 |     except KeyError:
 62 |       return False
 63 | 
 64 |   def loadMetaData(self):
 65 |     with open("nameMapping.meta", "r") as metaFile:
 66 |       lineArray = []
 67 |       for line in metaFile:
 68 |         lineArray = line.split(',')
 69 |         districtMapping = {}
 70 | 
 71 |         currentDictionary = {}
 72 |         if lineArray[0] not in self.nameMapping:
 73 |           self.nameMapping[lineArray[0]] = {}
 74 | 
 75 |         currentDictionary = self.nameMapping[lineArray[0]]
 76 |         currentDictionary[lineArray[1].strip()] = re.sub('\n', '', lineArray[2].strip())
 77 |         self.nameMapping[lineArray[0]] = currentDictionary
 78 | 
 79 |   
 80 |   def getStateDataFromSite(self, stateName, stateDataFromStateDashboard, options):
 81 |     logging.info(stateDataFromStateDashboard)
 82 |     stateData = self.covidDashboardData[stateName]['districtData']
 83 |     stateCode = self.covidDashboardData[stateName]['statecode']
 84 |     print("\n" + '*' * 20 + stateName + '*' * 20)
 85 |     try:
 86 |       nameMapping = self.nameMapping[stateName]
 87 |     except KeyError:
 88 |       nameMapping = {}
 89 | 
 90 |     confirmedDeltaArray = []
 91 |     recoveredDeltaArray = []
 92 |     deceasedDeltaArray = []
 93 |     activeDeltaArray = []
 94 |     migratedDeltaArray = []
 95 |     districts = []
 96 |     stateTotalFromStateDashboard = {'confirmed': 0, 'recovered': 0, 'deceased': 0}
 97 |     siteTotalFromStateDashboard = {'confirmed': 0, 'recovered': 0, 'deceased': 0}
 98 |     errorArray = []
 99 |     districtMap = {}
100 | 
101 |     for districtDetails in stateDataFromStateDashboard:
102 |       try:
103 |         districtName = nameMapping[districtDetails['districtName']] if districtDetails['districtName'] in nameMapping else districtDetails['districtName']
104 |         outputString = ""
105 | 
106 |         stateTotalFromStateDashboard['confirmed'] += districtDetails['confirmed'] if districtDetails['confirmed'] != -999 else 0
107 |         stateTotalFromStateDashboard['recovered'] += districtDetails['recovered'] if districtDetails['recovered'] != -999 else 0
108 |         stateTotalFromStateDashboard['deceased'] += districtDetails['deceased'] if districtDetails['deceased'] != -999 else 0
109 | 
110 |         siteTotalFromStateDashboard['confirmed'] += stateData[districtName]['confirmed']
111 |         siteTotalFromStateDashboard['recovered'] += stateData[districtName]['recovered']
112 |         siteTotalFromStateDashboard['deceased'] += stateData[districtName]['deceased']
113 | 
114 |         confirmedDelta = districtDetails['confirmed'] - stateData[districtName]['confirmed'] if districtDetails['confirmed'] != -999 else "NA"
115 |         recoveredDelta = districtDetails['recovered'] - stateData[districtName]['recovered'] if districtDetails['recovered'] != -999 else "NA"
116 |         deceasedDelta = districtDetails['deceased'] - stateData[districtName]['deceased'] if districtDetails['deceased'] != -999 else "NA"
117 |         activeDelta = 0
118 |         migratedDelta = 0
119 | 
120 |         if 'migrated' in districtDetails.keys():
121 |           migratedDelta = districtDetails['migrated'] - stateData[districtName]['migratedother'] 
122 | 
123 |         if 'active' in districtDetails.keys():
124 |           activeDelta = districtDetails['active'] - (stateData[districtName]['confirmed'] - stateData[districtName]['deceased'] - stateData[districtName]['recovered'])
125 |       except KeyError:
126 |         errorArray.append("--> ERROR: Failed to find key mapping for district: {}, state: {}".format(districtName, stateName))
127 |         continue
128 | 
129 |       if not options:
130 |         outputString = districtName + ", " + str(confirmedDelta) + ", " + str(recoveredDelta) + ", " + str(deceasedDelta) 
131 |         print(outputString)
132 |       if options == "detailed" or options == "full" or options == "fullActive":
133 |         districts.append(districtName)
134 |         confirmedDeltaArray.append(confirmedDelta)
135 |         recoveredDeltaArray.append(recoveredDelta)
136 |         deceasedDeltaArray.append(deceasedDelta)
137 |         activeDeltaArray.append(activeDelta)
138 |         migratedDeltaArray.append(migratedDelta)
139 | 
140 |     stateConfirmedDelta = stateTotalFromStateDashboard['confirmed'] - siteTotalFromStateDashboard['confirmed'] 
141 |     stateRecoveredDelta = stateTotalFromStateDashboard['recovered'] - siteTotalFromStateDashboard['recovered']
142 |     stateDeceasedDelta = stateTotalFromStateDashboard['deceased'] - siteTotalFromStateDashboard['deceased']
143 | 
144 |     if options == "detailed":
145 |         districts.append('Total')
146 |         confirmedDeltaArray.append(stateConfirmedDelta)
147 |         recoveredDeltaArray.append(stateRecoveredDelta)
148 |         deceasedDeltaArray.append(stateDeceasedDelta)
149 | 
150 |         self.printDistricts(self.printDeltas(confirmedDeltaArray, "Confirmed"), districts)
151 |         self.printDistricts(self.printDeltas(recoveredDeltaArray, "Recovered"), districts)
152 |         self.printDistricts(self.printDeltas(deceasedDeltaArray, "Deceased"), districts)
153 |     elif options == "full":
154 |       self.printFullDetails(confirmedDeltaArray, "Hospitalized", stateName, stateCode, districts)
155 |       self.printFullDetails(recoveredDeltaArray, "Recovered", stateName, stateCode, districts)
156 |       self.printFullDetails(deceasedDeltaArray, "Deceased", stateName, stateCode, districts)
157 |       if 'migrated' in districtDetails.keys():
158 |         self.printFullDetails(migratedDeltaArray, "Migrated_Other", stateName, stateCode, districts)
159 |     elif options == "fullActive":
160 |       self.printFullDetails(activeDeltaArray, "Active", stateName, stateCode, districts)
161 |       return
162 |     else:
163 |       print("Total delta, {}, {}, {}".format(stateConfirmedDelta, stateRecoveredDelta, stateDeceasedDelta))
164 | 
165 |     print("StateTotal, {}, {}, {}".format(stateTotalFromStateDashboard['confirmed'], stateTotalFromStateDashboard['recovered'], stateTotalFromStateDashboard['deceased']))
166 |     print("SiteTotal, {}, {}, {}".format(siteTotalFromStateDashboard['confirmed'], siteTotalFromStateDashboard['recovered'], siteTotalFromStateDashboard['deceased']))
167 | 
168 |     if len(errorArray) > 0:
169 |       for error in errorArray:
170 |         print(error)
171 | 
172 |   def printFullDetails(self, deltaArray, category, stateName, stateCode, districts):
173 |     with open("output2.txt", "w+") as f:
174 |       for index, data in enumerate(deltaArray):
175 |         if data != 0 and data != "NA":
176 |           print("{},{},{},{},{}".format(districts[index], stateName, stateCode, data, category), file=f)
177 |           print("{},{},{},{},{}".format(districts[index], stateName, stateCode, data, category))
178 | 
179 |   def printDeltas(self, deltaArray, category):
180 |     print('-' * 20 + category + '-' * 20)
181 |     printIndex = []
182 |     for index, data in enumerate(deltaArray):
183 |       if data != 0 and data != "NA":
184 |         print(data)
185 |         printIndex.append(index)
186 | 
187 |     return printIndex
188 | 
189 |   def printDistricts(self, printIndex, districts):
190 |     for data in printIndex:
191 |       print(districts[data])
192 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ***A newer, cleaner version is being written [here](https://github.com/bee-rickey/covid_bulletin_automator). WIP***
  2 | 
  3 | ***A scrapper for multiple covid state websites. Triedcatched's ghost!***
  4 | 
  5 | ***Used by www.covid19india.org admin teams. Not for general consumption :P :D***
  6 | 
  7 | 
  8 | # Usage  
  9 | Currently there are three types of bulletins:
 10 | 
 11 | 1. Images - AP, AR, BR, CT, JH, JK, HP, MH, MP, NL, PB, RJ, TG, TN, UK, UP
 12 | 2. PDFs - HR, KA, KL, PB, TN, WB
 13 | 3. Dashboards - CH, GJ, ML, OR, PY, TR, Vaccines
 14 | 
 15 | For all those where ocr is supported (optical character recognition using google vision api), the command to run is:  
 16 | ```./ocr.sh "fully qualified path to image" "State Name" "starting text, ending text" "True/False" "ocr/table/automation"```
 17 | 
 18 | Parameter description:
 19 | 1. "fully qualified path to image": Example "./home/covid/mh.jpg" The path cannot be relative path but it should have the fully qualified path.  
 20 | 2. "State Name": This is the state for which the image is being passed. Example: "Andhra Pradesh". 
 21 | 3. "starting text, ending text": This is the starting text of an image which considered to be the begining of a bulletin. In case you want auto detection to kick in, use "auto,auto". In some of the cases, if the bulletin has a text above the table with district names, consider cropping the image to have only the table with district data.  
 22 | 4. "True/False": This parameter is used in case you want to translate the district name (True: yes, please translate. False: No, do not translate). As of now this is applicable only to UP and BR bulletins.  
 23 | 5. "ocr/table/automation": This is an option provided where in case you want to skip one or more of the steps (ocr, table creation or automation.py run), you can provide those steps in comma separated manner. Example: "ocr,automation" will skip both ocr step and the automation step. "ocr,table" will skip image reading and table creation, but will run automation.py step to compute the delta.
 24 | 
 25 | 
 26 | # How does ocr.sh work?
 27 | ![Detailed Flow of ocr.sh](detailedflow.png)
 28 | 
 29 | For any bulletin to be parsed, we use Google Vision API free tier. All the steps are called via ocr.sh. 
 30 | 1. First, [google vision api is called](https://github.com/bee-rickey/webScraper/blob/68441dbbd0aff5980b8984bcd2cee701950e96c9/automation/ocr/ocr.sh#L159) on the image to generate bounds.txt file. This is a direct output from the Google Vision API. This needs to be parsed to figure out the tablular structure.
 31 | 2. Next, "ocrconfig.meta.orig" [file is parsed](https://github.com/bee-rickey/webScraper/blob/fbb055addff6bfefcee45853e34e238b25f3092e/automation/ocr/ocr.sh#L188) and this generates ocrconfig.meta file. This file is used to tweak the way a table is interpreted in a bulletin.
 32 | 3. ocr.sh internaly [invokes googlevision.py](https://github.com/bee-rickey/webScraper/blob/fbb055addff6bfefcee45853e34e238b25f3092e/automation/ocr/ocr.sh#L193) file as well. This file is responsible for using ocrconfig.meta file and read the output generated by the Google Vision API (bounds.txt). The output of this step is generation of an output.txt file. This file has textually converted data of the image passed (basically a csv file with a row for each district given in the image). 
 33 | 4. In the last step, [the bounds.txt file is copied](https://github.com/bee-rickey/webScraper/blob/fbb055addff6bfefcee45853e34e238b25f3092e/automation/ocr/ocr.sh#L196) into automation/.tmp/stateCode.txt and [automation.py is invoked](https://github.com/bee-rickey/webScraper/blob/fbb055addff6bfefcee45853e34e238b25f3092e/automation/ocr/ocr.sh#L202) to generate the delta values for the state across all districts.
 34 | 
 35 | NOTES: 
 36 | - Since output.txt is an intermediate file, in case there are issues wrt bulletin being converted into text, then, the feature of skipping ocr and table generation can be used after correcting values in output.txt. Example: *./ocr.sh <image> <State> auto,auto False "ocr,table"*.
 37 | - OCR is heavily dependent on how good the image quality is. If the quality of image is bad, the output of google vision api might not be good enough to generate data.
 38 | - Since googlevision.py script tries to auto identify the bulletin table, it alwalys searches for district names and assumes the line with the first occurance of a district name is the starting of the table. Hence, in case there are notes above a table with district names, the image has to be cropped to remove the text above the table.
 39 | 
 40 | 
 41 | # How does googlevision.py work?  
 42 |   1. Google Vision API gives each text that it recognises and coordinates of a rectangle around the text it matches. 
 43 |   Example:
 44 |   ```
 45 |     9248|bounds|245,326|281,326|281,343|245,343
 46 |   ```
 47 |   This shows 9248 was found with bottom left coordinate of 245,326, bottom right of 281,343, top right of 281,343 and top left of 245,343
 48 |   
 49 |   The idea is to use this information to figure out which all texts in an image fall on the same lines and same columns. 
 50 |   2. googlevision.py uses the bounds.txt file which contains this information to generate an internal class per text. This class has the following [definition](https://github.com/bee-rickey/webScraper/blob/47182b314849e1f99ea48a3c537c3a1104513560/automation/ocr/googlevision.py#L38):
 51 |   ```
 52 |   class cellItem:
 53 |   def __init__(self, value, x, y, lbx, lby, w, h, col, row, index):
 54 |     self.value = value
 55 |     self.x = x
 56 |     self.y = y
 57 |     self.col = col
 58 |     self.row = row
 59 |     self.index = index
 60 |     self.lbx = lbx
 61 |     self.lby = lby
 62 |     self.h = h
 63 |     self.w = w
 64 |   ```
 65 |   Definitions:
 66 |   ```
 67 |   x - mid point of the text in x direction
 68 |   y - mid point of the text in y direction
 69 |   col - a column number assigned to the text (all texts that fall with same x coordinate with a given tolerance will have the same col number)
 70 |   row - a row number assigned to the text (all texts that fall with same y coordinate with a given tolerance will have the same row number)
 71 |   index - a unique number identifying each text
 72 |   lbx - the left bottom x coordinate of the text (used for drawing a rectangle around the text)
 73 |   lby - the left bottom y coordinate of the text (used for drawing a rectangle around the text)
 74 |   h - height of the text (calculated using  left top y - left bottom y)
 75 |   w - width of the text (calculated using right bottom x - left bottom x)
 76 |   ```
 77 |   3. For each text found in the image, using it's rectangular coordinates, the mid points are calculated.
 78 |   4. The next steps involve figuring out the row and column numbers. For this the logic is simple:  
 79 |     - If the x coordinates are same, then the lie on the same column (in case hough transformation is used, all texts within the bounds of two consecutive lines should have the same column number).  
 80 |     - If the y coordinates are same, then they lie on the same row.  
 81 |   However, [a tolerance](https://github.com/bee-rickey/webScraper/blob/47182b314849e1f99ea48a3c537c3a1104513560/automation/ocr/googlevision.py#L321) is considered while arriving at col and row numbers.
 82 |   5. In order to arrive at the rows (lines) that matter, the starting text and the ending text parameters are used. The moment a line with the starting text is encountered, it is assumed to be the first line of the table. If the starting text is kept as "auto", in that case, the code checks for the first line containing a district name as the starting of the table.
 83 |   6. Next step is to print all those with the same row in the same line but with a sorting on the x coordinate (column value). While printing some of the corner scenarios like district names with space need to be considered and handled. 
 84 |   7. In case the district names are in Hindi, then before printing, the text has to be converted into English using a translation dictionary which is used.
 85 |   8. The output is put into a file named output.txt. This file will have a 1-1 conversion of the bulletin table that has districts information.
 86 |   
 87 | # How does automation.py work?
 88 |   1. automation.py uses api endpoint at covid19india.org to figure out the difference per district from bulletin to the api endpoint.
 89 |   2. automation.py has different modes of operation - ocr, pdf, dashboard.  
 90 |     - For ocr, the .tmp/statecode.txt file is used to compute the delta (this comes from ocr.sh run).  
 91 |     - For pdfs, pdftotext and camelot are used to convert a pdf into a csv file and then use it for delta calculation.  
 92 |     - For dashboards, beautifulsoup or sometimes plain json pulls are used to get the information to calculate the delta. 
 93 |   3. In case of pdfs, there's an option to specify which page number to read and parse. The format is:
 94 |   ```
 95 |     ./automation.py "statename" full pdf=<urlOfThepdf>=<pageNumber> 
 96 |     ./automation.py "stateName" full pdf==<pageNumber> (this in case you manually place the pdf as .tmp/stateCode.pdf)
 97 |   ```
 98 |   4. For dashboards, a meta file automation.meta has the dashboard endpoint from which to read and parse the data.
 99 |   5. For each state, there has to be an entry in automation.meta file (even if it's driven by ocr). The meta file has the stateCode to consider for picking up the file from .tmp folder. The state code also allows for standardization of code. Each state has a <stateCode>GetData() function which acts as the entry point for the calculations. Example:
100 |   ```
101 |   def TRGetData():
102 |     response = requests.request("GET", metaDictionary['Tripura'].url)
103 |     soup = BeautifulSoup(response.content, 'html.parser')
104 |     table = soup.find("tbody").find_all("tr")
105 | 
106 |     districtArray = []
107 |     for index, row in enumerate(table):
108 |       dataPoints = row.find_all("td")
109 |       
110 |       districtDictionary = {}
111 |       districtDictionary['districtName'] = dataPoints[1].get_text().strip()
112 |       districtDictionary['confirmed'] = int(dataPoints[8].get_text().strip())
113 |       districtDictionary['recovered'] = int(dataPoints[10].get_text().strip())
114 |       districtDictionary['deceased'] = int(dataPoints[12].get_text().strip())
115 |       districtArray.append(districtDictionary)
116 | 
117 |     deltaCalculator.getStateDataFromSite("Tripura", districtArray, option)
118 |   ```
119 |   
120 |   
121 |   # How does this code sit in the grand scheme of automation at covid19india.org?
122 | botto.png![image](https://user-images.githubusercontent.com/63364562/130059654-f5257e6a-6ed3-412b-b728-641d39794203.png)
123 | Essentially, the idea is that volunteers send the request over a telegram bot that is then [configured to trigger the script](https://github.com/covid19india/automation-bot/blob/master/src/ocr_functions.py) when a command is required. 
124 | 


--------------------------------------------------------------------------------
/automation/ocr/googlevision.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import cv2
  3 | import os
  4 | import sys
  5 | import json
  6 | from PIL import Image
  7 | import numpy as np
  8 | from matplotlib import pyplot as plt
  9 | import matplotlib.patches as patches
 10 | from matplotlib.patches import Circle
 11 | 
 12 | dataDictionary = {}
 13 | dataDictionaryArray = []
 14 | translationDictionary = {}
 15 | xInterval = 0
 16 | xStartThreshold = 0
 17 | yStartThreshold = 0
 18 | xEndThreshold = 0
 19 | yEndThreshold = 0
 20 | configxInterval = 0
 21 | configyInterval = 0
 22 | yInterval = 0
 23 | startingText = ""
 24 | endingText = ""
 25 | enableTranslation = False
 26 | translationFile = ""
 27 | fileName = ""
 28 | xWidthTotal = 0
 29 | configMinLineLength = 600
 30 | 
 31 | def is_number(s):
 32 |   try:            
 33 |     int(s)
 34 |     return True
 35 |   except ValueError:
 36 |     return False
 37 | 
 38 | class cellItem:
 39 |   def __init__(self, value, x, y, lbx, lby, w, h, col, row, index):
 40 |     self.value = value
 41 |     self.x = x
 42 |     self.y = y
 43 |     self.col = col
 44 |     self.row = row
 45 |     self.index = index
 46 |     self.lbx = lbx
 47 |     self.lby = lby
 48 |     self.h = h
 49 |     self.w = w
 50 | 
 51 | class ColumnHandler:
 52 |   def __init__(self):
 53 |     self.columnList = []
 54 |     self.rowList = []
 55 |     self.pointList = []
 56 | 
 57 |   def addPoint(self, x, y):
 58 |     self.pointList.append(LinePoints(x, y))
 59 | 
 60 |   def prepareRow(self):
 61 |     rowNumber = 1
 62 |     self.pointList.sort(key=lambda y: y.y)
 63 |     for index, col in enumerate(self.pointList):
 64 |       if index % 2 == 1:
 65 |         continue
 66 |       if index == 0:
 67 |         previousX = col.x
 68 |         previousY = col.y
 69 |         continue
 70 | 
 71 |       if col.y - previousY < 10:
 72 |         continue
 73 |       self.rowList.append(ColumnAndRow(previousX, previousY, col.x, col.y, rowNumber))
 74 |       previousX = col.x
 75 |       previousY = col.y
 76 |       rowNumber += 1
 77 |     
 78 |   def prepareColumn(self):
 79 |     columnNumber = 1
 80 |     self.pointList.sort(key=lambda x: x.x)
 81 |     for index, col in enumerate(self.pointList):
 82 |       if index % 2 == 1:
 83 |         continue
 84 |       if index == 0:
 85 |         previousX = col.x
 86 |         previousY = col.y
 87 |         continue
 88 | 
 89 |       if col.x - previousX < 5:
 90 |         continue
 91 |       self.columnList.append(ColumnAndRow(previousX, previousY, col.x, col.y, columnNumber))
 92 |       previousX = col.x
 93 |       previousY = col.y
 94 |       columnNumber += 1
 95 |   
 96 |   def printColumnsAndCoordinates(self):
 97 |     print("Column No ... x1,y1 --> x2,y2")
 98 |     for column in self.columnList:
 99 |       print("c{} ... {},{} --> {},{}".format(column.number, column.x1, column.y1, column.x2, column.y2))
100 |     for row in self.rowList:
101 |       print("r{} ... {},{} --> {},{}".format(row.number, row.x1, row.y1, row.x2, row.y2))
102 | 
103 |   def getNearestLineToTheLeft(self, xCoordinate):      
104 |     for col in self.columnList:
105 |       if xCoordinate > int(col.x1) and xCoordinate < int(col.x2):
106 |         return col.x1
107 |     return 0
108 |       
109 |   def getColumnNumber(self, cell):      
110 |     for col in self.columnList:
111 |       if cell.x > col.x1 and cell.x < col.x2:
112 |         return col.number
113 | 
114 | class ColumnAndRow:
115 |   def __init__(self, x1, y1, x2, y2, number):
116 |     self.x1 = x1
117 |     self.y1 = y1
118 |     self.x2 = x2
119 |     self.y2 = y2
120 |     self.number = number
121 | 
122 | class LinePoints:
123 |   def __init__(self, x, y):
124 |     self.x = x
125 |     self.y = y
126 | 
127 | def buildCellsV2():
128 |   global xInterval
129 |   global yInterval
130 |   global startingText
131 |   global endingText
132 |   global yStartThreshold
133 |   global xStartThreshold
134 |   global configxInterval
135 |   global configyInterval
136 |   global xWidthTotal
137 | # testingNumbersFile = open("poly.txt", "r")
138 | #data = json.load(testingNumbersFile)
139 | 
140 | def detectLines():
141 |   global columnHandler
142 |   global configMinLineLength
143 |   img = cv2.imread(fileName)
144 |   gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
145 |   edges = cv2.Canny(img, 50, 150)
146 |   lines = cv2.HoughLinesP(edges, 1, np.pi/135, configMinLineLength, maxLineGap=250)
147 |   columnHandler = ColumnHandler()
148 |   for line in lines:
149 |     x1, y1, x2, y2 = line[0]
150 |     columnHandler.addPoint(x1, y1)
151 |     columnHandler.addPoint(x2, y2)
152 | 
153 |   columnHandler.prepareColumn()
154 |   columnHandler.prepareRow()
155 |   columnHandler.printColumnsAndCoordinates()
156 | 
157 | def buildCells():
158 |   global xInterval
159 |   global yInterval
160 |   global startingText
161 |   global endingText
162 |   global xStartThreshold
163 |   global yStartThreshold
164 |   global xEndThreshold
165 |   global yEndThreshold
166 |   global configxInterval
167 |   global configyInterval
168 |   global xWidthTotal
169 | 
170 |   startingMatchFound = False
171 |   endingMatchFound = False
172 | 
173 |   autoEndingText = endingText
174 |   autoStartingText = startingText
175 | 
176 | 
177 |   testingNumbersFile = open("bounds.txt", "r")
178 |   for index, line in enumerate(testingNumbersFile):
179 |     lineArray = line.split('|')
180 |     if len(lineArray) != 6:
181 |       continue
182 | 
183 |     lowerLeft = []
184 |     lowerRight = []
185 |     upperRight = []
186 |     upperLeft = []
187 |     
188 |     if not lineArray[0] or not lineArray[2] or not lineArray[4] or not lineArray[5]:
189 |       continue
190 | 
191 |     value = lineArray[0]
192 |       
193 |     lowerLeft = lineArray[2].split(',')
194 |     lowerRight = lineArray[3].split(',')
195 |     upperRight = lineArray[4].split(',')
196 |     upperLeft = lineArray[5].split(',')
197 | 
198 |     if len(lowerLeft) != 2 or len(lowerRight) !=2 or len(upperRight) != 2 or len(upperLeft) != 2:
199 |       continue
200 | 
201 | #Get the mid point of the bound where the text matches
202 |     xMean = (int(lowerLeft[0]) + int(lowerRight[0]))/2
203 |     yMean = (int(lowerLeft[1]) + int(upperLeft[1]))/2
204 | 
205 |     if startingText == "auto":
206 |       if len(value.title()) > 1 and any(value.title() in district for district in list(translationDictionary.keys())):
207 |         if xStartThreshold == 0:
208 |           xStartThreshold = xMean
209 |           autoStartingText = value
210 |         if yStartThreshold == 0:
211 |           yStartThreshold = yMean
212 | 
213 |         if yMean < yStartThreshold:
214 |           xStartTreshold = xMean 
215 |           yStartThreshold = yMean
216 |           autoStartingText = value
217 |           
218 | 
219 |     if endingText == "auto":
220 |       if len(value.title()) > 1 and any(value.title() in district for district in list(translationDictionary.keys())):
221 |         if xEndThreshold == 0:
222 |           xEndThreshold = xMean
223 |         if yEndThreshold == 0:
224 |           yEndThreshold = yMean
225 |           autoEndingText = value
226 | 
227 |         if yMean > yEndThreshold:
228 |           xEndThreshold = xMean
229 |           yEndThreshold = yMean
230 |           autoEndingText = value
231 | 
232 |     if ',' in startingText:
233 |       if value.title() in startingText.split(','):# and startingMatchFound == False:
234 |         startingMatchFound = True
235 |         xStartThreshold = xMean
236 |         yStartThreshold = yMean  
237 |     else:
238 |       if value.title() == startingText and startingMatchFound == False:
239 |         startingMatchFound = True
240 |         xStartThreshold = xMean
241 |         yStartThreshold = yMean  
242 | 
243 |     if ',' in endingText:
244 |       if value.title() in endingText.split(','):# and endingMatchFound == False:
245 |         endingMatchFound = True
246 |         xEndThreshold = xMean
247 |         yEndThreshold = yMean
248 |     else:
249 |       if value.title() == endingText and endingMatchFound == False:
250 |         endingMatchFound = True
251 |         xEndThreshold = xMean
252 |         yEndThreshold = yMean
253 | 
254 | #Use these intervals as a possible error in mid point calculation
255 |     xInterval = (int(lowerRight[0]) - int(lowerLeft[0]))/2 if (int(lowerRight[0]) - int(lowerLeft[0]))/2 > xInterval else xInterval
256 |     yInterval = (int(upperLeft[1]) - int(lowerLeft[1]))/2 if (int(upperLeft[1]) - int(lowerLeft[1]))/2 > yInterval else yInterval
257 |     xWidthTotal = xWidthTotal + int(lowerRight[0]) - int(lowerLeft[0])
258 |     dataDictionaryArray.append(cellItem(value, xMean, yMean, lowerLeft[0], lowerLeft[1], (float(lowerRight[0]) - float(lowerLeft[0])), (float(upperLeft[1]) - float(lowerLeft[1])), 0, 0, index + 1))
259 |   xWidthTotal = xWidthTotal/len(dataDictionaryArray)
260 |   startingText = autoStartingText
261 |   endingText = autoEndingText
262 |   testingNumbersFile.close()
263 | 
264 | def buildReducedArray():
265 |   global endingText
266 |   tempDictionaryArray = []
267 |   global xInterval
268 |   global yInterval
269 |   global dataDictionaryArray
270 |   global columnHandler
271 |   maxWidth = 0
272 |   maxHeight = 0
273 | 
274 | #Ignore the texts that lie to the left and top of the threshold text. This improves accuracy of output
275 |   print("Starting text: {} ... Ending text: {}".format(startingText, endingText)) 
276 |   xLimit = columnHandler.getNearestLineToTheLeft(xStartThreshold) if houghTransform == True else xStartThreshold - 20
277 |   for cell in dataDictionaryArray:
278 |     if cell.y < yStartThreshold - 10 or (xLimit is not None and cell.x < xLimit):
279 |       continue
280 | 
281 |     if len(endingText) != 0 and (cell.y > yEndThreshold + 10): # or cell.x < xEndThreshold - 30):
282 |       continue
283 | 
284 |     tempDictionaryArray.append(cell)
285 |     maxWidth = cell.w if cell.w > maxWidth else maxWidth
286 |     maxHeight = cell.h if cell.h > maxHeight else maxHeight
287 | 
288 |   xInterval = maxWidth/2
289 |   yInterval = maxHeight/2
290 |   
291 |   dataDictionaryArray = tempDictionaryArray
292 | 
293 | def assignRowsAndColumns():
294 |   global yInterval
295 |   global xInterval
296 |   global configyInterval
297 |   global configxInterval
298 | 
299 | 
300 |   if configxInterval != 0:
301 |     xInterval = configxInterval
302 |   if configyInterval != 0:
303 |     yInterval = configyInterval
304 | 
305 |   print("Using computed yInterval: {}, xInterval: {}".format(yInterval, xInterval))
306 |   for rowIndex, currentCell in enumerate(dataDictionaryArray):           
307 | 
308 |     if currentCell.row == 0:
309 |       currentCell.row = rowIndex + 1
310 |     for colIndex, restOfTheCells in enumerate(dataDictionaryArray):
311 | 
312 |       if currentCell.col == 0:
313 |         if houghTransform == True:
314 |           currentCell.col = columnHandler.getColumnNumber(currentCell)
315 |         else:
316 |           currentCell.col = rowIndex + 1
317 |         
318 |       if restOfTheCells.index == currentCell.index:
319 |         continue
320 | 
321 |       yUpperBound = currentCell.y + yInterval
322 |       yLowerBound = currentCell.y - yInterval
323 | #If the y coordinate matches, the texts lie on the same row
324 |       if restOfTheCells.row == 0:
325 |         if yLowerBound <= restOfTheCells.y <= yUpperBound:
326 |           restOfTheCells.row = rowIndex + 1
327 | 
328 |       xUpperBound = currentCell.x + xInterval
329 |       xLowerBound = currentCell.x - xInterval
330 | 
331 | #If the x coordinate matches, the texts lie on the same column
332 |       if restOfTheCells.col == 0:
333 |         if houghTransform == True:
334 |           restOfTheCells.col = columnHandler.getColumnNumber(restOfTheCells)
335 |         elif xLowerBound <= restOfTheCells.x <= xUpperBound:
336 |           restOfTheCells.col = currentCell.col
337 |       
338 | 
339 | def buildTranslationDictionary():
340 |   global startingText
341 |   global endingText
342 | 
343 |   originalStartingText = startingText
344 |   originalEndingText = endingText
345 | 
346 |   with open(translationFile, "r") as metaFile:
347 |     for line in metaFile:
348 |       if line.startswith('#'):
349 |         continue
350 |       lineArray = line.strip().split(',')
351 |       if len(startingText) != 0:
352 |         if originalStartingText.strip() == lineArray[1].strip():
353 |           startingText = startingText + "," + lineArray[0].strip() 
354 | 
355 |       if len(endingText) != 0:
356 |         if originalEndingText.strip() == lineArray[1].strip():
357 |           endingText = endingText + "," + lineArray[0].strip() 
358 | 
359 |       translationDictionary[lineArray[0].strip()] = lineArray[1].strip()
360 |   
361 | 
362 | def printOutput():
363 |   outputFile = open('output.txt', 'w') 
364 |   global enableTranslation
365 |   xArray = []
366 |   yArray = []
367 | 
368 |   image = np.array(Image.open(fileName), dtype=np.uint8)
369 |   fig, ax = plt.subplots(1)
370 |   if houghTransform == True:
371 |     for point in columnHandler.pointList:
372 |       if columnHandler.getNearestLineToTheLeft(xStartThreshold) - 5 <= point.x <= columnHandler.getNearestLineToTheLeft(xStartThreshold) + 5:
373 |         circ = Circle((point.x,point.y),5, color='r')
374 |       else: 
375 |         circ = Circle((point.x,point.y),4)
376 |       ax.add_patch(circ)
377 | 
378 |   for i in range(0, len(dataDictionaryArray)):
379 |     outputString = []
380 |     for cell in dataDictionaryArray:
381 |       if cell.row == i:
382 |         outputString.append(cell)
383 |     outputString.sort(key=lambda x: x.x)
384 | 
385 |     output = ""
386 |     previousCol = -999
387 |     mergedValue = ""
388 | #<TODO> column verification has to come in here
389 | #Merge those texts separated by spaces - these have the same column value due to proximity but belong to different objects
390 |     columnList = ""
391 |     for index, value in enumerate(outputString):
392 |       value.value = re.sub("\.", "", re.sub(",", "", value.value))
393 |       if index == 0:
394 |         mergedValue = value.value 
395 |         previousCol = value.col
396 |         columnList = str(value.col)
397 |         rect = patches.Rectangle((int(value.lbx), int(value.lby)), value.w, value.h,linewidth=0.75,edgecolor='r', facecolor='none')
398 |         ax.add_patch(rect)
399 |         continue
400 | 
401 |       if value.col == previousCol and is_number(value.value) == False:
402 |         mergedValue = mergedValue + " " + value.value if len(mergedValue) != 0 else value.value
403 |         if index == len(outputString) - 1:
404 |           output += mergedValue if len(output) == 0 else " , " + mergedValue
405 |       else:
406 |         if index == len(outputString) - 1:
407 |           mergedValue = mergedValue + ", " + value.value if len(mergedValue) != 0 else value.value
408 |         output += mergedValue if len(output) == 0 else " , " + mergedValue
409 |         previousCol = value.col
410 |         mergedValue = value.value #+ " ---- " + str(value.col)
411 |         columnList = columnList + ", " + str(value.col) if len(columnList) != 0 else str(value.col)
412 |         rect = patches.Rectangle((int(value.lbx), int(value.lby)), value.w, value.h,linewidth=0.75,edgecolor='r', facecolor='none')
413 |         ax.add_patch(rect)
414 | 
415 |     if len(output) > 0:
416 |       if enableTranslation == False:
417 |         print("{} | {}".format(output, columnList), file = outputFile)
418 |       else:
419 |         outputArray = output.split(',')
420 |         districtIndex = 0
421 | #If the rows are not numberd, this condition can be skipped. For UP bulletin, this makes sense.
422 |         if(is_number(outputArray[0])):
423 |           districtName = outputArray[1].strip()
424 |           distrinctIndex = 1
425 |         else:
426 |           districtName = outputArray[0].strip()
427 |           distrinctIndex = 0
428 | 
429 | #Do a lookup for district name, if not found, discard the record and print a message.
430 |         try:
431 |           translatedValue = translationDictionary[districtName]
432 |           outputString = translatedValue 
433 |           for index, value in enumerate(outputArray):
434 |             if index > districtIndex: #and is_number(value):
435 |               outputString += "," + value.strip()
436 |         except KeyError:
437 |           try:
438 |             fuzzyDistrict = fuzzyLookup(translationDictionary,districtName)
439 |             translatedValue = translationDictionary[fuzzyDistrict]
440 |           except:
441 |             print(f"Failed to find lookup for {districtName}")
442 |             continue
443 |           
444 |         outputString = translatedValue 
445 |         for index, value in enumerate(outputArray):
446 |           if index > districtIndex:
447 |             outputString += "," + value.strip()
448 |         print("{} | {}".format(outputString, columnList), file = outputFile)
449 | 
450 |   outputFile.close()
451 |   ax.imshow(image)
452 |   plt.savefig("image.png", dpi=300)
453 |   plt.show()
454 | 
455 | def fuzzyLookup(translationDictionary,districtName):
456 |   '''
457 |   Use fuzzy string match to map incorrect districtnames
458 |   to the ones in the dictionary
459 |   '''
460 |   from fuzzywuzzy import process
461 |   # Score cut-off of 90 seem to be working well for UP
462 |   district = process.extractOne(
463 |     districtName,
464 |     translationDictionary.keys(),
465 |     score_cutoff = 90)[0]
466 |   print(f"WARN : {districtName} mapped to {district} using Fuzzy Lookup")
467 |   return district
468 | 
469 |   
470 | def parseConfigFile(fileName):
471 |   global startingText
472 |   global endingText
473 |   global enableTranslation
474 |   global translationFile
475 |   global configyInterval
476 |   global configxInterval
477 |   global houghTransform
478 |   global configMinLineLength
479 | 
480 |   configFile = open(fileName, "r")
481 |   for index, line in enumerate(configFile):
482 |     lineArray = line.split(':')
483 |     if len(lineArray) < 2:
484 |       continue
485 | 
486 |     key = lineArray[0].strip()
487 |     value = lineArray[1].strip()
488 |   
489 |     if key == "startingText":
490 |       if ',' in value:
491 |         startingText = value.split(',')[0]
492 |         endingText = value.split(',')[1]
493 |       else:
494 |         startingText = value
495 |     if key == "enableTranslation":
496 |       enableTranslation = eval(value)
497 |     if key == "translationFile":
498 |       translationFile = value
499 |     if key == "xInterval":
500 |       configxInterval = int(value)
501 |     if key == "yInterval":
502 |       configyInterval = int(value)
503 |     if key == "houghTransform":
504 |       houghTransform = eval(value)
505 |     if key == "configMinLineLength":
506 |       configMinLineLength = eval(value)
507 | 
508 | def main():
509 |   global startingText
510 |   global endingText
511 |   global enableTranslation
512 |   global houghTransform
513 |   global fileName
514 | # If given, this text will be used to ignore those items above and to the left of this text. This can cause issues if the text is repeated!
515 |   houghTransform = False
516 |   if len(sys.argv) > 1:
517 |     parseConfigFile(sys.argv[1])
518 |     fileName = sys.argv[2]
519 |         
520 |   buildTranslationDictionary()
521 | 
522 |   buildCells()
523 |   buildCellsV2()
524 |   if houghTransform == True:
525 |     print("Using houghTransform to figure out columns. Set houghTransform:False in ocrconfig.meta.orig to disable this")
526 |     detectLines()
527 | 
528 |   if len(startingText) != 0 or len(endingText) != 0:
529 |     buildReducedArray()
530 | 
531 |   assignRowsAndColumns()
532 | 
533 |   printOutput()
534 | if __name__ == '__main__':
535 |   main()
536 | 


--------------------------------------------------------------------------------
/extract.py:
--------------------------------------------------------------------------------
  1 | from bs4 import BeautifulSoup
  2 | import requests
  3 | import sys
  4 | import json
  5 | import re
  6 | import datetime
  7 | folderName = "data/" + sys.argv[1] + "/"
  8 | 
  9 | #url = "https://dashboard.kerala.gov.in/index.php"
 10 | #response = requests.request("GET", url)
 11 | #cookie=(response.headers['Set-Cookie']).split(';')[0]
 12 | #
 13 | #url = "https://dashboard.kerala.gov.in/testing-view-public.php"
 14 | #
 15 | #payload = {}
 16 | #headers = {
 17 | #  'Host': 'www.dashboard.kerala.gov.in',
 18 | #  'Connection': 'keep-alive',
 19 | #  'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
 20 | #  'Accept-Encoding': 'gzip, deflate, br',
 21 | #  'Accept-Language': 'en-US,en;q=0.5',
 22 | #  'Referer': 'https://dashboard.kerala.gov.in/index.php',
 23 | #  'Cookie': cookie
 24 | #}
 25 | #
 26 | #response = requests.request("GET", url, headers=headers, data = payload)
 27 | #soup = BeautifulSoup(response.content, 'html5lib')
 28 | #table = soup.find("table")
 29 | #rows=table.find_all("tr")
 30 | #
 31 | #for row in rows:
 32 | #  data = row.find_all("td")
 33 | #  if len(data) > 0 :
 34 | #    print(data[0].get_text() + "," + data[1].get_text() + "," + data[2].get_text())
 35 | #
 36 | #url = "http://hmfw.ap.gov.in/covid_dashboard.aspx"
 37 | #response = requests.request("GET", url)
 38 | #soup = BeautifulSoup(response.content, 'html5lib')
 39 | #samplesTested = soup.find("span", id="lblSamples")
 40 | #samplesNegative = soup.find("span", id="lblNegative")
 41 | #
 42 | #print("AP: Samples tested: " + samplesTested.get_text())
 43 | #print("AP: Samples negative: " + samplesNegative.get_text())
 44 | #
 45 | #
 46 | #url = "http://www.rajswasthya.nic.in"
 47 | #response = requests.request("GET", url)
 48 | #soup = BeautifulSoup(response.content, 'html5lib')
 49 | #rows=soup.find("blockquote").find("tbody").find_all("tr")
 50 | #
 51 | #for row in rows:                                                                               
 52 | #  data = row.find_all("font")
 53 | #  if len(data) > 0:
 54 | #    print(re.sub(' +', ' ', data[1].get_text()) + "," + re.sub(' +', ' ', data[2].get_text()))
 55 | 
 56 | 
 57 | #masterUrl="http://chdcovid19.in/"
 58 | #response = requests.request("GET", masterUrl)
 59 | #soup = BeautifulSoup(response.content, 'html5lib')
 60 | #table = soup.find_all("div", {"class":"card stats bg-danger text-white text-center"})
 61 | #print(table)
 62 | #print(soup.prettify())
 63 |         
 64 | 
 65 | metaArray = []
 66 | class ExtractMeta:  
 67 |   def __init__(self, url, stateName, district):
 68 |     if district == "":
 69 |       self.districtRequired = False
 70 |     else:
 71 |       self.districtRequired = True
 72 |     self.url = url
 73 |     self.stateName = stateName
 74 | 
 75 | with open("extract.meta", "r") as metaFile:
 76 |   for line in metaFile:
 77 |     if line.startswith('#'):
 78 |       continue
 79 |     lineArray = line.strip().split(',') 
 80 |     metaObject = ExtractMeta(lineArray[0].strip(), lineArray[1].strip(), lineArray[2].strip())
 81 |     metaArray.append(metaObject)
 82 | 
 83 | 
 84 | def getDataForStates():
 85 |   outputToWrite=[]
 86 |   header = "State, Last Updated, Samples Tested, Samples Positive, Samples Negative, Results Awaited, Total Confirmed, Total Active, Total Dischargedl, url\n"
 87 |   outputToWrite.append(header)
 88 | 
 89 |   for metaObject in metaArray:
 90 |     if metaObject.districtRequired == True:
 91 |       districtDetailsExtractor(metaObject)
 92 |     else:
 93 |       stateDetailsExtractor(metaObject, outputToWrite)
 94 | 
 95 |   writeToOutputCsv("summary.csv", outputToWrite)
 96 | 
 97 | def writeToOutputCsv(fileName, dataToWrite):
 98 |   testingNumbersFile = open(folderName + fileName, "w")
 99 |   testingNumbersFile.writelines(dataToWrite)
100 |   testingNumbersFile.close()
101 | 
102 | 
103 | def stateDetailsExtractor(metaObject, outputString):
104 |   print("Data fetching for " + metaObject.stateName + ", " + metaObject.url)
105 |   url = metaObject.url
106 |   try:
107 |     response = requests.request("GET", url)
108 |   except:
109 |     print("Error occurred while doing a request to " + url)
110 |     return False
111 |   soup = BeautifulSoup(response.content, 'html5lib')
112 |     
113 |   if metaObject.stateName == "Andhra Pradesh":
114 |     samplesTested = soup.find("span", id = "lblSamples").get_text()
115 |     samplesNegative = soup.find("span", id = "lblNegative").get_text()
116 |     confirmed = soup.find("span", id = "lblConfirmed").get_text()
117 |     active = soup.find("span", id = "lblActive").get_text()
118 |     discharged = soup.find("span", id = "lblDischarged").get_text()
119 |     lastUpdated = datetime.datetime.strptime(soup.find("span", id = "lblLast_Update").get_text(), "%d-%m-%Y %I:%M:%S %p")
120 |     outputString.append("Andhra Pradesh, " + lastUpdated.strftime("%d/%m/%Y") + ", " + samplesTested + ", " + confirmed +","+ samplesNegative + ",,"+ confirmed +","+ active +", "+ discharged + "," + url + "\n")
121 |     
122 |   if metaObject.stateName == "Arunachal Pradesh":
123 |     row = soup.find("tbody").find("tr")
124 |     for index, data in enumerate(row.find_all("td")):
125 |       if index == 0:
126 |         lastUpdated = data.get_text()
127 |       if index == 1:
128 |         samplesTested = data.get_text()
129 |       if index == 4:
130 |         samplesNegative = data.get_text()
131 |       if index == 5:
132 |         samplesPositive = data.get_text()
133 |       if index == 3:
134 |         resultsAwaited = data.get_text()
135 |       if index == 7:
136 |         active = data.get_text()
137 |       if index == 6:
138 |         cured = data.get_text()
139 |         
140 |     outputString.append("Arunachal Pradesh, " + lastUpdated + ", " + samplesTested + ", " + samplesPositive + ", " + samplesNegative +","+ resultsAwaited +"," + samplesPositive + "," + active +"," + cured + "," + url + "\n")
141 | 
142 |   if metaObject.stateName == "Chandigarh":
143 |     divs = soup.find("div", {"class": "col-lg-8 col-md-9 form-group pt-10"}).find_all("div", {"class": "col-md-3"})
144 | 
145 |     dataDictionary = {}
146 |     for div in divs:
147 |       innerDiv = div.find("div", {'class': 'stats'}).find_all('div')
148 |       dataDictionary[innerDiv[0].get_text()] = innerDiv[1].get_text()
149 | 
150 |     rowString = "Chandigarh, " + datetime.date.today().strftime("%d/%m/%Y") 
151 |     orderArray = ['Total Sampled', 'Confirmed', 'Negative Cases', 'Result Awaited', 'Confirmed', '', 'Recovered']
152 |     rowString = buildRowString(url, orderArray, rowString, dataDictionary)
153 | 
154 |     outputString.append(rowString)
155 | 
156 |   if metaObject.stateName == "Gujarat":
157 |     divs = soup.find_all("div", {"class": "dashboard-status"})
158 |     date = soup.find("span", id="ctl00_body_lblDate").get_text()
159 |     dataDictionary = {}
160 | 
161 |     for div in divs:
162 |       value = div.find("h3")
163 |       key = div.find_all("h5")
164 |       dataDictionary[key[len(key)-1].get_text().strip()] = value.get_text()
165 | 
166 |     rowString = "Gujarat, " + date 
167 |     orderArray = ['Cases Tested for COVID19', '', '', '', '', '', 'Patients Recovered', 'People Under Quarantine']
168 |     rowString = buildRowString(url, orderArray, rowString, dataDictionary)
169 |     outputString.append(rowString)
170 | 
171 |   if metaObject.stateName == "Kerala":
172 |     table = soup.find('table', {"class": "table-bordered"}).find_all("tr")
173 |     date = soup.find("small").get_text()
174 |     dataDictionary = {}
175 |     keys = table[0].find_all("td")
176 |     values = table[1].find_all("td")
177 |     for index, value in enumerate(values):
178 |       dataDictionary[keys[index].get_text().strip()] = value.get_text().strip()
179 | 
180 |     keys = soup.find('section', {"class": "content"}).find("div", {"class": "container-fluid"}).find("div", {"class": "row"}).find_all("p")
181 |     values = soup.find('section', {"class": "content"}).find("div", {"class": "container-fluid"}).find("div", {"class": "row"}).find_all("h3")
182 |     
183 |     for index, value in enumerate(values):
184 |       if '(' in keys[index].get_text().strip():
185 |         key = keys[index].get_text().strip().split('(')[0]
186 |       else:
187 |          key = keys[index].get_text().strip()
188 |       dataDictionary[key] = value.get_text().strip()
189 | 
190 |     rowString = "Kerala, " + date 
191 |     orderArray = ['Total  Sent', 'Tested Positive', 'Tested Negative', 'Result Awaiting', 'Total Confirmed', 'Active Cases ', 'Recovered ']
192 |     rowString = buildRowString(url, orderArray, rowString, dataDictionary)
193 |     outputString.append(rowString)
194 | 
195 |   if metaObject.stateName == "Nagaland":
196 |     keys = soup.find("div", {"class": "row"}).find_all('p')
197 |     values = soup.find("div", {"class": "row"}).find_all(['h1', 'h3'])
198 |       
199 |     dataDictionary = {}
200 |     for index, value in enumerate(values):
201 |       dataDictionary[keys[index].get_text().strip()] = value.get_text().strip()
202 | 
203 |     print(dataDictionary)
204 | 
205 |     rowString = "Nagaland, " + datetime.date.today().strftime("%d/%m/%Y") 
206 |     orderArray = ['', '', '', '', 'CONFIRMED', 'ACTIVE', 'RECOVERED']
207 |     rowString = buildRowString(url, orderArray, rowString, dataDictionary)
208 | 
209 |     outputString.append(rowString)
210 | 
211 |   if metaObject.stateName == "Odisha":
212 |     divs = soup.find_all("div", {"class": "info-box"})
213 |     date = soup.find("div", {"class": "toplink-section d-flex justify-content-center align-items-center"}).find("small").get_text()
214 |     dataDictionary = {}
215 |     for div in divs:
216 |       key = re.sub(' +', '', div.find("p").get_text().strip())
217 |       value = re.sub(',', '', re.sub(' +\[.*', '', div.find("h5").get_text().strip()))
218 |       dataDictionary[key] = value
219 |     
220 |     rowString = "Odisha, " + date 
221 |     orderArray = ['TotalTestsDone', 'PositiveResult', 'NegativeResult', '', 'Confirmed', 'Active', 'Recovered']
222 |     rowString = buildRowString(url, orderArray, rowString, dataDictionary)
223 |     outputString.append(rowString)
224 | 
225 |   header = "State, Last Updated, Samples Tested, Samples Positive, Samples Negative, Results Awaited, Total Confirmed, Total Active, Total Discharged\n"
226 |   if metaObject.stateName == "Puducherry":
227 |     divRows = soup.find_all("div", {"class": "row"})
228 |     divs = divRows[0].find_all("div", {"class": "card-body"})
229 |     date = soup.find("footer").find("div", {"class": "col-6 text-left"}).get_text()
230 |     dataDictionary = {}
231 |     for div in divs:
232 |       value = div.find("span").get_text()
233 |       div.find("span").decompose()
234 |       key = div.get_text().split('(')[0].strip() if '(' in div.get_text() else div.get_text()
235 |       dataDictionary[key] = value
236 |       
237 |     divs = divRows[2].find_all("div", {"class": "col-xl-6"})[1].find("table")
238 |     keys = divs.find_all("th")
239 |     values = divs.find_all("td")
240 | 
241 |     for index, value in enumerate(values):
242 |       dataDictionary[keys[index].get_text().strip()] = value.get_text().strip()
243 | 
244 | 
245 |     rowString = "Puducherry, " + date 
246 | 
247 |     order = ['Total Samples Sent', 'Total Positive', 'Total Negative', 'Result Awaiting', 'Total Reported', 'Active Case', 'Cured']
248 |     rowString = buildRowString(url, order, rowString, dataDictionary)
249 |     outputString.append(rowString)
250 | 
251 |   header = "State, Last Updated, Samples Tested, Samples Positive, Samples Negative, Results Awaited, Total Confirmed, Total Active, Total Discharged\n"
252 |   if metaObject.stateName == "Rajasthan":
253 |     table = soup.find("table").find_all("tr")[1].find_all("table")[3].find_all("div")
254 |     date = re.sub(' +', ' ', re.sub('\n', ' ', soup.find("table").find_all("tr")[1].find_all("div", {"align": "right"})[1].get_text().strip()))
255 |     keys = []
256 |     values = []
257 |     dataDictionary = {}
258 |     for index, row in enumerate(table):
259 |       if row.find("br") != None:
260 |         row.find("br").decompose()
261 |       valuesArray = re.sub(' +', '', row.get_text().strip()).split('\n')
262 |       value = valuesArray.pop(0)
263 |       dataDictionary[' '.join(valuesArray)] = value
264 |       
265 |     rowString = "Rajasthan, " + date
266 |     orderArray = ['TotalSample Collected', 'Positive  Cases', 'Negative  Cases', 'Report Awaited', 'Positive  Cases', '', 'Cured/Recovered']
267 |     rowString = buildRowString(url, orderArray, rowString, dataDictionary)
268 |     outputString.append(rowString)
269 | 
270 | 
271 | 
272 | 
273 | def buildRowString(url, orderArray, rowString, dataDictionary):
274 | 
275 |   for key in orderArray:
276 |     rowString = rowString + "," + dataDictionary[key] if len(key) > 0 else rowString + ","
277 |   rowString += "," + url + "\n"
278 |   return rowString
279 | 
280 |   
281 |   
282 | def nagalandTableExtractor(soupObject, districtDictionary, firstPass):
283 |   for index, row in enumerate(soupObject):
284 |     if index == 0:
285 |       dataElements = row.find_all("th")
286 |     else:
287 |       dataElements = row.find_all("td")
288 | 
289 |     rowString=""
290 |     currentDistrict = ""
291 |     for data in dataElements:
292 |       if len(rowString) == 0:
293 |         currentDistrict = data.get_text() 
294 |       rowString = data.get_text() if len(rowString) == 0 else rowString + "," + data.get_text()
295 | 
296 |     rowString = rowString.replace('-', '/')
297 |     if firstPass == False:
298 |       rowString = rowString + "\n"
299 |     districtDictionary[currentDistrict] = rowString if firstPass == True else districtDictionary[currentDistrict] + "," + rowString 
300 |     
301 | def readAllEntriesForATable(table, outputString, itemToSearch, itemsToAppend, itemsToRemove):
302 |   for index, row in enumerate(table):
303 |     data = row.find_all(itemToSearch) 
304 | 
305 |     if len(itemsToRemove) != 0:
306 |       for sub in data('font'):
307 |         sub.decompose()
308 | 
309 |     rowString = ""
310 |     for value in data:
311 |       rowString = str(value.get_text()).strip() if len(rowString) == 0 else rowString + "," + str(value.getText()).strip()
312 |       rowString = re.sub('\n', '', rowString)
313 |       rowString = re.sub(' +', ' ', rowString)
314 | 
315 |     if len(rowString) > 0:
316 |       rowString = rowString + "," + str(itemsToAppend) + "\n" if len(itemsToAppend) > 0 else rowString + "\n"
317 |       outputString.append(rowString)
318 |   
319 | 
320 | 
321 | def districtDetailsExtractor(metaObject):
322 |   print("Data fetching for " + metaObject.stateName + ", " + metaObject.url)
323 |   outputString = []
324 |   url = metaObject.url
325 | 
326 |   try:
327 |     response = requests.request("GET", url)
328 |   except:
329 |     print("Error occurred while doing a request to " + url)
330 |     return False
331 |     
332 |   soup = BeautifulSoup(response.content, 'html5lib')
333 | 
334 |   if metaObject.stateName == "Nagaland":
335 |     table = soup.find("table").find_all("tr")
336 |     districtDictionary = {}
337 |     nagalandTableExtractor(table, districtDictionary, True)
338 | 
339 |     response = requests.request("GET", "https://covid19.nagaland.gov.in/")
340 |     soup = BeautifulSoup(response.content, 'html5lib')
341 |     table = soup.find("div", id="case-data").find("table").find_all("tr")
342 |     nagalandTableExtractor(table, districtDictionary, False)
343 | 
344 |     outputString.append(districtDictionary['District'])
345 |     for k, v in districtDictionary.items():
346 |       if str(k) != 'District':
347 |         outputString.append(str(v))
348 |     outputString.append("\n" + url)
349 | 
350 |     writeToOutputCsv("nagalandDistrict.csv", outputString)
351 |   
352 |   if metaObject.stateName == 'Odisha':
353 |     url = "https://statedashboard.odisha.gov.in/ajax/heatMapHospital?type=Current"
354 |     try:
355 |       response = requests.request("GET", url)
356 |     except:
357 |       print("Error occurred while doing a request to " + url)
358 |       return False
359 |     outputString.append("DistrictName,NoOfHospitals,NoOfBeds,NoOfICU\n")
360 |     for data in response.json():
361 |       dataString = data['vchDistrctName'] + "," + str(data['intNoOfHospital']) + "," + str(data['intNoOfBed']) + "," + str(data['intNoOfICU']) + "\n"
362 |       outputString.append(dataString)
363 |     outputString.append(url)
364 |     writeToOutputCsv("OdishaDistrictBeds.csv", outputString)
365 | 
366 |   
367 |   if metaObject.stateName == 'Puducherry':
368 |     div = soup.find_all("div", {"class": "col-md-6"})
369 |     date = div[1].find("h5").get_text().replace('-', '/')
370 |     table = div[1].find("table").find_all("tr")
371 | 
372 |     readAllEntriesForATable(table, outputString, "th", date, '')
373 |     readAllEntriesForATable(table, outputString, "td", date, '')
374 | 
375 |     outputString.append(url)
376 |     writeToOutputCsv("Puducherry.csv", outputString)
377 |   
378 |   
379 |   if metaObject.stateName == "Gujarat":
380 |     div = soup.find("div", {"class": "card-body p-1"})
381 |     date = soup.find("span", id="ctl00_body_lblDate").get_text()
382 |     table = div.find("table").find_all("tr")
383 |             
384 |     tempOutputString = []
385 |     readAllEntriesForATable(table, tempOutputString, "th", 'Last Updated', '')
386 |         
387 | 
388 |     table = div.find("table").find_all("tr")
389 |     readAllEntriesForATable(table, tempOutputString, "span", date, '')
390 | 
391 |     districtNames = []
392 | 
393 |     for row in table:
394 |       data = row.find("td")
395 |       if data is not None:
396 |         districtNames.append(data.get_text().strip())
397 | 
398 |     for index, value in enumerate(tempOutputString):
399 |       if index == 0:
400 |         outputString.append(value)
401 |       else:
402 |         districtString = districtNames[index - 1] + "," + value
403 |         outputString.append(districtString)
404 |       
405 |     outputString.append(url)
406 |     writeToOutputCsv("GujaratDistrict.csv", outputString)
407 |   
408 |   if metaObject.stateName == 'Andhra Pradesh':
409 |     response = requests.request("POST", url).json()
410 |     try:
411 |       response = requests.request("POST", url).json()
412 |     except:
413 |       print("Error occurred while doing a request to " + url)
414 |       return False
415 |     districtDictionary = {}
416 | 
417 |     districtDictionary['District'] = "Cases,Active,Recovered,Death,Total Samples,Total Positive,Total Negative,Total Inprogress, Total, Beds, Hall, Rooms"
418 |     for cases in (response['cases_district']):
419 |       districtDictionary[cases['district_name']] = cases['cases'] +","+ cases['active'] +","+ cases['recovered'] +","+ cases['death'] 
420 | 
421 |     for cases in (response['samples_district']):
422 |       districtDictionary[cases['district_name']] = districtDictionary[cases['district_name']] + "," + cases['total'] +","+ cases['positive'] +","+ cases['negitive'] +","+ cases['inprogress'] 
423 | 
424 |     for cases in (response['infra_district']):
425 |       districtDictionary[cases['district_name']] = districtDictionary[cases['district_name']] + "," + cases['total'] +","+ cases['beds'] +","+ cases['hall'] +","+ cases['rooms'] 
426 | 
427 |       
428 |     for k, v in districtDictionary.items():
429 |       outputString.append(str(k) + "," + str(v) + "\n")
430 | 
431 |     outputString.append(url)
432 |     writeToOutputCsv("APDistrict.csv", outputString)
433 | 
434 |   if metaObject.stateName == 'Rajasthan':
435 |     table = soup.find('blockquote').find('table').find_all('tr')
436 | 
437 |     tempOutputString = []
438 |     readAllEntriesForATable(table, tempOutputString, "font", '', '')
439 | 
440 | 
441 |     for index, row in enumerate(tempOutputString):
442 |       if 'Discharged' in row:
443 |         row = "SR. No, District - Country,Total Sample Received,Todays Positive,Cumulative Positive,Recovered,Discharged\n"
444 |       if 'Other District' in row:
445 |         row = "," + row
446 |       if 'Total,' in row:
447 |         rowValue = row.split(',')
448 |         rowString = "";
449 |         for headerIndex, data in enumerate(rowValue):
450 |           if headerIndex%2 == 0:
451 |             rowString = rowString + "," + data
452 |         row = rowString + "\n"
453 |       if 'Grand Total' in row:
454 |         row = "," + row
455 |       if 'BSF' in row:
456 |         row = ",," + row
457 |       if 'Evacuees' in row:
458 |         row = ",,"
459 |       if 'Italy' in row:
460 |         row = ",,"
461 | 
462 |       outputString.append(row)
463 | 
464 |     outputString.append(url)
465 |     writeToOutputCsv("Rajasthan.csv", outputString)
466 | 
467 | getDataForStates()
468 | 
469 | 
470 | 
471 | 
472 | 


--------------------------------------------------------------------------------
/automation/x:
--------------------------------------------------------------------------------
 1 | Using pageId: 
 2 | [[{'id': '5f395a2f0deffa1bd752be5b', 'tagId': '5dd152552fc63e490ca55adb', 'header': False, 'name': 'East Khasi Hills', 'inputType': 'LOV', 'headerId': '5f395a260deffa1bd752be54', 'Operator': 'COUNT_ROWS', 'value': [{'rowcount': 73617, 'headerId': None, 'mean': 0, 'std': 0, 'var': 0}, {'headerId': 'ID', 'Operator': 'COUNT_DISTINCT', 'value': 36632, 'meta': None}, {'headerId': 'ID', 'Operator': 'COUNT_DISTINCT', 'value': 36632, 'meta': None}, {'formulaId': '5f395dd80deffa1bd752bef5', 'value': 915}, {'formulaId': '5f395d9a0deffa1bd752beef', 'value': 1253}, {'formulaId': '5f395dba0deffa1bd752bef2', 'value': 35267}, {'formulaId': '5f395d6f0deffa1bd752bee8', 'value': 37435}], 'mappedDbs': {'5dd037df2fc63e490ca559b6': '5dd037df2fc63e490ca559b6', '5ede407f90996a6151457855': '5ede407f90996a6151457855', '5ecffef990996a6151455379': '5ecffef990996a6151455379', '5ee8a2ff5fdbc42575880d99': '5ee8a2ff5fdbc42575880d99', '5dcd090b93f35e567f731e99': '5dcd090b93f35e567f731e99', '5e834589d8f38d5e485ee2c8': '5e834589d8f38d5e485ee2c8', '5ece64b190996a61514550b9': '5ece64b190996a61514550b9', '5dc00e5bd1120d4329e6a9cc': '5dc00e5bd1120d4329e6a9cc', '5ec9544690996a6151454ad2': '5ec9544690996a6151454ad2', '5ecd353890996a6151454bcb': '5ecd353890996a6151454bcb', '5eeb47055af12620876678a2': '5eeb47055af12620876678a2', '5ecf8cc490996a6151455150': '5ecf8cc490996a6151455150', '5ed0019a90996a6151455ba8': '5ed0019a90996a6151455ba8', '5edccdf390996a6151456cbc': '5edccdf390996a6151456cbc', '5f509748ccac542e7d96d39e': '5f509748ccac542e7d96d39e', '5e75c53ad8f38d5e485ed22d': '5e75c53ad8f38d5e485ed22d', '5f6b2860433ce111b922d90d': '5f6b2860433ce111b922d90d', '5e834587d8f38d5e485ee298': '5e834587d8f38d5e485ee298', '5f395a260deffa1bd752be4e': '5f395a260deffa1bd752be4e', '5f17e27cb9cf725ea8af2223': '5f17e27cb9cf725ea8af2223', '5f3b97480deffa1bd7530269': '5f3b97480deffa1bd7530269', '5f9fc2e1d6b6d23ecb12e88b': '5f9fc2e1d6b6d23ecb12e88b', '5fd99f97ac425f6f85a76796': '5fd99f97ac425f6f85a76796', '5ecd353690996a6151454b88': '5ecd353690996a6151454b88', '5f4f9979ccac542e7d96cbc7': '5f4f9979ccac542e7d96cbc7', '6048951108de571997a34279': '6048951108de571997a34279', '608fc6215e310145e9f93842': '608fc6215e310145e9f93842', '60c884989ea5a9402aba5190': '60c884989ea5a9402aba5190', '60c8924d9ea5a9402aba53f4': '60c8924d9ea5a9402aba53f4', '60c990319ea5a9402aba7225': '60c990319ea5a9402aba7225', '60bdb81c6b008b09eabdb52c': '60bdb81c6b008b09eabdb52c'}}, {'id': '5f395a2f0deffa1bd752be5d', 'tagId': '5dd152552fc63e490ca55add', 'header': False, 'name': 'RiBhoi', 'inputType': 'LOV', 'headerId': '5f395a260deffa1bd752be54', 'Operator': 'COUNT_ROWS', 'value': [{'rowcount': 17995, 'headerId': None, 'mean': 0, 'std': 0, 'var': 0}, {'headerId': 'ID', 'Operator': 'COUNT_DISTINCT', 'value': 9119, 'meta': None}, {'headerId': 'ID', 'Operator': 'COUNT_DISTINCT', 'value': 9119, 'meta': None}, {'formulaId': '5f395dd80deffa1bd752bef5', 'value': 76}, {'formulaId': '5f395d9a0deffa1bd752beef', 'value': 249}, {'formulaId': '5f395dba0deffa1bd752bef2', 'value': 8797}, {'formulaId': '5f395d6f0deffa1bd752bee8', 'value': 9122}], 'mappedDbs': {'5dd037df2fc63e490ca559b6': '5dd037df2fc63e490ca559b6', '5ede407f90996a6151457855': '5ede407f90996a6151457855', '5ecffef990996a6151455379': '5ecffef990996a6151455379', '5ee8a2ff5fdbc42575880d99': '5ee8a2ff5fdbc42575880d99', '5dcd090b93f35e567f731e99': '5dcd090b93f35e567f731e99', '5e834589d8f38d5e485ee2c8': '5e834589d8f38d5e485ee2c8', '5ece64b190996a61514550b9': '5ece64b190996a61514550b9', '5dc00e5bd1120d4329e6a9cc': '5dc00e5bd1120d4329e6a9cc', '5ec9544690996a6151454ad2': '5ec9544690996a6151454ad2', '5ecd353890996a6151454bcb': '5ecd353890996a6151454bcb', '5eeb47055af12620876678a2': '5eeb47055af12620876678a2', '5ecf8cc490996a6151455150': '5ecf8cc490996a6151455150', '5ed0019a90996a6151455ba8': '5ed0019a90996a6151455ba8', '5edccdf390996a6151456cbc': '5edccdf390996a6151456cbc', '5f509748ccac542e7d96d39e': '5f509748ccac542e7d96d39e', '5e75c53ad8f38d5e485ed22d': '5e75c53ad8f38d5e485ed22d', '5f6b2860433ce111b922d90d': '5f6b2860433ce111b922d90d', '5e834587d8f38d5e485ee298': '5e834587d8f38d5e485ee298', '5f395a260deffa1bd752be4e': '5f395a260deffa1bd752be4e', '5f17e27cb9cf725ea8af2223': '5f17e27cb9cf725ea8af2223', '5f3b97480deffa1bd7530269': '5f3b97480deffa1bd7530269', '5f9fc2e1d6b6d23ecb12e88b': '5f9fc2e1d6b6d23ecb12e88b', '5fd99f97ac425f6f85a76796': '5fd99f97ac425f6f85a76796', '5ecd353690996a6151454b88': '5ecd353690996a6151454b88', '5f4f9979ccac542e7d96cbc7': '5f4f9979ccac542e7d96cbc7', '6048951108de571997a34279': '6048951108de571997a34279', '608fc6215e310145e9f93842': '608fc6215e310145e9f93842', '60c884989ea5a9402aba5190': '60c884989ea5a9402aba5190', '60c8924d9ea5a9402aba53f4': '60c8924d9ea5a9402aba53f4', '60c990319ea5a9402aba7225': '60c990319ea5a9402aba7225', '60bdb81c6b008b09eabdb52c': '60bdb81c6b008b09eabdb52c'}}, {'id': '5f395a2f0deffa1bd752be5c', 'tagId': '5dd152552fc63e490ca55ae1', 'header': False, 'name': 'West Garo Hills', 'inputType': 'LOV', 'headerId': '5f395a260deffa1bd752be54', 'Operator': 'COUNT_ROWS', 'value': [{'rowcount': 17628, 'headerId': None, 'mean': 0, 'std': 0, 'var': 0}, {'headerId': 'ID', 'Operator': 'COUNT_DISTINCT', 'value': 8852, 'meta': None}, {'headerId': 'ID', 'Operator': 'COUNT_DISTINCT', 'value': 8852, 'meta': None}, {'formulaId': '5f395dd80deffa1bd752bef5', 'value': 71}, {'formulaId': '5f395d9a0deffa1bd752beef', 'value': 78}, {'formulaId': '5f395dba0deffa1bd752bef2', 'value': 8704}, {'formulaId': '5f395d6f0deffa1bd752bee8', 'value': 8853}], 'mappedDbs': {'5dd037df2fc63e490ca559b6': '5dd037df2fc63e490ca559b6', '5ede407f90996a6151457855': '5ede407f90996a6151457855', '5ecffef990996a6151455379': '5ecffef990996a6151455379', '5ee8a2ff5fdbc42575880d99': '5ee8a2ff5fdbc42575880d99', '5dcd090b93f35e567f731e99': '5dcd090b93f35e567f731e99', '5e834589d8f38d5e485ee2c8': '5e834589d8f38d5e485ee2c8', '5ece64b190996a61514550b9': '5ece64b190996a61514550b9', '5dc00e5bd1120d4329e6a9cc': '5dc00e5bd1120d4329e6a9cc', '5ec9544690996a6151454ad2': '5ec9544690996a6151454ad2', '5ecd353890996a6151454bcb': '5ecd353890996a6151454bcb', '5eeb47055af12620876678a2': '5eeb47055af12620876678a2', '5ecf8cc490996a6151455150': '5ecf8cc490996a6151455150', '5ed0019a90996a6151455ba8': '5ed0019a90996a6151455ba8', '5edccdf390996a6151456cbc': '5edccdf390996a6151456cbc', '5f509748ccac542e7d96d39e': '5f509748ccac542e7d96d39e', '5e75c53ad8f38d5e485ed22d': '5e75c53ad8f38d5e485ed22d', '5f6b2860433ce111b922d90d': '5f6b2860433ce111b922d90d', '5e834587d8f38d5e485ee298': '5e834587d8f38d5e485ee298', '5f395a260deffa1bd752be4e': '5f395a260deffa1bd752be4e', '5f17e27cb9cf725ea8af2223': '5f17e27cb9cf725ea8af2223', '5f3b97480deffa1bd7530269': '5f3b97480deffa1bd7530269', '5f9fc2e1d6b6d23ecb12e88b': '5f9fc2e1d6b6d23ecb12e88b', '5fd99f97ac425f6f85a76796': '5fd99f97ac425f6f85a76796', '5ecd353690996a6151454b88': '5ecd353690996a6151454b88', '5f4f9979ccac542e7d96cbc7': '5f4f9979ccac542e7d96cbc7', '6048951108de571997a34279': '6048951108de571997a34279', '608fc6215e310145e9f93842': '608fc6215e310145e9f93842', '60c884989ea5a9402aba5190': '60c884989ea5a9402aba5190', '60c8924d9ea5a9402aba53f4': '60c8924d9ea5a9402aba53f4', '60c990319ea5a9402aba7225': '60c990319ea5a9402aba7225', '60bdb81c6b008b09eabdb52c': '60bdb81c6b008b09eabdb52c'}}, {'id': '5f395a2f0deffa1bd752be5f', 'tagId': '5dd152552fc63e490ca55ae2', 'header': False, 'name': 'West Jaintia Hills', 'inputType': 'LOV', 'headerId': '5f395a260deffa1bd752be54', 'Operator': 'COUNT_ROWS', 'value': [{'rowcount': 12075, 'headerId': None, 'mean': 0, 'std': 0, 'var': 0}, {'headerId': 'ID', 'Operator': 'COUNT_DISTINCT', 'value': 6133, 'meta': None}, {'headerId': 'ID', 'Operator': 'COUNT_DISTINCT', 'value': 6133, 'meta': None}, {'formulaId': '5f395dd80deffa1bd752bef5', 'value': 115}, {'formulaId': '5f395d9a0deffa1bd752beef', 'value': 189}, {'formulaId': '5f395dba0deffa1bd752bef2', 'value': 5828}, {'formulaId': '5f395d6f0deffa1bd752bee8', 'value': 6132}], 'mappedDbs': {'5dd037df2fc63e490ca559b6': '5dd037df2fc63e490ca559b6', '5ede407f90996a6151457855': '5ede407f90996a6151457855', '5ecffef990996a6151455379': '5ecffef990996a6151455379', '5ee8a2ff5fdbc42575880d99': '5ee8a2ff5fdbc42575880d99', '5dcd090b93f35e567f731e99': '5dcd090b93f35e567f731e99', '5e834589d8f38d5e485ee2c8': '5e834589d8f38d5e485ee2c8', '5ece64b190996a61514550b9': '5ece64b190996a61514550b9', '5dc00e5bd1120d4329e6a9cc': '5dc00e5bd1120d4329e6a9cc', '5ec9544690996a6151454ad2': '5ec9544690996a6151454ad2', '5ecd353890996a6151454bcb': '5ecd353890996a6151454bcb', '5eeb47055af12620876678a2': '5eeb47055af12620876678a2', '5ecf8cc490996a6151455150': '5ecf8cc490996a6151455150', '5ed0019a90996a6151455ba8': '5ed0019a90996a6151455ba8', '5edccdf390996a6151456cbc': '5edccdf390996a6151456cbc', '5f509748ccac542e7d96d39e': '5f509748ccac542e7d96d39e', '5e75c53ad8f38d5e485ed22d': '5e75c53ad8f38d5e485ed22d', '5f6b2860433ce111b922d90d': '5f6b2860433ce111b922d90d', '5e834587d8f38d5e485ee298': '5e834587d8f38d5e485ee298', '5f395a260deffa1bd752be4e': '5f395a260deffa1bd752be4e', '5f17e27cb9cf725ea8af2223': '5f17e27cb9cf725ea8af2223', '5f3b97480deffa1bd7530269': '5f3b97480deffa1bd7530269', '5f9fc2e1d6b6d23ecb12e88b': '5f9fc2e1d6b6d23ecb12e88b', '5fd99f97ac425f6f85a76796': '5fd99f97ac425f6f85a76796', '5ecd353690996a6151454b88': '5ecd353690996a6151454b88', '5f4f9979ccac542e7d96cbc7': '5f4f9979ccac542e7d96cbc7', '6048951108de571997a34279': '6048951108de571997a34279', '608fc6215e310145e9f93842': '608fc6215e310145e9f93842', '60c884989ea5a9402aba5190': '60c884989ea5a9402aba5190', '60c8924d9ea5a9402aba53f4': '60c8924d9ea5a9402aba53f4', '60c990319ea5a9402aba7225': '60c990319ea5a9402aba7225', '60bdb81c6b008b09eabdb52c': '60bdb81c6b008b09eabdb52c'}}, {'id': '5f395a2f0deffa1bd752be62', 'tagId': '5dd152552fc63e490ca55ae3', 'header': False, 'name': 'West Khasi Hills', 'inputType': 'LOV', 'headerId': '5f395a260deffa1bd752be54', 'Operator': 'COUNT_ROWS', 'value': [{'rowcount': 7295, 'headerId': None, 'mean': 0, 'std': 0, 'var': 0}, {'headerId': 'ID', 'Operator': 'COUNT_DISTINCT', 'value': 3802, 'meta': None}, {'headerId': 'ID', 'Operator': 'COUNT_DISTINCT', 'value': 3802, 'meta': None}, {'formulaId': '5f395dd80deffa1bd752bef5', 'value': 57}, {'formulaId': '5f395d9a0deffa1bd752beef', 'value': 309}, {'formulaId': '5f395dba0deffa1bd752bef2', 'value': 3436}, {'formulaId': '5f395d6f0deffa1bd752bee8', 'value': 3802}], 'mappedDbs': {'5dd037df2fc63e490ca559b6': '5dd037df2fc63e490ca559b6', '5ede407f90996a6151457855': '5ede407f90996a6151457855', '5ecffef990996a6151455379': '5ecffef990996a6151455379', '5ee8a2ff5fdbc42575880d99': '5ee8a2ff5fdbc42575880d99', '5dcd090b93f35e567f731e99': '5dcd090b93f35e567f731e99', '5e834589d8f38d5e485ee2c8': '5e834589d8f38d5e485ee2c8', '5ece64b190996a61514550b9': '5ece64b190996a61514550b9', '5dc00e5bd1120d4329e6a9cc': '5dc00e5bd1120d4329e6a9cc', '5ec9544690996a6151454ad2': '5ec9544690996a6151454ad2', '5ecd353890996a6151454bcb': '5ecd353890996a6151454bcb', '5eeb47055af12620876678a2': '5eeb47055af12620876678a2', '5ecf8cc490996a6151455150': '5ecf8cc490996a6151455150', '5ed0019a90996a6151455ba8': '5ed0019a90996a6151455ba8', '5edccdf390996a6151456cbc': '5edccdf390996a6151456cbc', '5f509748ccac542e7d96d39e': '5f509748ccac542e7d96d39e', '5e75c53ad8f38d5e485ed22d': '5e75c53ad8f38d5e485ed22d', '5f6b2860433ce111b922d90d': '5f6b2860433ce111b922d90d', '5e834587d8f38d5e485ee298': '5e834587d8f38d5e485ee298', '5f395a260deffa1bd752be4e': '5f395a260deffa1bd752be4e', '5f17e27cb9cf725ea8af2223': '5f17e27cb9cf725ea8af2223', '5f3b97480deffa1bd7530269': '5f3b97480deffa1bd7530269', '5f9fc2e1d6b6d23ecb12e88b': '5f9fc2e1d6b6d23ecb12e88b', '5fd99f97ac425f6f85a76796': '5fd99f97ac425f6f85a76796', '5ecd353690996a6151454b88': '5ecd353690996a6151454b88', '5f4f9979ccac542e7d96cbc7': '5f4f9979ccac542e7d96cbc7', '6048951108de571997a34279': '6048951108de571997a34279', '608fc6215e310145e9f93842': '608fc6215e310145e9f93842', '60c884989ea5a9402aba5190': '60c884989ea5a9402aba5190', '60c8924d9ea5a9402aba53f4': '60c8924d9ea5a9402aba53f4', '60c990319ea5a9402aba7225': '60c990319ea5a9402aba7225', '60bdb81c6b008b09eabdb52c': '60bdb81c6b008b09eabdb52c'}}, {'id': '5f395a2f0deffa1bd752be5e', 'tagId': '5dd152552fc63e490ca55ada', 'header': False, 'name': 'East Jaintia Hills', 'inputType': 'LOV', 'headerId': '5f395a260deffa1bd752be54', 'Operator': 'COUNT_ROWS', 'value': [{'rowcount': 4627, 'headerId': None, 'mean': 0, 'std': 0, 'var': 0}, {'headerId': 'ID', 'Operator': 'COUNT_DISTINCT', 'value': 2336, 'meta': None}, {'headerId': 'ID', 'Operator': 'COUNT_DISTINCT', 'value': 2336, 'meta': None}, {'formulaId': '5f395dd80deffa1bd752bef5', 'value': 30}, {'formulaId': '5f395d9a0deffa1bd752beef', 'value': 45}, {'formulaId': '5f395dba0deffa1bd752bef2', 'value': 2261}, {'formulaId': '5f395d6f0deffa1bd752bee8', 'value': 2336}], 'mappedDbs': {'5dd037df2fc63e490ca559b6': '5dd037df2fc63e490ca559b6', '5ede407f90996a6151457855': '5ede407f90996a6151457855', '5ecffef990996a6151455379': '5ecffef990996a6151455379', '5ee8a2ff5fdbc42575880d99': '5ee8a2ff5fdbc42575880d99', '5dcd090b93f35e567f731e99': '5dcd090b93f35e567f731e99', '5e834589d8f38d5e485ee2c8': '5e834589d8f38d5e485ee2c8', '5ece64b190996a61514550b9': '5ece64b190996a61514550b9', '5dc00e5bd1120d4329e6a9cc': '5dc00e5bd1120d4329e6a9cc', '5ec9544690996a6151454ad2': '5ec9544690996a6151454ad2', '5ecd353890996a6151454bcb': '5ecd353890996a6151454bcb', '5eeb47055af12620876678a2': '5eeb47055af12620876678a2', '5ecf8cc490996a6151455150': '5ecf8cc490996a6151455150', '5ed0019a90996a6151455ba8': '5ed0019a90996a6151455ba8', '5edccdf390996a6151456cbc': '5edccdf390996a6151456cbc', '5f509748ccac542e7d96d39e': '5f509748ccac542e7d96d39e', '5e75c53ad8f38d5e485ed22d': '5e75c53ad8f38d5e485ed22d', '5f6b2860433ce111b922d90d': '5f6b2860433ce111b922d90d', '5e834587d8f38d5e485ee298': '5e834587d8f38d5e485ee298', '5f395a260deffa1bd752be4e': '5f395a260deffa1bd752be4e', '5f17e27cb9cf725ea8af2223': '5f17e27cb9cf725ea8af2223', '5f3b97480deffa1bd7530269': '5f3b97480deffa1bd7530269', '5f9fc2e1d6b6d23ecb12e88b': '5f9fc2e1d6b6d23ecb12e88b', '5fd99f97ac425f6f85a76796': '5fd99f97ac425f6f85a76796', '5ecd353690996a6151454b88': '5ecd353690996a6151454b88', '5f4f9979ccac542e7d96cbc7': '5f4f9979ccac542e7d96cbc7', '6048951108de571997a34279': '6048951108de571997a34279', '608fc6215e310145e9f93842': '608fc6215e310145e9f93842', '60c884989ea5a9402aba5190': '60c884989ea5a9402aba5190', '60c8924d9ea5a9402aba53f4': '60c8924d9ea5a9402aba53f4', '60c990319ea5a9402aba7225': '60c990319ea5a9402aba7225', '60bdb81c6b008b09eabdb52c': '60bdb81c6b008b09eabdb52c'}}, {'id': '5f395a2f0deffa1bd752be66', 'tagId': '5dd152552fc63e490ca55ae0', 'header': False, 'name': 'South West Khasi Hills', 'inputType': 'LOV', 'headerId': '5f395a260deffa1bd752be54', 'Operator': 'COUNT_ROWS', 'value': [{'rowcount': 4480, 'headerId': None, 'mean': 0, 'std': 0, 'var': 0}, {'headerId': 'ID', 'Operator': 'COUNT_DISTINCT', 'value': 2300, 'meta': None}, {'headerId': 'ID', 'Operator': 'COUNT_DISTINCT', 'value': 2300, 'meta': None}, {'formulaId': '5f395dd80deffa1bd752bef5', 'value': 14}, {'formulaId': '5f395d9a0deffa1bd752beef', 'value': 120}, {'formulaId': '5f395dba0deffa1bd752bef2', 'value': 2166}, {'formulaId': '5f395d6f0deffa1bd752bee8', 'value': 2300}], 'mappedDbs': {'5dd037df2fc63e490ca559b6': '5dd037df2fc63e490ca559b6', '5ede407f90996a6151457855': '5ede407f90996a6151457855', '5ecffef990996a6151455379': '5ecffef990996a6151455379', '5ee8a2ff5fdbc42575880d99': '5ee8a2ff5fdbc42575880d99', '5dcd090b93f35e567f731e99': '5dcd090b93f35e567f731e99', '5e834589d8f38d5e485ee2c8': '5e834589d8f38d5e485ee2c8', '5ece64b190996a61514550b9': '5ece64b190996a61514550b9', '5dc00e5bd1120d4329e6a9cc': '5dc00e5bd1120d4329e6a9cc', '5ec9544690996a6151454ad2': '5ec9544690996a6151454ad2', '5ecd353890996a6151454bcb': '5ecd353890996a6151454bcb', '5eeb47055af12620876678a2': '5eeb47055af12620876678a2', '5ecf8cc490996a6151455150': '5ecf8cc490996a6151455150', '5ed0019a90996a6151455ba8': '5ed0019a90996a6151455ba8', '5edccdf390996a6151456cbc': '5edccdf390996a6151456cbc', '5f509748ccac542e7d96d39e': '5f509748ccac542e7d96d39e', '5e75c53ad8f38d5e485ed22d': '5e75c53ad8f38d5e485ed22d', '5f6b2860433ce111b922d90d': '5f6b2860433ce111b922d90d', '5e834587d8f38d5e485ee298': '5e834587d8f38d5e485ee298', '5f395a260deffa1bd752be4e': '5f395a260deffa1bd752be4e', '5f17e27cb9cf725ea8af2223': '5f17e27cb9cf725ea8af2223', '5f3b97480deffa1bd7530269': '5f3b97480deffa1bd7530269', '5f9fc2e1d6b6d23ecb12e88b': '5f9fc2e1d6b6d23ecb12e88b', '5fd99f97ac425f6f85a76796': '5fd99f97ac425f6f85a76796', '5ecd353690996a6151454b88': '5ecd353690996a6151454b88', '5f4f9979ccac542e7d96cbc7': '5f4f9979ccac542e7d96cbc7', '6048951108de571997a34279': '6048951108de571997a34279', '608fc6215e310145e9f93842': '608fc6215e310145e9f93842', '60c884989ea5a9402aba5190': '60c884989ea5a9402aba5190', '60c8924d9ea5a9402aba53f4': '60c8924d9ea5a9402aba53f4', '60c990319ea5a9402aba7225': '60c990319ea5a9402aba7225', '60bdb81c6b008b09eabdb52c': '60bdb81c6b008b09eabdb52c'}}, {'id': '5f395a2f0deffa1bd752be60', 'tagId': '5dd152552fc63e490ca55adf', 'header': False, 'name': 'South West Garo Hills', 'inputType': 'LOV', 'headerId': '5f395a260deffa1bd752be54', 'Operator': 'COUNT_ROWS', 'value': [{'rowcount': 3663, 'headerId': None, 'mean': 0, 'std': 0, 'var': 0}, {'headerId': 'ID', 'Operator': 'COUNT_DISTINCT', 'value': 1841, 'meta': None}, {'headerId': 'ID', 'Operator': 'COUNT_DISTINCT', 'value': 1841, 'meta': None}, {'formulaId': '5f395dd80deffa1bd752bef5', 'value': 16}, {'formulaId': '5f395d9a0deffa1bd752beef', 'value': 19}, {'formulaId': '5f395dba0deffa1bd752bef2', 'value': 1806}, {'formulaId': '5f395d6f0deffa1bd752bee8', 'value': 1841}], 'mappedDbs': {'5dd037df2fc63e490ca559b6': '5dd037df2fc63e490ca559b6', '5ede407f90996a6151457855': '5ede407f90996a6151457855', '5ecffef990996a6151455379': '5ecffef990996a6151455379', '5ee8a2ff5fdbc42575880d99': '5ee8a2ff5fdbc42575880d99', '5dcd090b93f35e567f731e99': '5dcd090b93f35e567f731e99', '5e834589d8f38d5e485ee2c8': '5e834589d8f38d5e485ee2c8', '5ece64b190996a61514550b9': '5ece64b190996a61514550b9', '5dc00e5bd1120d4329e6a9cc': '5dc00e5bd1120d4329e6a9cc', '5ec9544690996a6151454ad2': '5ec9544690996a6151454ad2', '5ecd353890996a6151454bcb': '5ecd353890996a6151454bcb', '5eeb47055af12620876678a2': '5eeb47055af12620876678a2', '5ecf8cc490996a6151455150': '5ecf8cc490996a6151455150', '5ed0019a90996a6151455ba8': '5ed0019a90996a6151455ba8', '5edccdf390996a6151456cbc': '5edccdf390996a6151456cbc', '5f509748ccac542e7d96d39e': '5f509748ccac542e7d96d39e', '5e75c53ad8f38d5e485ed22d': '5e75c53ad8f38d5e485ed22d', '5f6b2860433ce111b922d90d': '5f6b2860433ce111b922d90d', '5e834587d8f38d5e485ee298': '5e834587d8f38d5e485ee298', '5f395a260deffa1bd752be4e': '5f395a260deffa1bd752be4e', '5f17e27cb9cf725ea8af2223': '5f17e27cb9cf725ea8af2223', '5f3b97480deffa1bd7530269': '5f3b97480deffa1bd7530269', '5f9fc2e1d6b6d23ecb12e88b': '5f9fc2e1d6b6d23ecb12e88b', '5fd99f97ac425f6f85a76796': '5fd99f97ac425f6f85a76796', '5ecd353690996a6151454b88': '5ecd353690996a6151454b88', '5f4f9979ccac542e7d96cbc7': '5f4f9979ccac542e7d96cbc7', '6048951108de571997a34279': '6048951108de571997a34279', '608fc6215e310145e9f93842': '608fc6215e310145e9f93842', '60c884989ea5a9402aba5190': '60c884989ea5a9402aba5190', '60c8924d9ea5a9402aba53f4': '60c8924d9ea5a9402aba53f4', '60c990319ea5a9402aba7225': '60c990319ea5a9402aba7225', '60bdb81c6b008b09eabdb52c': '60bdb81c6b008b09eabdb52c'}}, {'id': '5f395a2f0deffa1bd752be65', 'tagId': '5dd152552fc63e490ca55ad9', 'header': False, 'name': 'East Garo Hills', 'inputType': 'LOV', 'headerId': '5f395a260deffa1bd752be54', 'Operator': 'COUNT_ROWS', 'value': [{'rowcount': 3618, 'headerId': None, 'mean': 0, 'std': 0, 'var': 0}, {'headerId': 'ID', 'Operator': 'COUNT_DISTINCT', 'value': 1820, 'meta': None}, {'headerId': 'ID', 'Operator': 'COUNT_DISTINCT', 'value': 1820, 'meta': None}, {'formulaId': '5f395dd80deffa1bd752bef5', 'value': 9}, {'formulaId': '5f395d9a0deffa1bd752beef', 'value': 22}, {'formulaId': '5f395dba0deffa1bd752bef2', 'value': 1789}, {'formulaId': '5f395d6f0deffa1bd752bee8', 'value': 1820}], 'mappedDbs': {'5dd037df2fc63e490ca559b6': '5dd037df2fc63e490ca559b6', '5ede407f90996a6151457855': '5ede407f90996a6151457855', '5ecffef990996a6151455379': '5ecffef990996a6151455379', '5ee8a2ff5fdbc42575880d99': '5ee8a2ff5fdbc42575880d99', '5dcd090b93f35e567f731e99': '5dcd090b93f35e567f731e99', '5e834589d8f38d5e485ee2c8': '5e834589d8f38d5e485ee2c8', '5ece64b190996a61514550b9': '5ece64b190996a61514550b9', '5dc00e5bd1120d4329e6a9cc': '5dc00e5bd1120d4329e6a9cc', '5ec9544690996a6151454ad2': '5ec9544690996a6151454ad2', '5ecd353890996a6151454bcb': '5ecd353890996a6151454bcb', '5eeb47055af12620876678a2': '5eeb47055af12620876678a2', '5ecf8cc490996a6151455150': '5ecf8cc490996a6151455150', '5ed0019a90996a6151455ba8': '5ed0019a90996a6151455ba8', '5edccdf390996a6151456cbc': '5edccdf390996a6151456cbc', '5f509748ccac542e7d96d39e': '5f509748ccac542e7d96d39e', '5e75c53ad8f38d5e485ed22d': '5e75c53ad8f38d5e485ed22d', '5f6b2860433ce111b922d90d': '5f6b2860433ce111b922d90d', '5e834587d8f38d5e485ee298': '5e834587d8f38d5e485ee298', '5f395a260deffa1bd752be4e': '5f395a260deffa1bd752be4e', '5f17e27cb9cf725ea8af2223': '5f17e27cb9cf725ea8af2223', '5f3b97480deffa1bd7530269': '5f3b97480deffa1bd7530269', '5f9fc2e1d6b6d23ecb12e88b': '5f9fc2e1d6b6d23ecb12e88b', '5fd99f97ac425f6f85a76796': '5fd99f97ac425f6f85a76796', '5ecd353690996a6151454b88': '5ecd353690996a6151454b88', '5f4f9979ccac542e7d96cbc7': '5f4f9979ccac542e7d96cbc7', '6048951108de571997a34279': '6048951108de571997a34279', '608fc6215e310145e9f93842': '608fc6215e310145e9f93842', '60c884989ea5a9402aba5190': '60c884989ea5a9402aba5190', '60c8924d9ea5a9402aba53f4': '60c8924d9ea5a9402aba53f4', '60c990319ea5a9402aba7225': '60c990319ea5a9402aba7225', '60bdb81c6b008b09eabdb52c': '60bdb81c6b008b09eabdb52c'}}, {'id': '5f395a2f0deffa1bd752be61', 'tagId': '5dd152552fc63e490ca55adc', 'header': False, 'name': 'North Garo Hills', 'inputType': 'LOV', 'headerId': '5f395a260deffa1bd752be54', 'Operator': 'COUNT_ROWS', 'value': [{'rowcount': 3309, 'headerId': None, 'mean': 0, 'std': 0, 'var': 0}, {'headerId': 'ID', 'Operator': 'COUNT_DISTINCT', 'value': 1658, 'meta': None}, {'headerId': 'ID', 'Operator': 'COUNT_DISTINCT', 'value': 1658, 'meta': None}, {'formulaId': '5f395dd80deffa1bd752bef5', 'value': 8}, {'formulaId': '5f395d9a0deffa1bd752beef', 'value': 7}, {'formulaId': '5f395dba0deffa1bd752bef2', 'value': 1643}, {'formulaId': '5f395d6f0deffa1bd752bee8', 'value': 1658}], 'mappedDbs': {'5dd037df2fc63e490ca559b6': '5dd037df2fc63e490ca559b6', '5ede407f90996a6151457855': '5ede407f90996a6151457855', '5ecffef990996a6151455379': '5ecffef990996a6151455379', '5ee8a2ff5fdbc42575880d99': '5ee8a2ff5fdbc42575880d99', '5dcd090b93f35e567f731e99': '5dcd090b93f35e567f731e99', '5e834589d8f38d5e485ee2c8': '5e834589d8f38d5e485ee2c8', '5ece64b190996a61514550b9': '5ece64b190996a61514550b9', '5dc00e5bd1120d4329e6a9cc': '5dc00e5bd1120d4329e6a9cc', '5ec9544690996a6151454ad2': '5ec9544690996a6151454ad2', '5ecd353890996a6151454bcb': '5ecd353890996a6151454bcb', '5eeb47055af12620876678a2': '5eeb47055af12620876678a2', '5ecf8cc490996a6151455150': '5ecf8cc490996a6151455150', '5ed0019a90996a6151455ba8': '5ed0019a90996a6151455ba8', '5edccdf390996a6151456cbc': '5edccdf390996a6151456cbc', '5f509748ccac542e7d96d39e': '5f509748ccac542e7d96d39e', '5e75c53ad8f38d5e485ed22d': '5e75c53ad8f38d5e485ed22d', '5f6b2860433ce111b922d90d': '5f6b2860433ce111b922d90d', '5e834587d8f38d5e485ee298': '5e834587d8f38d5e485ee298', '5f395a260deffa1bd752be4e': '5f395a260deffa1bd752be4e', '5f17e27cb9cf725ea8af2223': '5f17e27cb9cf725ea8af2223', '5f3b97480deffa1bd7530269': '5f3b97480deffa1bd7530269', '5f9fc2e1d6b6d23ecb12e88b': '5f9fc2e1d6b6d23ecb12e88b', '5fd99f97ac425f6f85a76796': '5fd99f97ac425f6f85a76796', '5ecd353690996a6151454b88': '5ecd353690996a6151454b88', '5f4f9979ccac542e7d96cbc7': '5f4f9979ccac542e7d96cbc7', '6048951108de571997a34279': '6048951108de571997a34279', '608fc6215e310145e9f93842': '608fc6215e310145e9f93842', '60c884989ea5a9402aba5190': '60c884989ea5a9402aba5190', '60c8924d9ea5a9402aba53f4': '60c8924d9ea5a9402aba53f4', '60c990319ea5a9402aba7225': '60c990319ea5a9402aba7225', '60bdb81c6b008b09eabdb52c': '60bdb81c6b008b09eabdb52c'}}, {'id': '5f395a2f0deffa1bd752be63', 'tagId': '5dd152552fc63e490ca55ade', 'header': False, 'name': 'South Garo Hills', 'inputType': 'LOV', 'headerId': '5f395a260deffa1bd752be54', 'Operator': 'COUNT_ROWS', 'value': [{'rowcount': 2406, 'headerId': None, 'mean': 0, 'std': 0, 'var': 0}, {'headerId': 'ID', 'Operator': 'COUNT_DISTINCT', 'value': 1205, 'meta': None}, {'headerId': 'ID', 'Operator': 'COUNT_DISTINCT', 'value': 1205, 'meta': None}, {'formulaId': '5f395dd80deffa1bd752bef5', 'value': 12}, {'formulaId': '5f395d9a0deffa1bd752beef', 'value': 4}, {'formulaId': '5f395dba0deffa1bd752bef2', 'value': 1189}, {'formulaId': '5f395d6f0deffa1bd752bee8', 'value': 1205}], 'mappedDbs': {'5dd037df2fc63e490ca559b6': '5dd037df2fc63e490ca559b6', '5ede407f90996a6151457855': '5ede407f90996a6151457855', '5ecffef990996a6151455379': '5ecffef990996a6151455379', '5ee8a2ff5fdbc42575880d99': '5ee8a2ff5fdbc42575880d99', '5dcd090b93f35e567f731e99': '5dcd090b93f35e567f731e99', '5e834589d8f38d5e485ee2c8': '5e834589d8f38d5e485ee2c8', '5ece64b190996a61514550b9': '5ece64b190996a61514550b9', '5dc00e5bd1120d4329e6a9cc': '5dc00e5bd1120d4329e6a9cc', '5ec9544690996a6151454ad2': '5ec9544690996a6151454ad2', '5ecd353890996a6151454bcb': '5ecd353890996a6151454bcb', '5eeb47055af12620876678a2': '5eeb47055af12620876678a2', '5ecf8cc490996a6151455150': '5ecf8cc490996a6151455150', '5ed0019a90996a6151455ba8': '5ed0019a90996a6151455ba8', '5edccdf390996a6151456cbc': '5edccdf390996a6151456cbc', '5f509748ccac542e7d96d39e': '5f509748ccac542e7d96d39e', '5e75c53ad8f38d5e485ed22d': '5e75c53ad8f38d5e485ed22d', '5f6b2860433ce111b922d90d': '5f6b2860433ce111b922d90d', '5e834587d8f38d5e485ee298': '5e834587d8f38d5e485ee298', '5f395a260deffa1bd752be4e': '5f395a260deffa1bd752be4e', '5f17e27cb9cf725ea8af2223': '5f17e27cb9cf725ea8af2223', '5f3b97480deffa1bd7530269': '5f3b97480deffa1bd7530269', '5f9fc2e1d6b6d23ecb12e88b': '5f9fc2e1d6b6d23ecb12e88b', '5fd99f97ac425f6f85a76796': '5fd99f97ac425f6f85a76796', '5ecd353690996a6151454b88': '5ecd353690996a6151454b88', '5f4f9979ccac542e7d96cbc7': '5f4f9979ccac542e7d96cbc7', '6048951108de571997a34279': '6048951108de571997a34279', '608fc6215e310145e9f93842': '608fc6215e310145e9f93842', '60c884989ea5a9402aba5190': '60c884989ea5a9402aba5190', '60c8924d9ea5a9402aba53f4': '60c8924d9ea5a9402aba53f4', '60c990319ea5a9402aba7225': '60c990319ea5a9402aba7225', '60bdb81c6b008b09eabdb52c': '60bdb81c6b008b09eabdb52c'}}]]
 3 | 
 4 | ********************Meghalaya********************
 5 | East Khasi Hills,Meghalaya,ML,2309,Recovered
 6 | Ribhoi,Meghalaya,ML,669,Recovered
 7 | West Garo Hills,Meghalaya,ML,976,Recovered
 8 | West Jaintia Hills,Meghalaya,ML,562,Recovered
 9 | West Khasi Hills,Meghalaya,ML,480,Recovered
10 | East Jaintia Hills,Meghalaya,ML,179,Recovered
11 | South West Khasi Hills,Meghalaya,ML,313,Recovered
12 | South West Garo Hills,Meghalaya,ML,77,Recovered
13 | East Garo Hills,Meghalaya,ML,98,Recovered
14 | North Garo Hills,Meghalaya,ML,117,Recovered
15 | South Garo Hills,Meghalaya,ML,91,Recovered
16 | East Khasi Hills,Meghalaya,ML,814,Deceased
17 | Ribhoi,Meghalaya,ML,61,Deceased
18 | West Garo Hills,Meghalaya,ML,64,Deceased
19 | West Jaintia Hills,Meghalaya,ML,103,Deceased
20 | West Khasi Hills,Meghalaya,ML,36,Deceased
21 | East Jaintia Hills,Meghalaya,ML,25,Deceased
22 | South West Khasi Hills,Meghalaya,ML,12,Deceased
23 | South West Garo Hills,Meghalaya,ML,15,Deceased
24 | East Garo Hills,Meghalaya,ML,9,Deceased
25 | North Garo Hills,Meghalaya,ML,8,Deceased
26 | South Garo Hills,Meghalaya,ML,10,Deceased
27 | StateTotal, 76504, 72886, 1323
28 | SiteTotal, 76504, 67015, 166
29 | Dashboard url: https://services7.arcgis.com/nzBTI19PTHBZaEPT/arcgis/rest/services/Admin_Boundary/FeatureServer/1/query?f=json&returnGeometry=false&outFields=*&where=1=1
30 | 


--------------------------------------------------------------------------------
/automation/automation.py:
--------------------------------------------------------------------------------
   1 | #!/usr/bin/python3
   2 | import datetime 
   3 | import csv
   4 | import requests
   5 | import json
   6 | import pdftotext
   7 | import sys
   8 | import os
   9 | import re
  10 | import logging
  11 | import camelot
  12 | from bs4 import BeautifulSoup
  13 | import html5lib
  14 | from deltaCalculator import DeltaCalculator
  15 | 
  16 | 
  17 | '''
  18 | To add a new state:
  19 | 
  20 | Make an entry into automation.meta file.
  21 | Write a function <StateCode>GetData()
  22 | Inside this function fetch/read files and prepare an array of hashes. 
  23 | Each hash should be of the format:
  24 | {
  25 |   "districtName": nameOfTheDistrict,
  26 |   "confirmed": TotalConfirmedCount,
  27 |   "recovered": TotalRecoveredCount,
  28 |   "deceased": TotalDeceasedCount
  29 | }
  30 | In case any of the values is unknown, pass -999 as the value. All keys are mandatory.
  31 | 
  32 | Pass these values to the deltaCalculator.getStateDataFromSite function with the state name. 
  33 | Eg: deltaCalculator.getStateDataFromSite("Arunachal Pradesh", districtArray, option). The value for options are: full/detailed/<empty>. These values are passed via command line.
  34 | 
  35 | The deltaCalculator object will return the valules to be added for today for the three categories across all districts mentioned.
  36 | 
  37 | In case there are name mappings required, i.e, if the district name in the bulletin and the district name in the site are different, make entries in nameMapping.meta file.
  38 | This file has <StateName>, <BulletinDistrictName>, <SiteDistrictName> as the format for each line.
  39 | 
  40 | For any pdf reading, refer to readFileFromURLV2 function. This needs to be called from within the <StateCode>GetData() function. 
  41 | '''
  42 | 
  43 | 
  44 | 
  45 | logging.basicConfig(filename='deltaCalculator.log', format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO)
  46 | deltaCalculator = DeltaCalculator()
  47 | metaDictionary = {}
  48 | option = ""
  49 | typeOfAutomation = "url"
  50 | pdfUrl = ""
  51 | pageId = ""
  52 | 
  53 | ''' This class holds the data from automation.meta file. This allows for better management of meta data '''
  54 | class AutomationMeta:
  55 |   def __init__(self, stateName, stateCode, url):
  56 |     self.stateName = stateName
  57 |     self.stateCode = stateCode
  58 |     self.url = url
  59 | 
  60 | def fetchData(stateName):
  61 |   if stateName == "All States":
  62 |     for key, metaObject in metaDictionary.items():
  63 |       if len(metaObject.url.strip()) > 0:
  64 |         logging.info("Calling delta calculator for: " + metaObject.stateCode)
  65 |         eval(metaObject.stateCode + "GetData()")
  66 |         print("Dashboard url: " + metaObject.url)
  67 |   else:
  68 |     try:
  69 |       logging.info("Calling delta calculator for: " + metaDictionary[stateName].stateCode)
  70 |       eval(metaDictionary[stateName].stateCode + "GetData()")
  71 |       print("Dashboard url: " + metaDictionary[stateName].url)
  72 |     except KeyError:
  73 |       print("No entry found for state {} in automation.meta file".format(stateName))
  74 | 
  75 | def loadMetaData():
  76 |   with open("automation.meta", "r") as metaFile:
  77 |     for line in metaFile:
  78 |       if line.startswith('#'):
  79 |         continue
  80 |       lineArray = line.strip().split(',') 
  81 |       metaObject = AutomationMeta(lineArray[0].strip(), lineArray[1].strip(), lineArray[2].strip())
  82 |       metaDictionary[lineArray[0].strip()] = metaObject
  83 |   metaFile.close()
  84 |   
  85 | '''
  86 | def getAllColumnValues():
  87 |   columnSet = set()
  88 |   with open(".tmp/ct.txt", "r") as upFile:
  89 |     for line in upFile:
  90 |       for col in line.split('|')[1].split(','):
  91 |         columnSet.add(re.sub('\n', '', col.strip())
  92 |   return sorted(columnSet)
  93 | '''
  94 | 
  95 | def CTGetData():
  96 |   districtArray = []
  97 |   '''columnNumbers = getAllColumnValues()'''
  98 |   with open(".tmp/ct.txt", "r") as upFile:
  99 |     for line in upFile:
 100 |       linesArray = line.split('|')[0].split(',')
 101 |       availableColumns = line.split('|')[1].split(',')
 102 | 
 103 |       districtDictionary = {}
 104 |       districtDictionary['deceased'] = 0
 105 |       confirmedFound = False
 106 |       recoveredFound = False
 107 |       deceasedFound = False
 108 |       for index, data in enumerate(linesArray):
 109 |         if availableColumns[index].strip() == "2":
 110 |           districtDictionary['districtName'] = data.strip()
 111 |         if availableColumns[index].strip() == "4":
 112 |           districtDictionary['confirmed'] = int(data.strip())
 113 |           confirmedFound = True
 114 |         if availableColumns[index].strip() == "9":
 115 |           districtDictionary['recovered'] = int(data.strip())
 116 |           recoveredFound = True
 117 |         if availableColumns[index].strip() == "12": 
 118 |           districtDictionary['deceased'] += int(data.strip())
 119 |           deceasedFound = True
 120 | 
 121 |       #print(districtDictionary)
 122 |       if recoveredFound == False or confirmedFound == False:
 123 |         print("--> Issue with {}".format(linesArray))
 124 |         continue
 125 |       districtArray.append(districtDictionary)
 126 |   upFile.close()
 127 | 
 128 |   deltaCalculator.getStateDataFromSite("Chhattisgarh", districtArray, option)
 129 | 
 130 | def APGetData():
 131 |   if typeOfAutomation == "ocr":
 132 |     APGetDataByOCR()
 133 |   elif typeOfAutomation == "pdf":
 134 |     APGetDataByPdf()
 135 |   else:
 136 |     APGetDataByUrl()
 137 | 
 138 | def APGetDataByPdf():
 139 |   linesArray = []
 140 |   districtDictionary = {}
 141 |   districtArray = []
 142 |   readFileFromURLV2(metaDictionary['Andhra Pradesh'].url, "Andhra Pradesh", "Anantapur", "")
 143 |   try:
 144 |     with open(".tmp/ap.csv", "r") as upFile:
 145 |       for line in upFile:
 146 |         linesArray = line.split(',')
 147 |         if len(linesArray) != 4:
 148 |           print("--> Issue with {}".format(linesArray))
 149 |           continue
 150 |         districtDictionary = {}
 151 |         districtDictionary['districtName'] = linesArray[0].strip()
 152 |         districtDictionary['confirmed'] = int(linesArray[1])
 153 |         districtDictionary['recovered'] = int(linesArray[2])
 154 |         districtDictionary['deceased'] = int(linesArray[3]) if len(re.sub('\n', '', linesArray[3])) != 0 else 0
 155 |         districtArray.append(districtDictionary)
 156 | 
 157 |     upFile.close()
 158 |     deltaCalculator.getStateDataFromSite("Andhra Pradesh", districtArray, option)
 159 |   except FileNotFoundError:
 160 |     print("ap.csv missing. Generate through pdf or ocr and rerun.")
 161 | 
 162 | def APGetDataByOCR():
 163 |   districtArray = []
 164 |   with open(".tmp/ap.txt", "r") as upFile:
 165 |     for line in upFile:
 166 |       if 'Total' in line:
 167 |         continue
 168 | 
 169 |       linesArray = line.split('|')[0].split(',')
 170 |       if len(linesArray) != 6:
 171 |         print("--> Issue with {}".format(linesArray))
 172 |         continue
 173 | 
 174 |       districtDictionary = {}
 175 |       districtName = linesArray[0].strip()
 176 |       districtDictionary['districtName'] = linesArray[0].strip()
 177 |       districtDictionary['confirmed'] = int(linesArray[2])
 178 |       districtDictionary['recovered'] = int(linesArray[4])
 179 |       districtDictionary['deceased'] = int(linesArray[5]) if len(re.sub('\n', '', linesArray[3])) != 0 else 0
 180 |       districtArray.append(districtDictionary)
 181 |   upFile.close()
 182 | 
 183 |   deltaCalculator.getStateDataFromSite("Andhra Pradesh", districtArray, option)
 184 | 
 185 | def ARGetDataByOcr():
 186 |   districtArray = []
 187 |   additionalDistrictInfo = {}
 188 |   additionalDistrictInfo['districtName'] = 'Papum Pare'
 189 |   additionalDistrictInfo['confirmed'] = 0
 190 |   additionalDistrictInfo['recovered'] = 0
 191 |   additionalDistrictInfo['deceased'] = 0
 192 | 
 193 |   with open(".tmp/ar.txt", "r") as upFile:
 194 |     for line in upFile:
 195 |       if 'Total' in line:
 196 |         continue
 197 | 
 198 |       linesArray = line.split('|')[0].split(',')
 199 |       if len(linesArray) != 14:
 200 |         print("--> Issue with {}".format(linesArray))
 201 |         continue
 202 | 
 203 | 
 204 |       if linesArray[0].strip() == "Capital Complex" or linesArray[0].strip() == "Papum Pare":
 205 |         additionalDistrictInfo['confirmed'] += int(linesArray[5])
 206 |         additionalDistrictInfo['recovered'] += int(linesArray[12])
 207 |         additionalDistrictInfo['deceased'] += int(linesArray[13]) if len(re.sub('\n', '', linesArray[13])) != 0 else 0
 208 |         continue
 209 | 
 210 |       districtDictionary = {}
 211 |       districtName = linesArray[0].strip()
 212 |       districtDictionary['districtName'] = linesArray[0].strip()
 213 |       districtDictionary['confirmed'] = int(linesArray[5])
 214 |       districtDictionary['recovered'] = int(linesArray[12])
 215 |       districtDictionary['deceased'] = int(linesArray[13]) if len(re.sub('\n', '', linesArray[13])) != 0 else 0
 216 |       districtArray.append(districtDictionary)
 217 |   upFile.close()
 218 |   districtArray.append(additionalDistrictInfo)
 219 | 
 220 |   deltaCalculator.getStateDataFromSite("Arunachal Pradesh", districtArray, option)
 221 | 
 222 | def ARGetData():
 223 |   if typeOfAutomation == "ocr":
 224 |     ARGetDataByOcr()
 225 |     return
 226 |   stateDashboard = requests.request("get", metaDictionary['Arunachal Pradesh'].url).json()
 227 |   districtArray = []
 228 |   for districtDetails in stateDashboard:
 229 |     if districtDetails['district'] == 'Total':
 230 |       continue
 231 |     districtDictionary = {}
 232 |     districtDictionary['districtName'] =  districtDetails['district']
 233 |     districtDictionary['confirmed'] =  int(districtDetails['confirmed'])
 234 |     districtDictionary['recovered'] =  int(districtDetails['recovered'])
 235 |     districtDictionary['deceased'] =  int(districtDetails['deceased'])
 236 | 
 237 |     districtArray.append(districtDictionary)
 238 | 
 239 |   deltaCalculator.getStateDataFromSite("Arunachal Pradesh", districtArray, option)
 240 | 
 241 | def APGetDataByUrl():
 242 |   response = requests.request("GET", metaDictionary['Andhra Pradesh'].url)
 243 |   soup = BeautifulSoup(response.content, 'html.parser')
 244 |   table = soup.find_all("table")[1].find_all("tr")
 245 | 
 246 |   districtArray = []
 247 |   for index, row in enumerate(table):
 248 |     data = row.find_all("td")
 249 |     if 'Total' in data[0].get_text() or 'District' in data[0].get_text():
 250 |       continue
 251 | 
 252 |     districtDictionary = {}
 253 |     districtDictionary['districtName'] =  data[0].get_text()
 254 |     districtDictionary['confirmed'] =  int(data[1].get_text())
 255 |     districtDictionary['recovered'] =  int(data[2].get_text())
 256 |     districtDictionary['deceased'] =  int(data[3].get_text())
 257 |     districtArray.append(districtDictionary)
 258 | 
 259 |   """
 260 |   stateDashboard = requests.request("post", metaDictionary['Andhra Pradesh'].url).json()
 261 | 
 262 |   for districtDetails in (stateDashboard['cases_district']):
 263 |     districtDictionary = {}
 264 |     districtDictionary['districtName'] =  districtDetails['district_name']
 265 |     districtDictionary['confirmed'] =  int(districtDetails['cases'])
 266 |     districtDictionary['recovered'] =  int(districtDetails['recovered'])
 267 |     districtDictionary['deceased'] =  int(districtDetails['death'])
 268 | 
 269 |     districtArray.append(districtDictionary)
 270 |   """
 271 |   deltaCalculator.getStateDataFromSite("Andhra Pradesh", districtArray, option)
 272 | 
 273 | def ORGetData():
 274 |   os.system("curl -sk https://statedashboard.odisha.gov.in/ | grep -i string | grep -v legend | sed 's/var result = JSON.stringify(//' |sed 's/);//' | head -1 > orsite.csv")
 275 | 
 276 |   districtArray = []
 277 |   districtsData = []
 278 |   with open("orsite.csv", "r") as metaFile:
 279 |     for line in metaFile:
 280 |       districtsData = json.loads(line)
 281 |   for data in districtsData:
 282 |     districtDictionary = {}
 283 |     districtDictionary['districtName'] =  data['vchDistrictName']
 284 |     districtDictionary['confirmed'] =  int(data['intConfirmed'])
 285 |     districtDictionary['recovered'] =  int(data['intRecovered'])
 286 |     districtDictionary['deceased'] =  int(data['intDeceased']) + int(data['intOthDeceased'])
 287 |     districtArray.append(districtDictionary)
 288 | 
 289 |   deltaCalculator.getStateDataFromSite("Odisha", districtArray, option)
 290 | 
 291 | def MHGetData():
 292 |   if typeOfAutomation == "ocr":
 293 |     MHGetDataByOcr()
 294 |   else:
 295 |     MHGetDataByUrl()
 296 | 
 297 | def VCMGetData():
 298 |   global pageId
 299 |   print("Date, State, First Dose, Second Dose, Total Doses")
 300 |   
 301 |   lookback = int(pageId) if len(pageId) != 0 else 0
 302 |   for day in range(lookback, -1, -1):
 303 |     today = (datetime.date.today() - datetime.timedelta(days = day)).strftime("%Y-%m-%d")
 304 |     fileName=today+"-at-07-00-AM.pdf"
 305 |     
 306 |     pageId = "1"
 307 |   
 308 |     readFileFromURLV2(metaDictionary['VCMohfw'].url + fileName, "VCMohfw", "A & N Islands", "")
 309 |     dadra = {'firstDose': 0, 'secondDose': 0, 'totalDose': 0}
 310 |     
 311 |     try:
 312 |       with open(".tmp/vcm.csv", "r") as upFile:
 313 |         for line in upFile:
 314 |           if "Dadra" in line or "Daman" in line:
 315 |             dadra['firstDose'] += int(line.split(',')[1])
 316 |             dadra['secondDose'] += int(line.split(',')[2])
 317 |             dadra['totalDose'] += int(line.split(',')[3])
 318 |             continue
 319 |           print(today + "," + line, end = "")
 320 | 
 321 |       print("{}, DnH, {}, {}, {}".format(today, dadra['firstDose'], dadra['secondDose'], dadra['totalDose']))
 322 |     except FileNotFoundError:
 323 |       print("br.txt missing. Generate through pdf or ocr and rerun.")
 324 | 
 325 | def VCGetData():
 326 |   today = (datetime.date.today() - datetime.timedelta(days = 1)).strftime("%Y-%m-%d")
 327 | #proxy = {"https":"http://159.65.153.14:8080"}
 328 | #vaccineDashboardNation = requests.request("get", "https://api.cowin.gov.in/api/v1/reports/getPublicReports?state_id=&district_id=&date=2021-03-01").json()
 329 |   stateKeys = {
 330 |     '36': 'West Bengal',
 331 |     '7': 'Chhattisgarh',
 332 |     '31': 'Tamil Nadu',
 333 |     '20': 'Madhya Pradesh',
 334 |     '13': 'Himachal Pradesh',
 335 |     '4': 'Assam',
 336 |     '15': 'Jharkhand',
 337 |     '11': 'Gujarat',
 338 |     '28': 'Punjab',
 339 |     '17': 'Kerala',
 340 |     '32': 'Telangana',
 341 |     '33': 'Tripura',
 342 |     '10': 'Goa',
 343 |     '14': 'Jammu and Kashmir',
 344 |     '34': 'Uttar Pradesh',
 345 |     '29': 'Rajasthan',
 346 |     '5': 'Bihar',
 347 |     '21': 'Maharashtra',
 348 |     '2': 'Andhra Pradesh',
 349 |     '16': 'Karnataka',
 350 |     '35': 'Uttarakhand',
 351 |     '26': 'Odisha',
 352 |     '12': 'Haryana',
 353 |     '3': 'Arunachal Pradesh',
 354 |     '9': 'Delhi',
 355 |     '1': 'Andaman and Nicobar Islands',
 356 |     '24': 'Mizoram',
 357 |     '23': 'Meghalaya',
 358 |     '27': 'Puducherry',
 359 |     '18': 'Ladakh',
 360 |     '30': 'Sikkim',
 361 |     '25': 'Nagaland',
 362 |     '37': 'Daman and Diu',
 363 |     '22': 'Manipur',
 364 |     '39': 'Himachal',
 365 |     '6': 'Chandigarh',
 366 |     '8': 'Dadra and Nagar Haveli',
 367 |     '19': 'Lakshadweep',
 368 |     '0': 'India'
 369 |   }
 370 | 
 371 |   lookback = int(pageId) if len(pageId) != 0 else 0
 372 |   lookbackMaxDate = datetime.date(2021, 5, 21)
 373 |   if datetime.date.today() - datetime.timedelta(days = lookback) < lookbackMaxDate:
 374 |     lookback = (datetime.date.today() - lookbackMaxDate).days
 375 |     print("------------ Data beyond 21st May has different data ranges hence defaulting max lookback to max {} days--------- ".format(lookback))
 376 |   print("date, state, district, daily vaccine count, beneficiaries, sessions, sites, vaccines given, vaccines given dose two, male, female, others, covaxin, covishield, sputnik, aefi, 18-45, 45-60, 60+")
 377 |   for day in range (lookback, -1, -1):
 378 |     today = (datetime.date.today() - datetime.timedelta(days = day)).strftime("%Y-%m-%d")
 379 |     todayStr = (datetime.date.today() - datetime.timedelta(days = day)).strftime("%d-%m-%Y")
 380 |     if option == "V2":
 381 |       metaDictionary['Vaccine'].url = "https://api.cowin.gov.in/api/v1/reports/v2/getPublicReports?state_id=@@state_id@@&district_id=@@district_id@@&date=@@date@@"
 382 |     url = re.sub('@@date@@', today, metaDictionary['Vaccine'].url)
 383 |     url_nation = re.sub('@@district_id@@', '', re.sub('@@state_id@@', '', url))
 384 | 
 385 |     districtCount = 1
 386 | 
 387 |     if option == "V2":
 388 |       districtArray = getAndPrintVaccineDataV2(url_nation, '0', todayStr, stateKeys, '')
 389 |     else:
 390 |       districtArray = getAndPrintVaccineDataV1(url_nation, '0', todayStr, stateKeys, '')
 391 | 
 392 | 
 393 |     for state_code in range(1, 38, 1):
 394 |       url_state = re.sub('@@district_id@@', '', re.sub('@@state_id@@', str(state_code), url))
 395 |       districtArray = []
 396 |   
 397 |       if option == "V2":
 398 |         districtArray = getAndPrintVaccineDataV2(url_state, state_code, todayStr, stateKeys, '')
 399 |       else:
 400 |         districtArray = getAndPrintVaccineDataV1(url_state, state_code, todayStr, stateKeys, '')
 401 |         
 402 |       if not districtArray:
 403 |         continue
 404 |       for district in districtArray:
 405 |         url_district = re.sub('@@district_id@@', str(district['district_id']), re.sub('@@state_id@@', str(state_code), url))
 406 |         if option == "V2":
 407 |           getAndPrintVaccineDataV2(url_district, state_code, todayStr, stateKeys, district['district_name'])
 408 |         else:
 409 |           getAndPrintVaccineDataV1(url_district, state_code, todayStr, stateKeys, district['district_name'])
 410 | 
 411 | 
 412 | 
 413 | def getAndPrintVaccineDataV1(url, state_code, todayStr, stateKeys, districtName):
 414 | 
 415 |   vaccineDashboard = requests.request("get", url)
 416 |   if vaccineDashboard.status_code != 200:
 417 |     while True:
 418 |       vaccineDashboard = requests.request("get", url)
 419 |       if vaccineDashboard.status_code == 200:
 420 |         break
 421 |   vaccineDashboard = vaccineDashboard.json()
 422 |   if not vaccineDashboard:
 423 |     return
 424 |   gender = {'male': 0, 'female': 0, 'others': 0}
 425 |   #print(vaccineDashboard)
 426 |   for i in range (0, 3, 1):
 427 |     if vaccineDashboard['vaccinatedBeneficiaryByGender'][i]['gender_label'].lower() == 'male':
 428 |       gender['male'] = vaccineDashboard['vaccinatedBeneficiaryByGender'][i]['count']
 429 |     if vaccineDashboard['vaccinatedBeneficiaryByGender'][i]['gender_label'].lower() == 'female':
 430 |       gender['female'] = vaccineDashboard['vaccinatedBeneficiaryByGender'][i]['count']
 431 |     if vaccineDashboard['vaccinatedBeneficiaryByGender'][i]['gender_label'].lower() == 'others':
 432 |       gender['others'] = vaccineDashboard['vaccinatedBeneficiaryByGender'][i]['count']
 433 | 
 434 |   typeOfVaccine = {'covaxin': 0, 'covishield': 0}
 435 |   for i in range (0, 2, 1):
 436 |     if vaccineDashboard['vaccinatedBeneficiaryByMaterial'][i]['material_name'].lower() == 'covaxin':
 437 |       typeOfVaccine['covaxin'] = vaccineDashboard['vaccinatedBeneficiaryByMaterial'][i]['count']
 438 |     if vaccineDashboard['vaccinatedBeneficiaryByMaterial'][i]['material_name'].lower() == 'covishield':
 439 |       typeOfVaccine['covishield'] = vaccineDashboard['vaccinatedBeneficiaryByMaterial'][i]['count']
 440 | 
 441 |   print("{}, {}, '{}', {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {} ". \
 442 |       format(todayStr, \
 443 |         stateKeys[str(state_code)], \
 444 |         districtName, \
 445 |         vaccineDashboard['dailyVaccineData']['vaccine_given'], \
 446 |         vaccineDashboard['overAllReports']['Beneficiaries'], \
 447 |         vaccineDashboard['overAllReports']['Sessions'], \
 448 |         vaccineDashboard['overAllReports']['Sites'], \
 449 |         vaccineDashboard['overAllReports']['Vaccine Given'], \
 450 |         vaccineDashboard['overAllReports']['Vaccine Given Dose Two'], \
 451 |         gender['male'], \
 452 |         gender['female'], \
 453 |         gender['others'], \
 454 |         typeOfVaccine['covaxin'], \
 455 |         typeOfVaccine['covishield']
 456 |         ))
 457 |   with open('output.out','a') as file:
 458 |     print("{}, {}, '{}', {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {} ". \
 459 |     format(todayStr, \
 460 |       stateKeys[str(state_code)], \
 461 |       districtName, \
 462 |       vaccineDashboard['dailyVaccineData']['vaccine_given'], \
 463 |       vaccineDashboard['overAllReports']['Beneficiaries'], \
 464 |       vaccineDashboard['overAllReports']['Sessions'], \
 465 |       vaccineDashboard['overAllReports']['Sites'], \
 466 |       vaccineDashboard['overAllReports']['Vaccine Given'], \
 467 |       vaccineDashboard['overAllReports']['Vaccine Given Dose Two'], \
 468 |       gender['male'], \
 469 |       gender['female'], \
 470 |       gender['others'], \
 471 |       typeOfVaccine['covaxin'], \
 472 |       typeOfVaccine['covishield']
 473 |       ), file = file)
 474 |   return vaccineDashboard['getBeneficiariesGroupBy']
 475 | 
 476 | def getAndPrintVaccineDataV2(url, state_code, todayStr, stateKeys, districtName):
 477 |   vaccineDashboard = requests.request("get", url)
 478 |   if vaccineDashboard.status_code != 200:
 479 |     while True:
 480 |       vaccineDashboard = requests.request("get", url)
 481 |       if vaccineDashboard.status_code == 200:
 482 |         break
 483 |   vaccineDashboard = vaccineDashboard.json()
 484 |   if not vaccineDashboard:
 485 |     return
 486 |   
 487 |   
 488 |   category = vaccineDashboard['topBlock']['vaccination']
 489 |   if 'vaccinationByAge' in vaccineDashboard.keys():
 490 |     category = vaccineDashboard['vaccinationByAge']
 491 |   
 492 |   print("{}, {}, \"{}\", {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {} ". \
 493 |     format(todayStr, \
 494 |       stateKeys[str(state_code)], \
 495 |       districtName, \
 496 |       vaccineDashboard['topBlock']['vaccination']['today'], \
 497 |       vaccineDashboard['topBlock']['vaccination']['total'], \
 498 |       vaccineDashboard['topBlock']['sessions']['total'], \
 499 |       vaccineDashboard['topBlock']['sites']['total'], \
 500 |       vaccineDashboard['topBlock']['vaccination']['tot_dose_1'], \
 501 |       vaccineDashboard['topBlock']['vaccination']['tot_dose_2'], \
 502 |       vaccineDashboard['topBlock']['vaccination']['male'], \
 503 |       vaccineDashboard['topBlock']['vaccination']['female'], \
 504 |       vaccineDashboard['topBlock']['vaccination']['others'], \
 505 |       vaccineDashboard['topBlock']['vaccination']['covaxin'], \
 506 |       vaccineDashboard['topBlock']['vaccination']['covishield'], \
 507 |       vaccineDashboard['topBlock']['vaccination']['sputnik'], \
 508 |       vaccineDashboard['topBlock']['vaccination']['aefi'], \
 509 |       category['vac_18_45'], \
 510 |       category['vac_45_60'], \
 511 |       category['above_60']
 512 |     )
 513 |   )
 514 |   
 515 | 
 516 |   with open('output2.out','a') as file:
 517 |     print("{}, {}, \"{}\", {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {} ". \
 518 |     format(todayStr, \
 519 |       stateKeys[str(state_code)], \
 520 |       districtName, \
 521 |       vaccineDashboard['topBlock']['vaccination']['today'], \
 522 |       vaccineDashboard['topBlock']['vaccination']['total'], \
 523 |       vaccineDashboard['topBlock']['sessions']['total'], \
 524 |       vaccineDashboard['topBlock']['sites']['total'], \
 525 |       vaccineDashboard['topBlock']['vaccination']['tot_dose_1'], \
 526 |       vaccineDashboard['topBlock']['vaccination']['tot_dose_2'], \
 527 |       vaccineDashboard['topBlock']['vaccination']['male'], \
 528 |       vaccineDashboard['topBlock']['vaccination']['female'], \
 529 |       vaccineDashboard['topBlock']['vaccination']['others'], \
 530 |       vaccineDashboard['topBlock']['vaccination']['covaxin'], \
 531 |       vaccineDashboard['topBlock']['vaccination']['covishield'], \
 532 |       ), file = file)
 533 |   return vaccineDashboard['getBeneficiariesGroupBy']  
 534 | 
 535 | 
 536 | def MHGetDataByOcr():
 537 |   linesArray = []
 538 |   districtDictionary = {}
 539 |   districtArray = []
 540 |   try:
 541 |     with open(".tmp/mh.txt", "r") as upFile:
 542 |       isIgnoreFlagSet = False
 543 |       for line in upFile:
 544 |         linesArray = line.split('|')[0].split(',')
 545 |         if 'Total' in line or isIgnoreFlagSet == True:
 546 |           isIgnoreFlagSet = True
 547 |           print("--> Ignoring {} ".format(line))
 548 |         if len(linesArray) != 6:
 549 |           print("--> Ignoring due to invalid length: {}".format(linesArray))
 550 |           continue
 551 |         districtDictionary = {}
 552 |         try:
 553 |           if is_number(linesArray[0].strip()):
 554 |             print("--> Ignoring: {}".format(linesArray))
 555 |             continue
 556 | 
 557 |           districtDictionary['districtName'] = linesArray[0].strip().title()
 558 |           districtDictionary['confirmed'] = int(linesArray[1])
 559 |           districtDictionary['recovered'] = int(linesArray[2])
 560 |           districtDictionary['deceased'] = int(linesArray[3])
 561 |           districtDictionary['migrated'] = int(linesArray[4])
 562 |           districtArray.append(districtDictionary)
 563 |         except ValueError:
 564 |           print("--> Ignoring: {}".format(linesArray))
 565 |           continue
 566 | 
 567 |     upFile.close()
 568 |     deltaCalculator.getStateDataFromSite("Maharashtra", districtArray, option)
 569 |   except FileNotFoundError:
 570 |     print("rj.txt missing. Generate through pdf or ocr and rerun.")
 571 | 
 572 | def MHGetDataByUrl():
 573 |   stateDashboard = requests.request("get", metaDictionary['Maharashtra'].url).json()
 574 | 
 575 |   districtArray = []
 576 |   for districtDetails in stateDashboard:
 577 |     districtDictionary = {}
 578 |     districtDictionary['districtName'] = districtDetails['District']
 579 |     districtDictionary['confirmed'] = districtDetails['Positive Cases']
 580 |     districtDictionary['recovered'] = districtDetails['Recovered']
 581 |     districtDictionary['deceased'] = districtDetails['Deceased']
 582 |     districtArray.append(districtDictionary)
 583 | 
 584 |   deltaCalculator.getStateDataFromSite("Maharashtra", districtArray, option)
 585 | 
 586 | def HPGetData():
 587 |   linesArray = []
 588 |   districtDictionary = {}
 589 |   districtArray = []
 590 |   districtTableBeingRead = False
 591 |   try:
 592 |     with open(".tmp/hp.txt", "r") as upFile:
 593 |       for line in upFile:
 594 |         line = re.sub('\*', '', line)
 595 |         linesArray = line.split('|')[0].split(',')
 596 |         availableColumns = line.split('|')[1].split(',')
 597 | 
 598 |         '''
 599 |         if 'Report of Positive Cases till date' in (re.sub(" +", " ", " ".join(linesArray))):
 600 |           districtTableBeingRead = True
 601 | 
 602 |         if districtTableBeingRead == False or 'Total' in linesArray[0]:
 603 |           districtTableBeingRead = False
 604 |           continue
 605 |         '''
 606 | 
 607 |         districtDictionary = {}
 608 |         confirmedFound = False
 609 |         recoveredFound = False
 610 |         deceasedFound = False
 611 |         '''
 612 |         for index, data in enumerate(linesArray):
 613 |           try:
 614 |             if availableColumns[index].strip() == "1":
 615 |               districtDictionary['districtName'] = data.strip()
 616 |             if availableColumns[index].strip() == "2":
 617 |               districtDictionary['confirmed'] = int(data.strip())
 618 |               confirmedFound = True
 619 |             if availableColumns[index].strip() == "6":
 620 |               districtDictionary['recovered'] = int(data.strip())
 621 |               recoveredFound = True
 622 |             if availableColumns[index].strip() == "7":
 623 |               districtDictionary['deceased'] = int(data.strip())
 624 |               deceasedFound = True
 625 |           except ValueError:
 626 |             print("--> Ignoring {}".format(linesArray))
 627 |             continue
 628 | 
 629 |         if recoveredFound == False or confirmedFound == False or deceasedFound == False:
 630 |           print("--> Issue with {}".format(linesArray))
 631 |           continue
 632 |         '''
 633 | 
 634 |         if len(linesArray) != 11:
 635 |           print("--> Issue with {}".format(linesArray))
 636 |           continue
 637 | 
 638 |         districtDictionary['districtName'] = linesArray[0].strip()
 639 |         districtDictionary['confirmed'] = int(linesArray[1].strip())
 640 |         districtDictionary['recovered'] = int(linesArray[8].strip())
 641 |         districtDictionary['deceased'] = int(re.sub('\*', '', linesArray[9].strip()).strip())
 642 |         #districtDictionary['migrated'] = int(linesArray[10].strip())
 643 | 
 644 |         districtArray.append(districtDictionary)
 645 | 
 646 |     upFile.close()
 647 |     deltaCalculator.getStateDataFromSite("Himachal Pradesh", districtArray, option)
 648 |   except FileNotFoundError:
 649 |     print("hp.txt missing. Generate through pdf or ocr and rerun.")
 650 |     
 651 | 
 652 | def RJGetDataUsingUrl():
 653 |   if typeOfAutomation != "ocr" or typeOfAutomation != "pdf":
 654 |     print("RJ Getdata using url is deprecated")
 655 |     return
 656 |   response = requests.request("GET", metaDictionary['Rajasthan'].url)
 657 |   soup = BeautifulSoup(response.content, 'html.parser')
 658 |   table = soup.find('blockquote').find('table').find_all('tr')
 659 | 
 660 | 
 661 |   districtArray = []
 662 |   for index, rowContent in enumerate(table):
 663 |     dataPoints = rowContent.find_all("td")
 664 |     if index == 0 or len(dataPoints) != 7:
 665 |       continue
 666 |     districtName = re.sub(' +', ' ', re.sub('\n', ' ', dataPoints[1].get_text().strip()))
 667 | 
 668 |     districtDictionary = {}
 669 |     districtDictionary['districtName'] = districtName
 670 |     districtDictionary['confirmed'] = int(dataPoints[4].get_text().strip())
 671 |     districtDictionary['recovered'] = int(dataPoints[5].get_text().strip())
 672 |     districtDictionary['deceased'] = -999
 673 |     districtArray.append(districtDictionary)
 674 | 
 675 |   print(districtArray)
 676 |   deltaCalculator.getStateDataFromSite("Rajasthan", districtArray, option)
 677 | 
 678 | 
 679 | def GJGetData():
 680 |   response = requests.request("GET", metaDictionary['Gujarat'].url)
 681 |   soup = BeautifulSoup(response.content, 'html.parser')
 682 |   table = soup.find("div", {"class": "table-responsive"}).find_all("tr")
 683 |   
 684 |   districtArray = []
 685 |   for index, row in enumerate(table):
 686 |     if index == len(table) - 1:
 687 |       continue
 688 |       
 689 |     dataPoints = row.find_all("td")
 690 |     if(len(dataPoints) != 6):
 691 |       continue
 692 | 
 693 |     districtDictionary = {}
 694 |     districtDictionary['districtName'] = dataPoints[0].get_text()
 695 |     districtDictionary['confirmed'] = int(dataPoints[1].get_text().strip())
 696 |     districtDictionary['recovered'] = int(dataPoints[3].get_text().strip())
 697 |     districtDictionary['deceased'] = int(dataPoints[5].get_text().strip())
 698 |     districtArray.append(districtDictionary)
 699 | 
 700 |   deltaCalculator.getStateDataFromSite("Gujarat", districtArray, option)
 701 | 
 702 | 
 703 | def TGGetData():
 704 |   linesArray = []
 705 |   with open(".tmp/tg.txt", "r") as tgFile:
 706 |     for line in tgFile:
 707 |       linesArray = line.split('|')[0].split(',')
 708 |       if len(linesArray) != 2:
 709 |         print("--> Issue with {}".format(linesArray))
 710 |         continue
 711 |       if linesArray[0].strip().capitalize() == "Ghmc":
 712 |         linesArray[0] = "Hyderabad"
 713 |       print("{},Telangana,TG,{},Hospitalized".format(linesArray[0].strip().title(), linesArray[1].strip()))
 714 | 
 715 | 
 716 | def UPGetData():
 717 |   errorCount = 0
 718 |   linesArray = []
 719 |   districtDictionary = {}
 720 |   districtArray = []
 721 |   masterColumnArray = []
 722 |   splitArray = []
 723 |   lengthOfArray = 7    
 724 |   activeIndex = 6
 725 |   recoveredIndex = 3
 726 |   deceasedIndex = 5
 727 |   global typeOfAutomation
 728 | 
 729 |   if typeOfAutomation == "ocr1":
 730 |     lengthOfArray = 7    
 731 |     activeIndex = 6
 732 |     recoveredIndex = 3
 733 |     deceasedIndex = 5
 734 |   else:
 735 |     typeOfAutomation = "ocr2"
 736 |     lengthOfArray = 8    
 737 |     activeIndex = 7
 738 |     recoveredIndex = 4
 739 |     deceasedIndex = 6
 740 |   print("--> Using format {}".format(typeOfAutomation))
 741 |     
 742 |   try:
 743 |     with open(".tmp/up.txt", "r") as upFile:
 744 |       for line in upFile:
 745 |         splitArray = re.sub('\n', '', line.strip()).split('|')
 746 |         linesArray = splitArray[0].split(',')
 747 | 
 748 |         if errorCount > 10:
 749 |           errorCount = 0
 750 |           if typeOfAutomation == "ocr1":
 751 |             typeOfAutomation = "ocr2"
 752 |           else:
 753 |             typeOfAutomation = "ocr1"
 754 |           print("--> Switching to version {}. Error count breached.".format(typeOfAutomation))
 755 |           UPGetData()
 756 |           return
 757 | 
 758 |         if len(linesArray) != lengthOfArray:
 759 |           print("--> Issue with {}".format(linesArray))
 760 |           errorCount += 1
 761 |           continue
 762 | 
 763 |         districtDictionary = {}
 764 |         districtDictionary['districtName'] = linesArray[0].strip()
 765 |         districtDictionary['confirmed'] = int(linesArray[recoveredIndex]) + int(linesArray[deceasedIndex]) + int(linesArray[activeIndex])
 766 |         districtDictionary['recovered'] = int(linesArray[recoveredIndex])
 767 |         districtDictionary['deceased'] = int(linesArray[deceasedIndex])
 768 | #        districtDictionary['active'] = int(linesArray[activeIndex])
 769 |         """
 770 | 
 771 |         districtDictionary['confirmed'] = int(linesArray[2]) 
 772 |         districtDictionary['recovered'] = int(linesArray[4])
 773 |         districtDictionary['deceased'] = int(linesArray[6])
 774 |         """
 775 | 
 776 |         districtArray.append(districtDictionary)
 777 |     upFile.close()
 778 | 
 779 |     deltaCalculator.getStateDataFromSite("Uttar Pradesh", districtArray, option)
 780 |   except FileNotFoundError:
 781 |     print("up.txt missing. Generate through pdf or ocr and rerun.")
 782 | 
 783 | def UTGetData():
 784 |   linesArray = []
 785 |   districtDictionary = {}
 786 |   districtArray = []
 787 |   ignoreLines = False
 788 |   try:
 789 |     with open(".tmp/ut.txt", "r") as upFile:
 790 |       for line in upFile:
 791 |         if ignoreLines == True:
 792 |           continue
 793 | 
 794 |         if 'Total' in line:
 795 |           ignoreLines = True
 796 |           continue
 797 | 
 798 |         linesArray = line.split('|')[0].split(',')
 799 |         if len(linesArray) != 6:
 800 |           print("--> Issue with {}".format(linesArray))
 801 |           continue
 802 |         districtDictionary = {}
 803 |         districtDictionary['districtName'] = linesArray[0].strip()
 804 |         districtDictionary['confirmed'] = int(linesArray[1])
 805 |         districtDictionary['recovered'] = int(linesArray[2])
 806 |         districtDictionary['deceased'] = int(linesArray[4])
 807 |         districtDictionary['migrated'] = int(linesArray[5]) 
 808 |         districtArray.append(districtDictionary)
 809 | 
 810 |     upFile.close()
 811 |     deltaCalculator.getStateDataFromSite("Uttarakhand", districtArray, option)
 812 |   except FileNotFoundError:
 813 |     print("br.txt missing. Generate through pdf or ocr and rerun.")
 814 | 
 815 | def BRGetData():
 816 |   linesArray = []
 817 |   districtDictionary = {}
 818 |   districtArray = []
 819 |   try:
 820 |     with open(".tmp/br.txt", "r") as upFile:
 821 |       for line in upFile:
 822 |         linesArray = line.split('|')[0].split(',')
 823 |         if len(linesArray) != 5:
 824 |           print("--> Issue with {}".format(linesArray))
 825 |           continue
 826 |         districtDictionary = {}
 827 |         districtDictionary['districtName'] = linesArray[0]
 828 |         districtDictionary['confirmed'] = int(linesArray[1])
 829 |         districtDictionary['recovered'] = int(linesArray[2])
 830 |         districtDictionary['deceased'] = int(linesArray[3]) 
 831 |         districtArray.append(districtDictionary)
 832 | 
 833 |     upFile.close()
 834 |     deltaCalculator.getStateDataFromSite("Bihar", districtArray, option)
 835 |   except FileNotFoundError:
 836 |     print("br.txt missing. Generate through pdf or ocr and rerun.")
 837 | 
 838 | def JHGetData():
 839 |   if typeOfAutomation == "ocr":
 840 |     JHGetDataByOCR()
 841 |   else:
 842 |     JHGetDataByURL2()
 843 | 
 844 | def JHGetDataByURL():
 845 |   
 846 |   url = "https://covid19dashboard.jharkhand.gov.in/Home/mapData?action=\"total\"&district_id=0"
 847 | 
 848 |   payload="action=total&district_id=0"
 849 |   headers = {
 850 |     'Origin': 'https://covid19dashboard.jharkhand.gov.in',
 851 |     'Referer': 'https://covid19dashboard.jharkhand.gov.in/',
 852 |     'Host': 'covid19dashboard.jharkhand.gov.in',
 853 |     'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
 854 |     'Content-Length': '26',
 855 |     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
 856 |     'Cookie': 'ci_session=9n007bqlfk2q8joa9igknjpq6vpl2gmm'
 857 |   }
 858 | 
 859 |   response = requests.request("POST", url, headers=headers, data=payload).json()
 860 | 
 861 |   districtArray = []
 862 |   for data in response:
 863 |     districtDictionary = {}
 864 |     districtDictionary['districtName'] = data['district']
 865 |     districtDictionary['confirmed'] = int(data['positive_cases'])
 866 |     districtDictionary['recovered'] = int(data['recovered_cases'])
 867 |     districtDictionary['deceased'] = int(data['deaths'])
 868 |     print(districtDictionary)
 869 |     districtArray.append(districtDictionary)
 870 | 
 871 |   deltaCalculator.getStateDataFromSite("Jharkhand", districtArray, option)
 872 | 
 873 | def JHGetDataByURL2():
 874 |   url = "https://covid19dashboard.jharkhand.gov.in/Bulletin/GetTestCaseData?date=2021-03-25"
 875 | 
 876 |   payload="date=" + (datetime.date.today() - datetime.timedelta(days = 0)).strftime("%Y-%m-%d")
 877 |   headers = {
 878 |     'Host': 'covid19dashboard.jharkhand.gov.in',
 879 |     'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
 880 |     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
 881 |     'Content-Length': '15',
 882 |     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
 883 |     'Cookie': 'ci_session=i6qt39o41i7gsopt23ipm083hla6994c'
 884 |   }
 885 | 
 886 |   response = requests.request("POST", url, headers=headers, data=payload)
 887 |   soup = BeautifulSoup(response.content, 'html.parser')
 888 |   districts = soup.find("table").find_all("tr")
 889 |   
 890 |   districtArray = []
 891 | 
 892 |   districtStart = False
 893 |   for district in districts:
 894 | 
 895 |     if "Bokaro" in district.get_text() and districtStart == False:
 896 |       districtStart = True
 897 | 
 898 |     if districtStart == False:
 899 |       continue
 900 | 
 901 |     data = district.find_all("td")
 902 | 
 903 |     if int(data[3].get_text()) != 0:
 904 |       print("{},Jharkhand,JH,{},Hospitalized".format(data[1].get_text(), data[3].get_text()))
 905 |     if int(data[4].get_text()) != 0:
 906 |       print("{},Jharkhand,JH,{},Recovered".format(data[1].get_text(), data[4].get_text()))
 907 |     if int(data[6].get_text()) != 0:
 908 |       print("{},Jharkhand,JH,{},Deceased".format(data[1].get_text(), data[6].get_text()))
 909 |     
 910 | 
 911 | 
 912 | def JHGetDataByOCR():
 913 |   linesArray = []
 914 |   districtDictionary = {}
 915 |   districtArray = []
 916 |   try:
 917 |     with open(".tmp/jh.txt", "r") as upFile:
 918 |       for line in upFile:
 919 |         linesArray = line.split('|')[0].split(',')
 920 |         if len(linesArray) != 8:
 921 |           print("--> Issue with {}".format(linesArray))
 922 |           continue
 923 | 
 924 |         districtDictionary = {}
 925 |         districtDictionary['districtName'] = linesArray[0].strip()
 926 |         districtDictionary['confirmed'] = int(linesArray[4]) + int(linesArray[5])
 927 |         districtDictionary['recovered'] = int(linesArray[2]) + int(linesArray[6])
 928 |         districtDictionary['deceased'] = int(linesArray[3]) + int(linesArray[7])
 929 | 
 930 |         districtArray.append(districtDictionary)
 931 |     upFile.close()
 932 |     deltaCalculator.getStateDataFromSite("Jharkhand", districtArray, option)
 933 |   except FileNotFoundError:
 934 |     print("jh.txt missing. Generate through pdf or ocr and rerun.")
 935 | 
 936 | def RJGetData():
 937 |   linesArray = []
 938 |   districtDictionary = {}
 939 |   districtArray = []
 940 |   skipValues = False
 941 |   try:
 942 |     with open(".tmp/rj.txt", "r") as upFile:
 943 |       for line in upFile:
 944 |         if 'Other' in line:
 945 |           skipValues = True
 946 |           continue
 947 |         if skipValues == True:
 948 |           continue
 949 | 
 950 |         linesArray = line.split('|')[0].split(',')
 951 | 
 952 |         if len(linesArray) != 9:
 953 |           print("--> Issue with {}".format(linesArray))
 954 |           continue
 955 |         
 956 |         districtDictionary = {}
 957 |         districtDictionary['districtName'] = linesArray[0].strip().title()
 958 |         districtDictionary['confirmed'] = int(linesArray[3])
 959 |         districtDictionary['recovered'] = int(linesArray[7])
 960 |         districtDictionary['deceased'] = int(linesArray[5])
 961 |         districtArray.append(districtDictionary)
 962 | 
 963 |     upFile.close()
 964 |     deltaCalculator.getStateDataFromSite("Rajasthan", districtArray, option)
 965 |   except FileNotFoundError:
 966 |     print("rj.txt missing. Generate through pdf or ocr and rerun.")
 967 | 
 968 | 
 969 | def MPGetData():
 970 |   linesArray = []
 971 |   districtDictionary = {}
 972 |   districtArray = []
 973 |   try:
 974 |     with open(".tmp/mp.txt", "r") as upFile:
 975 |       isIgnoreFlagSet = False
 976 |       for line in upFile:
 977 |         linesArray = line.split('|')[0].split(',')
 978 |         if 'Total' in line or isIgnoreFlagSet == True:
 979 |           isIgnoreFlagSet = True
 980 |           print("--> Ignoring {} ".format(line))
 981 |         if len(linesArray) != 8:
 982 |           print("--> Ignoring due to invalid length: {}".format(linesArray))
 983 |           continue
 984 |         districtDictionary = {}
 985 |         try:
 986 |           if is_number(linesArray[0].strip()):
 987 |             print("--> Ignoring: {}".format(linesArray))
 988 |             continue
 989 | 
 990 |           districtDictionary['districtName'] = linesArray[0].strip().title()
 991 |           districtDictionary['confirmed'] = int(linesArray[2])
 992 |           districtDictionary['recovered'] = int(linesArray[6])
 993 |           districtDictionary['deceased'] = int(linesArray[4])
 994 |           districtArray.append(districtDictionary)
 995 |         except ValueError:
 996 |           print("--> Ignoring: {}".format(linesArray))
 997 |           continue
 998 | 
 999 |     upFile.close()
1000 |     deltaCalculator.getStateDataFromSite("Madhya Pradesh", districtArray, option)
1001 |   except FileNotFoundError:
1002 |     print("rj.txt missing. Generate through pdf or ocr and rerun.")
1003 | 
1004 | def JKGetData():
1005 |   linesArray = []
1006 |   districtDictionary = {}
1007 |   districtArray = []
1008 |   try:
1009 |     with open(".tmp/jk.txt", "r") as upFile:
1010 |       isIgnoreFlagSet = False
1011 |       for line in upFile:
1012 |         linesArray = line.split('|')[0].split(',')
1013 |         if len(linesArray) != 11:
1014 |           print("--> Ignoring due to invalid length: {}".format(linesArray))
1015 |           continue
1016 |         districtDictionary = {}
1017 |         try:
1018 |           if is_number(linesArray[0].strip()):
1019 |             print("--> Ignoring: {}".format(linesArray))
1020 |             continue
1021 | 
1022 |           districtDictionary['districtName'] = linesArray[0].strip().title()
1023 |           districtDictionary['confirmed'] = int(linesArray[6])
1024 |           districtDictionary['recovered'] = int(linesArray[9])
1025 |           districtDictionary['deceased'] = int(linesArray[10])
1026 |           districtArray.append(districtDictionary)
1027 |         except ValueError:
1028 |           print("--> Ignoring: {}".format(linesArray))
1029 |           continue
1030 | 
1031 |     upFile.close()
1032 |     deltaCalculator.getStateDataFromSite("Jammu and Kashmir", districtArray, option)
1033 |   except FileNotFoundError:
1034 |     print("rj.txt missing. Generate through pdf or ocr and rerun.")
1035 | 
1036 | def WBGetData():
1037 |   linesArray = []
1038 |   districtDictionary = {}
1039 |   districtArray = []
1040 |   readFileFromURLV2(metaDictionary['West Bengal'].url, "West Bengal", "Alipurduar", "TOTAL")
1041 |   try:
1042 |     with open(".tmp/wb.csv", "r") as upFile:
1043 |       for line in upFile:
1044 |         linesArray = line.split(',')
1045 |         if len(linesArray) != 4:
1046 |           print("--> Issue with {}".format(linesArray))
1047 |           continue
1048 |         districtDictionary = {}
1049 |         districtDictionary['districtName'] = linesArray[0].strip()
1050 |         districtDictionary['confirmed'] = int(linesArray[1])
1051 |         districtDictionary['recovered'] = int(linesArray[2])
1052 |         districtDictionary['deceased'] = int(linesArray[3]) if len(re.sub('\n', '', linesArray[3])) != 0 else 0
1053 |         districtArray.append(districtDictionary)
1054 | 
1055 |     upFile.close()
1056 |     deltaCalculator.getStateDataFromSite("West Bengal", districtArray, option)
1057 |   except FileNotFoundError:
1058 |     print("wb.txt missing. Generate through pdf or ocr and rerun.")
1059 | 
1060 | def PBGetDataThroughPdf():
1061 |   linesArray = []
1062 |   districtDictionary = {}
1063 |   districtArray = []
1064 |   readFileFromURLV2(metaDictionary['Punjab'].url, "Punjab", "Ludhiana", "Total")
1065 |   try:
1066 |     with open(".tmp/pb.csv", "r") as upFile:
1067 |       for line in upFile:
1068 |         linesArray = line.split(',')
1069 |         if len(linesArray) != 5:
1070 |           print("--> Issue with {}".format(linesArray))
1071 |           continue
1072 |         districtDictionary = {}
1073 |         districtDictionary['districtName'] = linesArray[0].strip()
1074 |         districtDictionary['confirmed'] = int(linesArray[1])
1075 |         districtDictionary['recovered'] = int(linesArray[3])
1076 |         districtDictionary['deceased'] = int(linesArray[4]) if len(re.sub('\n', '', linesArray[3])) != 0 else 0
1077 |         districtArray.append(districtDictionary)
1078 | 
1079 |     upFile.close()
1080 |     deltaCalculator.getStateDataFromSite("Punjab", districtArray, option)
1081 |   except FileNotFoundError:
1082 |     print("pb.txt missing. Generate through pdf or ocr and rerun.")
1083 | 
1084 | def PBGetData():
1085 |   if typeOfAutomation == "pdf":
1086 |     PBGetDataThroughPdf()
1087 |   else:
1088 |     PBGetDataThroughOcr()
1089 | 
1090 | def PBGetDataThroughOcr():
1091 |   linesArray = []
1092 |   districtDictionary = {}
1093 |   districtArray = []
1094 |   secondRunArray = []
1095 |   masterColumnList = ""
1096 |   masterColumnArray = []
1097 |   splitArray = []
1098 |   try:
1099 |     with open(".tmp/pb.txt", "r") as upFile:
1100 |       for line in upFile:
1101 |         splitArray = re.sub('\n', '', line.strip()).split('|')
1102 |         linesArray = splitArray[0].split(',')
1103 | 
1104 |         if len(linesArray) != 5:
1105 |           print("--> Issue with {}".format(linesArray))
1106 |           continue
1107 |         if linesArray[0].strip() == "Total":
1108 |           continue
1109 |         districtDictionary = {}
1110 |         districtDictionary['districtName'] = linesArray[0].strip()
1111 |         districtDictionary['confirmed'] = int(linesArray[1])
1112 |         districtDictionary['recovered'] = int(linesArray[3])
1113 |         districtDictionary['deceased'] = int(linesArray[4])
1114 |         districtArray.append(districtDictionary)
1115 | 
1116 |     upFile.close()
1117 | 
1118 |     deltaCalculator.getStateDataFromSite("Punjab", districtArray, option)
1119 |   except FileNotFoundError:
1120 |     print("pb.txt missing. Generate through pdf or ocr and rerun.")
1121 | 
1122 | def KAGetData():
1123 |   if typeOfAutomation == "ocr":
1124 |     KAGetDataByOCR()
1125 |   else:
1126 |     KAGetDataByUrl()
1127 | 
1128 | 
1129 | def KAGetDataByOCR():
1130 |   districtArray = []
1131 |   linesArray = []
1132 |   with open(".tmp/ka.txt") as kaFile:
1133 |     for line in kaFile:
1134 |       line = line.replace('"', '').replace('*', '').replace('#', '').replace('$', '')
1135 |       linesArray = line.split('|')[0].split(',')
1136 |       if len(linesArray) != 9:
1137 |         print("--> Issue with {}".format(linesArray))
1138 |         continue
1139 |       
1140 |       districtDictionary = {}
1141 |       districtDictionary['districtName'] = linesArray[0].strip()
1142 |       districtDictionary['confirmed'] = int(linesArray[2])
1143 |       districtDictionary['recovered'] = int(linesArray[4])
1144 |       districtDictionary['deceased'] = int(linesArray[7]) if len(re.sub('\n', '', linesArray[7])) != 0 else 0
1145 |       districtArray.append(districtDictionary)
1146 |   kaFile.close()
1147 |   deltaCalculator.getStateDataFromSite("Karnataka", districtArray, option)
1148 | 
1149 | def KAGetDataByUrl():
1150 |   global pdfUrl
1151 |   global pageId
1152 |   linesArray = []
1153 |   districtDictionary = {}
1154 |   districtArray = []
1155 |   runDeceased = False
1156 |   startId = 0
1157 |   endId = 0
1158 | 
1159 |   if ',' in pageId:
1160 |     startId = pageId.split(',')[1]
1161 |     endId = pageId.split(',')[2]
1162 |     pageId = pageId.split(',')[0]
1163 |     runDeceased = True
1164 | 
1165 |   if len(pdfUrl) != 0:
1166 |     urlArray = pdfUrl.split('/')
1167 |     for index, parts in enumerate(urlArray):
1168 |       if parts == "file":
1169 |         if urlArray[index + 1] == "d":
1170 |           fileId = urlArray[index + 2]
1171 |           break
1172 |     pdfUrl = "https://docs.google.com/uc?export=download&id=" + fileId 
1173 |     print("--> Downloading using: {}".format(pdfUrl))  
1174 |   readFileFromURLV2('', "Karnataka", "Bagalakote", "Total")
1175 |   try:
1176 |     with open(".tmp/ka.csv", "r") as upFile:
1177 |       for line in upFile:
1178 |         linesArray = line.split(',')
1179 |         if len(linesArray) != 4:
1180 |           print("--> Issue with {}".format(linesArray))
1181 |           continue
1182 |         districtDictionary = {}
1183 |         districtDictionary['districtName'] = linesArray[0].strip()
1184 |         districtDictionary['confirmed'] = int(linesArray[1])
1185 |         districtDictionary['recovered'] = int(linesArray[2])
1186 |         districtDictionary['deceased'] = int(linesArray[3]) if len(re.sub('\n', '', linesArray[3])) != 0 else 0
1187 |         districtArray.append(districtDictionary)
1188 | 
1189 |     upFile.close()
1190 |     deltaCalculator.getStateDataFromSite("Karnataka", districtArray, option)
1191 | 
1192 |     if runDeceased == True:
1193 |       os.system("python3 kaautomation.py d " + str(startId) + " " + str(endId) + " && cat kaconfirmed.csv")
1194 | 
1195 |   except FileNotFoundError:
1196 |     print("ka.txt missing. Generate through pdf or ocr and rerun.")
1197 | 
1198 | def HRGetData():
1199 |   linesArray = []
1200 |   districtDictionary = {}
1201 |   districtArray = []
1202 |   if typeOfAutomation == "pdf":
1203 |     readFileFromURLV2(metaDictionary['Haryana'].url, "Haryana", "Gurugram", "Total")
1204 |   try:
1205 |     with open(".tmp/hr.csv", "r") as upFile:
1206 |       for line in upFile:
1207 |         linesArray = line.split(',')
1208 |         if len(linesArray) != 4:
1209 |           print("--> Issue with {}".format(linesArray))
1210 |           continue
1211 | 
1212 |         districtDictionary = {}
1213 |         districtDictionary['districtName'] = linesArray[0].strip()
1214 |         districtDictionary['confirmed'] = int(linesArray[1])
1215 |         districtDictionary['recovered'] = int(linesArray[2])
1216 |         districtDictionary['deceased'] = int(linesArray[3]) if len(re.sub('\n', '', linesArray[3])) != 0 else 0
1217 |         districtArray.append(districtDictionary)
1218 | 
1219 |     upFile.close()
1220 |     deltaCalculator.getStateDataFromSite("Haryana", districtArray, option)
1221 |   except FileNotFoundError:
1222 |     print("hr.csv missing. Generate through pdf or ocr and rerun.")
1223 |       
1224 | def TNGetData():
1225 |   linesArray = []
1226 |   districtDictionary = {}
1227 |   districtArray = []
1228 |   if typeOfAutomation == "ocr":
1229 |     getTNDataThroughOcr()
1230 |     return
1231 |   else:
1232 |     convertTnPDFToCSV()
1233 |   try:
1234 |     with open(".tmp/tn.csv", "r") as upFile:
1235 |       for line in upFile:
1236 |         linesArray = line.split(',')
1237 |         if len(linesArray) != 4:
1238 |           print("--> Issue with {}".format(linesArray))
1239 |           continue
1240 |         linesArray[3] = linesArray[3].replace('$', '')
1241 |         districtDictionary = {}
1242 |         districtDictionary['districtName'] = linesArray[0].strip()
1243 |         districtDictionary['confirmed'] = int(linesArray[1])
1244 |         districtDictionary['recovered'] = int(linesArray[2])
1245 |         districtDictionary['deceased'] = int(linesArray[3]) if len(re.sub('\n', '', linesArray[3])) != 0 else 0
1246 |         districtArray.append(districtDictionary)
1247 | 
1248 |     upFile.close()
1249 |     deltaCalculator.getStateDataFromSite("Tamil Nadu", districtArray, option)
1250 |   except FileNotFoundError:
1251 |     print("tn.txt missing. Generate through pdf or ocr and rerun.")
1252 | 
1253 | def getTNDataThroughOcr():
1254 |   districtArray = []
1255 |   linesArray = []
1256 |   airportDictionary = {'districtName': 'Airport Quarantine', "confirmed": 0, "recovered": 0, "deceased": 0}
1257 |   with open(".tmp/tn.txt") as tnFile:
1258 |     for line in tnFile:
1259 |       line = line.replace('"', '').replace('*', '').replace('#', '').replace('$', '')
1260 |       linesArray = line.split('|')[0].split(',')
1261 |       if len(linesArray) != 5:
1262 |         print("--> Issue with {}".format(linesArray))
1263 |         continue
1264 | 
1265 |       if 'Airport' in line:
1266 |         airportDictionary['confirmed'] += int(linesArray[1])
1267 |         airportDictionary['recovered'] += int(linesArray[2])
1268 |         airportDictionary['deceased'] += int(linesArray[4]) if len(re.sub('\n', '', linesArray[4])) != 0 else 0
1269 |         continue
1270 | 
1271 |       if 'Railway' in line:
1272 |         linesArray[0] = 'Railway Quarantine'
1273 | 
1274 |       districtDictionary = {}
1275 |       districtDictionary['districtName'] = linesArray[0].strip()
1276 |       districtDictionary['confirmed'] = int(linesArray[1])
1277 |       districtDictionary['recovered'] = int(linesArray[2])
1278 |       districtDictionary['deceased'] = int(linesArray[4]) if len(re.sub('\n', '', linesArray[4])) != 0 else 0
1279 |       districtArray.append(districtDictionary)
1280 | 
1281 |     districtArray.append(airportDictionary)
1282 |     tnFile.close()
1283 |     deltaCalculator.getStateDataFromSite("Tamil Nadu", districtArray, option)
1284 | 
1285 | 
1286 | 
1287 | def NLGetData():
1288 |   districtArray = []
1289 |   if typeOfAutomation == "ocr":
1290 |     try:
1291 |       with open(".tmp/nl.txt", "r") as upFile:
1292 |         for line in upFile:
1293 |           linesArray = line.split('|')[0].split(',')
1294 |           if len(linesArray) != 13:
1295 |             print("--> Issue with {}".format(linesArray))
1296 |             continue
1297 | 
1298 |           districtDictionary = {}
1299 |           districtDictionary['districtName'] = linesArray[0].strip()
1300 |           districtDictionary['confirmed'] = int(linesArray[12])
1301 |           districtDictionary['recovered'] = int(linesArray[7])
1302 |           districtDictionary['migrated'] = int(linesArray[11])
1303 |           districtDictionary['deceased'] = int(linesArray[8]) if len(re.sub('\n', '', linesArray[8])) != 0 else 0
1304 |           districtArray.append(districtDictionary)
1305 | 
1306 |       upFile.close()
1307 |       deltaCalculator.getStateDataFromSite("Nagaland", districtArray, option)
1308 |     except FileNotFoundError:
1309 |       print("hr.csv missing. Generate through pdf or ocr and rerun.")
1310 | 
1311 | def GAGetData():
1312 |   response = requests.request("GET", metaDictionary['Goa'].url)
1313 |   soup = BeautifulSoup(response.content, 'html.parser')
1314 |   table = soup.find_all("div", {"class": "vc_col-md-2"})
1315 | 
1316 |   districtArray = []
1317 |   for index, row in enumerate(table):
1318 |     print(row.get_text())
1319 |       
1320 |     districtDictionary = {}
1321 |     districtArray.append(districtDictionary)
1322 | 
1323 |   return
1324 |   deltaCalculator.getStateDataFromSite("Goa", districtArray, option)
1325 | 
1326 | 
1327 | def ASGetDataThroughOCR():	
1328 |   linesArray = []
1329 |   districtDictionary = {}
1330 |   districtArray = []
1331 |   splitArray = []
1332 |   try:
1333 |     with open(".tmp/as.txt", "r") as upFile:
1334 |       for line in upFile:
1335 |         splitArray = re.sub('\n', '', line.strip()).split('|')
1336 |         linesArray = splitArray[0].split(',')
1337 |         if int(linesArray[len(linesArray) - 1]) > 0:
1338 |           print("{},Assam,AS,{},Hospitalized".format(linesArray[0].strip(), linesArray[len(linesArray) - 1].strip()))
1339 | 
1340 |   except FileNotFoundError:
1341 |     print("pb.txt missing. Generate through pdf or ocr and rerun.")
1342 | 
1343 | def ASGetData():
1344 |   if typeOfAutomation == "ocr":
1345 |     ASGetDataThroughOCR()
1346 |     return
1347 |   response = requests.request("GET", metaDictionary['Assam'].url)
1348 |   soup = BeautifulSoup(response.content, 'html.parser')
1349 |   table = soup.find("tbody").find_all("tr")
1350 | 
1351 |   districtArray = []
1352 |   for index, row in enumerate(table):
1353 |     dataPoints = row.find_all("td")
1354 |       
1355 |     districtDictionary = {}
1356 |     districtDictionary['districtName'] = dataPoints[0].get_text().strip() 
1357 |     districtDictionary['confirmed'] = int(dataPoints[1].get_text().strip()) if '-' not in dataPoints[1].get_text().strip() else 0
1358 |     districtDictionary['recovered'] = int(dataPoints[3].get_text().strip()) if '-' not in dataPoints[3].get_text().strip() else 0
1359 |     districtDictionary['deceased'] = int(dataPoints[4].get_text().strip()) if '-' not in dataPoints[4].get_text().strip() else 0
1360 |     districtArray.append(districtDictionary)
1361 | 
1362 |   deltaCalculator.getStateDataFromSite("Assam", districtArray, option)
1363 | 
1364 | def TRGetData():
1365 |   response = requests.request("GET", metaDictionary['Tripura'].url)
1366 |   soup = BeautifulSoup(response.content, 'html.parser')
1367 |   table = soup.find("tbody").find_all("tr")
1368 | 
1369 |   districtArray = []
1370 |   for index, row in enumerate(table):
1371 |     dataPoints = row.find_all("td")
1372 |       
1373 |     districtDictionary = {}
1374 |     districtDictionary['districtName'] = dataPoints[1].get_text().strip()
1375 |     districtDictionary['confirmed'] = int(dataPoints[8].get_text().strip())
1376 |     districtDictionary['recovered'] = int(dataPoints[10].get_text().strip())
1377 |     districtDictionary['deceased'] = int(dataPoints[12].get_text().strip())
1378 |     districtArray.append(districtDictionary)
1379 | 
1380 |   deltaCalculator.getStateDataFromSite("Tripura", districtArray, option)
1381 | 
1382 | def PYGetData():
1383 |   response = requests.request("GET", metaDictionary['Puducherry'].url)
1384 |   soup = BeautifulSoup(response.content, 'html.parser')
1385 |   table = soup.find_all("tbody")[1].find_all("tr")
1386 | 
1387 |   districtArray = []
1388 |   for index, row in enumerate(table):
1389 |     dataPoints = row.find_all("td")
1390 |       
1391 |     districtDictionary = {}
1392 |     districtDictionary['districtName'] = dataPoints[0].get_text().strip()
1393 |     districtDictionary['confirmed'] = int(dataPoints[1].get_text().strip())
1394 |     districtDictionary['recovered'] = int(dataPoints[2].get_text().strip())
1395 |     districtDictionary['deceased'] = int(dataPoints[4].get_text().strip())
1396 |     districtArray.append(districtDictionary)
1397 | 
1398 |   deltaCalculator.getStateDataFromSite("Puducherry", districtArray, option)
1399 | 
1400 | def CHGetData():
1401 |   response = requests.request("GET", metaDictionary['Chandigarh'].url)
1402 |   soup = BeautifulSoup(response.content, 'html.parser')
1403 |   divs = soup.find("div", {"class": "col-lg-8 col-md-9 form-group pt-10"}).find_all("div", {"class": "col-md-3"})
1404 | 
1405 |   districtDictionary = {}
1406 |   districtArray = []
1407 |   districtDictionary['districtName'] = 'Chandigarh'
1408 |   
1409 |   for index, row in enumerate(divs):
1410 | 
1411 |     if index > 2:
1412 |       continue
1413 | 
1414 |     dataPoints = row.find("div", {"class": "card-body"}).get_text()
1415 | 
1416 |     if index == 0:
1417 |       districtDictionary['confirmed'] = int(dataPoints)
1418 |     if index == 1:
1419 |       districtDictionary['recovered'] = int(dataPoints)
1420 |     if index == 2:
1421 |       districtDictionary['deceased'] = int(dataPoints)
1422 | 
1423 |   districtArray.append(districtDictionary)
1424 |   deltaCalculator.getStateDataFromSite("Chandigarh", districtArray, option)
1425 | 
1426 | 
1427 | def KLGetData():
1428 |   if typeOfAutomation == "pdf":
1429 |     KLGetDataByPDF()
1430 |     return
1431 |   response = requests.request("GET", 'https://dashboard.kerala.gov.in/index.php')
1432 |   sessionId = (response.headers['Set-Cookie']).split(';')[0].split('=')[1]
1433 | 
1434 |   cookies = {
1435 |     '_ga': 'GA1.3.594771251.1592531338',
1436 |     '_gid': 'GA1.3.674470591.1592531338',
1437 |     'PHPSESSID': sessionId,
1438 |     '_gat_gtag_UA_162482846_1': '1',
1439 |   }
1440 | 
1441 |   headers = {
1442 |     'Connection': 'keep-alive',
1443 |     'Accept': 'application/json, text/javascript, */*; q=0.01',
1444 |     'X-Requested-With': 'XMLHttpRequest',
1445 |     'Sec-Fetch-Site': 'same-origin',
1446 |     'Sec-Fetch-Mode': 'cors',
1447 |     'Sec-Fetch-Dest': 'empty',
1448 |     'Referer': 'https://dashboard.kerala.gov.in/index.php',
1449 |     'Accept-Language': 'en-US,en;q=0.9',
1450 |   }
1451 | 
1452 |   stateDashboard = requests.get(metaDictionary['Kerala'].url, headers=headers, cookies=cookies).json()
1453 |   districtArray = []
1454 |   for districtDetails in stateDashboard['features']:
1455 |     districtDictionary = {}
1456 |     districtDictionary['districtName'] = districtDetails['properties']['District']
1457 |     districtDictionary['confirmed'] = districtDetails['properties']['covid_stat']
1458 |     districtDictionary['recovered'] = districtDetails['properties']['covid_statcured']
1459 |     districtDictionary['deceased'] = districtDetails['properties']['covid_statdeath']
1460 |     districtArray.append(districtDictionary)
1461 |   deltaCalculator.getStateDataFromSite("Kerala", districtArray, option)
1462 | 
1463 | def KLGetDataByPDF():
1464 |   linesArray = []
1465 |   districtDictionary = {}
1466 |   districtArray = []
1467 |   readFileFromURLV2(metaDictionary['Kerala'].url, "Kerala", "District", "Total")
1468 |   try:
1469 |     with open(".tmp/kl.csv", "r") as upFile:
1470 |       for line in upFile:
1471 |         linesArray = line.split(',')
1472 |         if len(linesArray) != 3:
1473 |           print("--> Issue with {}".format(linesArray))
1474 |           continue
1475 |         print("{},Kerala,KL,{},Hospitalized".format(linesArray[0].strip().title(), linesArray[1].strip()))
1476 |         print("{},Kerala,KL,{},Recovered".format(linesArray[0].strip().title(), linesArray[2].strip()))
1477 | 
1478 |     upFile.close()
1479 |   except FileNotFoundError:
1480 |     print("ap.csv missing. Generate through pdf or ocr and rerun.")
1481 | 
1482 | 
1483 | def KLDGetData():
1484 |   linesArray = []
1485 |   districtDictionary = {}
1486 |   districtArray = []
1487 |   readFileFromURLV2(metaDictionary['KeralaDeaths'].url, "KeralaDeaths", "District", "")
1488 |   try:
1489 |     with open(".tmp/kld.csv", "r") as upFile:
1490 |       for line in upFile:
1491 |         linesArray = line.split(',')
1492 |         if len(linesArray) != 3:
1493 |           print("--> Issue with {}".format(linesArray))
1494 |           continue
1495 |         gender = "M" if linesArray[2].strip() == "Male" else "F"
1496 |         print("{},{},,{},Kerala,KL,1,Deceased".format(linesArray[1], gender, linesArray[0].strip().title()))
1497 |         
1498 | 
1499 |     upFile.close()
1500 |   except FileNotFoundError:
1501 |     print("ap.csv missing. Generate through pdf or ocr and rerun.")
1502 |   
1503 | 
1504 | def MLGetData():
1505 |   if typeOfAutomation == "ocr":
1506 |     MLGetDataByOCR()
1507 |     return
1508 | 
1509 |   #stateDashboard = requests.get(metaDictionary['Meghalaya'].url).json()
1510 | 
1511 |   response = requests.request("GET", "https://mbdasankalp.in/auth/local/embed")
1512 |   authKey = json.loads(response.text)['key']
1513 | 
1514 |   url = "https://mbdasankalp.in/api/elasticsearch/aggregation/or/db/merge?access_token=" + authKey
1515 | 
1516 |   payload = "{\"aggregation\":{\"XAxisHeaders\":[{\"TagId\":\"5dd151b22fc63e490ca55ad6\",\"Header\":false,\"dbId\":\"5f395a260deffa1bd752be4e\"}],\"IsXaxisParallel\":false,\"YAxisHeaders\":[{\"Operator\":\"COUNT_DISTINCT\",\"isHousehold\":true,\"Header\":false,\"dbId\":\"5f395a260deffa1bd752be4e\"}],\"IsYaxisParallel\":true,\"YAxisFormulae\":[{\"isHousehold\":false,\"Instance\":\"\",\"axisId\":\"9100b461-5d86-47f9-b11c-6d48f90f9cf9\",\"isFormulaAxis\":true,\"formulaId\":\"5f395d6f0deffa1bd752bee8\",\"dbIds\":[\"5f395a260deffa1bd752be4e\"]},{\"isHousehold\":false,\"Instance\":\"\",\"axisId\":\"5b94c49f-7c8e-4bdf-9c8b-e7af4e53e14d\",\"isFormulaAxis\":true,\"formulaId\":\"5f395dba0deffa1bd752bef2\",\"dbIds\":[\"5f395a260deffa1bd752be4e\"]},{\"isHousehold\":false,\"Instance\":\"\",\"axisId\":\"3a36866c-956d-48b2-a47c-1149a0334f29\",\"isFormulaAxis\":true,\"formulaId\":\"5f395dd80deffa1bd752bef5\",\"dbIds\":[\"5f395a260deffa1bd752be4e\"]},{\"isHousehold\":false,\"Instance\":\"\",\"axisId\":\"a714425e-e78f-4dd7-833a-636a3bb850ca\",\"isFormulaAxis\":true,\"formulaId\":\"5f395d9a0deffa1bd752beef\",\"dbIds\":[\"5f395a260deffa1bd752be4e\"]}]},\"dbId\":\"5f395a260deffa1bd752be4e\",\"tagFilters\":[],\"sorting\":{\"axis\":{\"id\":\"5f395d6f0deffa1bd752bee8\",\"axisId\":\"9100b461-5d86-47f9-b11c-6d48f90f9cf9\",\"operator\":\"rowcount\"},\"sort\":{\"orderBy\":\"count\",\"order\":\"desc\"},\"size\":9999,\"enabled\":true,\"histogram\":false,\"timeseries\":false},\"customBins\":[],\"tagStatus\":true,\"boxplot\":false,\"requestedDbs\":{\"5f395a260deffa1bd752be4e\":{}}}"
1517 |   headers = {
1518 |   'Origin': 'https://mbdasankalp.in',
1519 |   'Referer': 'https://mbdasankalp.in/render/chart/5f4a8e961dbba63b625ff002?c=f7f7f7&bc=121212&key=' + authKey,
1520 |   'Host': 'mbdasankalp.in',
1521 |   'Content-Type': 'application/json',
1522 |   'Accept': 'application/json, text/plain, */*',
1523 |   'Content-Length': '1399'
1524 |   }
1525 | 
1526 |   response = requests.request("POST", url, headers=headers, data = payload)
1527 |   stateDashboard = json.loads(response.text.encode('utf8'))
1528 | 
1529 |   districtArray = []
1530 |   for data in stateDashboard[0]:
1531 |     districtDictionary = {}
1532 |     districtDictionary['districtName'] = data["name"]
1533 |     for value in data["value"]:
1534 |       try:
1535 |         if value["formulaId"] == "5f395d6f0deffa1bd752bee8":
1536 |           districtDictionary['confirmed'] = int(value["value"])
1537 |         if value["formulaId"] == "5f395dba0deffa1bd752bef2":
1538 |           districtDictionary['recovered'] = int(value["value"])
1539 |         if value["formulaId"] == "5f395dd80deffa1bd752bef5":
1540 |           districtDictionary['deceased'] = int(value["value"])
1541 |       except KeyError:
1542 |         continue
1543 |     districtArray.append(districtDictionary)
1544 |   deltaCalculator.getStateDataFromSite("Meghalaya", districtArray, option)
1545 |   return
1546 | 
1547 |   districtArray = []
1548 |   for districtDetails in stateDashboard['features']:
1549 |     districtDictionary = {}
1550 |     districtDictionary['districtName'] = districtDetails['attributes']['Name']
1551 |     districtDictionary['confirmed'] = districtDetails['attributes']['Positive']
1552 |     districtDictionary['recovered'] = districtDetails['attributes']['Recovered']
1553 |     districtDictionary['deceased'] = districtDetails['attributes']['Deceasesd']
1554 |     districtArray.append(districtDictionary)
1555 |   deltaCalculator.getStateDataFromSite("Meghalaya", districtArray, option)
1556 | 
1557 | def MLGetDataByOCR():
1558 |   districtArray = []
1559 |   with open(".tmp/ml.txt", "r") as mlFile:
1560 |     for line in mlFile:
1561 |       linesArray = line.split('|')[0].split(',')
1562 |       if len(linesArray) != 8:
1563 |         print("--> Issue with {}".format(linesArray))
1564 |         continue
1565 | 
1566 |       districtDictionary = {}
1567 |       districtDictionary['districtName'] = linesArray[0].strip()
1568 |       districtDictionary['confirmed'] = int(linesArray[5].strip())
1569 |       districtDictionary['recovered'] = int(linesArray[6].strip())
1570 |       districtDictionary['deceased'] = int(linesArray[7]) if len(re.sub('\n', '', linesArray[7])) != 0 else 0
1571 |       districtArray.append(districtDictionary)
1572 |     deltaCalculator.getStateDataFromSite("Meghalaya", districtArray, option)
1573 | 
1574 | def MNGetData():
1575 |   districtArray = []
1576 |   with open(".tmp/mn.txt") as mnFile:
1577 |     for line in mnFile:
1578 |       linesArray = line.split('|')[0].split(',')
1579 |       if len(linesArray) != 8:
1580 |         print("--> Issue with {}".format(linesArray))
1581 |         continue
1582 | 
1583 |       if (linesArray[2].strip()) != "0":
1584 |         print("{},Manipur,MN,{},Hospitalized".format(linesArray[0].strip().title(), linesArray[2].strip()))
1585 |       if (linesArray[4].strip()) != "0":
1586 |         print("{},Manipur,MN,{},Deceased".format(linesArray[0].strip().title(), linesArray[4].strip()))
1587 | 
1588 |   mnFile.close()
1589 | 
1590 | def MZGetData():
1591 |   districtArray = []
1592 |   with open(".tmp/mz.txt") as mzFile:
1593 |     for line in mzFile:
1594 |       line = line.replace('Nil', '0')
1595 |       linesArray = line.split('|')[0].split(',')
1596 |       if len(linesArray) != 5:
1597 |         print("--> Issue with {}".format(linesArray))
1598 |         continue
1599 | 
1600 |       districtDictionary = {}
1601 |       districtDictionary['districtName'] = linesArray[0].strip()
1602 |       districtDictionary['confirmed'] = int(linesArray[4]) #+ int(linesArray[2]) + int(linesArray[3])
1603 |       districtDictionary['recovered'] = int(linesArray[2])
1604 |       districtDictionary['deceased'] = int(linesArray[3]) #if len(re.sub('\n', '', linesArray[3])) != 0 else 0
1605 |       districtArray.append(districtDictionary)
1606 | 
1607 |     mzFile.close()
1608 |     deltaCalculator.getStateDataFromSite("Mizoram", districtArray, option)
1609 | 
1610 | 
1611 | def LAGetData():
1612 |   response = requests.request("GET", metaDictionary['Ladakh'].url)
1613 |   soup = BeautifulSoup(response.content, 'html.parser')
1614 |   table = soup.find("table", id = "tableCovidData2").find_all("tr")
1615 | 
1616 |   districtArray = []
1617 |   districtDictionary = {}
1618 |   confirmed = table[9].find_all("td")[1]
1619 |   discharged = table[11].find_all("td")[1]
1620 |   confirmedArray = dischargedArray = []
1621 |   confirmedArray = re.sub('\\r', '', re.sub(':', '', re.sub(' +', ' ', re.sub("\n", " ", confirmed.get_text().strip())))).split(' ')
1622 |   dischargedArray = re.sub('\\r', '', re.sub(':', '', re.sub(' +', ' ', re.sub("\n", " ", discharged.get_text().strip())))).split(' ')
1623 | 
1624 |   districtDictionary['districtName'] = confirmedArray[0]
1625 |   districtDictionary['confirmed'] = int(confirmedArray[1])
1626 |   districtDictionary['recovered'] = int(dischargedArray[1])
1627 |   districtDictionary['deceased'] = -999
1628 |   districtArray.append(districtDictionary)
1629 | 
1630 |   districtDictionary = {}
1631 |   districtDictionary['districtName'] = confirmedArray[2]
1632 |   districtDictionary['confirmed'] = int(confirmedArray[3])
1633 |   districtDictionary['recovered'] = int(dischargedArray[3])
1634 |   districtDictionary['deceased'] = -999
1635 |   districtArray.append(districtDictionary)
1636 | 
1637 |   deltaCalculator.getStateDataFromSite("Ladakh", districtArray, option)
1638 |       
1639 | def VCMFormatLine(row):
1640 | 
1641 |   state = " "
1642 |   firstDose = 0
1643 |   secondDose = 0
1644 |   totalDose = 0 
1645 | 
1646 |   if len(row) < 5:
1647 |     row = re.sub("\s+", " ", " ".join(row)).split(" ")
1648 |   state = row[1]
1649 |   firstDose = re.sub(",", "", row[2])
1650 |   secondDose = re.sub(",", "", row[3])
1651 |   totalDose = re.sub(",", "", row[4])
1652 | 
1653 |   return state + "," + firstDose + "," + secondDose + "," + totalDose + "\n"
1654 |   #return row[1] + "," + re.sub(",", "", row[2]) + "," + re.sub(",", "", row[3]) + "," + re.sub(",", "", row[4]) + "\n"
1655 | 
1656 | def PBFormatLine(row):
1657 |   return row[1] + "," + row[2] + "," + row[3] + "," + row[4] + "," + row[5] + "\n"
1658 | 
1659 | def KLFormatLine(row):
1660 |   return row[0] + "," + row[1] + "," + row[2] + "\n"
1661 | 
1662 | def KLDFormatLine(row):
1663 |   return row[1] + "," + row[4] + "," + row[5] + "\n"
1664 | 
1665 | 
1666 | def KAFormatLine(row):
1667 |   district = ""
1668 |   modifiedRow = []
1669 |   for value in row:
1670 |     if len(value) > 0:
1671 |       modifiedRow.append(value)
1672 | 
1673 |   if is_number(modifiedRow[0]) == False:
1674 |     district = " ".join(re.sub(' +', ' ', modifiedRow[0]).split(' ')[1:])
1675 |     modifiedRow.insert(0, 'a')
1676 |   else:
1677 |     district = re.sub('\*', '', modifiedRow[1])
1678 |   print(modifiedRow)
1679 | 
1680 |   return district + "," + modifiedRow[3] + "," + modifiedRow[5] + "," + modifiedRow[8] + "\n"
1681 | 
1682 | """
1683 | def HRFormatLine(line):
1684 |   line = re.sub(' +', ',', re.sub('^ +', '', line))
1685 | 
1686 |   linesArray = line.split(',')
1687 | 
1688 |   if len(linesArray) > 1 and linesArray[1] == "Charkhi":
1689 |     linesArray.remove("Dadri")
1690 |     linesArray[1] = "Charkhi Dadri"
1691 | 
1692 |   if len(linesArray) != 11:
1693 |     print("--> Ignoring: {}".format(linesArray))
1694 |     return "\n"
1695 |   
1696 |   recovery = 0
1697 |   if '[' in linesArray[4]:
1698 |     recovery = linesArray[4].split('[')[0]
1699 |   else:
1700 |     recovery = linesArray[4]
1701 | 
1702 |   deaths = 0
1703 |   if '[' in linesArray[7]:
1704 |     deaths = linesArray[7].split('[')[0]
1705 |   else:
1706 |     deaths = linesArray[7]
1707 | 
1708 |   outputString = linesArray[1] + "," + linesArray[3] + "," + str(recovery) + "," + str(deaths) + "\n"
1709 |   return outputString
1710 | """
1711 | 
1712 | def HRFormatLine(row):
1713 |   row[1] = re.sub('\*', '', row[1])
1714 |   if '[' in row[3]:
1715 |     row[3] = row[3].split('[')[0]
1716 |   if '[' in row[4]:
1717 |     row[4] = row[4].split('[')[0]
1718 |   if '[' in row[7]:
1719 |     row[7] = row[7].split('[')[0]
1720 |   if '[' in row[6]:
1721 |     row[6] = row[6].split('[')[0]
1722 | 
1723 |   line = row[1] + "," + row[3] + "," + row[4] + "," + str(int(row[6]) + int (row[7])) + "\n"
1724 |   return line
1725 | 
1726 | def APFormatLine(row):
1727 |   line = row[1] + "," + row[3] + "," + row[5] + "," + row[6] + "\n"
1728 |   return line
1729 | 
1730 | 
1731 | def WBFormatLine(row):
1732 |   row[2] = re.sub(',', '', re.sub('\+.*', '', row[2]))
1733 |   row[3] = re.sub(',', '', re.sub('\+.*', '', row[3]))
1734 |   row[4] = re.sub('\#', '', re.sub(',', '', re.sub('\+.*', '', row[4])))
1735 |   row[5] = re.sub(',', '', re.sub('\+.*', '', row[5]))
1736 |   line = row[1] + "," + row[2] + "," + row[3] + "," + row[4] + "\n"
1737 |   return line
1738 | 
1739 | ''' 
1740 |   This method uses camelot package to read a pdf and then parse it into a csv file.
1741 |   In this method, we read the pdf either from the meta file or from the pdfUrl global variable. This variable can be set from the cmd line.
1742 |   The method also takes user input for page number or allows for page number to be used from the pageId global variable.
1743 |   The method, reads a specific page, then for that page, decides if a line has to be ignored using starting and ending keys. 
1744 |   Then the method calls a "<stateCode>FormatLine(row)" function that calls the corresponding function to allow for any row/line to be manipulated.
1745 |   The outputs are written to a <stateCode>.csv file. This is read inside the corresponding <stateCode>GetData() functions which call deltaCalculator to calculate deltas.
1746 | '''
1747 | def readFileFromURLV2(url, stateName, startKey, endKey):
1748 |   global pdfUrl
1749 |   global pageId
1750 |   stateFileName = metaDictionary[stateName].stateCode 
1751 | 
1752 |   if len(pdfUrl) > 0:
1753 |     url = pdfUrl
1754 |   if len(url) > 0:
1755 |     #print("--> Requesting download from {} ".format(url))
1756 |     r = requests.get(url, allow_redirects=True, verify=False)
1757 |     open(".tmp/" + stateFileName + ".pdf", 'wb').write(r.content)
1758 |   if len(pageId) > 0:
1759 |     pid = ""
1760 |     if ',' in pageId:
1761 |       startPage = int(pageId.split(',')[0])
1762 |       endPage = int(pageId.split(',')[1])
1763 |       for pages in range(startPage, endPage + 1, 1):
1764 |         print(pages)
1765 |         pid = pid + "," + str(pages) if len(pid) > 0 else str(pages)
1766 |         print(pid)
1767 |     else:
1768 |       pid = pageId
1769 |   else:
1770 |     pid = input("Enter district page:")
1771 |   print("Running for {} pages".format(pid))
1772 |   tables = camelot.read_pdf(".tmp/" + stateFileName + ".pdf", strip_text = '\n', pages = pid, split_text = True)
1773 |   for index, table in enumerate(tables):
1774 |     tables[index].to_csv('.tmp/' + stateFileName + str(index) + '.pdf.txt')
1775 | 
1776 |   stateOutputFile = open('.tmp/' + stateFileName.lower() + '.csv', 'w')
1777 |   csvWriter = csv.writer(stateOutputFile)
1778 |   arrayToWrite = []
1779 | 
1780 |   startedReadingDistricts = False
1781 |   for index, table in enumerate(tables):
1782 |     with open('.tmp/' + stateFileName + str(index) + '.pdf.txt', newline='') as stateCSVFile:
1783 |       rowReader = csv.reader(stateCSVFile, delimiter=',', quotechar='"')
1784 |       for row in rowReader:
1785 |         line = "|".join(row)
1786 |         line = re.sub("\|+", '|', line)
1787 |         if startKey in line:
1788 |           startedReadingDistricts = True
1789 |         if len(endKey) > 0 and endKey in line:
1790 |           startedReadingDistricts = False
1791 |           continue
1792 |         if startedReadingDistricts == False:
1793 |           continue
1794 | 
1795 |         line = eval(stateFileName + "FormatLine")(line.split('|'))
1796 |         if line == "\n":
1797 |           continue
1798 |         print(line, file = stateOutputFile, end = "")
1799 | 
1800 |   stateOutputFile.close()
1801 |         
1802 | ''' This will be deprecated. '''
1803 | def readFileFromURL(url, stateName, startKey, endKey):
1804 |   global pdfUrl
1805 |   global pageId
1806 |   stateFileName = metaDictionary[stateName].stateCode 
1807 |   if len(pdfUrl) > 0:
1808 |     url = pdfUrl
1809 | 
1810 |   if len(url) > 0:
1811 |     r = requests.get(url, allow_redirects=True, verify=False)
1812 |     open(".tmp/" + stateFileName + ".pdf", 'wb').write(r.content)
1813 | 
1814 |   with open(".tmp/" + stateFileName + ".pdf", "rb") as f:
1815 |     pdf = pdftotext.PDF(f)
1816 | 
1817 |   fileToWrite = open(".tmp/" + stateFileName + ".pdf.txt", "w")
1818 |   if len(pageId) > 0:
1819 |     pid = pageId
1820 |   else:
1821 |     pid = input("Enter district page:")
1822 |   print(pdf[int(pid)], file = fileToWrite)
1823 |   fileToWrite.close()
1824 | 
1825 |   fileToWrite = open(".tmp/" + stateFileName + '.pdf.txt', 'r') 
1826 |   lines = fileToWrite.readlines() 
1827 |   stateOutputFileName = open(".tmp/" + stateFileName + '.txt', 'w') 
1828 | 
1829 |   startedReadingDistricts = False
1830 |   outputLines = []
1831 |   for line in lines:
1832 |     if startKey in line:
1833 |       startedReadingDistricts = True
1834 |     if endKey in line:
1835 |       startedReadingDistricts = False
1836 |       continue
1837 | 
1838 |     if startedReadingDistricts == False:
1839 |       continue
1840 |     print(eval(stateFileName + "FormatLine")(line), file = stateOutputFileName, end = " ")
1841 | 
1842 |   stateOutputFileName.close()
1843 |   fileToWrite.close()
1844 | 
1845 | '''This will eventually be moved to TNFormatLine(row) function'''
1846 | def convertTnPDFToCSV():
1847 |   global pdfUrl
1848 |   global typeOfAutomation
1849 | 
1850 |   if len(pdfUrl) > 0:
1851 |     r = requests.get(pdfUrl, allow_redirects=True, verify=False)  
1852 |     open(".tmp/tn.pdf", 'wb').write(r.content)
1853 | 
1854 |   try:
1855 |     with open(".tmp/" + "tn.pdf", "rb") as f:
1856 |       pdf = pdftotext.PDF(f)
1857 |   except FileNotFoundError:
1858 |     print("Make sure tn.pdf is present in the current folder and rerun the script! Arigatou gozaimasu.")
1859 |     return
1860 | 
1861 |   tables = camelot.read_pdf('.tmp/tn.pdf',strip_text='\n', pages="7", split_text = True)
1862 |   tables[0].to_csv('.tmp/tn.pdf.txt')
1863 | 
1864 |   tnFile = open(".tmp/" + 'tn.pdf.txt', 'r') 
1865 |   lines = tnFile.readlines() 
1866 |   tnOutputFile = open(".tmp/" + 'tn.csv', 'w') 
1867 | 
1868 |   startedReadingDistricts = False
1869 |   airportRun = 1
1870 |   airportConfirmedCount = 0
1871 |   airportRecoveredCount = 0
1872 |   airportDeceasedCount = 0
1873 |   with open('.tmp/tn.pdf.txt', newline='') as csvfile:
1874 |     rowReader = csv.reader(csvfile, delimiter=',', quotechar='"')
1875 |     line = ""
1876 |     for row in rowReader:
1877 |       line = '|'.join(row)
1878 |   
1879 |       if 'Ariyalur' in line:
1880 |         startedReadingDistricts = True
1881 |       if 'Total' in line:
1882 |         startedReadingDistricts = False
1883 | 
1884 |       if startedReadingDistricts == False:
1885 |         continue
1886 | 
1887 |       line = line.replace('"', '').replace('*', '').replace('#', '').replace(',', '').replace('$', '')
1888 |       linesArray = line.split('|')
1889 | 
1890 |       if len(linesArray) < 6:
1891 |         print("--> Ignoring line: {} due to less columns".format(line))
1892 |         continue
1893 | 
1894 |       if 'Airport' in line:
1895 |         airportConfirmedCount += int(linesArray[2])
1896 |         airportRecoveredCount += int(linesArray[3])
1897 |         airportDeceasedCount += int(linesArray[5])
1898 |         if airportRun == 1:
1899 |           airportRun += 1
1900 |           continue
1901 |         else:
1902 |           print("{}, {}, {}, {}\n".format('Airport Quarantine', airportConfirmedCount, airportRecoveredCount, airportDeceasedCount), file = tnOutputFile)
1903 |           continue
1904 |       if 'Railway' in line:
1905 |         print("{}, {}, {}, {}".format('Railway Quarantine', linesArray[2], linesArray[3], linesArray[5]), file = tnOutputFile)
1906 |         continue
1907 | 
1908 |       print("{}, {}, {}, {}".format(linesArray[1], linesArray[2], linesArray[3], linesArray[5]), file = tnOutputFile)
1909 | 
1910 |   tnOutputFile.close()
1911 | 
1912 | def is_number(s):
1913 |   try:
1914 |     int(s)
1915 |     return True
1916 |   except ValueError:
1917 |     return False
1918 | 
1919 | def main():
1920 | 
1921 |   loadMetaData()
1922 |   stateName = ""
1923 |   global option 
1924 |   global typeOfAutomation
1925 |   global pdfUrl
1926 |   global pageId
1927 | 
1928 |   if len(sys.argv) not in [1, 2, 3, 4]:
1929 |     print('Usage: ./automation "[StateName]" "[detailed/full]" "[ocr/pdf=url]"')
1930 |     return
1931 | 
1932 |   if len(sys.argv) == 2:
1933 |     stateName = sys.argv[1]
1934 | 
1935 |   if len(sys.argv) == 3:
1936 |     stateName = sys.argv[1]
1937 |     option = sys.argv[2]
1938 | 
1939 |   if len(sys.argv) == 4:
1940 |     stateName = sys.argv[1]
1941 |     option = sys.argv[2]
1942 |     if "=" in sys.argv[3]:
1943 |       typeOfAutomation = sys.argv[3].split("=")[0]
1944 |       pdfUrl = sys.argv[3].split("=")[1]
1945 |       if len(sys.argv[3].split("=")) > 2:
1946 |         pageId = sys.argv[3].split("=")[2]
1947 |     else:
1948 |       typeOfAutomation = sys.argv[3]
1949 |   
1950 |   print("Using pageId: {}".format(pageId))
1951 |   
1952 |   if not stateName:
1953 |     stateName = "All States"
1954 |   fetchData(stateName)
1955 | 
1956 | if __name__ == '__main__':
1957 |   main()
1958 | 
1959 | 


--------------------------------------------------------------------------------