├── .gitignore
├── .projectile
├── README.md
├── a
    ├── estimate_hosp_capacity.do
    ├── impute_additional_fields.do
    └── predict_age_cfr.do
├── assets
    ├── covid_build_public.nomnoml
    ├── covid_build_public.svg
    ├── download_button.png
    ├── hospital-beds.html
    ├── hospital-beds.js
    ├── hospital-beds.png
    ├── market-volumes-livestock.html
    ├── market-volumes-livestock.js
    ├── market-volumes.html
    ├── market-volumes.js
    ├── mortality-pred.html
    ├── mortality-pred.js
    └── mortality-pred.png
├── b
    ├── agmark_scraper.py
    ├── clean_agmark.do
    ├── clean_ap_mort.do
    ├── clean_assam_mort.do
    ├── clean_bbmp_mort.do
    ├── clean_bihar_mort.do
    ├── clean_chennai_mort.do
    ├── clean_ghmc_mort.do
    ├── clean_haryana_mort.do
    ├── clean_hp_mort.do
    ├── clean_kmdc_mort.do
    ├── clean_mh_mort.do
    ├── clean_migration.do
    ├── clean_mp_mort.do
    ├── clean_odisha_mort.do
    ├── clean_rajasthan_mort.do
    ├── clean_state_mort.do
    ├── clean_up_mort.do
    ├── clean_wb_mort.do
    ├── copy_keys.do
    ├── create_dlhs4_pc11_district_key.do
    ├── create_google.do
    ├── ddl_nfhs_poll_hmis.do
    ├── gen_age_distribution.do
    ├── gen_lgd_pc11_demographics.do
    ├── gen_nss_district_key.do
    ├── gen_urbanization_subdist.do
    ├── get_case_data.do
    ├── get_vaccination_data.do
    ├── make_mortality.do
    ├── old
    │   ├── aggregate_case_data.do
    │   ├── gen_demographics.do
    │   ├── gen_pc11_states.do
    │   ├── get_case_data.do
    │   ├── get_lgd_keys.do
    │   ├── mse_simple.m
    │   └── predict_cts_uk_age_or_v2.m
    ├── pc11_lgd_metadata.csv
    ├── prep_bihar.do
    ├── prep_dlhs4_district.do
    ├── prep_ec_hosp.do
    ├── prep_ec_hosp_microdata.do
    ├── prep_hosp_pca_vd.do
    ├── prep_nss75.do
    ├── prep_pc_hosp.do
    ├── prep_secc.do
    ├── push_data.sh
    ├── retrieve_case_data.py
    ├── str
    │   ├── cov19india_district_fixes.txt
    │   ├── cov19india_vaccine_district_fixes.txt
    │   ├── covid_district_fixes.txt
    │   └── lgd_district_fixes.txt
    ├── update_case_cronjob.sh
    ├── update_case_vaccination_data.do
    └── vaccination_plot.py
├── build.md
├── como
    ├── a
    │   ├── analyze_mort_counts.do
    │   ├── app_age_hr_interpolation.do
    │   ├── app_joint_condition.do
    │   ├── app_table_age_bin_prev.do
    │   ├── app_table_nhs_vs_os.do
    │   ├── calc_hr_sensitivity.do
    │   ├── calc_prev_sensitivity.do
    │   ├── calc_prrs.do
    │   ├── covid_como_agerisks_tpl.tex
    │   ├── covid_como_oscompare_tpl.tex
    │   ├── covid_como_sumhr_tpl.tex
    │   ├── covid_como_sumstats_tpl.tex
    │   ├── examine_risk_factors_poverty.do
    │   ├── make_coef_plot.py
    │   ├── make_paper_figures.do
    │   ├── make_paper_tables.do
    │   ├── make_summary_tables.do
    │   ├── old
    │   │   └── analyze_age_mort_risk.do
    │   ├── prep_eng_india_prev_compare.do
    │   └── sumstats.do
    ├── b
    │   ├── clean_gbd_india.do
    │   ├── collapse_biomarkers_to_state.do
    │   ├── fit_cts_uk_age_hr.m
    │   ├── flatten_hr_data.py
    │   ├── old
    │   │   ├── prep_india_sim_prevalence.do
    │   │   ├── prep_populations.do
    │   │   ├── prep_uk_age_risks.do
    │   │   └── prep_uk_bmi.do
    │   ├── prep_age_level_data.do
    │   ├── prep_england_prevalence.do
    │   ├── prep_gbd.do
    │   ├── prep_health_data.do
    │   ├── prep_hrs.do
    │   ├── prep_india_comorbidities.do
    │   ├── prep_ny_mortality.do
    │   ├── prep_pop_sex.do
    │   └── prep_standard_errors.do
    ├── como_programs.do
    ├── csv
    │   ├── copd_mclean_rates.csv
    │   ├── england_gender_age.csv
    │   ├── india_condition_prevalence.csv
    │   ├── ny_cummings.csv
    │   ├── ny_hr.csv
    │   ├── nystate_age_comorbid_05082020.csv
    │   ├── nystate_or.csv
    │   ├── uk_condition_prevalence.csv
    │   ├── uk_condition_sd.csv
    │   ├── uk_demography.csv
    │   ├── uk_nhs_hazard_ratios.csv
    │   ├── uk_nhs_incidence.csv
    │   └── weighted_hrs.txt
    ├── e
    │   ├── examine_risk_factors.do
    │   ├── explore_gbd_vs_dlhs.do
    │   ├── hr_vs_or.do
    │   ├── summarize_india_conditions.do
    │   ├── test_cts_ors.do
    │   └── test_map.do
    ├── make_como.do
    ├── r
    │   └── covid_como_sumstats.csv
    └── tex
    │   ├── app_bootstrap.tex
    │   ├── appendix.tex
    │   ├── como_exhibits.tex
    │   ├── como_tables_figures.tex
    │   ├── covid-como.bib
    │   ├── front_matter_como.tex
    │   └── vancouver.bst
├── covid_progs.do
├── e
    ├── agmark_plot.ipynb
    ├── analyze_mortality.do
    ├── comoweb_plots.ipynb
    ├── compare_hosp_counts.do
    ├── covid_district_map.ipynb
    ├── create_agmark_plots.do
    ├── describe_migration.do
    ├── diff_dlsh4_doctor_definitions.do
    ├── dlhs.do
    ├── explore_agmark_by_state.do
    ├── explore_agmark_perishables.do
    ├── explore_ec_microdata.do
    ├── explore_idi_survey_r1.do
    ├── explore_idi_survey_r2.do
    ├── explore_migration.ipynb
    ├── explore_mortality.do
    ├── explore_pc_dlhs_doctors.do
    ├── explore_vacc_story.do
    ├── expolore_idi_survey_r2.do
    ├── figure_hmis.do
    ├── gen_map.py
    ├── gen_survey_map.py
    ├── get_vac_data.do
    ├── graphs_idi_r3.do
    ├── hmis_spatial_maps.py
    ├── idi_labor_ag.do
    ├── idi_r1_oped_graphs.do
    ├── idi_survey_r1_for_ppt.do
    ├── map_vacc_eligible.py
    ├── pop_estimates_21.csv
    ├── prep_ahs_data.do
    ├── prep_covid_dist_analysis.do
    ├── prep_dlhs_data.do
    ├── validate_table1.do
    └── validate_vacc_api.do
├── forecasting
    ├── README.md
    ├── Snakefile
    ├── b
    │   ├── create_vector_tileset.sh
    │   ├── data_to_geojson.py
    │   ├── merge_ddl_pred_data.do
    │   ├── old
    │   │   └── push_predicted_metadata.py
    │   ├── process_ddl_data.do
    │   ├── process_predicted_data.do
    │   ├── pull_predicted_data.sh
    │   ├── pull_predicted_data_helper.py
    │   ├── push_public_data.sh
    │   ├── push_vector_tileset.py
    │   └── test_merged_data.py
    ├── config
    │   ├── config.yaml
    │   ├── forecasting.yaml
    │   └── forecasting_spatial.yaml
    └── update_forecasts_cronjob.sh
├── make_covid.do
└── str
    └── manual_covid_case_district_match.csv


/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints/
2 | __pycache__
3 | forecasting/predictions_credential.json
4 | forecasting/.snakemake/
5 | forecasting/*.log
6 | 


--------------------------------------------------------------------------------
/.projectile:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/devdatalab/covid/a86c2d00d81eee6d26c343e05ae9ba1087fad47f/.projectile


--------------------------------------------------------------------------------
/a/impute_additional_fields.do:
--------------------------------------------------------------------------------
 1 | /* combine results into a single export file, and impute some additional fields */
 2 | 
 3 | /* open hospital bed capacity */
 4 | use $covidpub/estimates/hospitals_dist, clear
 5 | 
 6 | /* merge with age distribution and infection fatality rate file */
 7 | merge 1:1 pc11_state_id pc11_district_id using $covidpub/estimates/district_age_dist_cfr, nogen
 8 | 
 9 | /* identify the districts that are least well prepared */
10 | gen bottom_100_district_dlhs = rank_dlhs <= 100
11 | gen bottom_100_district_pc = rank_pc <= 100
12 | gen district_at_risk = bottom_100_district_pc == 1 & bottom_100_district_dlhs == 1
13 | 
14 | /* create scenarios where 1%, 5%, 10% of the district population gets infected */
15 | gen predicted_mort_01 = pc11_pca_tot_t * 0.01 * district_estimated_cfr_t
16 | gen predicted_hosp_01 = pc11_pca_tot_t * 0.01 * district_estimated_cfr_t * 5
17 | gen predicted_mort_10 = pc11_pca_tot_t * 0.10 * district_estimated_cfr_t
18 | gen predicted_hosp_10 = pc11_pca_tot_t * 0.10 * district_estimated_cfr_t * 5
19 | 
20 | /* calculate number of beds according to DLHS in each district */
21 | gen dlhs_beds = dlhs_perk_pubpriv_beds / 1000 * pc11_pca_tot_t
22 | 
23 | /* calculate extent over capacity under 1% infection rate */
24 | gen capacity_01 = predicted_hosp_01 / dlhs_beds
25 | gen capacity_10 = predicted_hosp_10 / dlhs_beds
26 | 
27 | save $tmp/district_age_dist_cfr_hospitals, replace
28 | export delimited $tmp/district_age_dist_cfr_hospitals, replace
29 | 


--------------------------------------------------------------------------------
/assets/covid_build_public.nomnoml:
--------------------------------------------------------------------------------
 1 | #.instructions:
 2 | #.1: navigate to nomnoml.com
 3 | #.2: copy the following UML code into the window
 4 | #.3: use the toolbar in the top left to export the diagram
 5 | #.3a: if you make changes, export the source and push the changes to this file in github
 6 | #.3b: download the png and push the updated file in github
 7 | 
 8 | #.prog: fill=#D5F1FF visual=sender italic center
 9 | #.data: visual=roundrect align=center 
10 | #.web: visual=database italic 
11 | #.finaldata: visual=roundrect align=center fill=#FC1
12 | #.key: fill=#ffffff bold
13 | 
14 | [<key>Key |
15 | [<data>Initial Dataset]--[<finaldata> Final Dataset]
16 | [<finaldata> Final Dataset]--[<prog>Program]
17 | ]
18 | 
19 | [COVID Case Data |
20 | [<web> https://covindia.com/]->[<prog>get_case_data.do]
21 | ]o->[<finaldata>raw/covindia_raw.dta;<finaldata>covid_infected_deaths.dta]
22 | 
23 | [Population Census |
24 | [<data>pc11r_hosp.dta | pc11u_hosp.dta]->[<prog>prep_pc_hosp.do]
25 | [<prog>prep_pc_hosp.do]
26 | ]o->[PC Hospital Data | [<data> pc_hospitals_subdist.dta|pc_hospitals_dist.dta]]
27 | 
28 | [Economic Census |
29 | [<data>pc11_district_key.dta]->[<prog>prep_ec_hosp.do]
30 | [<data>ec13_hosp_microdata.dta]->[<prog>prep_ec_hosp.do]
31 | [<prog>prep_ec_hosp.do]->[<data>ec_hospitals_tv.dta]
32 | ]o->[EC Hospital Data | [<data>ec_hospitals_dist.dta]]
33 | 
34 | [Age Bins from SECC| [<data>secc_age_bins_district_t|secc_age_bins_subdistrict_t]]->[<prog>predict_age_cfr.do]
35 | [National CFR by Age | [<data> cfr_age_bins.dta]]->[<prog>predict_age_cfr.do]
36 | [<prog>predict_age_cfr.do]o->[CFR by Age Bins| [<data>district_age_dist_cfr.dta|subdistrict_age_dist_cfr.dta]]
37 | 
38 | [DLHS Data | [<data>dlhs4_hospitals_dist.dta]]->[<prog>estimate_hosp_capacity.do]
39 | [EC Hospital Data]->[<prog>estimate_hosp_capacity.do]
40 | [PC Hospital Data ]->[<prog>estimate_hosp_capacity.do]
41 | [<prog>estimate_hosp_capacity.do]->[<finaldata>hospitals_dist.dta]
42 | 
43 | [<finaldata>hospitals_dist.dta]->[<prog>impute_additional_fields.do]
44 | [CFR by Age Bins]->[<prog>impute_additional_fields.do]
45 | [<prog>impute_additional_fields.do]->[<finaldata>district_age_dist_cfr_hospitals.dta]
46 | 


--------------------------------------------------------------------------------
/assets/download_button.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/devdatalab/covid/a86c2d00d81eee6d26c343e05ae9ba1087fad47f/assets/download_button.png


--------------------------------------------------------------------------------
/assets/hospital-beds.html:
--------------------------------------------------------------------------------
1 | 
2 | <script src="main/static/main/js/hospital-beds.js" id="740cbf02-391a-45fc-bd87-862d8f1bd216"></script>


--------------------------------------------------------------------------------
/assets/hospital-beds.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/devdatalab/covid/a86c2d00d81eee6d26c343e05ae9ba1087fad47f/assets/hospital-beds.png


--------------------------------------------------------------------------------
/assets/market-volumes-livestock.html:
--------------------------------------------------------------------------------
1 | 
2 | <script src="market-volumes-livestock.js" id="f4f59a7e-e84e-409c-af3c-9d2f40efc314"></script>


--------------------------------------------------------------------------------
/assets/market-volumes.html:
--------------------------------------------------------------------------------
1 | 
2 | <script src="market-volumes.js" id="f832a053-15af-4d35-b2b7-94f9f3b12f3a"></script>


--------------------------------------------------------------------------------
/assets/mortality-pred.html:
--------------------------------------------------------------------------------
1 | 
2 | <script src="mortality-pred.js" id="41c52b5f-e6de-4f98-9575-654e3f7b20a0"></script>


--------------------------------------------------------------------------------
/assets/mortality-pred.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/devdatalab/covid/a86c2d00d81eee6d26c343e05ae9ba1087fad47f/assets/mortality-pred.png


--------------------------------------------------------------------------------
/b/clean_ap_mort.do:
--------------------------------------------------------------------------------
 1 | /***************************************/
 2 | /* Clean Andhra Pradesh mortality data */
 3 | /***************************************/
 4 | 
 5 | /* load covid programs */
 6 | qui do $ddl/covid/covid_progs.do
 7 | 
 8 | /* set globals for year and month */
 9 | global year "2018 2019 2020 2021"
10 | global month "01 02 03 04 05 06 07 08 09 10 11 12"
11 | 
12 | foreach j in $year {
13 | 
14 |   /* conditional global for 2021 since data is only available till June */
15 |   if `j' == 2021 {
16 |     global month = "01 02 03 04 05 06"
17 |   }
18 | 
19 |   foreach i in $month {
20 | 
21 |     /* import raw data */
22 |     import excel "$covidpub/private/mortality/raw/andhra_pradesh/MonitoringReport-AP-`j'`i'-`j'`i'.xlsx", sheet("Monitoring Report") cellrange(A4:P20) clear
23 | 
24 |     /* basic cleaning - keep relevant vars and rename */
25 |     keep A B E F G
26 |     drop if B == ""
27 | 
28 |     ren A id 
29 |     ren B district 
30 |     ren E death_male
31 |     ren F death_female
32 |     ren G death_trans
33 | 
34 |     /* generate vars for state, month, year */
35 |     gen state = "Andhra Pradesh"
36 |     gen month = `i'
37 |     gen year = `j'
38 | 
39 |     save $tmp/ap_`i'_`j', replace
40 | 
41 |   }
42 | }
43 | 
44 | clear 
45 | 
46 | /* reset global for month */
47 | global month "01 02 03 04 05 06 07 08 09 10 11 12"
48 | 
49 | /* append all month-year data */
50 | foreach j in $year {
51 | 
52 |   if `j' == 2021 {
53 |     global month = "01 02 03 04 05 06"
54 |   }
55 | 
56 |   foreach i in $month {
57 | 
58 |     append using $tmp/ap_`i'_`j'.dta
59 | 
60 |   }
61 | }
62 | 
63 | /* convert months from float to string for consistency */
64 | str_month, float(month) string(str_month)
65 | 
66 | /* sum total deaths */
67 | egen deaths = rowtotal(death_*)
68 | 
69 | /* drop gender-wise deaths and id var and order vars */
70 | drop death_* id
71 | order state district deaths month year
72 | 
73 | /* save clean data to scratch */
74 | save $tmp/mort_ap.dta, replace
75 | 


--------------------------------------------------------------------------------
/b/clean_assam_mort.do:
--------------------------------------------------------------------------------
 1 | /******************************/
 2 | /* Clean Assam mortality data */
 3 | /******************************/
 4 | 
 5 | /* set globals for year */
 6 | global year "2018 2019 2020"
 7 | 
 8 | /* process raw data for each year */
 9 | foreach j in $year {
10 | 
11 |   /* import raw data */
12 |   import excel "$covidpub/private/mortality/raw/assam/`j'.xlsx", sheet("D-4") cellrange(A5:P364) firstrow clear
13 |   drop if C == ""
14 | 
15 |   /* fill missing values */
16 |   foreach i in A B {
17 |     replace `i' = `i'[_n-1] if mi(`i')
18 |   }
19 | 
20 |   /* drop redundant vars */
21 |   drop if C == "T"
22 |   drop if A == 27
23 |   drop P
24 | 
25 |   /* rename vars */
26 |   ren A id
27 |   ren B district
28 |   ren C sex
29 |   ren D death_january
30 |   ren E death_february
31 |   ren F death_march
32 |   ren G death_april
33 |   ren H death_may
34 |   ren I death_june
35 |   ren J death_july
36 |   ren K death_august
37 |   ren L death_september
38 |   ren M death_october
39 |   ren N death_november 
40 |   ren O death_december
41 | 
42 |   /* reshape from wide to long */
43 |   reshape long death_, i(district sex) j(month) string
44 | 
45 |   /* drop id var and generate vars for state and year */
46 |   drop id
47 |   gen state = "Assam"
48 |   gen year = "`j'"
49 | 
50 |   /* rename and order variables */
51 |   ren death_ deaths
52 |   order state district month year death sex
53 | 
54 |   save "$tmp/assam_`j'" , replace 
55 | 
56 | }
57 | 
58 | clear 
59 | 
60 | /* append all month-year data */
61 | foreach j in $year {
62 | 
63 |   append using $tmp/assam_`j'
64 | 
65 | }
66 | 
67 | /* collapse on district-month-year */
68 | collapse (sum) deaths, by(state district month year)
69 | 
70 | /* label and destring numeric vars */
71 | la var state "State"
72 | la var district "District"
73 | la var month "Month"
74 | la var year "Year"
75 | la var deaths "Total Death"
76 | 
77 | destring year, replace
78 | 
79 | /* save clean dataset unique on district-month-year */
80 | save $tmp/mort_assam.dta, replace
81 | 


--------------------------------------------------------------------------------
/b/clean_bbmp_mort.do:
--------------------------------------------------------------------------------
 1 | /*****************************************/
 2 | /* Clean BBMP (Bangalore) mortality data */
 3 | /*****************************************/
 4 | 
 5 | /* import raw data */
 6 | import excel "$covidpub/private/mortality/raw/Karnataka, BBMP deaths data.xlsx", sheet("Sheet1") cellrange(A24:D37) clear
 7 | 
 8 | /* drop redundant obs and rename vars for reshape */
 9 | drop in 1
10 | drop in 13
11 | ren A month
12 | ren B death_2019
13 | ren C death_2020
14 | ren D death_2021
15 | 
16 | /* reshape wide to long on monthly deaths */
17 | reshape long death_, i(month) j(year)
18 | 
19 | /* ren deaths var and drop empty obs */
20 | ren death deaths
21 | drop if deaths == .
22 | 
23 | /* rename months for consistency */
24 | replace month = "january" if month == "Jan"
25 | replace month = "february" if month == "Feb"
26 | replace month = "march" if month == "Mar"
27 | replace month = "april" if month == "Apr"
28 | replace month = "may" if month == "May"
29 | replace month = "june" if month == "Jun"
30 | replace month = "july" if month == "Jul"
31 | replace month = "august" if month == "Aug"
32 | replace month = "september" if month == "Sep"
33 | replace month = "october" if month == "Oct"
34 | replace month = "november" if month == "Nov"
35 | replace month = "december" if month == "Dec"
36 | 
37 | /* gen vars for state, district */
38 | gen state = "Karnataka"
39 | gen district = "Bangalore (Urban)"
40 | 
41 | order state district deaths
42 | 
43 | /* save clean data to scratch */
44 | save $tmp/mort_bbmp.dta, replace
45 | 


--------------------------------------------------------------------------------
/b/clean_bihar_mort.do:
--------------------------------------------------------------------------------
 1 | /******************************/
 2 | /* Clean Bihar mortality data */
 3 | /******************************/
 4 | 
 5 | /* load covid programs */
 6 | qui do $ddl/covid/covid_progs.do
 7 | 
 8 | /* set globals for year and month */
 9 | global year "2018 2019 2020 2021"
10 | global month "01 02 03 04 05 06 07 08 09 10 11 12"
11 | 
12 | foreach j in $year {
13 | 
14 |   /* conditional global for 2021 since data is only available till May */
15 |   if `j' == 2021 {
16 |     global month = "01 02 03 04 05"
17 |   }
18 | 
19 |   foreach i in $month {
20 | 
21 |     /* import raw data */
22 |     import excel "$covidpub/private/mortality/raw/bihar/`j'_`i'.xlsx", sheet("Monitoring Report") cellrange(A4:P45) clear
23 | 
24 |     /* basic cleaning - keep relevant vars and rename */
25 |     keep A B E F G
26 |     drop if B == ""
27 | 
28 |     ren A id 
29 |     ren B district 
30 |     ren E death_male
31 |     ren F death_female
32 |     ren G death_trans
33 | 
34 |     /* generate vars for state, month, year */
35 |     gen state = "Bihar"
36 |     gen month = `i'
37 |     gen year = `j'
38 | 
39 |     save $tmp/bihar_`i'_`j', replace
40 | 
41 |   }
42 | }
43 | 
44 | clear 
45 | 
46 | /* reset global for month */
47 | global month "01 02 03 04 05 06 07 08 09 10 11 12"
48 | 
49 | /* append all month-year data */
50 | foreach j in $year {
51 | 
52 |   if `j' == 2021 {
53 |     global month = "01 02 03 04 05"
54 |   }
55 | 
56 |   foreach i in $month {
57 | 
58 |     append using $tmp/bihar_`i'_`j'.dta
59 | 
60 |   }
61 | }
62 | 
63 | /* convert months from float to string for consistency */
64 | str_month, float(month) string(str_month)
65 | 
66 | /* sum total deaths */
67 | egen deaths = rowtotal(death_*)
68 | 
69 | /* drop gender-wise deaths and id var and order vars */
70 | drop death_* id
71 | order state district deaths month year
72 | 
73 | /* save clean file to scratch */
74 | save $tmp/mort_bihar.dta, replace
75 | 


--------------------------------------------------------------------------------
/b/clean_chennai_mort.do:
--------------------------------------------------------------------------------
 1 | /********************************/
 2 | /* Clean Chennai mortality data */
 3 | /********************************/
 4 | 
 5 | /* import raw data */
 6 | import excel "$covidpub/private/mortality/raw/chennai.xlsx", sheet("Sheet2") cellrange(A1:M13) clear firstrow
 7 | 
 8 | /* rename vars for reshape */
 9 | ren Month month
10 | ren B deaths2010
11 | ren C deaths2011
12 | ren D deaths2012
13 | ren E deaths2013
14 | ren F deaths2014
15 | ren G deaths2015
16 | ren H deaths2016
17 | ren I deaths2017
18 | ren J deaths2018
19 | ren K deaths2019
20 | ren L deaths2020
21 | ren M deaths2021
22 | 
23 | /* convert months to lowercase for consistency */
24 | replace month = lower(month)
25 | 
26 | /* reshape from wide to long */
27 | reshape long deaths, i(month) j(year)
28 | 
29 | /* drop missing obs */
30 | drop if deaths == .
31 | 
32 | /* generate vars for state, district */
33 | gen state = "Tamil Nadu"
34 | gen district = "Chennai"
35 | 
36 | order state district deaths
37 | 
38 | /* save clean data to scratch */
39 | save $tmp/mort_chennai, replace
40 | 


--------------------------------------------------------------------------------
/b/clean_ghmc_mort.do:
--------------------------------------------------------------------------------
 1 | /*****************************************/
 2 | /* Clean GHMC (Hyderabad) mortality data */
 3 | /*****************************************/
 4 | 
 5 | /* import raw data */
 6 | import excel "$covidpub/private/mortality/raw/ghmc_certificates.xlsx", sheet("Sheet1") cellrange(A3:G14) clear
 7 | 
 8 | /* rename vars for reshape */
 9 | ren A month
10 | ren B deaths2016
11 | ren C deaths2017
12 | ren D deaths2018
13 | ren E deaths2019
14 | ren F deaths2020
15 | ren G deaths2021
16 | 
17 | /* convert months to lowercase for consistency */
18 | replace month = lower(month)
19 | 
20 | /* reshape from wide to long */
21 | reshape long deaths, i(month) j(year)
22 | 
23 | /* drop missing data */
24 | drop if deaths == .
25 | 
26 | /* gen vars for state, district */
27 | gen state = "Telangana"
28 | gen district = "Hyderabad"
29 | 
30 | order state district deaths
31 | 
32 | /* save clean data to scratch */
33 | save $tmp/mort_ghmc.dta, replace
34 | 


--------------------------------------------------------------------------------
/b/clean_haryana_mort.do:
--------------------------------------------------------------------------------
 1 | /********************************/
 2 | /* Clean Haryana mortality data */
 3 | /********************************/
 4 | 
 5 | /* import raw data from statsofindia repo */
 6 | import delimited "https://raw.githubusercontent.com/statsofindia/india-mortality/master/district-level/Haryana-districts.csv", clear
 7 | 
 8 | /* create variables for month and day of death */
 9 | gen year = substr(date, 1, 4)
10 | gen month = substr(date, 6, 2)
11 | gen day = substr(date, 9, 2)
12 | destring year month day, replace
13 | 
14 | /* collapse on date of death, district and gender */
15 | collapse (sum) deaths, by(district year month)
16 | 
17 | /* convert months from float to string for consistency */
18 | str_month, float(month) string(str_month)
19 | 
20 | /* generate state var */
21 | gen state = "Haryana"
22 | 
23 | /* re-order variables */
24 | order state district deaths year month 
25 | 
26 | save $tmp/mort_haryana.dta, replace
27 | 


--------------------------------------------------------------------------------
/b/clean_hp_mort.do:
--------------------------------------------------------------------------------
 1 | /*****************************************/
 2 | /* Clean Himachal Pradesh mortality data */
 3 | /*****************************************/
 4 | 
 5 | /* import raw data from statsofindia repo */
 6 | import delimited "https://raw.githubusercontent.com/statsofindia/india-mortality/master/district-level/Himachal%20Pradesh-districts.csv", clear
 7 | 
 8 | /* create variables for month and day of death */
 9 | gen year = substr(date, 1, 4)
10 | gen month = substr(date, 6, 2)
11 | gen day = substr(date, 9, 2)
12 | destring year month day, replace
13 | 
14 | /* collapse on date of death, district and gender */
15 | collapse (sum) deaths, by(district year month)
16 | 
17 | /* convert months from float to string for consistency */
18 | str_month, float(month) string(str_month)
19 | 
20 | /* generate state var */
21 | gen state = "Himachal Pradesh"
22 | 
23 | /* re-order variables */
24 | order state district deaths year month 
25 | 
26 | save $tmp/mort_hp.dta, replace
27 | 


--------------------------------------------------------------------------------
/b/clean_kmdc_mort.do:
--------------------------------------------------------------------------------
 1 | /***************************************************************/
 2 | /* Clean Kolkata Municipal Corporation Death Registration data */
 3 | /***************************************************************/
 4 | 
 5 | /* read in raw csv data (source: https://github.com/thejeshgn/KMCDeathRecords) */
 6 | import delimited "$covidpub/private/mortality/raw/death_records_kolkata.csv", clear
 7 | 
 8 | /* drop empty variables */
 9 | drop dateofregistration deathdate crematoriumcode regnno recordssourcerawdatafile yearofregistration
10 | 
11 | /* rename and label variables */
12 | ren deceasedname deceased_name
13 | ren deathregnno death_reg_no
14 | ren crematoriumname crematorium
15 | ren deceasedsex sex
16 | ren fathername father_name
17 | ren deathsite death_site
18 | ren recordssource record_source
19 | ren recordscity district
20 | ren recordsdateofdeath death_date
21 | 
22 | la var deceased_name "Name of Deceased"
23 | la var death_reg_no "Death Registration Number"
24 | la var crematorium "Crematorium"
25 | la var sex "Sex of Deceased"
26 | la var father_name "Father Name"
27 | la var death_site "Site of Death"
28 | la var record_source "Source"
29 | la var district "District"
30 | la var death_date "Date of Death"
31 | 
32 | /* gender for some obs is unidentified for various reasons and is missing - label them as unknown */
33 | replace sex = "UNKNOWN" if sex == " "
34 | 
35 | /* create placeholder variable for collapse */
36 | gen deaths = 1
37 | 
38 | /* create variables for month and day of death */
39 | gen year = substr(death_date, 1, 4)
40 | gen month = substr(death_date, 6, 2)
41 | gen date = substr(death_date, 9, 2)
42 | destring year month date, replace
43 | 
44 | /* collapse on date of death, district and gender */
45 | collapse (sum) deaths, by(district year month)
46 | 
47 | /* convert months from float to string for consistency */
48 | str_month, float(month) string(str_month)
49 | 
50 | /* generate state var */
51 | gen state = "West Bengal"
52 | 
53 | /* re-order variables */
54 | order state district deaths year month 
55 | 
56 | save $tmp/mort_kolkata.dta, replace
57 | 


--------------------------------------------------------------------------------
/b/clean_mh_mort.do:
--------------------------------------------------------------------------------
 1 | /* import raw data from statsofindia repo */
 2 | import delimited "https://raw.githubusercontent.com/statsofindia/india-mortality/master/district-level/Maharashtra-districts.csv" , clear
 3 | 
 4 | /* create variables for month and day of death */
 5 | gen year = substr(date, 1, 4)
 6 | gen month = substr(date, 6, 2)
 7 | gen day = substr(date, 9, 2)
 8 | destring year month day, replace
 9 | 
10 | /* collapse on date of death, district and gender */
11 | collapse (sum) deaths, by(district year month)
12 | 
13 | /* convert months from float to string for consistency */
14 | str_month, float(month) string(str_month)
15 | 
16 | /* generate state var */
17 | gen state = "Maharashtra"
18 | 
19 | /* re-order variables */
20 | order state district deaths year month 
21 | 
22 | save $tmp/mort_maha.dta, replace
23 | 


--------------------------------------------------------------------------------
/b/clean_migration.do:
--------------------------------------------------------------------------------
 1 | /* Clean district migration in/outflow data */
 2 | 
 3 | 
 4 | /* read in raw CSV data (source: Clement Imbert */
 5 | import delimited using $covidpub/migration/raw/district_migration_pc11.csv, clear varn(1)
 6 | 
 7 | /* reformat census identifiers to string */
 8 | ren statecodecensus2011 pc11_state_id
 9 | tostring pc11_state_id, format(%02.0f) replace
10 | ren districtcodecensus2011 pc11_district_id
11 | tostring pc11_district_id, format(%03.0f) replace
12 | 
13 | /* save to pc11 */
14 | order _all, alphabetic
15 | order pc11_state_id pc11_district_id, first
16 | compress
17 | save $covidpub/migration/pc11/district_migration_pc11, replace
18 | export delimited using $covidpub/migration/csv/district_migration_pc11.csv, replace
19 | 
20 | /* create LGD version */
21 | convert_ids, from_ids(pc11_state_id pc11_district_id) to_ids(lgd_state_id lgd_district_id) key($keys/lgd_pc11_district_key_weights.dta) weight_var(pc11_lgd_wt_pop) metadata_urls("https://docs.google.com/spreadsheets/d/e/2PACX-1vTu79uiVKSFv8c1oZvx7WARrWXSfbwfLakiukoezDaH0spMM_MQalkm5fr4bnkBQVNRs2aiU7x41oi3/pub?gid=0&single=true&output=csv") labels
22 | save $covidpub/migration/district_migration, replace
23 | export delimited using $covidpub/migration/csv/district_migration.csv, replace
24 | 


--------------------------------------------------------------------------------
/b/clean_mp_mort.do:
--------------------------------------------------------------------------------
 1 | /***************************************/
 2 | /* Clean Madhya Pradesh mortality data */
 3 | /***************************************/
 4 | 
 5 | /* set globals for month and year */
 6 | global month "january february march april may june july august september october november december"
 7 | 
 8 | global year "2018 2019 2020 2021"
 9 | 
10 | /* process raw data from January 2018 to May 2021 */
11 | foreach j in $year {
12 |   
13 | /* conditional global for 2021 since data is available upto May */
14 |   if `j' == 2021 {
15 |     global month = "january february march april may"
16 |   }
17 | 
18 |   foreach i in $month {
19 | 
20 | /* import raw data */
21 |     import excel "$covidpub/private/mortality/raw/madhya_pradesh/`j'/`i'`j'.xlsx", sheet("Monitoring Report") firstrow clear
22 | 
23 | /* rename vars and drop redundant obs */
24 |     ren SlNo id
25 |     ren District district
26 |     ren C deaths
27 | 
28 |     drop in 1/2
29 |     drop if id == ""
30 | 
31 | /* generate variables for month and year */
32 |     gen month = "`i'"
33 |     gen year = "`j'"
34 |     gen state = "Madhya Pradesh"
35 | 
36 |     destring * , replace
37 |     order id state district
38 |     
39 | /* save temp file for month-year */
40 |     save $tmp/`i'`j' , replace
41 | 
42 |   }
43 | }
44 | 
45 | clear
46 | 
47 | /* reset global */
48 | global month "january february march april may june july august september october november december"
49 | 
50 | /* append all month-year data */
51 | foreach j in $year {
52 | 
53 |   if `j' == 2021 {
54 |     global month = "january february march april may"
55 |   }
56 | 
57 |   foreach i in $month {
58 | 
59 |     append using $tmp/`i'`j'
60 | 
61 |   }
62 | }
63 | 
64 | drop id
65 | 
66 | /* save clean dataset unique on district-month-year */
67 | save $tmp/mort_mp.dta, replace
68 | 


--------------------------------------------------------------------------------
/b/clean_odisha_mort.do:
--------------------------------------------------------------------------------
 1 | /*******************************/
 2 | /* Clean Odisha Mortality Data */
 3 | /*******************************/
 4 | 
 5 | /* to be appended with district-year mortality dataset */
 6 | 
 7 | /* import raw data */
 8 | import excel "$covidpub/private/mortality/raw/Odisha Analysis.xlsx", sheet("CRS") cellrange(B5:N34) clear
 9 | 
10 | /* drop redundant variables and rename them for reshape */
11 | drop F G H I K L M
12 | 
13 | ren B district
14 | ren C deaths2017
15 | ren D deaths2018
16 | ren E deaths2019
17 | ren J deaths2020
18 | ren N deaths2021
19 | 
20 | /* reshape from wide to long on deaths */
21 | reshape long deaths, i(district) j(year)
22 | 
23 | /* generate variable for state */
24 | gen state = "Odisha"
25 | 
26 | /* create lgd_state variable to merge */
27 | gen lgd_state_name = lower(state)
28 | 
29 | /* merge in lgd state id */
30 | merge m:1 lgd_state_name using $keys/lgd_state_key, keepusing(lgd_state_id) keep(match master) nogen
31 | 
32 | /* now create an lgd_district variable to merge */
33 | gen lgd_district_name = lower(district)
34 | 
35 | /* save temp file */
36 | save $tmp/mort_odisha, replace
37 | 
38 | /* run masala merge */
39 | keep lgd_state_name lgd_district_name
40 | duplicates drop
41 | masala_merge lgd_state_name using $keys/lgd_district_key, s1(lgd_district_name) minbigram(0.2) minscore(0.6) outfile($tmp/mort_lgd_district)
42 | 
43 | /* check that all districts were matched to LGD */
44 | count if match_source == 6
45 | di "`r(N)' districts were unmatched"
46 | 
47 | /* keep master matches */
48 | keep if match_source < 7
49 | 
50 | /* drop redundant variables */
51 | keep lgd_state_name lgd_district_name_using lgd_district_name_master lgd_district_id
52 | 
53 | /* merge data back in */
54 | ren lgd_district_name_master lgd_district_name
55 | merge 1:m lgd_state_name lgd_district_name using $tmp/mort_odisha
56 | drop _merge
57 | 
58 | /* now replace the district name with the lgd key name */
59 | drop lgd_district_name
60 | ren lgd_district_name_using lgd_district_name
61 | 
62 | /* merge with PC11 districts */
63 | merge m:m lgd_state_id lgd_district_id using "$keys/lgd_pc11_district_key.dta"
64 | keep if _merge == 3
65 | drop _merge lgd_district_name_local lgd_district_version
66 | la var deaths "Total reported deaths - CRS"
67 | 
68 | /* add some pointers about data */
69 | notes deaths: Data for Odisha provided by Chinmay Tumbe (IIM Ahmedabad)
70 | notes death: For Odisha, 2020 deaths are projected totals computed based on average growth factor of 2018 and 2019
71 | 
72 | order lgd_state_id lgd_district_id lgd_state_name lgd_district_name state district deaths year pc11_*
73 | 
74 | /* save clean district-year data to scratch */
75 | save $tmp/mort_odisha_dist, replace
76 | 
77 | /* collapse on state-year */
78 | collapse (sum) deaths, by(lgd_state_id lgd_state_name state year pc11_state_id)
79 | 
80 | /* save clean state-year data to scratch */
81 | save $tmp/mort_odisha_state, replace
82 | 


--------------------------------------------------------------------------------
/b/clean_rajasthan_mort.do:
--------------------------------------------------------------------------------
 1 | /**********************************/
 2 | /* Clean Rajasthan mortality data */
 3 | /**********************************/ 
 4 | 
 5 | /* import raw data from statsofindia repo */
 6 | import delimited "https://raw.githubusercontent.com/statsofindia/rajasthan-mortality/master/rajasthan-pehchan-districts-mortality-2018-2021.csv", clear
 7 | 
 8 | /* create variables for month and day of death */
 9 | gen year = substr(date, 1, 4)
10 | gen month = substr(date, 6, 2)
11 | gen day = substr(date, 9, 2)
12 | destring year month day, replace
13 | 
14 | /* collapse on date of death, district and gender */
15 | collapse (sum) deaths, by(district year month)
16 | 
17 | /* convert months from float to string for consistency */
18 | str_month, float(month) string(str_month)
19 | 
20 | /* generate state var */
21 | gen state = "Rajasthan"
22 | 
23 | /* re-order variables */
24 | order state district deaths year month 
25 | 
26 | save $tmp/mort_rajasthan.dta, replace
27 | 


--------------------------------------------------------------------------------
/b/clean_up_mort.do:
--------------------------------------------------------------------------------
 1 | /***************************/
 2 | /* Clean UP Mortality data */
 3 | /***************************/
 4 | 
 5 | /* import raw data */
 6 | import excel "$covidpub/private/mortality/raw/UP RTI- Death Certificates Issued.xlsx", sheet("Year-wise original") cellrange(A9:P308) clear
 7 | 
 8 | /* preliminary cleaning */
 9 | replace B = B[_n-1] if mi(B)
10 | drop A
11 | drop if C == .
12 | drop P
13 | 
14 | /* rename variabless */
15 | ren B district
16 | ren C year
17 | ren D death_january
18 | ren E death_february
19 | ren F death_march
20 | ren G death_april
21 | ren H death_may
22 | ren I death_june
23 | ren J death_july
24 | ren K death_august
25 | ren L death_september
26 | ren M death_october
27 | ren N death_november 
28 | ren O death_december
29 | 
30 | /* reshape from wide to long */
31 | destring death_november, replace
32 | reshape long death_, i(district year) j(month) string
33 | 
34 | /* generate state variable and clean further */
35 | gen state = "Uttar Pradesh"
36 | drop if death_ == .
37 | ren death_ deaths
38 | 
39 | /* label and destring numeric vars */
40 | la var state "State"
41 | la var district "District"
42 | la var month "Month"
43 | la var year "Year"
44 | la var deaths "Total Reported Deaths - CRS"
45 | 
46 | /* save clean data to scratch */
47 | save $tmp/mort_up.dta, replace
48 | 


--------------------------------------------------------------------------------
/b/clean_wb_mort.do:
--------------------------------------------------------------------------------
 1 | /************************************/
 2 | /* Clean West Bengal Mortality Data */
 3 | /************************************/
 4 | 
 5 | /* import raw data from statsofindia repo */
 6 | import delimited "https://raw.githubusercontent.com/statsofindia/india-mortality/master/district-level/West%20Bengal-districts.csv", clear
 7 | 
 8 | /* create variables for month and day of death */
 9 | gen year = substr(date, 1, 4)
10 | gen month = substr(date, 6, 2)
11 | gen day = substr(date, 9, 2)
12 | destring year month day, replace
13 | 
14 | /* collapse on date of death, district and gender */
15 | collapse (sum) deaths, by(district year month)
16 | 
17 | /* convert months from float to string for consistency */
18 | str_month, float(month) string(str_month)
19 | 
20 | /* generate state var */
21 | gen state = "West Bengal"
22 | 
23 | /* drop Kolkata from the dataset since we already have data for the district */
24 | drop if district == "Kolkata"
25 | 
26 | /* re-order variables */
27 | order state district deaths year month 
28 | 
29 | save $tmp/mort_wb.dta, replace
30 | 


--------------------------------------------------------------------------------
/b/copy_keys.do:
--------------------------------------------------------------------------------
 1 | /* copy keys for release in the covid data repository, with any
 2 | necessary processing. */
 3 | 
 4 | /* copy EC:PC keys */
 5 | shell cp $keys/pc11_district_key.dta $covidpub/keys/
 6 | shell cp $keys/pc11_ec13_district_key.dta $covidpub/keys/
 7 | shell cp $keys/pc11r_ec13r_key.dta $covidpub/keys/
 8 | shell cp $keys/pc11u_ec13u_key.dta $covidpub/keys/
 9 | 
10 | /* copy LGD keys */
11 | shell cp $keys/lgd_district_key.dta $covidpub/keys/
12 | shell cp $keys/lgd_pc11_town_key.dta $covidpub/keys/
13 | shell cp $keys/lgd_town_key.dta $covidpub/keys/
14 | shell cp $keys/lgd_village_key.dta $covidpub/keys/
15 | 
16 | /* excise unnecessary fields from LGD PC11 village key */
17 | use $keys/lgd_pc11_village_key.dta, clear
18 | keep pc11_state_id pc11_district_id pc11_subdistrict_id pc11_village_id lgd_state_id lgd_district_id lgd_subdistrict_id lgd_village_id lgd_pc11_match
19 | encode lgd_pc11_match, gen(tmp)
20 | drop lgd_pc11_match
21 | ren tmp lgd_pc11_match
22 | compress
23 | save $covidpub/keys/lgd_pc11_village_key.dta, replace
24 | 


--------------------------------------------------------------------------------
/b/create_google.do:
--------------------------------------------------------------------------------
 1 | /* create dataset with google serach data for covid symptoms */
 2 | 
 3 | 
 4 | /*  import csv file */
 5 | import delimited "$iec/covid/google/google_search_may2.csv", clear
 6 | 
 7 | /* merge state keys */
 8 | merge m:1 pc11_state_name using $pc11/pc11_pca_state_clean.dta
 9 | 
10 | /* rename day variable in csv */
11 | rename day date
12 | 
13 | /* modify date variable to create stata data */
14 | gen year = 20
15 | tostring year, generate(year2)
16 | gen date2 = date + year2
17 | drop date year year2
18 | rename date2 date
19 | 
20 | /* gen stata date */
21 | gen date2 = date(date, "DMY")
22 | format date2 %d
23 | 
24 | /* rename date2 */
25 | drop date
26 | rename date2 date
27 | 
28 | /* keep relevant vars */
29 | keep cough fever date pc11_state_id pc11_state_name pc11_pca_state_name
30 | 
31 | /* rename symptom variables */
32 | rename cough cough_score
33 | rename fever fever_score
34 | 
35 | /* drop missing values if any */
36 | drop if mi(fever_score)
37 | 
38 | /* sort by state date */
39 | sort pc11_state_id date
40 | 
41 | /* save as stata dataset */
42 | save $covidpub/google/google_top10_may.dta, replace
43 | 


--------------------------------------------------------------------------------
/b/gen_nss_district_key.do:
--------------------------------------------------------------------------------
 1 | /* generates clean district key out of pdf-to-csv conversion of Appendix-I */
 2 | 
 3 | /* define lgd matching program */
 4 | qui do $ddl/covid/covid_progs.do
 5 | 
 6 | /* load data */
 7 | insheet using $nss/nss-75-health/Appendix-I.csv, clear
 8 | 
 9 | /* drop unnecessary vars */
10 | drop v1 v3 v6 v11 v10
11 | 
12 | /* gen state and district name vars */
13 | gen nss_state_name = v9
14 | gen nss_district_name = v7
15 | gen nss_district_id = v8
16 | 
17 | /* drop bad obs */
18 | drop if real(nss_district_id) == . | real(nss_district_id) < 0
19 | 
20 | /* format nss id variables */
21 | destring nss_district_id, replace
22 | 
23 | /* state and district name cleaning */
24 | lgd_state_clean nss_state_name
25 | lgd_dist_clean nss_district_name
26 | 
27 | /* state match to lgd key */
28 | lgd_state_match nss_state_name
29 | /* arunachal pradesh and mizoram missing in nss key */
30 | 
31 | /* generate nss state id from the lgd state id variable*/
32 | gen nss_state_id = real(lgd_state_id)
33 | 
34 | 
35 | /* district match to lgd key */
36 | lgd_dist_match nss_district_name
37 | 
38 | /* re-order vars */
39 | order nss_state_id nss_district_id, first
40 | order nss_district_name, before(lgd_district_name)
41 | 
42 | /* the final key has 649 obs */
43 | /* original key had 648 obs */
44 | /* jaintia hills was expanded into 2 obs bc lgd data has e & w. jaintia hills */
45 | 
46 | /* pull population weights to handle jaintia hills split - these can
47 | be drawn from pc11:LGD key, which has the same split */
48 | preserve
49 | use $keys/lgd_pc11_district_key_weights.dta, clear
50 | 
51 | /* make sure the key hasn't changed */
52 | count if regexm(lower(pc11_district_name), "jaintia")
53 | assert `r(N)' == 2
54 | 
55 | /* pull weights into locals */
56 | sum pc11_lgd_wt_pop if regexm(lower(lgd_district_name), "east jaintia hills")
57 | local east = `r(mean)'
58 | sum pc11_lgd_wt_pop if regexm(lower(lgd_district_name), "west jaintia hills")
59 | local west = `r(mean)'
60 | 
61 | /* back to the NSS key. weight is just 1 for all others */
62 | restore
63 | gen nss_lgd_wt_pop = 1
64 | 
65 | /* replace for split */
66 | replace nss_lgd_wt_pop = `west' if regexm(lower(lgd_district_name), "west jaintia hills")
67 | replace nss_lgd_wt_pop = `east' if regexm(lower(lgd_district_name), "east jaintia hills")
68 | 
69 | /* save */
70 | save $nss/nss-75-health/nss75_lgd_district_key, replace
71 | save $covidpub/nss/nss75_lgd_district_key, replace
72 | 


--------------------------------------------------------------------------------
/b/gen_urbanization_subdist.do:
--------------------------------------------------------------------------------
 1 | /* generate pc11 subdistrict-level urbanization dataset */
 2 | 
 3 | /* merge total/urban/rural data together */
 4 | use $pc11/pc11r_pca_subdistrict_clean.dta, clear
 5 | ren pc11_pca* pc11r_pca*
 6 | merge 1:1 pc11_state_id pc11_district_id pc11_subdistrict_id using $pc11/pc11u_pca_subdistrict_clean, gen(_m_pc11u)
 7 | ren pc11_pca* pc11u_pca*
 8 | merge 1:1 pc11_state_id pc11_district_id pc11_subdistrict_id using $pc11/pc11_pca_subdistrict_clean, gen(_m_pc11r)
 9 | drop _m*
10 | 
11 | /* generate urbanization variable */
12 | gen pc11_urb_share = pc11u_pca_tot_p / pc11_pca_tot_p
13 | label var pc11_urb_share "Urbanization share of subdistrict"
14 | 
15 | /* save */
16 | save $tmp/pc11_pca_subd, replace 
17 | 
18 | /* zip  */
19 | cd $tmp
20 | !zip pc11_pca_subd.zip pc11_pca_subd.dta
21 | 


--------------------------------------------------------------------------------
/b/old/aggregate_case_data.do:
--------------------------------------------------------------------------------
  1 | /* Agregate covid case data to district level */
  2 | 
  3 | /**********/
  4 | /* Deaths */
  5 | /**********/
  6 | use $covidpub/covid/covid_deaths_recoveries, clear
  7 | 
  8 | /* keep only the deaths */
  9 | keep if patientstatus == "Deceased"
 10 | 
 11 | /* create counter to get total number of deaths */
 12 | gen new_deaths = 1
 13 | 
 14 | /* collapse to district-day */
 15 | collapse (sum) new_deaths, by(pc11_state_id pc11_district_id date)
 16 | 
 17 | /* save as a tempfile */
 18 | save $tmp/deaths, replace
 19 | 
 20 | /*********/
 21 | /* Cases */
 22 | /*********/
 23 | use $covidpub/covid/covid_cases_raw, clear
 24 | 
 25 | /* rename date announced to simply date */
 26 | ren dateannounced date
 27 | 
 28 | /* create counter to get total number of cases */
 29 | gen new_cases = 1
 30 | 
 31 | /* collapse to district-day */
 32 | collapse (sum) new_cases, by(pc11_state_id pc11_district_id date)
 33 | 
 34 | 
 35 | /*******************/
 36 | /* Merge and Clean */
 37 | /*******************/
 38 | merge 1:1 pc11_state_id pc11_district_id date using $tmp/deaths
 39 | 
 40 | /* fill in missing new_cases and new_deaths with 0 */
 41 | replace new_cases = 0 if mi(new_cases)
 42 | replace new_deaths = 0 if mi(new_deaths)
 43 | 
 44 | /* create a numeric datetime */
 45 | gen datenum =  clock(date, "DMY")
 46 | 
 47 | /* sort by state, district, and date */
 48 | sort pc11_state_id pc11_district_id datenum
 49 | 
 50 | /* count the running total of cases*/
 51 | bys pc11_state_id pc11_district_id: gen total_cases = sum(new_cases)
 52 | 
 53 | /* count the running total of deaths */
 54 | bys pc11_state_id pc11_district_id: gen total_deaths = sum(new_deaths)
 55 | 
 56 | drop _merge datenum
 57 | 
 58 | /***************************************************************************/
 59 | /* Transform into a square dataset with district positive cases and deaths */
 60 | /***************************************************************************/
 61 | 
 62 | /* drop if we have no date-- hard to know what to do with these */
 63 | drop if mi(date)
 64 | 
 65 | /* set a missing value for missing districts so they get counted */
 66 | replace pc11_district_id = "-99" if mi(pc11_district_id)
 67 | 
 68 | /* create a single variable for state-district */
 69 | egen sdgroup = group(pc11_state_id pc11_district_id)
 70 | 
 71 | /* create a Stata date field */
 72 | ren date datestr
 73 | gen date = date(datestr, "DMY")
 74 | format date %d
 75 | 
 76 | /* fill in  non-reporting dates */
 77 | assert !mi(pc11_state_id) & !mi(pc11_district_id)
 78 | sort sdgroup date
 79 | fillin date sdgroup
 80 | 
 81 | /* fill in missing state and district ids created by the fillin */
 82 | xfill pc11_state_id, i(sdgroup)
 83 | xfill pc11_district_id, i(sdgroup)
 84 | xfill datestr, i(date)
 85 | 
 86 | /* create a sequential row counter so we can use L for the last seen
 87 | date even if not yesterday (fillin solves some of this but some dates
 88 | had no reporting at all. */
 89 | sort sdgroup date
 90 | by sdgroup: egen row = seq()
 91 | 
 92 | /* set as time series on the row */
 93 | sort sdgroup row
 94 | xtset sdgroup row
 95 | 
 96 | /* fill in zeroes with the new missing data */
 97 | replace new_cases = 0 if mi(new_cases)
 98 | replace new_deaths = 0 if mi(new_deaths)
 99 | 
100 | /* fill in the cumulative count for days when nothing happened */
101 | replace total_cases = 0  if datestr == "30/01/2020" & mi(total_cases)
102 | replace total_deaths = 0 if datestr == "30/01/2020" & mi(total_deaths)
103 | replace total_cases = L.total_cases if mi(total_cases)
104 | replace total_deaths = L.total_deaths if mi(total_deaths)
105 | 
106 | /* drop unused fields */
107 | drop _fillin datestr sdgroup row
108 | 
109 | /* save total case and death data */
110 | save $covidpub/covid/covid_cases_deaths_district, replace
111 | cap mkdir $covidpub/covid/csv
112 | export delimited $covidpub/covid/csv/covid_cases_deaths_district.csv, replace
113 | 
114 | /* review number of confirmed/deaths in unknown districts */
115 | sum total_* if date == 22029
116 | sum total_* if date == 22029 & pc11_district_id == "-99"
117 | 


--------------------------------------------------------------------------------
/b/old/get_lgd_keys.do:
--------------------------------------------------------------------------------
 1 | /* use downloaded Local Government Data codes to formalize PC-LGD keys
 2 | 
 3 | data downloaded 14 April 2020 from: https://lgdirectory.gov.in/
 4 | from the "data download" page, selecting the CSV option
 5 | */
 6 | 
 7 | /**********/
 8 | /* States */
 9 | /**********/
10 | import delimited $iec/lgd/raw/allStateofIndia2020_04_14_23_16_43_253.csv, charset("utf-8") delimit(";") clear
11 | ren census2001code pc01_state_id
12 | ren census2011code pc11_state_id
13 | 
14 | /* convert id to 2-digit string */
15 | tostring pc01_state_id, format("%02.0f") replace
16 | tostring pc11_state_id, format("%02.0f") replace
17 | 
18 | /* save */
19 | save $iec/keys/lgd/lgd_pc_state_key, replace
20 | 
21 | /*************/
22 | /* Districts */
23 | /*************/
24 | import delimited $iec/lgd/raw/allDistrictofIndia2020_04_14_23_23_07_748.csv, charset("utf-8") delimit(";") clear
25 | ren census2001code pc01_district_id
26 | ren census2011code pc11_district_id
27 | 
28 | /* convert id to 2- or 3-digit string */
29 | tostring pc01_district_id, format("%02.0f") replace
30 | tostring pc11_district_id, format("%03.0f") replace
31 | 
32 | /* merge in the state id's */
33 | merge m:1 statecode using $iec/keys/lgd/lgd_pc_state_key, keepusing(pc01_state_id pc11_state_id)
34 | drop _merge
35 | 
36 | /* save */
37 | save $iec/keys/lgd/lgd_pc_district_key, replace
38 | 
39 | /****************/
40 | /* Subdistricts */
41 | /****************/
42 | import delimited $iec/lgd/raw/allSubDistrictofIndia2020_04_14_23_23_17_755.csv, charset("utf-8") delimit(";") clear
43 | ren census2001code pc01_subdistrict_id
44 | ren census2011code pc11_subdistrict_id
45 | 
46 | /* convert id to 4- or 5-digit string */
47 | tostring pc01_subdistrict_id, format("%04.0f") replace
48 | tostring pc11_subdistrict_id, format("%05.0f") replace
49 | 
50 | /* merge in the state and district id's */
51 | merge m:1 statecode districtcode using $iec/keys/lgd/lgd_pc_district_key, keepusing(pc01_state_id pc11_state_id pc01_district_id pc11_district_id) keep(match master)
52 | drop _merge
53 | 
54 | /* save */
55 | save $iec/keys/lgd/lgd_pc_subdistrict_key, replace
56 | 
57 | /************/
58 | /* Villages */
59 | /************/
60 | import delimited $iec/lgd/raw/allVillagesofIndia2020_04_14_23_23_29_843.csv, charset("utf-8") delimit(";") clear
61 | ren census2001code pc01_village_id
62 | ren census2011code pc11_village_id
63 | 
64 | /* merge in the state, district, and subdistrict id's */
65 | merge m:1 statecode districtcode subdistrictcode using $iec/keys/lgd/lgd_pc_subdistrict_key, keepusing(pc01_state_id pc11_state_id pc01_district_id pc11_district_id pc01_subdistrict_id pc11_subdistrict_id) keep(match master)
66 | drop _merge
67 | 
68 | /* convert id to 8- or 6-digit string */
69 | tostring pc01_village_id, format("%08.0f") replace
70 | tostring pc11_village_id, format("%06.0f") replace
71 | 
72 | /* save */
73 | save $iec/keys/lgd/lgd_pc_village_key, replace
74 | 


--------------------------------------------------------------------------------
/b/old/mse_simple.m:
--------------------------------------------------------------------------------
 1 | function mse_simple = mse_simple(x);
 2 | 
 3 |   %% the odds we're trying to match
 4 |   or_simple    = [.05 .27 1 2.61 7.61 26.27]';
 5 |   ln_or_simple = log(or_simple);
 6 |   
 7 |   %% create the x axis for age from 18-90
 8 |   age = [18:.1:90]';
 9 | 
10 |   %% predict odds at each age using cubic function x
11 |   y = x(1) .* age.^3 + x(2) .* age.^2 + x(3) .* age + x(4);
12 | 
13 |   %% calculate difference between bin means and target odds ratios
14 |   m1 = abs(mean(y(age >= 18 & age < 40)) - ln_or_simple(1));
15 |   m2 = abs(mean(y(age >= 40 & age < 50)) - ln_or_simple(2));
16 |   m3 = abs(mean(y(age >= 50 & age < 60)) - ln_or_simple(3));
17 |   m4 = abs(mean(y(age >= 60 & age < 70)) - ln_or_simple(4));
18 |   m5 = abs(mean(y(age >= 70 & age < 80)) - ln_or_simple(5));
19 |   m6 = abs(mean(y(age >= 80 & age < 90)) - ln_or_simple(6));
20 |   
21 |   %% calculate MSE between means and target log odds ratios, with uniform weighting
22 |   %% first bin gets scaled 2.2 since it is 18-40, while other bins are all width 10
23 |   mse_simple = m1 * 2.2 + m2 + m3 + m4 + m5 + m6;
24 | 
25 |   %% penalize max changes in slope
26 |   abs((y(3:721) - y(2:720)) - (y(2:720) - y(1:719)))
27 |   f2 = max(abs((y(3:721) - y(2:720)) - (y(2:720) - y(1:719))))
28 |   mse_simple = mse_simple;
29 |   
30 |   %% fprintf("%5.2f,%5.2f,%5.2f,%5.2f\n", x(1), x(2), x(3), x(4))
31 |   
32 | 


--------------------------------------------------------------------------------
/b/old/predict_cts_uk_age_or_v2.m:
--------------------------------------------------------------------------------
 1 | 
 2 | %% set odds ratios in bins and switch to logs since that is better for fitting
 3 | or_simple    = [.05 .27 1 2.61 7.61 26.27]';
 4 | or_full    = [.07 .31 1 2.09 4.77 12.64]';
 5 | ln_or_simple = log(or_simple);
 6 | ln_or_full = log(or_full);
 7 | 
 8 | %% set standard solver parameters
 9 | options = optimoptions(@fmincon,'MaxFunEvals',10000000,'Display','none','TolCon',0.0001,'TolFun',0.0001,'TolX',0.0001);
10 | 
11 | %% start with a linear function
12 | x_start = [1 2 3 4];
13 | [x, f_min, exit_flag, output] = fmincon(@mse_simple, x_start, [], [], [], [], [], [], [], options);
14 | 
15 | age = [18:100]';
16 | y = x(1) .* age.^3 + x(2) .* age.^2 + x(3) .* age + x(4);
17 | 
18 | %% graph the fit
19 | clf;
20 | hold on
21 | scatter(med_age, ln_or_simple);
22 | plot(age,y)
23 | xlabel("log odds ratio")
24 | ylabel("age")
25 | write_png('/scratch/pn/fit_simple')
26 | 
27 | %% %% generate predicted values
28 | %% predicted_or_simple = fit_simple(age);
29 | %% predicted_or_full = fit_full(age);
30 | %% 
31 | %% %% write these to a file
32 | %% writematrix([age predicted_or_simple predicted_or_full],'/scratch/pn/uk_age_fits.csv')
33 | %% 
34 | %% %% prepend a header to the file
35 | %% system('echo "age,ln_or_simple,ln_or_full" >~/iec/covid/covid/csv/uk_age_predicted_or.csv');
36 | %% system('cat /scratch/pn/uk_age_fits.csv >>~/iec/covid/covid/csv/uk_age_predicted_or.csv');
37 | %% fprintf("Writing uk_age_predicted_or.csv\n");
38 | 


--------------------------------------------------------------------------------
/b/pc11_lgd_metadata.csv:
--------------------------------------------------------------------------------
1 | variablename,aggregationmethod,label
2 | tot_old,sum
3 | 


--------------------------------------------------------------------------------
/b/prep_bihar.do:
--------------------------------------------------------------------------------
 1 | /**************************************/
 2 | /* prepare bihar hospitalization data */
 3 | /**************************************/
 4 | 
 5 | /* import and lcase raw data */
 6 | import excel $health/bihar/raw/bihar_ventilators_beds_v2.xlsx, clear sheet("Data - Public Hospitals - Bihar") firstrow
 7 | ren *, lower
 8 | 
 9 | /* clean district name */
10 | replace district = lower(district)
11 | ren district lgd_district_name
12 | gen lgd_state_name = "bihar"
13 | drop if lgd_district_name == "bihar"
14 | 
15 | /* run standard district name fixes */
16 | synonym_fix lgd_district_name, synfile(~/ddl/covid/b/str/lgd_district_fixes.txt) replace group(lgd_state_name)
17 | 
18 | /* merge to the district key to get standardized ids */
19 | merge 1:1 lgd_state_name lgd_district_name using $keys/lgd_district_key, assert(using match) keepusing(lgd_state_id lgd_district_id)
20 | keep if lgd_state_name == "bihar"
21 | assert _merge == 3
22 | drop _merge
23 | 
24 | /* save clean bihar hospital data */
25 | drop srno
26 | order lgd_state_id lgd_district_id lgd_state_name lgd_district_name 
27 | save $health/bihar/bihar_moh_hospitals, replace
28 | 
29 | 
30 | /***************************/
31 | /* prepare bihar case data */
32 | /***************************/
33 | /* open and lowercase raw data */
34 | import excel $health/bihar/raw/bihar_case_data_may11.xlsx, clear firstrow
35 | ren *, lower
36 | drop sno
37 | 
38 | /* rename vars */
39 | ren causeofsample contacttrace1
40 | ren h contacttrace2
41 | 
42 | /* clean district name */
43 | replace district = lower(district)
44 | ren district lgd_district_name
45 | gen lgd_state_name = "bihar"
46 | 
47 | /* run standard district name fixes */
48 | synonym_fix lgd_district_name, synfile(~/ddl/covid/b/str/lgd_district_fixes.txt) replace group(lgd_state_name)
49 | 
50 | /* merge to the district key to get standardized ids */
51 | /* note we keep using-only districts --- they have no cases yet  */
52 | merge m:1 lgd_state_name lgd_district_name using $keys/lgd_district_key, keepusing(lgd_state_id lgd_district_id)
53 | keep if lgd_state_name == "bihar"
54 | review_merge lgd_district_name
55 | assert _merge != 1
56 | drop _merge
57 | 
58 | /* save clean bihar case data */
59 | order lgd_state_id lgd_district_id lgd_state_name lgd_district_name 
60 | save $health/bihar/bihar_moh_cases, replace
61 | 
62 | 


--------------------------------------------------------------------------------
/b/prep_ec_hosp.do:
--------------------------------------------------------------------------------
  1 | use $covidpub/hospitals/ec_hosp_microdata, clear
  2 | 
  3 | /* require employment of at least 5 to be counted */
  4 | /* NOTE: National Health Profile used 20, we get better correlation with DLHS/PC
  5 |          on gov hospitals with smaller thresholds. */
  6 | keep if emp_all >= 5
  7 | 
  8 | /* create a firm-level counter t oget a firm count */
  9 | gen count_all = 1
 10 | 
 11 | /* collapse count and employment in each type of facility, by ec13 code */
 12 | /* note village id and town id are the same thing */
 13 | destring sector, replace
 14 | collapse (firstnm) sector (sum) count_all emp_all, by(gov nic ec13_state_id ec13_district_id ec13_subdistrict_id ec13_village_id ec13_town_id)
 15 | 
 16 | /* convert data into wide format so we can collapse to village/town level */
 17 | gen class = string(nic) + "_" + string(gov)
 18 | ren *all *all_
 19 | drop nic gov
 20 | reshape wide count_all_ emp_all_, i(ec13_state_id ec13_district_id ec13_subdistrict_id ec13_village_id ec13_town_id sector) j(class) string
 21 | 
 22 | ren *_1 *_gov
 23 | ren *_0 *_priv
 24 | 
 25 | /* get PC village codes */
 26 | merge m:1 ec13_state_id ec13_district_id ec13_subdistrict_id ec13_village_id using $covidpub/keys/pc11r_ec13r_key, keepusing(pc11_state_id pc11_district_id pc11_subdistrict_id pc11_village_id)
 27 | drop if _merge == 2
 28 | foreach v in state district subdistrict village {
 29 |   ren pc11_`v'_id tmp_pc11_`v'_id
 30 | }
 31 | ren _merge _merge_v
 32 | 
 33 | /* get PC town codes */
 34 | merge m:1 ec13_state_id ec13_district_id ec13_subdistrict_id ec13_town_id using $covidpub/keys/pc11u_ec13u_key, keepusing(pc11_state_id pc11_district_id pc11_subdistrict_id pc11_town_id)
 35 | drop if _merge == 2
 36 | ren _merge _merge_t
 37 | 
 38 | /* restore the variables from the first merge */
 39 | replace pc11_state_id = tmp_pc11_state_id if mi(pc11_state_id)
 40 | replace pc11_district_id = tmp_pc11_district_id if mi(pc11_district_id)
 41 | replace pc11_subdistrict_id = tmp_pc11_subdistrict_id if mi(pc11_subdistrict_id)
 42 | ren tmp_pc11_village_id pc11_village_id
 43 | drop tmp*
 44 | 
 45 | /* systematically rename all variables */
 46 | ren emp_all_* emp_*
 47 | ren count_all_* num_*
 48 | 
 49 | /* drop outpatient practices, psych hospitals, etc.. */
 50 | drop *862* *871* *872* *879* *869* *873*
 51 | 
 52 | ren *861* *hosp*
 53 | drop _merge*
 54 | 
 55 | /* label urban/rural sector */
 56 | recode sector 1=1 2=0
 57 | rename sector rural
 58 | 
 59 | /* label variables */
 60 | // label_from_gdoc, docid("1h6G4vYL3lvy4Bi8DTY3pMT2-5aVWOBoxAm3plx4M7qQ")
 61 | save $covidpub/hospitals/ec_hospitals_tv, replace
 62 | cap mkdir $covidpub/hospitals/csv
 63 | export delimited $covidpub/hospitals/csv/ec_hospitals_tv.csv, replace
 64 | 
 65 | /* COLLAPSE TO DISTRICT LEVEL */
 66 | use $covidpub/hospitals/ec_hospitals_tv, clear
 67 | 
 68 | /* get district ids (can't use village/town match since we had some missing locations) */
 69 | ren pc11_state_id tmp_pc11_state_id
 70 | ren pc11_district_id tmp_pc11_district_id
 71 | 
 72 | /* get pc11 district ids */
 73 | merge m:1 ec13_state_id ec13_district_id using $covidpub/keys/pc11_ec13_district_key, keepusing(pc11_state_id pc11_district_id)
 74 | drop if _merge == 2
 75 | assert _merge == 3
 76 | drop _merge
 77 | 
 78 | /* see if they match (they better!) */
 79 | count if pc11_state_id != tmp_pc11_state_id & !mi(tmp_pc11_state_id)
 80 | count if pc11_district_id != tmp_pc11_district_id & !mi(tmp_pc11_district_id)
 81 | drop tmp*
 82 | list *hosp* if mi(pc11_district_id)
 83 | 
 84 | /* replace all delhi districts with missing so it all gets collapsed into 1 */
 85 | //replace pc11_district_id = "" if ec13_state_id == "07"
 86 | 
 87 | /* sum the numbers to pc11 districts */
 88 | collapse (sum) *hosp*, by(pc11_state_id pc11_district_id)
 89 | 
 90 | /* prefix all vars with EC prefix */
 91 | ren *hosp* ec_*hosp*
 92 | 
 93 | /* drop instances without a state id */
 94 | drop if mi(pc11_state_id)
 95 | 
 96 | /* label from the google sheet dictionary */
 97 | // label_from_gdoc, docid("1h6G4vYL3lvy4Bi8DTY3pMT2-5aVWOBoxAm3plx4M7qQ")
 98 | save $covidpub/hospitals/pc11/ec_hospitals_dist_pc11, replace
 99 | export delimited $covidpub/hospitals/csv/ec_hospitals_dist_pc11.csv, replace
100 | 
101 | /* create LGD version */
102 | convert_ids, from_ids(pc11_state_id pc11_district_id) to_ids(lgd_state_id lgd_district_id) labels key($keys/lgd_pc11_district_key_weights.dta) weight_var(pc11_lgd_wt_pop) metadata_urls(https://docs.google.com/spreadsheets/d/e/2PACX-1vSq7qkpXS2QFatP_35deNi0ZeHNVgSMr4JHKaxx3pZgefp4cw4iqRMo0GRPMe0-h3n6BEoHPuzQEgmc/pub?gid=1900447643&single=true&output=csv)
103 | save $covidpub/hospitals/ec_hospitals_dist, replace
104 | export delimited $covidpub/hospitals/csv/ec_hospitals_dist.csv, replace
105 | 


--------------------------------------------------------------------------------
/b/prep_ec_hosp_microdata.do:
--------------------------------------------------------------------------------
 1 | /* open partially cleaned EC13 */
 2 | /* this temp file is generated by core/ecpc/ecmerge/collapse_ecs.do */
 3 | use $tmp/ec13_precollapse_tmp.dta, clear
 4 | 
 5 | /* keep healthcare-related activities */
 6 | keep if inlist(nic, 861, 862, 869, 871, 872, 873, 879)
 7 | 
 8 | drop *shric*
 9 | 
10 | /* only keep the sector, location, employment, and activity fields */
11 | keep sector emp_all gov nic ec13_state_id ec13_district_id ec13_subdistrict_id ec13_village_id ec13_town_id
12 | 
13 | save $covidpub/hospitals/ec_hosp_microdata, replace
14 | cap mkdir $covidpub/hospitals/csv
15 | export delimited $covidpub/hospitals/csv/ec_hosp_microdata.csv, replace
16 | 


--------------------------------------------------------------------------------
/b/prep_hosp_pca_vd.do:
--------------------------------------------------------------------------------
 1 | /************/
 2 | /* Villages */
 3 | /************/
 4 | use $pc11/pc11_vd_clean.dta, clear
 5 | 
 6 | /* keep the demographic and health infrastructure variables */
 7 | 
 8 | /* check missing data percent for various health center variables */
 9 | keep *id pc11_vd_nc* pc11_vd_asha pc11_vd_med* pc11_vd_fwc_cntr pc11_vd_mh_cln pc11_vd_disp pc11_vd_altmed_hosp pc11_vd_all_hosp pc11_vd_tb_cln pc11_vd_mcw_cntr pc11_vd_phs_cntr pc11_vd_ph_cntr pc11_vd_ch_cntr  *_doc_* *_pmed_* pc11_vd_ch_cntr pc11_vd_ph_cntr pc11_vd_phs_cntr pc11_vd_tb_cln pc11_vd_all_hosp pc11_vd_disp pc11_vd_mh_cln pc11_vd_med_in_out_pat pc11_vd_med_c_hosp_home
10 | 
11 | /* merge with pca clean data at village level */
12 | merge 1:1 pc11_state_id pc11_district_id pc11_subdistrict_id pc11_village_id using $pc11/pc11r_pca_clean.dta, keepusing(pc11_pca_tot_p)
13 | keep if _merge == 3
14 | drop _merge
15 | 
16 | /* save rural PCA and VD subset in data repo */
17 | compress
18 | save $covidpub/hospitals/pc11r_hosp, replace
19 | cap mkdir $covidpub/hospitals/csv
20 | export delimited $covidpub/hospitals/csv/pc11r_hosp.csv, replace
21 | 
22 | /*********/
23 | /* Towns */
24 | /*********/
25 | use $pc11/pc11_td_clean.dta, clear
26 | 
27 | /* keep the town directory hospital and clinic fields */
28 | keep *id pc11_td_med* pc11_td_disp pc11_td_all_hosp pc11_td_alt_hospital *_doc_* *_pmed_* *_beds *clinic pc11_td_all_hospital pc11_td_disp pc11_td_tb_clinic pc11_td_nur_homes pc11_td_mh_clinic pc11_td_in_out_pat pc11_td_c_hosp_home
29 | 
30 | /* rename badly named allh to all for consistency with rural  */
31 | ren *_allh_* *_all_hosp_*
32 | ren pc11_td_all_hospital pc11_td_all_hosp
33 | 
34 | /* make a few other fields consistent */
35 | ren pc11_td_tb_clinic pc11_td_tbc
36 | ren pc11_td_nur_homes pc11_td_nh
37 | ren pc11_td_mh_clinic pc11_td_mh
38 | 
39 | /* merge with pca clean data at town level */
40 | merge 1:1 pc11_state_id pc11_district_id pc11_subdistrict_id pc11_town_id using $pc11/pc11u_pca_clean.dta, keepusing(pc11_pca_tot_p)
41 | keep if _merge == 3
42 | drop _merge
43 | 
44 | /* save urban PCA and TD subset in data repo */
45 | compress
46 | save $covidpub/hospitals/pc11u_hosp, replace
47 | cap mkdir $covidpub/hospitals/csv
48 | export delimited $covidpub/hospitals/csv/pc11u_hosp.csv, replace
49 | 


--------------------------------------------------------------------------------
/b/prep_secc.do:
--------------------------------------------------------------------------------
 1 | use ~/iec2/secc/final/collapse/village_consumption_imputed_pc11.dta, clear
 2 | 
 3 | keep pc11_state_id pc11_village_id secc_cons_per_cap
 4 | 
 5 | save $tmp/secc_cons_pc11, replace
 6 | 
 7 | /* get district identifiers */
 8 | merge 1:1 pc11_state_id pc11_village_id using $pc11/pc11r_pca_clean.dta, keepusing(pc11_district_id)
 9 | 
10 | /* collapse to district level */
11 | keep if _merge == 3
12 | drop _merge
13 | 
14 | collapse (mean) secc_cons_per_cap, by(pc11_state_id pc11_district_id)
15 | 
16 | save $tmp/secc_cons_pc11_district, replace
17 | 
18 | 
19 | use $hosp/hospitals_dist, clear
20 | 
21 | merge 1:1 pc11_state_id pc11_district_id using $tmp/secc_cons_pc11_district
22 | keep if _merge == 3
23 | drop _merge
24 | 
25 | 
26 | sum pc_perk_beds_tot dlhs4_perk_total_beds
27 | corr pc_perk_beds_tot ec_perk_emp_hosp_gov
28 | 
29 | corr pc_perk_beds_tot ec_perk_emp_hosp_tot
30 | 
31 | corr pc_perk_beds_tot secc_cons_per_cap
32 | corr dlhs4_perk_total_beds secc_cons_per_cap
33 | 


--------------------------------------------------------------------------------
/b/push_data.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | 
 4 | 
 5 | #############
 6 | # ship data #
 7 | #############
 8 | 
 9 | # set list of folders to be pushed from $covidpub (not all folders will be shared)
10 | dirs="covid demography estimates hospitals keys migration agmark nfhs hmis mortality"
11 | 
12 | # send public data from these folders to Dropbox via rclone (rclone must be configured)
13 | for dir in $dirs; do
14 |   rclone copy --progress ~/iec/covid/$dir my_remote:SamPaul/covid_data/$dir/
15 | done
16 | 
17 | 


--------------------------------------------------------------------------------
/b/str/cov19india_district_fixes.txt:
--------------------------------------------------------------------------------
 1 | master,district,state
 2 | y.s.r.,y.s.r kadapa,andhra pradesh
 3 | y.s.r.,ysr kadapa,andhra pradesh
 4 | y.s.r.,y.s.r. kadapa,andhra pradesh
 5 | y.s.r.,y s r,andhra pradesh
 6 | spsr nellore,s.p.s. nellore,andhra pradesh
 7 | spsr nellore,sri potti sriramulu nellore,andhra pradesh
 8 | dibang valley,upper dibang valley,arunachal pradesh
 9 | papum pare,capital complex,arunachal pradesh
10 | south salmara mancachar,south salmara mankachar,assam
11 | aurangabad,aurangabad bihar,bihar
12 | purbi champaran,purba champaran,bihar
13 | purbi champaran,west champaran,bihar
14 | pashchim champaran,east champaran,bihar
15 | kaimur (bhabua),kaimur,bihar
16 | kaimur (bhabua),kaimur bhabua,bihar
17 | madhepura,madhopura,bihar
18 | bemetara,bametara,chhattisgarh
19 | bilaspur,bilaspur cg,chhattisgarh
20 | dantewada,dakshin bastar dantewada,chhattisgarh
21 | gariyaband,gariaband,chhattisgarh
22 | janjgir-champa,janjgir champa,chhattisgarh
23 | kabirdham,kabeerdham,chhattisgarh
24 | uttar bastar kanker,uttar bastar kanker,chhattisgarh
25 | bilaspur,gaurela pendra marwahi,chhattisgarh
26 | ahmadabad,ahmedabad,gujarat
27 | sabar kantha,sabarkantha,gujarat
28 | panch mahals,panchmahal,gujarat
29 | mehsana,mahesana,gujarat
30 | kachchh,kutch,gujarat
31 | chota udaipur,chhota udaipur,gujarat
32 | banas kantha,banaskantha,gujarat
33 | charkhi dadri,charki dadri,haryana
34 | shopiyan,shupiyan,jammu and kashmir
35 | baramulla,baramula,jammu and kashmir
36 | bandipora,bandipore,jammu and kashmir
37 | budgam,badgam,jammu and kasmir
38 | koderma,kodarma,jharkhand
39 | saraikela kharsawan,saraikela,jharkhand
40 | bengaluru urban,bengaluru,karnataka
41 | ahmednagar,ahmadnagar,maharashtra
42 | beed,bid,maharashtra
43 | raigarh,raigad,maharashtra
44 | gondiya,gondia,maharashtra
45 | buldhana,buldana,maharashtra
46 | deogarh,debagarh,odisha
47 | baleshwar,balasore,odisha
48 | jajapur,jajpur,odisha
49 | ferozepur,firozpur,punjab
50 | dholpur,dhaulpur,rajasthan
51 | chittaurgarh,chittorgarh,rajasthan
52 | west district,west sikkim,sikkim
53 | east district,east sikkim,sikkim
54 | north district,north sikkim,sikkim
55 | south district,south sikkim,sikkim
56 | the nilgiris,nilgiris,tamil nadu
57 | jayashankar bhupalapally,jayashankar,tamil nadu
58 | kumuram bheem asifabad,komaram bheem,tamil nadu
59 | kanniyakumari,kanyakumari,tamil nadu
60 | jangoan,jangaon,telangana
61 | jagitial,jagtial,telangana
62 | mahrajganj,maharajganj,uttar pradesh
63 | kheri,lakhimpur kheri,uttar pradesh
64 | barabanki,bara banki,uttar pradesh
65 | medinipur east,purba medinipur,west bengal
66 | 
67 | 


--------------------------------------------------------------------------------
/b/str/cov19india_vaccine_district_fixes.txt:
--------------------------------------------------------------------------------
 1 | master,lgd_district_name,lgd_state_name
 2 | dohad,dahod,gujarat
 3 | east nimar,khandwa,madhya pradesh
 4 | y.s.r.,y.s.r. kadapa,andhra pradesh
 5 | spsr nellore,s.p.s. nellore,andhra pradesh
 6 | spsr nellore,sri potti sriramulu nellore,andhra pradesh
 7 | dibang valley,upper dibang valley,arunachal pradesh
 8 | papum pare,capital complex,arunachal pradesh
 9 | south salmara mancachar,south salmara mankachar,assam
10 | aurangabad,aurangabad bihar,bihar
11 | purbi champaran,purba champaran,bihar
12 | purbi champaran,west champaran,bihar
13 | pashchim champaran,east champaran,bihar
14 | kaimur (bhabua),kaimur,bihar
15 | kaimur (bhabua),kaimur bhabua,bihar
16 | madhepura,madhopura,bihar
17 | bemetara,bametara,chhattisgarh
18 | bilaspur,bilaspur cg,chhattisgarh
19 | dantewada,dakshin bastar dantewada,chhattisgarh
20 | gariyaband,gariaband,chhattisgarh
21 | janjgir-champa,janjgir champa,chhattisgarh
22 | kabirdham,kabeerdham,chhattisgarh
23 | uttar bastar kanker,uttar bastar kanker,chhattisgarh
24 | bilaspur,gaurela pendra marwahi,chhattisgarh
25 | ahmadabad,ahmedabad,gujarat
26 | sabar kantha,sabarkantha,gujarat
27 | panch mahals,panchmahal,gujarat
28 | mehsana,mahesana,gujarat
29 | kachchh,kutch,gujarat
30 | chota udaipur,chhota udaipur,gujarat
31 | banas kantha,banaskantha,gujarat
32 | charkhi dadri,charki dadri,haryana
33 | shopiyan,shupiyan,jammu and kashmir
34 | baramulla,baramula,jammu and kashmir
35 | bandipora,bandipore,jammu and kashmir
36 | budgam,badgam,jammu and kasmir
37 | koderma,kodarma,jharkhand
38 | saraikela kharsawan,saraikela,jharkhand
39 | bengaluru urban,bengaluru,karnataka
40 | ahmednagar,ahmadnagar,maharashtra
41 | beed,bid,maharashtra
42 | raigarh,raigad,maharashtra
43 | gondiya,gondia,maharashtra
44 | buldhana,buldana,maharashtra
45 | deogarh,debagarh,odisha
46 | baleshwar,balasore,odisha
47 | jajapur,jajpur,odisha
48 | ferozepur,firozpur,punjab
49 | dholpur,dhaulpur,rajasthan
50 | chittaurgarh,chittorgarh,rajasthan
51 | west district,west sikkim,sikkim
52 | east district,east sikkim,sikkim
53 | north district,north sikkim,sikkim
54 | south district,south sikkim,sikkim
55 | the nilgiris,nilgiris,tamil nadu
56 | jayashankar bhupalapally,jayashankar,tamil nadu
57 | kumuram bheem asifabad,komaram bheem,tamil nadu
58 | kanniyakumari,kanyakumari,tamil nadu
59 | jangoan,jangaon,telangana
60 | jagitial,jagtial,telangana
61 | mahrajganj,maharajganj,uttar pradesh
62 | kheri,lakhimpur kheri,uttar pradesh
63 | barabanki,bara banki,uttar pradesh
64 | medinipur east,purba medinipur,west bengal
65 | tuticorin,thoothukkudi,tamil nadu
66 | 


--------------------------------------------------------------------------------
/b/str/covid_district_fixes.txt:
--------------------------------------------------------------------------------
 1 | master,district,state
 2 | vizianagaram,vizianagram,andhra pradesh
 3 | kaimur bhabua,kaimur,bihar
 4 | darbhanga,dharbanga,bihar
 5 | kolar,kolara,karnataka
 6 | alappuzha,alapuzha,kerala
 7 | ernakulam,kochi,kerala
 8 | adilabad,asifabad,telangana
 9 | pauri garhwal,garhwal,uttarakhand
10 | nainital,nanital,uttarakhand
11 | amroha,phule,uttar pradesh
12 | amroha,jyotiba nagar,uttar pradesh
13 | budaun,budaon,uttar pradesh
14 | deoria,devariya,uttar pradesh
15 | mahamaya nagar,hathras,uttar pradesh
16 | 
17 | 


--------------------------------------------------------------------------------
/b/str/lgd_district_fixes.txt:
--------------------------------------------------------------------------------
 1 | master,lgd_district_name,lgd_state_name
 2 | kaimur (bhabua),kaimur bhabua,bihar
 3 | pashchim champaran,west champaran,bihar
 4 | purba champaran,purbi champaran, bihar
 5 | purbi champaran,east champaran,bihar
 6 | purnia,purnea,bihar
 7 | kaimur (bhabua),kaimur,bihar
 8 | muzaffarpur,muzzafarpur,bihar
 9 | sheikhpura,sheikpura,bihar
10 | ayodhya,faizabad,uttar pradesh
11 | prayagraj,allahabad,uttar pradesh
12 | kamrup metro,kamrup m,assam
13 | kamrup rural,kamrup r,assam
14 | y s r,cuddapah,andhra pradesh
15 | y s r,y.s.r. (cuddapah),andhra pradesh
16 | nuh,mewat,haryana
17 | kalaburagi,gulbarga,karnataka
18 | amethi,c s m nagar,uttar pradesh
19 | amroha,jyotiba phule nagar,uttar pradesh
20 | amroha,jyotiba nagar,uttar pradesh
21 | leh ladakh,leh,ladakh
22 | leh ladakh,ladakh,ladakh
23 | sant kabeer nagar,skn,uttar pradesh
24 | bhadohi,srnb,uttar pradesh
25 | bhadohi,sant ravidas nagar,uttar pradesh
26 | hathras,mahamaya nagar,uttar pradesh
27 | janjgir-champa,janjgir,chhattisgarh
28 | s.a.s nagar,sahibzada ajit singh,punjab
29 | shahid bhagat singh nagar,sbs nagar,punjab
30 | faridabad,gbn faridabad,haryana
31 | barddhaman,bardhaman,west bengal
32 | gurugram,gurgaon,haryana
33 | khargone,west nimar,madhya pradesh
34 | east nimar,khandwa,madhya pradesh
35 | baleshwar,balasore,odisha
36 | kachchh,kutch,gujarat
37 | tuticorin,thoothukkudi,tamil nadu
38 | purba bardhaman,east bardhaman,west bengal
39 | paschim bardhaman,west bardhaman,west bengal
40 | east medinipur,medinipur east,west bengal
41 | west medinipur,medinipur west,west bengal
42 | 


--------------------------------------------------------------------------------
/b/update_case_cronjob.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # set this up with the following cron command (executes just after midnight daily):
 4 | # $ crontab -l
 5 | # $ 5 5 * * * $HOME/ddl/covid/b/update_case_cronjob.sh
 6 | 
 7 | # depends on slack messaging hook in env variable SLACKKEY
 8 | if [[ -z "$SLACKKEY" ]]; then
 9 |   printf "\nENV variable $SLACKKEY must be defined for cronjob to execute. Add to your .bashrc\n"
10 | fi
11 | 
12 | # send init message via slack
13 | curl -X POST -H 'Content-type: application/json' --data '{"text":":building_construction: Beginning auto-update of COVID case and vaccination data"}' https://hooks.slack.com/services/$SLACKKEY
14 | 
15 | # change dir to scratch for logging
16 | cd /scratch/`whoami`
17 | 
18 | # run update script
19 | printf "\nbegin update script: ~/ddl/covid/b/update_case_vaccination_data.do\n"
20 | stata -b do ~/ddl/covid/b/update_case_vaccination_data.do
21 | 
22 | # check log for errors
23 | printf "\nchecking Stata log for errors...\n"
24 | if egrep --before-context=1 --max-count=1 "^r\([0-9]+\);$" "update_case_vaccination_data.log"
25 | then
26 |   # send error message
27 |   printf "\nFAIL - you have a data dumpster fire on your hands!"
28 |   curl -X POST -H 'Content-type: application/json' --data '{"text":":rotating_light: FAILURE: auto-update of COVID data had non-zero exit status"}' https://hooks.slack.com/services/$SLACKKEY
29 |   exit 1
30 | else
31 |   # send success message
32 |   curl -X POST -H 'Content-type: application/json' --data '{"text":":not-a-dumpster-fire: Successful update of COVID data!"}' https://hooks.slack.com/services/$SLACKKEY
33 |   printf "\nSuccess!"
34 |   exit 0
35 | fi
36 | 
37 | # move back to starting dir
38 | cd -
39 | 


--------------------------------------------------------------------------------
/b/update_case_vaccination_data.do:
--------------------------------------------------------------------------------
 1 | /* this file updates just the case count and vaccination data, the most frequently updated files on our platform. */
 2 | 
 3 | /* get new case data */
 4 | setc covid
 5 | do $ccode/b/get_case_data.do
 6 | import delimited using $covidpub/covid/csv/covid_infected_deaths.csv, clear
 7 | 
 8 | /* check last date */
 9 | quietly {
10 |   gen date_fmt = date(date, "DMY")
11 |   egen latest_date = max(date_fmt)
12 |   lab var latest_date "Last Day in the data:"
13 |   format latest_date %td
14 |   noi tab latest_date
15 | }
16 | 
17 | /* run checks */
18 | is_unique lgd_state_id lgd_state_name lgd_district_name date
19 | 
20 | /* check that data is square */
21 | gen n = 1
22 | bys lgd_state_id lgd_state_name lgd_district_name: egen num_days = total(n)
23 | qui distinct num_days
24 | local square_check =  `r(ndistinct)'
25 | if `square_check' != 1 {
26 |   disp_nice "Data is not square, it should have the same number of observations (days) for each district."
27 |   exit 9
28 | }
29 | 
30 | /* get new vaccination data */
31 | do $ccode/b/get_vaccination_data.do
32 | 
33 | /* import the csv data */
34 | import delimited using $covidpub/covid/csv/covid_vaccination.csv, clear
35 | 
36 | /* check last date */
37 | quietly {
38 |   gen date_fmt = date(date, "DMY")
39 |   egen latest_date = max(date_fmt)
40 |   lab var latest_date "Last Day in the data:"
41 |   format latest_date %td
42 |   noi tab latest_date
43 | }
44 | 
45 | /* run checks */
46 | is_unique lgd_state_id lgd_state_name lgd_district_name date
47 | 
48 | /* check that data is square */
49 | gen n = 1
50 | bys lgd_state_id lgd_state_name lgd_district_name: egen num_days = total(n)
51 | qui distinct num_days
52 | local square_check =  `r(ndistinct)'
53 | if `square_check' != 1 {
54 |   disp_nice "Data is not square, it should have the same number of observations (days) for each district."
55 |   exit 9
56 | }
57 | 
58 | /* check how many days are in the data - should be more than 97 as of 23 april 2021 */
59 | qui sum num_days
60 | local num_days = `r(mean)'
61 | if `num_days' < 97 {
62 |   disp_nice "Data is missing. There should be more than 97 days of data."
63 |   exit 9
64 | }
65 | 
66 | /* rclone needed data files to dropbox. CSV first */
67 | shell rclone copyto --progress ~/iec/covid/covid/csv/covid_infected_deaths.csv my_remote:SamPaul/covid_data/covid/csv/covid_infected_deaths.csv
68 | shell rclone copyto --progress ~/iec/covid/covid/csv/covid_infected_deaths_pc11.csv my_remote:SamPaul/covid_data/covid/csv/covid_infected_deaths_pc11.csv
69 | shell rclone copyto --progress ~/iec/covid/covid/csv/covid_vaccination.csv my_remote:SamPaul/covid_data/covid/csv/covid_vaccination.csv
70 | 
71 | /* now dta */
72 | shell rclone copyto --progress ~/iec/covid/covid/covid_infected_deaths.dta my_remote:SamPaul/covid_data/covid/covid_infected_deaths.dta
73 | shell rclone copyto --progress ~/iec/covid/covid/covid_infected_deaths_pc11.dta my_remote:SamPaul/covid_data/covid/covid_infected_deaths_pc11.dta
74 | shell rclone copyto --progress ~/iec/covid/covid/covid_vaccination.dta my_remote:SamPaul/covid_data/covid/covid_vaccination.dta
75 | 


--------------------------------------------------------------------------------
/b/vaccination_plot.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | import pandas as pd
 4 | import getpass
 5 | import datetime
 6 | import matplotlib.pyplot as plt
 7 | 
 8 | # vaccination dataframe
 9 | vdf = pd.read_stata(os.path.expanduser("~/iec/covid/covid/covid_vaccination.dta"))
10 | 
11 | # case data
12 | cdf = pd.read_stata(os.path.expanduser("~/iec/covid/covid/covid_infected_deaths.dta"))
13 | 
14 | # population data
15 | pdf = pd.read_stata(f"/scratch/{getpass.getuser()}/lgd_pca_district_pop.dta")
16 | 
17 | # get dates as string
18 | cdf['string_date'] = cdf['date'].apply(lambda x: x.strftime("%d%m%Y"))
19 | vdf = vdf.rename(columns={"date": "string_date"})
20 | 
21 | # merge case data and vaccination data
22 | df = cdf.merge(vdf, on=["lgd_state_name", "lgd_state_id", "lgd_district_name", "string_date"], how="outer")
23 | 
24 | # merge in population data
25 | df = df.merge(pdf, on=["lgd_state_name", "lgd_state_id", "lgd_district_name", "lgd_district_id"], how="left")
26 | 
27 | # get date as datetime object
28 | df['date'] = df['string_date'].apply(lambda x: datetime.datetime.strptime(x, "%d%m%Y"))
29 | df = df.sort_values(["lgd_state_name", "lgd_district_name", "date"])
30 | 
31 | # keep only dates with vaccination data, after Jan 16 2021, before April 13
32 | df = df.loc[(df['date'] >= datetime.datetime.strptime("16012021", "%d%m%Y")) &
33 |             (df['date'] < datetime.datetime.strptime("13042021", "%d%m%Y"))].copy()
34 | df['total_vaccinated'] = df['total_covaxin'] + df['total_covishied']
35 | 
36 | # calculate per capita vaccination rates
37 | df['vac_rate'] = df['total_vaccinated'] / df['lgd_pca_tot_p']
38 | 
39 | 
40 | # ---- #
41 | # plot #
42 | # ---- #
43 | f, ax = plt.subplots(figsize=[12, 8])
44 | 
45 | # get state data
46 | state_data = df.groupby(['date', 'lgd_state_name']).sum()[['total_vaccinated', 'lgd_pca_tot_p']].reset_index()
47 | 
48 | # get state total across all time
49 | state_total = state_data.groupby(['lgd_state_name']).sum()[['total_vaccinated', 'lgd_pca_tot_p']].reset_index()
50 | 
51 | # calcualte vaccination rate
52 | state_data['vac_rate'] = state_data['total_vaccinated'] / state_data['lgd_pca_tot_p']
53 | state_total['vac_rate'] = state_total['total_vaccinated'] / state_total['lgd_pca_tot_p']
54 | 
55 | #sns.lineplot(data=state_data, x="date", y="vac_rate", hue="lgd_state_name")
56 | state_total = state_total.sort_values(by='vac_rate', ascending=False)
57 | state_total = state_total.set_index("lgd_state_name")
58 | 
59 | # drop infinite values (sikkim)
60 | state_total = state_total.drop(state_total.loc[state_total["vac_rate"] == np.inf].index)
61 | 
62 | # drop 0 vlaues (lakshadweep)
63 | state_total = state_total.drop(state_total.loc[state_total["vac_rate"] == 0].index)
64 | 
65 | state_total.plot.bar(y='vac_rate', ax=ax)
66 | ax.set_ylabel("Vaccination Rate", fontsize=12)
67 | ax.set_xlabel("State", fontsize=12)
68 | 
69 | plt.savefig(os.path.expanduser("~/public_html/png/state_vac_rate.png"), bbox_inches="tight")
70 | 


--------------------------------------------------------------------------------
/build.md:
--------------------------------------------------------------------------------
1 | # The Build
2 | 
3 | ![COVID-19 Repository Build](assets/covid_build_public.svg?raw=true "COVID Build")
4 | 


--------------------------------------------------------------------------------
/como/a/app_age_hr_interpolation.do:
--------------------------------------------------------------------------------
 1 | /* open hazard ratio interpolations */
 2 | import delimited using $covidpub/covid/csv/uk_age_predicted_hr.csv, clear
 3 | ren ln_hr_age_sex ln_hr_simple
 4 | 
 5 | /* merge to actual data used in OpenSAFELY */
 6 | merge 1:1 age using $tmp/hr_full_dis, nogen keepusing(hr_age)
 7 | ren hr_age hr_full_age
 8 | 
 9 | merge 1:1 age using $tmp/hr_simp_dis, nogen keepusing(hr_age)
10 | ren hr_age hr_simple_age
11 | 
12 | /* expand opensafely data to decimal ages */
13 | expand 10
14 | bys age: egen s = seq()
15 | replace age = age - (s - 1) / 10
16 | replace ln_hr_simple = . if age != round(age)
17 | replace ln_hr_full = . if age != round(age)
18 | 
19 | foreach v in simple full {
20 |   gen ln_hr_`v'_age_dis = ln(hr_`v'_age)
21 | }
22 | 
23 | keep if inrange(age, 18, 89)
24 | 
25 | sort age
26 | twoway ///
27 |     (line ln_hr_full         age, lwidth(medthick) lpattern(solid) lcolor(black)) ///
28 |     (line ln_hr_full_age_dis age, lwidth(medthick) lpattern(-) lcolor(blue)) ///
29 |     , xscale(range(15 90)) xlabel(20(10)90) xtitle(Age) ytitle("Log Hazard Ratio") ///
30 |     legend(region(lcolor(black)) rows(2) ring(0) pos(5) lab(1 "Interpolated Age Hazard Ratio") lab(2 "Discrete Age Hazard Ratio") size(small) symxsize(5) bm(tiny)) 
31 | 
32 | graphout age_interpolation_full, pdf
33 | 
34 | // twoway ///
35 | //     (line ln_hr_simple         age, lwidth(medthick) lpattern(-) lcolor(gs8)) ///
36 | //     (line ln_hr_simple_age_dis age, lwidth(medthick) lpattern(solid) lcolor(black)) ///
37 | //     , xscale(range(15 90)) xlabel(20(10)90) xtitle(Age) ytitle("Log Hazard Ratio") ///
38 | //     legend(region(lcolor(black)) rows(2) ring(0) pos(5) lab(1 "Discrete Age Hazard Ratio") lab(2 "Interpolated Hazard Ratio") size(small) symxsize(5) bm(tiny)) 
39 | // 
40 | // graphout age_interpolation_simple, pdf
41 | 
42 | 
43 | 


--------------------------------------------------------------------------------
/como/a/app_joint_condition.do:
--------------------------------------------------------------------------------
 1 | /*************************************************************************************/
 2 | /* TEST: how much does interaction of comorbidities affect population relative risk? */
 3 | 
 4 | /* Note:
 5 | 
 6 | We can only do this for the biomarker conditions that we have in the
 7 | Indian data since we don't have microdata on the GBD variables. */
 8 | 
 9 | /*************************************************************************************/
10 | 
11 | global conditionlist male $hr_biomarker_vars
12 | 
13 | /* open Indian comorbidity microdata */
14 | use $health/dlhs/data/dlhs_ahs_covid_comorbidities, clear
15 | keep wt age $conditionlist
16 | 
17 | /* merge primary hazard ratios */
18 | merge m:1 age using $tmp/hr_full_cts
19 | 
20 | /* calculate the risk factor for each individual, multiplying the hazard ratio by
21 |    an indicator for condition existence. */
22 | gen prr_health = 1
23 | foreach v in $conditionlist  {
24 | 
25 |   qui gen prr_`v' = `v' * hr_`v' + (1 - `v')
26 |   qui replace prr_health = prr_health * prr_`v'
27 |   qui sum prr_health [aw=wt]
28 |   di %20s "`v': " %5.2f `r(mean)'
29 | }
30 | 
31 | /* collapse combined health PRR to age-level using survey weights */
32 | collapse (mean) $conditionlist prr_health [aw=wt], by(age)
33 | ren prr_health prr_health_micro
34 | 
35 | /* now repeat the exercise using the aggregate data (which ignores interactions) */
36 | merge 1:1 age using $tmp/hr_full_cts, nogen
37 | gen prr_health_agg = 1
38 | foreach v in $conditionlist {
39 |   qui gen prr_`v' = `v' * hr_`v' + (1 - `v')
40 |   qui replace prr_health_agg = prr_health_agg * prr_`v'
41 |   qui sum prr_health_agg
42 |   di "`v': " %5.2f `r(mean)'
43 | }
44 | 
45 | gen gap = prr_health_micro / prr_health_agg
46 | tsset age
47 | replace gap = (L3.gap + L2.gap + L.gap + gap + F.gap + F2.gap + F3.gap) / 7 if !mi(L3.gap) & !mi(F3.gap)
48 | keep if age <= 95
49 | 
50 | /* plot the two age-specific PRR distributions */
51 | sort age
52 | twoway ///
53 |     (line prr_health_micro age, lwidth(medthick) lcolor(black)) ///
54 |     (line prr_health_agg   age, lwidth(medthick) lcolor(lavender)), ///
55 |     ytitle("Aggregate Population Relative Risk from Health Conditions") xtitle("Age") ///
56 |     legend(lab(1 "Microdata") lab(2 "Aggregate Data") ring(0) pos(5) cols(1) size(small) symxsize(5) bm(tiny) region(lcolor(black))) ///
57 |     ylabel(1(.5)2.5) 
58 | graphout prr_health_joint, pdf
59 | 
60 | line gap age if age < 98, lwidth(medthick) ylabel(1 1.05 1.1 1.15) ///
61 |     xtitle("Age") ytitle("Increased population relative risk" "from comorbidity correlation")
62 | graphout prr_ratio_micro, pdf
63 | 


--------------------------------------------------------------------------------
/como/a/app_table_age_bin_prev.do:
--------------------------------------------------------------------------------
  1 | /* set variable labels */
  2 | cap prog drop label_vars
  3 | prog def label_vars
  4 |   cap label var bmi_obeseI        "Obese (class I)"
  5 |   cap label var bmi_obeseII       "Obese (class II)"
  6 |   cap label var bmi_obeseIII      "Obese (class III)"
  7 |   label var obese_1_2           "Obese (class 1 & 2)"
  8 |   label var obese_3  "Obese (class 3)"
  9 |   label var bp_high           "Hypertension"
 10 |   label var diabetes_uncontr  "Diabetes"
 11 |   label var asthma_ocs        "Asthma"
 12 |   label var autoimmune_dz     "Psoriasis, Rheumatoid"
 13 |   label var haem_malig_1      "Haematological Cancer"
 14 |   label var cancer_non_haem_1 "Non-haematological Cancer"
 15 |   label var chronic_heart_dz  "Chronic Heart Disease"
 16 |   label var chronic_resp_dz   "Chronic Respiratory Disease"
 17 |   label var immuno_other_dz   "Other Immunosuppressive Conditions"
 18 |   label var kidney_dz         "Kidney Disease"
 19 |   label var liver_dz          "Chronic Liver Disease"
 20 |   label var neuro_other       "Other Neurological Condition"
 21 |   label var stroke_dementia   "Stroke / Dementia"
 22 | end
 23 | 
 24 | /* write table header */
 25 | cap file close fh
 26 | file open fh using $out/app_table_age_bin_prev.tex, write replace
 27 | 
 28 | file write fh "\begin{tabular}{lrrrrrr}" _n
 29 | file write fh " & \multicolumn{6}{c}{\textbf{Age}} \\ " _n
 30 | file write fh " & 18--39 & 40--49 & 50--59 & 60--69 & 70--79 & 80--99 \\ " _n
 31 | 
 32 | /* india header */
 33 | file write fh " \textbf{India} &  & & & & \\ " _n
 34 | 
 35 | /* INDIA PREVALENCE TABLE */
 36 | use $tmp/prev_india, clear
 37 | ren prev_* *
 38 | 
 39 | /* get india age-specific population to weight GBD year vars */
 40 | merge 1:1 age using $tmp/india_pop, keep(match) nogen 
 41 | 
 42 | /* loop over condition list */
 43 | label_vars
 44 | foreach condition in $hr_biomarker_vars $hr_gbd_vars {
 45 | 
 46 |   /* get variable label for condition */
 47 |   local lab: variable label `condition'
 48 |   
 49 |   file write fh "\hspace{3mm} "
 50 |   file write fh "`lab' & "
 51 | 
 52 |   qui sum `condition' [aw=india_pop] if inrange(age, 18, 39)
 53 |   file write fh %5.1f (`r(mean)' * 100) " & "
 54 |   qui sum `condition' [aw=india_pop] if inrange(age, 40, 49)
 55 |   file write fh %5.1f (`r(mean)' * 100) " & "
 56 |   qui sum `condition' [aw=india_pop] if inrange(age, 50, 59)
 57 |   file write fh %5.1f (`r(mean)' * 100) " & "
 58 |   qui sum `condition' [aw=india_pop] if inrange(age, 60, 69)
 59 |   file write fh %5.1f (`r(mean)' * 100) " & "
 60 |   qui sum `condition' [aw=india_pop] if inrange(age, 70, 79)
 61 |   file write fh %5.1f (`r(mean)' * 100) " & "
 62 |   qui sum `condition' [aw=india_pop] if inrange(age, 80, 99)
 63 |   file write fh %5.1f (`r(mean)' * 100) " \\ " _n
 64 | }
 65 | 
 66 | /* UK HEADER */
 67 | file write fh " & & & & & \\ " _n
 68 | file write fh " \textbf{England} &  & & & & \\ " _n
 69 | 
 70 | /* ENGLAND PREVALENCE TABLE */
 71 | 
 72 | /* combine UK prevalence data */
 73 | use $tmp/prev_uk_nhs_matched, clear
 74 | ren prev_* *
 75 | 
 76 | /* get UK age-specific population to weight GBD year vars */
 77 | merge 1:1 age using $tmp/uk_pop, keep(match) nogen 
 78 | 
 79 | /* loop over condition list */
 80 | label_vars
 81 | foreach condition in $hr_biomarker_vars $hr_gbd_vars {
 82 | 
 83 |   /* get variable label for condition */
 84 |   local lab: variable label `condition'
 85 |   
 86 |   file write fh "\hspace{3mm} " 
 87 |   file write fh "`lab' & "
 88 | 
 89 |   qui sum `condition' [aw=uk_pop] if  inrange(age, 18, 39)
 90 |   file write fh %5.1f (`r(mean)' * 100) " & " 
 91 |   qui sum `condition' [aw=uk_pop] if  inrange(age, 40, 49)
 92 |   file write fh %5.1f (`r(mean)' * 100) " & " 
 93 |   qui sum `condition' [aw=uk_pop] if  inrange(age, 50, 59)
 94 |   file write fh %5.1f (`r(mean)' * 100) " & " 
 95 |   qui sum `condition' [aw=uk_pop] if  inrange(age, 60, 69)
 96 |   file write fh %5.1f (`r(mean)' * 100) " & " 
 97 |   qui sum `condition' [aw=uk_pop] if  inrange(age, 70, 79)
 98 |   file write fh %5.1f (`r(mean)' * 100) " & " 
 99 |   qui sum `condition' [aw=uk_pop] if  inrange(age, 80, 99)
100 |   file write fh %5.1f (`r(mean)' * 100) " \\ " _n
101 |   
102 | }
103 | 
104 | file write fh "\end{tabular}" _n
105 | 
106 | file close fh
107 | 
108 | 


--------------------------------------------------------------------------------
/como/a/app_table_nhs_vs_os.do:
--------------------------------------------------------------------------------
  1 | /*************************************************************************/
  2 | /* create a table comparing opensafely prevalences to UK NHS prevalences */
  3 | /*************************************************************************/
  4 | 
  5 | /* set variable labels */
  6 | cap prog drop label_vars
  7 | prog def label_vars
  8 |   label var bmi_obeseI        "Obese (class I)"
  9 |   label var bmi_obeseII       "Obese (class II)"
 10 |   label var bmi_obeseIII      "Obese (class III)"
 11 |   label var bp_high           "Hypertension"
 12 |   label var diabetes_uncontr  "Diabetes"
 13 |   label var asthma_ocs        "Asthma"
 14 |   label var autoimmune_dz     "Psoriasis, Rheumatoid"
 15 |   label var haem_malig_1      "Haematological Cancer"
 16 |   label var cancer_non_haem_1 "Non-haematological Cancer"
 17 |   label var chronic_heart_dz  "Chronic Heart Disease"
 18 |   label var chronic_resp_dz   "Chronic Respiratory Disease"
 19 |   label var immuno_other_dz   "Other Immunosuppressive Conditions"
 20 |   label var kidney_dz         "Kidney Disease"
 21 |   label var liver_dz          "Chronic Liver Disease"
 22 |   label var neuro_other       "Other Neurological Condition"
 23 |   label var stroke_dementia   "Stroke / Dementia"
 24 | end
 25 | 
 26 | /* ENGLAND PREVALENCE TABLE */
 27 | 
 28 | /* combine UK prevalence data */
 29 | use $tmp/prev_uk_nhs_matched, clear
 30 | 
 31 | /* get age-specific population for weighted collapse */
 32 | merge 1:1 age using $tmp/uk_pop
 33 | 
 34 | /* collapse to population prevalence */
 35 | drop male
 36 | ren prev_* *
 37 | 
 38 | /* merge in OpenSafely prevalences */
 39 | merge 1:1 age using $tmp/prev_uk_os, nogen
 40 | foreach v in $hr_biomarker_vars $hr_gbd_vars {
 41 |   bys age: egen t = mean(prev_`v')
 42 |   replace prev_`v' = t if mi(prev_`v')
 43 |   drop t
 44 | }
 45 | 
 46 | /* limit to ages 18-99 */
 47 | keep if inrange(age, 18, 99)
 48 | 
 49 | /* BEGIN TABLE OUTPUT */
 50 | cap file close fh
 51 | file open fh using $out/app_table_os_vs_nhs.tex, write replace
 52 | 
 53 | file write fh "\begin{tabular}{lcc}" _n
 54 | file write fh " & Population & OpenSAFELY \\ " _n
 55 | file write fh " & Prevalence & Prevalence \\ " _n
 56 | 
 57 | /* loop over conditions sourced in NHS */
 58 | label_vars
 59 | file write fh "\textbf{Source: NHS Health Survey for England} & & \\ " _n
 60 | foreach condition in $hr_biomarker_vars {
 61 | 
 62 |   /* get variable label for condition */
 63 |   local lab: variable label `condition'
 64 | 
 65 |   /* put in variable */
 66 |   file write fh "\hspace{3mm} " 
 67 |   file write fh "`lab' & "
 68 | 
 69 |   /* put in our prevalence */
 70 |   qui sum `condition' [aw=uk_pop]
 71 |   file write fh %5.1f (`r(mean)' * 100) " & "
 72 | 
 73 |   /* put in OpenSAFELY prevalence */
 74 |   qui sum prev_`condition' 
 75 |   file write fh %5.1f (`r(mean)' * 100) " \\ " _n
 76 | }
 77 | 
 78 | /* put in COPD */
 79 | 
 80 | file write fh "\vspace{5mm} & & \\ " _n
 81 | file write fh "\textbf{Source: Clinical Practice Research Datalink} & & \\ " _n
 82 | file write fh "\hspace{3mm} " 
 83 | file write fh %5.1f "Chronic Respiratory Illness & "
 84 | qui sum chronic_resp_dz [aw=uk_pop]
 85 | file write fh %5.1f (`r(mean)' * 100) " & "
 86 | 
 87 | /* put in OpenSAFELY prevalence */
 88 | qui sum prev_chronic_resp_dz
 89 | file write fh %5.1f (`r(mean)' * 100) " \\ " _n
 90 | 
 91 | /* GBD conditions */
 92 | file write fh "\vspace{5mm} & & \\ " _n
 93 | file write fh "\textbf{Source: Global Burden of Disease} & & \\ " _n
 94 | foreach condition in $hr_gbd_vars  {
 95 | 
 96 |   /* get variable label for condition */
 97 |   local lab: variable label `condition'
 98 | 
 99 |   /* put in variable */
100 |   file write fh "\hspace{3mm} " 
101 |   file write fh "`lab' & "
102 | 
103 |   /* put in our prevalence */
104 |   qui sum `condition' [aw=uk_pop]
105 |   file write fh %5.1f (`r(mean)' * 100) " & "
106 | 
107 |   /* put in OpenSAFELY prevalence */
108 |   qui sum prev_`condition' 
109 |   file write fh %5.1f (`r(mean)' * 100) " \\ " _n
110 | }
111 |   
112 | file write fh "\end{tabular}" _n
113 | 
114 | file close fh
115 | 
116 | 


--------------------------------------------------------------------------------
/como/a/covid_como_oscompare_tpl.tex:
--------------------------------------------------------------------------------
 1 | \begin{tabular}{p{6cm}cc}
 2 | & \multicolumn{2}{c}{\textbf{England Prevalence (\%)  }} \\[0.5ex] &
 3 |   \emph{OpenSafely Sample} & \emph{This Study} \\[2ex]
 4 | Age 18-39 & \num{34.4} & \num{$$uk_age_18_40$$} \\[0.25ex]
 5 | Age 40-49 & \num{16.5} & \num{$$uk_age_40_50$$} \\[0.25ex]
 6 | Age 50-59 & \num{17.6} & \num{$$uk_age_50_60$$}\\[0.25ex]
 7 | Age 60-69 & \num{13.8} & \num{$$uk_age_60_70$$}\\[0.25ex]
 8 | Age 70-79 & \num{11.2} & \num{$$uk_age_70_80$$}\\[0.25ex]
 9 | Age 80-99 & \num{6.5} & \num{$$uk_age_80$$} \\[0.25ex]
10 | Male & \num{49.9} & \num{$$male$$} \\[0.25ex]
11 | Diabetes (Controlled) & \num{6.0} & \num{$$uk_prev_diabetes_contr$$} \\[0.25ex]
12 | Diabetes (Uncontrolled) & \num{2.8} & \num{$$uk_prev_diabetes_uncontr$$} \\[0.25ex]
13 | Hypertension & 34.2} & \num{$$uk_prev_hypertension_both$$} \\[0.25ex]
14 | Obese (class I \& II) & \num{19.1} & \num{$$uk_prev_obese_1_2$$} \\[0.25ex]
15 | Obese (class III) & \num{2.7} & \num{$$uk_prev_obese_3$$} \\[0.25ex]
16 | Chronic Heart Disease & \num{6.7} & \num{$$uk_gbd_chronic_heart_dz_mu$$} \\[0.25ex]
17 | Chronic Respiratory Disease & \num{4.1} & \num{$$uk_prev_chronic_resp_dz$$}
18 | \\[0.25ex]
19 | Asthma & \num{1.7} & \num{$$uk_gbd_asthma_ocs_mu$$} \\[0.25ex]
20 | Kidney Disease & \num{6.3} & \num{$$uk_gbd_kidney_dz_mu$$} \\[0.25ex]
21 | Chronic Liver Disease & \num{0.7} & \num{$$uk_gbd_liver_dz_mu$$} \\[0.25ex]
22 | Haematological Cancer & \num{0.1} & \num{$$uk_gbd_haem_malig_1_mu$$}\\[0.25ex]
23 | Non-haematological Cancer & \num{0.5} & \num{$$uk_gbd_cancer_non_haem_1_mu$$} \\[0.25ex]
24 | Stroke, Dementia & \num{2.1} & \num{$$uk_gbd_stroke_dementia_mu$$} \\[0.25ex]
25 | Other Neurological Condition & \num{1.0} & \num{$$uk_gbd_neuro_other_mu$$} \\[0.25ex]
26 | Psoriasis, Rheumatoid & \num{5.1} & \num{$$uk_gbd_autoimmune_dz_mu$$} \\[0.25ex]
27 | Other Immunosuppressive Conditions & \num{1.6} & \num{$$uk_gbd_immuno_other_dz_mu$$} \\[0.25ex]
28 | \end{tabular}
29 | 


--------------------------------------------------------------------------------
/como/a/covid_como_sumhr_tpl.tex:
--------------------------------------------------------------------------------
 1 | \begin{tabular}{p{7cm}cp{1.25cm}p{1.5cm}}
 2 | & \textbf{{\footnotesize Individual}} &
 3 |   \multicolumn{2}{c}{{\textbf{\footnotesize{Population}}}} \\
 4 | & \textbf{{\footnotesize Relative Risk}} &
 5 |   \multicolumn{2}{c}{{\textbf{\footnotesize{Relative Risk}}}} \\[0.75ex]
 6 |   & & \emph{India} & \emph{England} \\[2ex]
 7 | %Male & 1.99 & \num{$$india_male_risk$$} & \num{$$uk_male_risk$$} \\[0.25ex]
 8 | Diabetes (Controlled) & \num{1.31} & \num{$$india_diabetes_contr_risk$$} & \num{$$uk_diabetes_contr_risk$$} \\[0.25ex]
 9 | Diabetes (Uncontrolled) & \num{1.94} & \num{$$india_diabetes_uncontr_risk$$} & \num{$$uk_diabetes_uncontr_risk$$} \\[0.25ex]
10 | Hypertension & \num{0.89} & \num{$$india_bp_high_risk$$} & \num{$$uk_bp_high_risk$$} \\[0.25ex]
11 | Obese (class I \& II) & \num{1.15} & \num{$$india_obese_1_2_risk$$} & \num{$$uk_obese_1_2_risk$$} \\[0.25ex]
12 | Obese (class III) & \num{1.91} & \num{$$india_obese_3_risk$$} & \num{$$uk_obese_3_risk$$} \\[0.25ex]
13 | \\
14 | Chronic Heart Disease & \num{1.17} & \num{$$india_chronic_heart_dz_risk$$} & \num{$$uk_chronic_heart_dz_risk$$} \\[0.25ex]
15 | Chronic Respiratory Disease & \num{1.62} & \num{$$india_chronic_resp_dz_risk$$} & \num{$$uk_chronic_resp_dz_risk$$} \\[0.25ex]
16 | Asthma & \num{1.13} & \num{$$india_asthma_ocs_risk$$} & \num{$$uk_asthma_ocs_risk$$} \\[0.25ex]
17 | Kidney Disease & \num{1.42} & \num{$$india_kidney_dz_risk$$} & \num{$$uk_kidney_dz_risk$$} \\[0.25ex]
18 | Chronic Liver Disease & \num{1.73} & \num{$$india_liver_dz_risk$$} & \num{$$uk_liver_dz_risk$$} \\[0.25ex]
19 | \\
20 | Haematological Cancer & \num{2.79} & \num{$$india_haem_malig_1_risk$$} & \num{$$uk_haem_malig_1_risk$$} \\[0.25ex]
21 | Non-haematological Cancer & \num{1.71} & \num{$$india_cancer_non_haem_1_risk$$} & \num{$$uk_cancer_non_haem_1_risk$$} \\[0.25ex]
22 | Stroke, Dementia & \num{2.15} & \num{$$india_stroke_dementia_risk$$} & \num{$$uk_stroke_dementia_risk$$} \\[0.25ex]
23 | Other Neurological Condition & \num{2.56} & \num{$$india_neuro_other_risk$$} & \num{$$uk_neuro_other_risk$$} \\[0.25ex]
24 | Psoriasis, Rheumatoid & \num{1.19} & \num{$$india_autoimmune_dz_risk$$} & \num{$$uk_autoimmune_dz_risk$$} \\[0.25ex]
25 | Other Immunosuppressive Conditions & \num{1.69} & \num{$$india_immuno_other_dz_risk$$} & \num{$$uk_immuno_other_dz_risk$$} \\[0.25ex]
26 | \end{tabular}
27 | 
28 | 


--------------------------------------------------------------------------------
/como/a/covid_como_sumstats_tpl.tex:
--------------------------------------------------------------------------------
 1 | \begin{tabular}{p{7cm}p{1.1cm}p{1cm}}
 2 | & \multicolumn{2}{c}{\textbf{{\footnotesize Prevalence (\%) }}} \\[0.5ex] & \emph{India} & \emph{England} \\[2ex]
 3 | Age 18-39 & \num{$$india_age18_40_mu$$} & \num{$$uk_age_18_40$$} \\[0.25ex]
 4 | Age 40-49 & \num{$$india_age40_50_mu$$} & \num{$$uk_age_40_50$$} \\[0.25ex]
 5 | Age 50-59 & \num{$$india_age50_60_mu$$} & \num{$$uk_age_50_60$$}\\[0.25ex]
 6 | Age 60-69 & \num{$$india_age60_70_mu$$} & \num{$$uk_age_60_70$$}\\[0.25ex]
 7 | Age 70-79 & \num{$$india_age70_80_mu$$} & \num{$$uk_age_70_80$$}\\[0.25ex]
 8 | Age 80-99 & \num{$$india_age80__mu$$} & \num{$$uk_age_80$$} \\[0.25ex]
 9 | Male & \num{$$india_male_mu$$} & \num{$$male$$} \\[0.25ex]
10 | \\
11 | Diabetes (Controlled) & \num{$$india_diabetes_contr_mu$$} & \num{$$uk_prev_diabetes_contr$$} \\[0.25ex]
12 | Diabetes (Uncontrolled) & \num{$$india_diabetes_uncontr_mu$$} & \num{$$uk_prev_diabetes_uncontr$$} \\[0.25ex]
13 | Hypertension & \num{$$india_hypertension_both_mu$$} & \num{$$uk_prev_hypertension_both$$} \\[0.25ex]
14 | Obese (class I \& II) & \num{$$india_obese_1_2_mu$$} & \num{$$uk_prev_obese_1_2$$} \\[0.25ex]
15 | Obese (class III) & \num{$$india_obese_3_mu$$} & \num{$$uk_prev_obese_3$$} \\[0.25ex]
16 | \\
17 | Chronic Heart Disease & \num{$$india_gbd_chronic_heart_dz_mu$$} & \num{$$uk_gbd_chronic_heart_dz_mu$$} \\[0.25ex]
18 | Chronic Respiratory Disease & \num{$$india_gbd_chronic_resp_dz_mu$$} & \num{$$uk_prev_chronic_resp_dz$$} \\[0.25ex]
19 | Asthma & \num{$$india_gbd_asthma_ocs_mu$$} & \num{$$uk_gbd_asthma_ocs_mu$$} \\[0.25ex]
20 | Kidney Disease & \num{$$india_gbd_kidney_dz_mu$$} & \num{$$uk_gbd_kidney_dz_mu$$} \\[0.25ex]
21 | Chronic Liver Disease & \num{$$india_gbd_liver_dz_mu$$} & \num{$$uk_gbd_liver_dz_mu$$} \\[0.25ex]
22 | \\
23 | Haematological Cancer & \num{$$india_gbd_haem_malig_1_mu$$} & \num{$$uk_gbd_haem_malig_1_mu$$}\\[0.25ex]
24 | Non-haematological Cancer & \num{$$india_gbd_cancer_non_haem_1_mu$$} & \num{$$uk_gbd_cancer_non_haem_1_mu$$} \\[0.25ex]
25 | Stroke, Dementia & \num{$$india_gbd_stroke_dementia_mu$$} & \num{$$uk_gbd_stroke_dementia_mu$$} \\[0.25ex]
26 | Other Neurological Condition & \num{$$india_gbd_neuro_other_mu$$} & \num{$$uk_gbd_neuro_other_mu$$} \\[0.25ex]
27 | Psoriasis, Rheumatoid & \num{$$india_gbd_autoimmune_dz_mu$$} & \num{$$uk_gbd_autoimmune_dz_mu$$} \\[0.25ex]
28 | Other Immunosuppressive Conditions & \num{$$india_gbd_immuno_other_dz_mu$$} & \num{$$uk_gbd_immuno_other_dz_mu$$} \\[0.25ex]
29 | \end{tabular}
30 | 


--------------------------------------------------------------------------------
/como/a/examine_risk_factors_poverty.do:
--------------------------------------------------------------------------------
 1 | /* get a poverty measure from...  */
 2 | use $secc/final/collapse/village_consumption_imputed_pc11, clear
 3 | 
 4 | /* get district identifiers */
 5 | merge 1:1 pc11_state_id pc11_village_id using $keys/pc11_village_key, keepusing(pc11_district_id)
 6 | keep if _merge == 3
 7 | drop _merge
 8 | 
 9 | /* collapse to district */
10 | collapse (mean) secc_cons_per_cap [aw=pc11_pop], by(pc11_state_id pc11_district_id)
11 | save $tmp/pc11_cons, replace
12 | 
13 | /* open risk factor file from examine_risk_factors.do  */
14 | use $tmp/rfs, clear
15 | 
16 | /* collapse risk factors to district level */
17 | /* [note this is trusting district-level age distributions to be correct --
18 |     which it won't be given only 1000 obs per district. alternately we could
19 |     do some kind of imputation here to bring down the noise level-- reweight
20 |     the age distribution based on the true age distribution from the SECC,
21 |     but keep the conditions as they are.] */
22 | collapse (mean) hr_full_bp_high bp_high *resp* ln_rf_full_nond_conditions *rf_full_abd_c *rf_full_diab ln_rf_full_c rf_full_c rf_simple_agesex_c ln_rf_simple_agesex_c [aw=wt], by(pc11_state_id pc11_district_id)
23 | 
24 | /* merge in our best measure of poverty -- currently access to  */
25 | merge 1:1 pc11_state_id pc11_district_id using $covidpub/demography/pc11/water_district_pc11, keepusing(pc11r_hl_dw_loc_inprem_sh pc11u_hl_dw_loc_inprem_sh) gen(_mw)
26 | drop if _mw == 2
27 | drop _mw
28 | 
29 | merge 1:1 pc11_state_id pc11_district_id using $covidpub/demography/pc11/dem_district_pc11, keepusing(pc11r_pca_tot_p pc11u_pca_tot_p) gen(_md)
30 | drop if _md == 2
31 | drop _md
32 | 
33 | /* get consumption data */
34 | merge 1:1 pc11_state_id pc11_district_id using $tmp/pc11_cons
35 | drop if _merge == 2
36 | drop _merge
37 | ren secc_cons_per_cap cons
38 | 
39 | /* generate share with water */
40 | gen water_share = ((pc11r_hl_dw_loc_inprem_sh * pc11r_pca_tot_p) + (pc11u_hl_dw_loc_inprem_sh * pc11u_pca_tot_p)) / (pc11r_pca_tot_p + pc11u_pca_tot_p)
41 | 
42 | /* examine risk curves as a function of the water share */
43 | binscatter rf_full_c rf_simple_agesex_c water_share, 
44 | graphout rf_water
45 | 
46 | binscatter ln_rf_full_c ln_rf_simple_agesex_c water_share, 
47 | graphout ln_rf_water
48 | 
49 | /* examine correlation of risk factors with water share */
50 | reg rf_simple_agesex_c water_share
51 | reg rf_full_c water_share
52 | 
53 | reg rf_full_diab water_share
54 | reg ln_rf_full_diab water_share
55 | 
56 | reg ln_rf_full_abd_c water_share
57 | reg ln_rf_simple_agesex_c water_share
58 | 
59 | /* repeat analysis with secc consumption */
60 | binscatter rf_full_c rf_simple_agesex_c cons, 
61 | graphout rf_cons
62 | 
63 | binscatter ln_rf_full_c ln_rf_simple_agesex_c cons, 
64 | graphout ln_rf_cons
65 | 
66 | reg rf_simple_agesex_c cons
67 | reg rf_full_c cons
68 | 
69 | reg rf_full_diab cons
70 | reg ln_rf_full_diab cons
71 | 
72 | reg ln_rf_full_abd_c cons
73 | reg ln_rf_simple_agesex_c cons
74 | 
75 | /* various conditions vs income */
76 | binscatter rf_full_diab cons
77 | graphout diabetes_cons
78 | 
79 | /* compare resp conditions with income */
80 | foreach v of varlist *resp* {
81 |   binscatter `v' cons
82 |   graphout `v'_cons
83 | }
84 | 
85 | /* hypertension vs. income */
86 | binscatter bp_high cons
87 | graphout hyp_cons
88 | 
89 | 


--------------------------------------------------------------------------------
/como/a/prep_eng_india_prev_compare.do:
--------------------------------------------------------------------------------
 1 | /* merge India and UK age-specific health prevalences with country prefixes */
 2 | use $tmp/prev_india, clear
 3 | ren prev_* i_*
 4 | 
 5 | merge 1:1 age using $tmp/prev_uk_nhs_matched, nogen
 6 | ren prev_* u_*
 7 | 
 8 | 
 9 | /* combine all three classes of obesity */
10 | gen u_obese = u_obese_1_2 + u_obese_3
11 | gen i_obese = i_obese_1_2 + i_obese_3
12 | 
13 | /* label the variables we will graph */
14 | label var u_diabetes_contr "Diabetes (Controlled, UK)"
15 | label var u_diabetes_uncontr "Diabetes (Uncontrolled, UK)"
16 | label var u_hypertension_contr "Hypertension (Controlled, UK)"
17 | label var u_hypertension_uncontr "Hypertension (Uncontrolled, UK)"
18 | label var u_obese "Obese (BMI >= 30, UK)"
19 | label var i_diabetes_contr "Diabetes (Controlled, India)"
20 | label var i_diabetes_uncontr "Diabetes (Uncontrolled, India)"
21 | label var i_hypertension_contr "Hypertension (Controlled, India)"
22 | label var i_hypertension_uncontr "Hypertension (Uncontrolled, India)"
23 | label var i_obese "Obese (BMI >= 30, India)"
24 | 
25 | /* apply a smoother to the India microdata conditions */
26 | sort age
27 | tsset age
28 | foreach v in i_diabetes_contr i_diabetes_uncontr i_hypertension_uncontr i_hypertension_contr i_obese {
29 |   replace `v' = (L2.`v' + L1.`v' + `v' + F1.`v' + F2.`v') / 5 if !mi(L2.`v') & !mi(F2.`v')
30 |   replace `v' = (L1.`v' + `v' + F1.`v') / 3 if (mi(L2.`v') | mi(F2.`v')) & !mi(L1.`v') & !mi(F1.`v')
31 | }
32 | sort age
33 | keep if age < 90
34 | 
35 | drop *diabetes_no_measure *hypertension_both
36 | 
37 | /* save file for figure generation */
38 | save $tmp/prev_compare, replace
39 | 


--------------------------------------------------------------------------------
/como/a/sumstats.do:
--------------------------------------------------------------------------------
 1 | /* generate stats used in paper */
 2 | use $tmp/combined, clear
 3 | 
 4 | /* sample size */
 5 | count
 6 | 
 7 | /* age median and IQR */
 8 | sum age, d
 9 | 
10 | /* risk factor severity */
11 | foreach v in $age_vars male $hr_biomarker_vars {
12 |   disp_nice "`v'"
13 |   tab `v' [aw=wt], mi
14 | }
15 | 
16 | /* open GBD data */
17 | foreach country in india uk {
18 |   use $health/gbd/gbd_nhs_conditions_`country'.dta, clear
19 |   disp_nice "`country'"
20 |   foreach v in $hr_gbd_vars {
21 |     qui sum gbd_`v' if age == -90
22 |     di %25s "`v': " %6.1f (`r(mean)' * 100) "%"
23 |   }
24 | }
25 | 
26 | /* self-report measures of liver and kidney disease for reference */
27 | use $tmp/combined, clear
28 | tab kidney_dz [aw=wt]
29 | tab liver_dz [aw=wt]
30 | 


--------------------------------------------------------------------------------
/como/b/fit_cts_uk_age_hr.m:
--------------------------------------------------------------------------------
 1 | 
 2 | med_age = [29 45 55 65 75 85]';
 3 | hr_age_sex    = [.05 .27 1 2.61 7.61 26.27]';
 4 | hr_full    = [.07 .31 1 2.09 4.77 12.64]';
 5 | 
 6 | ln_hr_age_sex = log(hr_age_sex);
 7 | ln_hr_full = log(hr_full);
 8 | 
 9 | age = [18:100]';
10 | 
11 | %% use a polynomial interpolation since spline fails on the endpoints
12 | fit_age_sex = fit(med_age, ln_hr_age_sex, 'poly3')
13 | fit_full = fit(med_age, ln_hr_full, 'poly3')
14 | 
15 | %% graph the fits
16 | clf;
17 | hold on
18 | scatter(med_age, ln_hr_full);
19 | plot(fit_full, age, zeros(83, 1))
20 | xlabel("log odds ratio")
21 | ylabel("age")
22 | b = gca; legend(b,'off');
23 | write_png('/scratch/pn/fit_full')
24 | 
25 | clf;
26 | hold on
27 | scatter(med_age, ln_hr_age_sex);
28 | plot(fit_age_sex, age, zeros(83, 1))
29 | xlabel("log odds ratio")
30 | ylabel("age")
31 | write_png('/scratch/pn/fit_age_sex')
32 | 
33 | %% generate predicted values
34 | predicted_hr_age_sex = fit_age_sex(age);
35 | predicted_hr_full = fit_full(age);
36 | 
37 | %% topcode predicted values at age 90 value since we don't have certainty over the
38 | %% age distribution here or whether HRs keep rising
39 | predicted_hr_age_sex(age > 90) = predicted_hr_age_sex(age == 90);
40 | predicted_hr_full(age > 90) = predicted_hr_full(age == 90);
41 | 
42 | %% write these to a file
43 | writematrix([age predicted_hr_age_sex predicted_hr_full],'/scratch/pn/uk_age_fits.csv')
44 | 
45 | %% prepend a header to the file
46 | system('echo "age,ln_hr_age_sex,ln_hr_full" >~/iec/covid/covid/csv/uk_age_predicted_hr.csv');
47 | system('cat /scratch/pn/uk_age_fits.csv >>~/iec/covid/covid/csv/uk_age_predicted_hr.csv');
48 | fprintf("Writing uk_age_predicted_hr.csv\n");
49 | 


--------------------------------------------------------------------------------
/como/b/flatten_hr_data.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pandas as pd
 3 | 
 4 | def flatten_hr_data(hr_var, fn_in, fn_out):
 5 |     """
 6 |     flatten the hazard ratio data from the NHS study to 
 7 |     be a 1D array with the names of the variables and the 
 8 |     selected hazard ratio variable.
 9 |     """
10 |     # read in the HR data
11 |     df = pd.read_stata(fn_in)
12 | 
13 |     # select just the variables we need
14 |     df = df[["variable", hr_var]].T
15 | 
16 |     # set new column names to combine hr and variable names
17 |     new_cols = [f"{x}_{hr_var}" for x in df.loc["variable"]]
18 |     df.columns = new_cols
19 |     
20 |     # drop the variable column
21 |     df = df.drop(["variable"])
22 | 
23 |     # set the index value to 0
24 |     df.index = [0]
25 | 
26 |     # write out the file
27 |     df.to_csv(fn_out)
28 |     
29 | 
30 |     
31 | 
32 | 


--------------------------------------------------------------------------------
/como/b/old/prep_india_sim_prevalence.do:
--------------------------------------------------------------------------------
 1 | global conditionlist copd asthma
 2 | 
 3 | /* import india prevalence csv */
 4 | import delimited using $comocsv/india_condition_prevalence.csv, varnames(1) clear
 5 | drop source
 6 | ren *, lower
 7 | 
 8 | /* reshape wide on conditions */
 9 | replace condition = condition[_n-1] if mi(condition)
10 | drop if condition == "Hypertension (2)"
11 | replace condition = "hypertension" if condition == "Hypertension (1)"
12 | replace condition = lower(condition)
13 | 
14 | /* create a new age-granular dataset and fill it according to the age bins */
15 | set obs 100
16 | gen age = _n
17 | 
18 | foreach condition in $conditionlist {
19 |   gen prev_`condition' = .
20 |   forval age = 1/100 {
21 |     sum prevalence if condition == "`condition'" & inrange(`age', startage, endage)
22 |     if `r(N)' > 0 {
23 |       replace prev_`condition' = `r(mean)' if age == `age'
24 |     }
25 |   }
26 |   replace prev_`condition' = prev_`condition' / 100
27 | }
28 | 
29 | /* drop the original fields and limit to ages close to our sample */
30 | keep if inrange(age, 16, 90)
31 | keep age prev*
32 | drop prevalence
33 | 
34 | ren prev* india_prev*
35 | 
36 | /* save uk prevalences */
37 | save $tmp/india_prevalences, replace
38 | 


--------------------------------------------------------------------------------
/como/b/old/prep_populations.do:
--------------------------------------------------------------------------------
  1 | // /********************************************/
  2 | // /* convert the UK age distribution to Stata */
  3 | // /********************************************/
  4 | // OBSOLETE-- WE FOUND A BETTER FILE
  5 | // 
  6 | // /* open the raw csv file */
  7 | // import delimited using $comocsv/uk_demography.csv, clear
  8 | // 
  9 | // /* split the age into a start and end */
 10 | // gen agestart = real(substr(age, 1, strpos(age, "-") - 1))
 11 | // gen ageend   = real(substr(age, strpos(age, "-") + 1, .))
 12 | // 
 13 | // /* create 5 rows for each age to have granular age */
 14 | // expand 5
 15 | // ren age age_str
 16 | // bys age_str: egen age = seq()
 17 | // replace age = age + agestart - 1
 18 | // 
 19 | // /* cut population by 5 since we multiplied each bin by 5 */
 20 | // replace uk_pop = uk_pop / 5
 21 | // 
 22 | // /* keep the ages and vars that we want */
 23 | // drop agestart ageend age_str
 24 | // keep if inrange(age, 16, 90)
 25 | // 
 26 | // /* smooth the population across bins */
 27 | // lpoly uk_pop age, bw(2) gen(uk_pop_smooth) at(age) 
 28 | // 
 29 | // order age
 30 | // save $tmp/uk_pop, replace
 31 | 
 32 | // /****************************************/
 33 | // /* create age-granular india population */
 34 | // /****************************************/
 35 | // OBSOLETE-- WE NOW GET THESE FROM THE POPULATION CENSUS
 36 | // 
 37 | // /* open district data with 5-year age bins */
 38 | // use $covidpub/demography/pc11/age_bins_district_t_pc11.dta, clear
 39 | // 
 40 | // /* collapse to national level */
 41 | // gen x = 1
 42 | // collapse (sum) age_*_t, by(x)
 43 | // 
 44 | // /* reshape to long on ages */
 45 | // ren *_t *
 46 | // ren age_* india_pop*
 47 | // reshape long india_pop, j(age) i(x)
 48 | // format india_pop %10.0f
 49 | // drop x
 50 | // 
 51 | // /* expand to have one row per age */
 52 | // expand 5
 53 | // ren age agebin
 54 | // bys agebin: egen age = seq()
 55 | // replace age = age + agebin - 1
 56 | // replace india_pop = india_pop / 5
 57 | // 
 58 | // drop agebin
 59 | // keep if inrange(age, 16, 90)
 60 | // order age
 61 | // 
 62 | // /* smooth across age bins */
 63 | // lpoly india_pop age, bw(3) gen(india_pop_smooth) at(age)
 64 | // 
 65 | // save $tmp/india_pop, replace
 66 | 
 67 | /*************************/
 68 | /* repeat at state level */
 69 | /*************************/
 70 | 
 71 | /* open district data with 5-year age bins */
 72 | use $covidpub/demography/pc11/age_bins_district_t_pc11.dta, clear
 73 | 
 74 | /* collapse to state level */
 75 | collapse (sum) age_*_t, by(pc11_state_id)
 76 | 
 77 | /* reshape to long on ages */
 78 | ren *_t *
 79 | ren age_* state_pop*
 80 | reshape long state_pop, j(age) i(pc11_state_id)
 81 | format state_pop %10.0f
 82 | 
 83 | /* expand to have one row per age */
 84 | expand 5
 85 | ren age agebin
 86 | bys pc11_state_id agebin: egen age = seq()
 87 | replace age = age + agebin - 1
 88 | replace state_pop = state_pop / 5
 89 | 
 90 | drop agebin
 91 | keep if inrange(age, 16, 90)
 92 | order pc11_state_id age
 93 | 
 94 | /* smooth across age bins */
 95 | gen state_pop_smooth = .
 96 | levelsof pc11_state_id, local(states)
 97 | foreach state in `states' {
 98 |   lpoly state_pop age if pc11_state_id == "`state'", bw(3) gen(tmp) at(age)
 99 |   replace state_pop_smooth = tmp if pc11_state_id == "`state'"
100 |   drop tmp
101 | }
102 | 
103 | ren state_pop state_pop_binned
104 | ren state_pop_smooth state_pop
105 | save $tmp/state_pop, replace
106 | 


--------------------------------------------------------------------------------
/como/b/old/prep_uk_bmi.do:
--------------------------------------------------------------------------------
 1 | import delimited using IHME_GBD_2015_OBESITY_PREVALENCE_1980_2015_Y2017M06D12.CSV , clear
 2 | 
 3 | keep if location_name == "United Kingdom" & sex == "Both" & year == 2015 & metric == "Percent"
 4 | capdrop location* sex* year measure metric
 5 | list
 6 | 
 7 | 
 8 | import delimited using IHME_GBD_2015_OVERWEIGHT_PREVALENCE_1980_2015_Y2017M06D12.CSV , clear
 9 | 
10 | keep if location_name == "United Kingdom" & sex == "Both" & year == 2015 & metric == "Percent"
11 | capdrop location* sex* year measure metric
12 | list
13 | 
14 | 


--------------------------------------------------------------------------------
/como/b/prep_england_prevalence.do:
--------------------------------------------------------------------------------
 1 | /* first import and calculate COPD rate, this is the only variable coming from its own source */
 2 | import delimited using $comocsv/copd_mclean_rates.csv, clear
 3 | 
 4 | /* calculate the total population */
 5 | gen pop_total = pop_female + pop_male
 6 | 
 7 | /* take the weighted average of male and female rates */
 8 | gen prev_copd = rate100k_male_mean*(pop_male / pop_total) + rate100k_female_mean*(pop_female / pop_total)
 9 | 
10 | /* convert the per 100k rate to a prevalence */
11 | replace prev_copd = prev_copd / 100000
12 | 
13 | /* keep only 18 - 100 year olds */
14 | keep if inrange(age, 18, 100)
15 | keep age prev_copd
16 | save $tmp/copd_uk_prev, replace
17 | 
18 | /* create full condition list */
19 | global conditionlist diabetes_contr diabetes_uncontr hypertension_contr hypertension_uncontr hypertension_both asthma obese_1_2 obese_3
20 | 
21 | /* import england data */
22 | import delimited using $comocsv/uk_condition_prevalence.csv, varnames(1) clear
23 | drop source v*
24 | 
25 | /* reshape wide on conditions */
26 | replace condition = condition[_n-1] if mi(condition)
27 | 
28 | /* replace names */
29 | // replace condition = "diabetes_diagnosed" if condition == "Diabetes" */
30 | replace condition = "diabetes_contr" if condition == "Diabetes (2)"
31 | replace condition = "diabetes_uncontr" if condition == "Diabetes (2a)"
32 | replace condition = "hypertension_contr" if condition == "Hypertension (3)"
33 | replace condition = "hypertension_both" if condition == "Hypertension (3a)"
34 | replace condition = "hypertension_uncontr" if condition == "Hypertension (3b)"
35 | // replace condition = "hypertension_diagnosis" if condition == "Hypertension (1)" */
36 | // replace condition = "hypertension_both2" if condition == "Hypertension (2)" */
37 | // replace condition = "hypertension_biomarker2" if condition == "Hypertension (2a)" */
38 | replace condition = "obese_1_2" if condition == "Obesity class 1-2"
39 | replace condition = "obese_3" if condition == "Obesity class 3"
40 | replace condition = lower(condition)
41 | 
42 | 
43 | /* create a new dataset and fill it with a manual reshape */
44 | set obs 100
45 | gen age = _n
46 | 
47 | foreach condition in $conditionlist {
48 |   gen prev_`condition' = .
49 |   forval age = 1/100 {
50 |     qui sum prevalence if condition == "`condition'" & inrange(`age', startage, endage)
51 |     if `r(N)' > 0 {
52 |       replace prev_`condition' = `r(mean)' if age == `age'
53 |     }
54 |   }
55 | }
56 | drop prevalence
57 | keep age prev_*
58 | 
59 | /* Update 06/22: correct data input of COPD with calculations at the top of this file */
60 | merge 1:1 age using $tmp/copd_uk_prev, nogen
61 | replace prev_copd = 0 if mi(prev_copd)
62 | 
63 | /* old code:
64 | merge 1:1 age using $comocsv/uk_copd_prevalence, keepusing(prevalence) nogen
65 | drop prev_copd
66 | ren prevalence prev_copd */
67 | 
68 | /* drop the original fields and limit to ages in study */
69 | keep if inrange(age, 18, 100)
70 | 
71 | /* rename these to the variables that match OpenSAFELY */
72 | gen prev_bp_high = prev_hypertension_both
73 | ren prev_asthma prev_asthma_no_ocs
74 | ren prev_copd prev_chronic_resp_dz
75 | 
76 | ren prev* uk_prev*
77 | 
78 | /* save england prevalences */
79 | save $tmp/uk_prevalences, replace
80 | 


--------------------------------------------------------------------------------
/como/b/prep_health_data.do:
--------------------------------------------------------------------------------
 1 | /* Combine AHS and DLHS data */
 2 | 
 3 | /***************************/
 4 | /* Merge DLHS and AHS Data */
 5 | /***************************/
 6 | /* open the dlhs data */
 7 | use $health/dlhs/dlhs_cab, clear
 8 | 
 9 | /* rename variables to align with ahs */
10 | ren hv05 sex
11 | ren hv06 usual_residance
12 | ren age_test age
13 | ren hv82 weight_in_kg
14 | ren hv85 length_height_cm
15 | ren hv93a bp_systolic_1_reading
16 | ren hv93b bp_systolic_2_reading
17 | ren hv94a bp_diastolic_1_reading
18 | ren hv94b bp_diastolic_2_reading
19 | ren hv19 illness_type
20 | ren hv21 symptoms_pertaining_illness
21 | ren hv23 diagnosed_for
22 | ren hv02 sl_no
23 | ren hv91a fasting_blood_glucose_mg_dl
24 | ren hv91 fasting
25 | ren hv25 regular_treatment
26 | 
27 | /* match variables to format in AHS */
28 | tostring sl_no, format("%05.0f") replace
29 | 
30 | /* mark as dlhs */
31 | gen survey = 1
32 | 
33 | /* append the ahs data */
34 | append using $health/ahs/ahs_cab
35 | 
36 | /* mark as ahs */
37 | replace survey = 2 if mi(survey)
38 | 
39 | /************/
40 | /* Cleaning */
41 | /************/
42 | 
43 | /* AGE */
44 | gen age_new = .
45 | 
46 | /* use AHS calculated age if it exists */
47 | replace age_new = age_calc if !mi(age_calc)
48 | 
49 | /* use AHS/DLHS reported age if the calculated age does not exist */
50 | replace age_new = age if mi(age_new) & !mi(age)
51 | 
52 | /* use age from the hosehold survey for AHS observations with no matched CAB observation */
53 | replace age_new = age_comb if mi(age_new) & !mi(age_comb)
54 | 
55 | /* replace age with age_new */
56 | drop age age_calc age_comb
57 | ren age_new age
58 | 
59 | /* SEX */
60 | /* use the AHS household sex for observations with no matched CAB observation */
61 | replace sex = sex_comb if mi(sex) & !mi(sex_comb)
62 | 
63 | /* drop if missing age or sex */
64 | drop if mi(age) | mi(sex) | sex == 3
65 | 
66 | /* drop those under 18 */
67 | drop if age < 18
68 | 
69 | /* SAMPLE */
70 | /* define a variable to clarify the sample for each variable */
71 | gen sample = ahs_merge
72 | replace sample = 4 if dlhs_merge == 2 & !mi(dlhs_merge)
73 | replace sample = 5 if dlhs_merge == 3 & !mi(dlhs_merge)
74 | cap label define sample 1 "1 AHS cab" 2 "2 AHS comb" 3 "3 AHS cab & comb" 4 "4 DLHS comb" 5 "5 DLHS cab & comb"
75 | label values sample sample
76 | label var sample "DLHS or AHS modules for each observation"
77 | 
78 | /* create new numeric unique identifer */
79 | gen long uid = _n
80 | tostring uid, format("%08.0f") replace
81 | 
82 | /* drop some large, unneeded variables */
83 | drop index prim_key qs* qe* qh* hv*
84 | 
85 | /* save */
86 | compress
87 | save $health/dlhs/data/dlhs_ahs_merged, replace
88 | 


--------------------------------------------------------------------------------
/como/b/prep_hrs.do:
--------------------------------------------------------------------------------
 1 | /**********************************/
 2 | /* Create wide hazard ratio files */
 3 | /**********************************/
 4 | 
 5 | /* Convert HR CSV to Stata */
 6 | import delimited $comocsv/uk_nhs_hazard_ratios.csv, clear
 7 | 
 8 | /* label variables */
 9 | lab var hr_age_sex "hazard ratio age-sex adjusted"
10 | lab var hr_age_sex_low "hazard ratio age-sex adjusted lower CI"
11 | lab var hr_age_sex_up "hazard ratio age-sex adjusted upper CI"
12 | lab var hr_full "hazard ratio fully adjusted"
13 | lab var hr_full_low "hazard ratio fully adjusted lower CI"
14 | lab var hr_full_up "hazard ratio fully adjusted upper CI"
15 | lab var hr_full_ec "hazard ratio fully adjusted early censoring"
16 | lab var hr_full_low_ec "hazard ratio fully adjusted early censoring lower CI"
17 | lab var hr_full_up_ec "hazard ratio fully adjusted early censoring upper CI"
18 | 
19 | /* shorten age-sex HRs */
20 | ren *age_sex* *simp*
21 | 
22 | /* save as dta file */
23 | save $tmp/uk_nhs_hazard_ratios, replace
24 | 
25 | /* raw data has risk factors in long format-- reshape them to wide */
26 | /* loop over two types of hazard ratios */
27 | foreach hr in hr_full hr_simp {
28 | 
29 |   /* open the long format HRs */
30 |   use $tmp/uk_nhs_hazard_ratios, clear  
31 | 
32 |   /* keep the risk factors, the desired hazard ratio, and the confidence interval */
33 |   keep variable `hr' `hr'_low `hr'_up
34 | 
35 |   /* transform hazard ratio into a relative risk, assuming base mortality rate of 1% */
36 |   replace `hr' = (1 - exp(`hr' * ln(1 - 0.01))) / 0.01
37 | 
38 |   /* replace the confidence interval with a standard error.
39 |   These are odds ratios. CIs for log odds are symmetric. */
40 |   gen `hr'_lnse = (ln(`hr') - ln(`hr'_low)) / 1.96
41 |   gen `hr'_lnse2 = (ln(`hr'_up) - ln(`hr')) / 1.96
42 | 
43 |   /* reshape them to wide format */
44 |   gen v1 = 0
45 |   keep `hr' `hr'_lnse v1 variable
46 |   reshape wide `hr' `hr'_lnse, j(variable) i(v1) string
47 |   ren `hr'_lnse* *_hr_lnse
48 |   ren `hr'* *_`hr'
49 |   
50 |   /* save the wide hazard ratios with standard errors */
51 |   save $tmp/uk_nhs_hazard_ratios_flat_`hr', replace
52 | }
53 | 
54 | /* convert continuous age HRs to stata */
55 | import delimited $covidpub/covid/csv/uk_age_predicted_hr.csv, clear
56 | ren ln_hr_age_sex ln_hr_simp
57 | gen hr_simp_age_cts = exp(ln_hr_simp)
58 | gen hr_full_age_cts = exp(ln_hr_full)
59 | drop ln_*
60 | save $tmp/uk_age_predicted_hr, replace
61 | 


--------------------------------------------------------------------------------
/como/b/prep_ny_mortality.do:
--------------------------------------------------------------------------------
 1 | /********************************/
 2 | /* prepare O/R from NY epi data */
 3 | /********************************/
 4 | import delimited using $comocsv/nystate_or.csv, varnames(1) clear
 5 | 
 6 | /* bottom-code everything at 1. it's not plausible that these conditions are protective */
 7 | foreach v of varlist * {
 8 |   if "`v'" == "age" continue
 9 | 
10 |   winsorize `v' 1 100, replace
11 | }
12 | 
13 | /* make the data granular on age */
14 | gen start_age = real(substr(age, 1, 2))
15 | drop age
16 | 
17 | expand 10
18 | bys start_age: egen increment = seq()
19 | gen age = start_age + increment - 1
20 | 
21 | /* clean up unused vars */
22 | drop increment start_age
23 | 
24 | /* expand to cover 18-19 */
25 | expand 3 if age == 20
26 | bys age: egen s = seq()
27 | replace age = age + 1 - s
28 | drop s
29 | 
30 | /* expand to cover 80-99 */
31 | expand 21 if age == 79
32 | bys age: egen s = seq()
33 | replace age = age - 1 + s
34 | drop s
35 | 
36 | /* prefix with ny to avoid name collision */
37 | rename * hr_*
38 | ren hr_age age
39 | 
40 | /* assume this new york measure is mostly controlled, though we don't know */
41 | ren hr_diabetes_uncontr hr_diabetes_contr
42 | 
43 | /* clean and save */
44 | order age
45 | save $tmp/nystate_hr, replace
46 | 
47 | 
48 | /**************************************/
49 | /* prepare O/Rs from NY Cummings data */
50 | /**************************************/
51 | import delimited using $comocsv/ny_cummings.csv, varnames(1) clear
52 | ren * hr_*
53 | 
54 | expand 82
55 | gen age = _n + 17
56 | 
57 | save $tmp/nycu_hr, replace
58 | 


--------------------------------------------------------------------------------
/como/b/prep_pop_sex.do:
--------------------------------------------------------------------------------
 1 | /*************************/
 2 | /* prep india male share */
 3 | /*************************/
 4 | import excel $covidpub/demography/pc11/pc11_agesex.xls, firstrow clear
 5 | 
 6 | keep if place_name == "India"
 7 | 
 8 | keep age tot_p tot_m tot_f
 9 | assert tot_m + tot_f == tot_p
10 | 
11 | destring age, replace force
12 | drop if mi(age)
13 | 
14 | /* take 5-year MA of population data series to pull out bumps */
15 | gen x = 1
16 | xtset x age
17 | gen     p_smooth = (L2.tot_p + L1.tot_p + tot_p + F.tot_p + F2.tot_p) / 5 if !mi(L2.tot_p) & !mi(F2.tot_p)
18 | replace p_smooth = (L1.tot_p + tot_p + F.tot_p + F2.tot_p) / 4 if mi(L2.tot_p) & !mi(F2.tot_p) & mi(p_smooth)
19 | replace p_smooth = (L2.tot_p + L1.tot_p + tot_p + F.tot_p) / 4 if mi(F2.tot_p) & !mi(L2.tot_p) & mi(p_smooth)
20 | replace p_smooth = tot_p if mi(p_smooth)
21 | 
22 | /* repeat for male population to get smoothed sex ratio */
23 | gen     m_smooth = (L2.tot_m + L1.tot_m + tot_m + F.tot_m + F2.tot_m) / 5 if !mi(L2.tot_m) & !mi(F2.tot_m)
24 | replace m_smooth = (L1.tot_m + tot_m + F.tot_m + F2.tot_m) / 4 if mi(L2.tot_m) & !mi(F2.tot_m) & mi(m_smooth)
25 | replace m_smooth = (L2.tot_m + L1.tot_m + tot_m + F.tot_m) / 4 if mi(F2.tot_m) & !mi(L2.tot_m) & mi(m_smooth)
26 | replace m_smooth = tot_m if mi(m_smooth)
27 | 
28 | /* calculate male share */
29 | gen male = m_smooth / p_smooth
30 | 
31 | ren p_smooth india_pop
32 | 
33 | keep if inrange(age, 18, 100)
34 | keep age male india_pop
35 | save $tmp/india_pop, replace
36 | 
37 | /********************************/
38 | /* UK population and male share */
39 | /********************************/
40 | import delimited using $comocsv/england_gender_age.csv, clear
41 | gen male_share = male / total
42 | drop male female
43 | ren male_share male
44 | ren total uk_pop
45 | 
46 | /* distribute age 90 weight across remaining years, since age 90 is actually 90+ */
47 | /* this is basically inconsequential since it is very few people and COPD is the
48 |    only variable that is non-constant from 90-99. */
49 | expand 10 if age == 90
50 | replace uk_pop = uk_pop/10 if inrange(age, 90, 99)
51 | replace age = _n - 1 if age == 90
52 | 
53 | save $tmp/uk_pop, replace
54 | 
55 | 


--------------------------------------------------------------------------------
/como/como_programs.do:
--------------------------------------------------------------------------------
  1 | /****************************************/
  2 | /* set globals used throughout analysis */
  3 | /****************************************/
  4 | 
  5 | /* MAIN COMORBID CONDITION SETS USED IN THE PAPER */
  6 | 
  7 | /* define age bin indicator variables */
  8 | global age_vars age18_40 age40_50 age50_60 age60_70 age70_80 age80_
  9 | 
 10 | /* define biomarker variables from DLHS/AHS that match NHS hazard ratio vars */
 11 | global hr_biomarker_vars obese_1_2 obese_3 bp_high diabetes_uncontr diabetes_contr
 12 | 
 13 | /* define non-biomarker GBD variables that match NHS hazard ratio vars */
 14 | global hr_gbd_vars asthma_ocs autoimmune_dz haem_malig_1 cancer_non_haem_1    ///
 15 |     chronic_heart_dz chronic_resp_dz immuno_other_dz kidney_dz liver_dz neuro_other ///
 16 |     stroke_dementia
 17 | 
 18 | /* define varlist found only in opensafely */
 19 | global hr_os_only_vars asthma_no_ocs cancer_non_haem_1_5 cancer_non_haem_5 diabetes_no_measure haem_malig_1_5 haem_malig_5 organ_transplant spleen_dz
 20 | 
 21 | 
 22 | /* SOME ADDITIONAL VARIABLE GROUPS USED IN EXPLORATION AND DEBUGGING */
 23 | 
 24 | /* define self-report vars found in DLHS/AHS (but not used in risk analysis)  */
 25 | global hr_selfreport_vars chronic_heart_dz stroke_dementia liver_dz kidney_dz autoimmune_dz ///
 26 |                       cancer_non_haem_1 haem_malig_1 chronic_resp_dz 
 27 | 
 28 | 
 29 | /*******************************/
 30 | /* define some helper programs */
 31 | /*******************************/
 32 | /*********************************************************/
 33 | /* sc: a function to scatter multiple variables over age */
 34 | /*********************************************************/
 35 | cap prog drop sc
 36 | prog def sc
 37 | 
 38 |   syntax varlist, [name(string) yscale(passthru) ylabel(passthru) legend(passthru)]
 39 |   tokenize `varlist'
 40 | 
 41 |   /* set a default yscale */
 42 |   if mi("`yscale'") local yscale yscale(log) 
 43 |   if mi("`ylabel'") local ylabel ylabel(.125 .25 1 4 16 64)
 44 |   
 45 |   /* set a default name */
 46 |   if mi("`name'") local name euripides
 47 |   
 48 |   /* loop over the outcome vars */
 49 |   while (!mi("`1'")) {
 50 | 
 51 |     /* store the variable label */
 52 |     local label : variable label `1'
 53 | 
 54 |     /* add the line plot for this variable to the twoway command string */
 55 |     local command `command' (line `1' age, `yscale' `ylabel' xtitle("`label'") ytitle("Mortality Hazard Ratio") lwidth(medthick) )
 56 | 
 57 |     /* get the next variable in the list */
 58 |     mac shift
 59 |   }
 60 | 
 61 |   /* draw the graph */
 62 |   twoway `command', `legend'
 63 |   graphout `name'
 64 | end
 65 | /****************** end sc *********************** */
 66 | 
 67 | /************************************************************/
 68 | /* scp: a function to compare multiple prevalences over age */
 69 | /************************************************************/
 70 | cap prog drop scp
 71 | prog def scp
 72 | 
 73 |   syntax varlist, [name(string) yscale(passthru) yline(passthru) ytitle(passthru) legend(passthru)]
 74 |   tokenize `varlist'
 75 | 
 76 |   /* set defaults */
 77 |   if mi("`yscale'") local yscale
 78 |   if mi("`name'") local name euripides
 79 |   if mi("`ytitle'") local ytitle ytitle("Prevalence")
 80 |   
 81 |   /* loop over the outcome vars */
 82 |   while (!mi("`1'")) {
 83 | 
 84 |     /* store the variable label */
 85 |     local label : variable label `1'
 86 | 
 87 |     /* add the line plot for this variable to the twoway command string */
 88 |     local command `command' (line `1' age, `yscale' xtitle("`label'") `ytitle' lwidth(medthick) )
 89 | 
 90 |     /* get the next variable in the list */
 91 |     mac shift
 92 |   }
 93 | 
 94 |   /* draw the graph */
 95 |   twoway `command', `yline' name(`name', replace) `legend'
 96 |   graphout `name'
 97 | end
 98 | /****************** end scp *********************** */
 99 | 
100 | 
101 | 
102 | // CONDITION LIST
103 | 
104 | // AGE/SEX
105 | // age18_40
106 | // age40_50
107 | // age50_60
108 | // age60_70
109 | // age70_80
110 | // age80_
111 | // male
112 | 
113 | // BIOMARKERS (plus diabetes_contr from DLHS/AHS)
114 | // obese_1_2
115 | // obese_3
116 | // bp_high
117 | // diabetes_uncontr
118 | // diabetes_contr
119 | 
120 | // GLOBAL BURDEN OF DISEASE
121 | // asthma_ocs
122 | // autoimmune_dz
123 | // haem_malig_1
124 | // cancer_non_haem_1
125 | // chronic_heart_dz
126 | // chronic_resp_dz
127 | // immuno_other_dz
128 | // kidney_dz
129 | // liver_dz
130 | // neuro_other
131 | // stroke_dementia
132 | 
133 | // NOT USED
134 | // asthma_no_ocs
135 | // cancer_non_haem_1_5
136 | // cancer_non_haem_5
137 | // diabetes_no_measure
138 | // haem_malig_1_5
139 | // haem_malig_5
140 | // organ_transplant
141 | // spleen_dz
142 | 


--------------------------------------------------------------------------------
/como/csv/copd_mclean_rates.csv:
--------------------------------------------------------------------------------
 1 | age,rate100k_male_mean,rate100k_male_lower,rate100k_male_upper,rate100k_female_mean,rate100k_female_lower,rate100k_female_upper,pop_male,pop_female
 2 | 35,92.0,47.6,160.7,152.7,93.3,235.8,334046,334931
 3 | 36,124.7,72.7,199.7,66.9,30.6,126.9,340309,341747  
 4 | 37,139.4,85.2,215.3,163.9,103.9,245.9,345224,348338
 5 | 38,246.3,173.4,339.4,183.2,120.8,266.6,360518,363211 
 6 | 39,244.2,173.7,333.8,160.7,104.0,237.2,360518,363211 
 7 | 40,190.9,130.6,269.5,236.4,168.1,323.2,385058,393469 
 8 | 41,239.9,171.4,326.7,361.6,275.3,466.4,376049,383443 
 9 | 42,312.6,234.2,408.9,345.9,262.0,448.1,384557,393354 
10 | 43,457.9,362.5,570.6,298.2,221.3,393.1,384595,392455 
11 | 44,396.3,308.4,501.6,488.3,389.5,604.5,391659,396844 
12 | 45,412.1,322.4,518.9,428.7,337.2,537.4,389810,399415 
13 | 46,484.1,387.7,597.1,599.8,490.6,726.1,392097,401209 
14 | 47,624.2,514.4,750.4,618.2,507.6,745.8,388933,397082 
15 | 48,597.9,489.1,723.9,731.5,609.4,870.9,381317,388739 
16 | 49,849.7,717.9,998.7,969.1,825.6,1130.3,373943,379531
17 | 50,787.4,658.8,933.8,1061.6,909.3,1232.1,361409,366327
18 | 51,985.8,839.8,1149.8,1075.2,920.9,1247.9,346924,351937 
19 | 52,1206.2,1041.2,1389.9,1408.9,1227.7,1609.4,339352,344086 
20 | 53,1502.0,1316.1,1706.8,1393.4,1212.6,1593.7,332154,336435 
21 | 54,1470.1,1283.9,1675.8,1669.6,1468.2,1890.8,319149,324806
22 | 55,1689.8,1483.2,1917.2,1751.2,1541.7,1981.3,308806,313516 
23 | 56,2019.8,1791.3,2269.4,2366.7,2116.4,2638.5,297135,303305 
24 | 57,2278.0,2033.7,2543.5,2396.8,2145.9,2669.0,297836,305403 
25 | 58,2451.5,2197.8,2726.4,2681.2,2415.3,2968.4,295033,301455 
26 | 59,3040.2,2756.0,3345.7,2760.1,2487.5,3054.6,285974,295038
27 | 60,2959.3,2674.3,3266.4,3418.8,3112.2,3747.5,288397,297457
28 | 61,3702.1,3384.0,4041.9,3472.9,3168.2,3799.0,293435,305710
29 | 62,3947.1,3622.0,4293.5,3702.7,3392.1,4034.0,302337,313305 
30 | 63,4109.5,3785.3,4454.0,3858.7,3547.8,4189.5,319545,332459 
31 | 64,4732.4,4400.8,5082.3,4274.2,3963.2,4603.1,348284,361362 
32 | 65,5121.9,4760.9,5503.0,4547.6,4211.9,4902.9,268158,280482 
33 | 66,6037.8,5600.6,6500.1,4925.2,4535.4,5339.5,260309,273585 
34 | 67,6286.9,5858.5,6738.4,5673.2,5272.3,6096.5,259773,273394
35 | 68,7180.7,6695.8,7691.4,5646.0,5227.4,6089.1,240572,256467 
36 | 69,7589.7,7075.3,8131.6,5839.5,5401.4,6303.6,213801,231079 
37 | 70,7473.9,6925.4,8054.3,6810.5,6301.9,7349.2,191917,210047 
38 | 71,8378.5,7792.2,8997.3,6519.3,6022.8,7045.8,199921,219818 
39 | 72,8704.3,8108.5,9332.2,7241.5,6728.8,7783.0,197591,219848 
40 | 73,9778.5,9144.1,10445.4,7168.4,6647.5,7719.3,192255,214700 
41 | 74,9398.4,8759.5,10071.6,7579.6,7037.4,8152.4,181894,206560 
42 | 75,9158.3,8510.7,9842.2,7440.4,6893.2,8019.6,171911,198752 
43 | 76,10352.1,9640.5,11102.3,7706.0,7143.2,8301.3,162735,191159
44 | 77,10050.6,9337.0,10804.2,8147.5,7557.1,8771.7,149477,180146 
45 | 78,10517.6,9751.2,11328.2,7821.2,7215.7,8464.0,140940,174360 
46 | 79,10317.8,9552.4,11128.3,7706.6,7113.7,8335.7,136064,172787 
47 | 80,10994.9,10173.1,11865.3,8286.5,7661.2,8949.3,127395,168522 
48 | 81,11872.4,10997.3,12798.6,8541.3,7900.6,9220.2,116429,160681 
49 | 82,11408.1,10505.3,12367.8,8156.7,7509.0,8845.3,104119,148184 
50 | 83,12755.2,11739.0,13835.9,8489.7,7807.0,9216.2,92560,36864 
51 | 84,12132.6,11074.6,13264.5,7477.9,6807.2,8196.8,83689,27404 
52 | 85,11844.6,10769.5,12997.9,8149.2,7422.2,8928.2,74160,18928 
53 | 86,12018.2,10863.5,13262.2,7854.0,7113.0,8651.3,63615,08907 
54 | 87,11655.6,10437.2,12977.2,7025.4,6304.5,7806.0,54463,99659
55 | 88,11767.1,10445.0,13210.3,7120.4,6358.1,7949.0,46119,90656 
56 | 89,11255.6,9832.9,12826.4,6604.5,5838.7,7442.7,39749,82648
57 | 90,11086.4,9599.5,12738.3,6471.2,5686.2,7334.3,10164,27510 
58 | 91,10639.6,9113.2,12348.6,6857.9,6035.7,7760.9,10164,27510 
59 | 92,9776.8,7881.5,11990.4,6544.6,5515.9,7709.5,10164,27510 
60 | 93,8373.6,6253.8,10980.9,6392.4,5206.7,7767.4,10164,27510 
61 | 94,8805.0,6345.9,11901.8,5705.5,4480.0,7162.7,10164,27510 
62 | 95,8148.1,5608.8,11443.0,4148.3,3048.0,5516.3,10164,27510 
63 | 96,7142.9,4476.4,10814.4,3415.2,2336.0,4821.2,10164,27510 
64 | 97,5357.1,2768.1,9357.9,5020.9,3516.6,6951.1,10164,27510 
65 | 98,4929.6,1981.9,10156.8,4212.5,2670.3,6320.8,10164,27510 
66 | 99,8000.0,3453.8,15763.2,4134.4,2363.1,6714.0,10164,27510 
67 | 100,13114.8,5662.0,25841.3,6227.1,3627.5,9970.2,10164,27510 
68 | 


--------------------------------------------------------------------------------
/como/csv/england_gender_age.csv:
--------------------------------------------------------------------------------
 1 | age,male,female,total
 2 | 0,327309,310525,637834
 3 | 1,338368,321522,659890
 4 | 2,349229,331803,681032
 5 | 3,349199,331559,680758
 6 | 4,352148,335065,687213
 7 | 5,360688,342703,703391
 8 | 6,370995,354215,725210
 9 | 7,363496,346678,710174
10 | 8,356965,340812,697777
11 | 9,351790,335524,687314
12 | 10,355145,337926,693071
13 | 11,344574,326534,671108
14 | 12,337076,321037,658113
15 | 13,322996,307963,630959
16 | 14,317980,302888,620868
17 | 15,309945,293801,603746
18 | 16,305110,288845,593955
19 | 17,313367,298838,612205
20 | 18,324403,305833,630236
21 | 19,337046,319387,656433
22 | 20,343991,325703,669694
23 | 21,357200,337190,694390
24 | 22,363315,340570,703885
25 | 23,364520,345682,710202
26 | 24,376642,357841,734483
27 | 25,377103,361973,739076
28 | 26,385487,377446,762933
29 | 27,397538,383724,781262
30 | 28,391623,377887,769510
31 | 29,384229,378914,763143
32 | 30,385964,385662,771626
33 | 31,372655,379770,752425
34 | 32,380880,379553,760433
35 | 33,380805,379662,760467
36 | 34,369422,373224,742646
37 | 35,370811,376544,747355
38 | 36,369418,376552,745970
39 | 37,372822,378172,750994
40 | 38,373242,378447,751689
41 | 39,359561,361914,721475
42 | 40,336044,336944,672988
43 | 41,329983,331604,661587
44 | 42,334321,338908,673229
45 | 43,340409,345283,685692
46 | 44,345749,351339,697088
47 | 45,360708,365570,726278
48 | 46,375568,380534,756102
49 | 47,384636,394820,779456
50 | 48,375401,384788,760189
51 | 49,383381,393836,777217
52 | 50,382860,393074,775934
53 | 51,389406,396933,786339
54 | 52,386668,398868,785536
55 | 53,388054,399900,787954
56 | 54,384446,395242,779688
57 | 55,375450,385802,761252
58 | 56,367033,375868,742901
59 | 57,353738,362172,715910
60 | 58,337972,347088,685060
61 | 59,329177,339029,668206
62 | 60,321653,331290,652943
63 | 61,308159,318785,626944
64 | 62,296626,306696,603322
65 | 63,284029,296404,580433
66 | 64,282719,298013,580732
67 | 65,278642,292661,571303
68 | 66,268184,285237,553421
69 | 67,268738,286641,555379
70 | 68,271970,293101,565071
71 | 69,278608,298811,577419
72 | 70,292024,315080,607104
73 | 71,315843,339855,655698
74 | 72,240337,261886,502223
75 | 73,230038,252895,482933
76 | 74,226476,250366,476842
77 | 75,205959,232313,438272
78 | 76,179879,206575,386454
79 | 77,158067,184848,342915
80 | 78,161540,190485,352025
81 | 79,156343,187117,343460
82 | 80,147733,179519,327252
83 | 81,135514,168751,304265
84 | 82,123492,158153,281645
85 | 83,112133,147147,259280
86 | 84,98000,133314,231314
87 | 85,87528,123624,211152
88 | 86,79030,116217,195247
89 | 87,69067,106926,175993
90 | 88,58264,95694,153958
91 | 89,47498,81854,129352
92 | 90,157788,341488,499276
93 | 


--------------------------------------------------------------------------------
/como/csv/india_condition_prevalence.csv:
--------------------------------------------------------------------------------
 1 | Condition,Start Age,End Age,Prevalence,Source
 2 | COPD,0,5,0.1,"GBD"
 3 | COPD,5,10,0.2
 4 | COPD,10,15,0.3
 5 | COPD,15,20,0.4
 6 | COPD,20,25,0.55
 7 | COPD,25,30,0.8
 8 | COPD,30,35,1.35
 9 | COPD,35,40,2.15
10 | COPD,40,45,3.55
11 | COPD,45,50,6.05
12 | COPD,50,55,9.65
13 | COPD,55,60,14.25
14 | COPD,60,65,19
15 | COPD,65,70,23.1
16 | COPD,70,75,26.35
17 | COPD,75,80,28.1
18 | COPD,80,100,28.35
19 | ASTHMA,0,5,1.35,"GBD"
20 | ASTHMA,5,10,2.4
21 | ASTHMA,10,15,1.75
22 | ASTHMA,15,20,1.35
23 | ASTHMA,20,25,1.2
24 | ASTHMA,25,30,1.5
25 | ASTHMA,30,35,1.95
26 | ASTHMA,35,40,2.35
27 | ASTHMA,40,45,2.85
28 | ASTHMA,45,50,3.6
29 | ASTHMA,50,55,4.65
30 | ASTHMA,55,60,6
31 | ASTHMA,60,65,7.5
32 | ASTHMA,65,70,9.4
33 | ASTHMA,70,75,10.9
34 | ASTHMA,75,80,11.25
35 | ASTHMA,80,100,10.9
36 | 
37 | 


--------------------------------------------------------------------------------
/como/csv/ny_cummings.csv:
--------------------------------------------------------------------------------
1 | age,male,bp_high,diabetes_contr,chronic_heart_dz,chronic_resp_dz
2 | 1.31,1.13,1.58,1.31,1.76,2.94
3 | 


--------------------------------------------------------------------------------
/como/csv/ny_hr.csv:
--------------------------------------------------------------------------------
 1 | age,hr_bp_high,hr_diabetes_uncontr,hr_chronic_heart_dz,hr_kidney_dz,hr_chronic_resp_dz
 2 | 20,1.64,48.19,0.00,0.60,0.00
 3 | 30,1.81,8.48,0.17,1.14,0.12
 4 | 40,1.67,3.90,0.68,1.06,0.65
 5 | 50,1.38,2.81,1.01,1.13,0.95
 6 | 60,1.13,2.15,0.90,0.54,1.02
 7 | 70,1.04,1.87,0.82,0.26,1.04
 8 | 80,1.04,1.87,0.82,0.26,1.04
 9 | 90,1.04,1.87,0.82,0.26,1.04
10 | 


--------------------------------------------------------------------------------
/como/csv/nystate_age_comorbid_05082020.csv:
--------------------------------------------------------------------------------
 1 | Age Range,Hypertension,Diabetes,Hyperlipidemia,Dementia,Cronoary Artery Disease,Renal Disease,COPD,Atrial Fibrillation,Cancer,Stroke,Fatality Count
 2 | Total,11517,7572,4445,2643,2476,2248,1880,1599,1527,1402,21045
 3 | age 0-9,1,0,0,0,0,0,0,0,0,0,3
 4 | age 10-19,0,1,0,0,0,0,0,0,0,0,8
 5 | age 20-29,11,20,3,0,0,3,0,0,3,2,74
 6 | age 30-39,65,73,15,0,1,20,2,2,5,4,287
 7 | age 40-49,250,223,66,2,19,73,12,8,18,22,748
 8 | age 50-59,942,800,321,27,133,210,103,43,71,79,2034
 9 | age 60-69,2352,1768,896,152,451,504,332,183,300,286,4147
10 | age 70-79,3331,2365,1381,567,746,633,590,415,453,453,5536
11 | age 80-89,3138,1749,1227,1062,815,585,570,611,487,408,5415
12 | age 90+,1426,572,535,831,311,220,271,337,190,148,2784
13 | Unknown,1,1,1,2,0,0,0,0,0,0,9


--------------------------------------------------------------------------------
/como/csv/nystate_or.csv:
--------------------------------------------------------------------------------
1 | age,bp_high,diabetes_uncontr,chronic_heart_dz,kidney_dz,chronic_resp_dz
2 | 20-29,1.642935378,48.19277108,0,0.6024096386,0
3 | 30-39,1.805650924,8.47630719,0.171998624,1.14379085,0.116713352
4 | 40-49,1.6687759,3.90086477,0.6750675068,1.058855886,0.6497524752
5 | 50-59,1.384627488,2.811610607,1.010526316,1.12890923,0.949742778
6 | 60-69,1.129405312,2.153825806,0.9003444145,0.5370415748,1.024088434
7 | 70-79,1.043533767,1.874224294,0.8249186539,0.2574051725,1.039019679
8 | 


--------------------------------------------------------------------------------
/como/csv/uk_condition_prevalence.csv:
--------------------------------------------------------------------------------
 1 | condition,start age, end age, prevalence, source
 2 | Diabetes,16,24,0.0085,,
 3 | ,25,34,0.012,,
 4 | ,35,44,0.0180,,
 5 | ,45,54,0.0480,,
 6 | ,55,64,0.0725,,
 7 | ,65,74,.1265,,
 8 | ,75,100,.1205,,
 9 | Diabetes (2),16,44,0.007,,
10 | ,45,64,0.088,,
11 | ,65,100,0.139,,
12 | Diabetes (2a),16,44,0.003,,
13 | ,45,64,0.032,,
14 | ,65,100,0.040,,
15 | Asthma,16,24,0.18,,
16 | ,25,34,0.21,,
17 | ,35,44,0.19,,
18 | ,45,54,0.17,,
19 | ,55,64,0.15,,
20 | ,65,74,0.15,,
21 | ,75,100,0.13,,
22 | Hypertension (1),16,24,0,,
23 | ,25,44,0.01,,
24 | ,35,64,0.11,,
25 | ,65,100,0.29,,
26 | Hypertension (2),20,39,0.093,,
27 | ,40,59,0.2790,,
28 | ,60,100,0.6370,,
29 | Hypertension (2a),20,39,0.08,
30 | ,40,59,0.1630,,
31 | ,60,100,0.3020,,
32 | Hypertension (3),16,24,0.00,,NHS-hyp.controlled
33 | ,25,34,0.007,,
34 | ,35,44,0.01,,
35 | ,45,54,0.087,,
36 | ,55,64,0.127,,
37 | ,65,74,0.267,,
38 | ,75,100,0.325,,
39 | Hypertension (3a),16,24,0.026,,NHS-hyp.all
40 | ,25,34,0.069,,
41 | ,35,44,0.122,,
42 | ,45,54,0.238,,
43 | ,55,64,0.409,,
44 | ,65,74,0.581,,
45 | ,75,100,0.660,,
46 | Hypertension (3b),16,24,0.026,,NHS-hyp.uncontrolled+untreated
47 | ,25,34,0.062,,
48 | ,35,44,0.112,,
49 | ,45,54,0.151,,
50 | ,55,64,0.283,,
51 | ,65,74,0.254,,
52 | ,75,100,0.335,,
53 | COPD,31,40,0.0009,,
54 | ,41,50,0.0069,,
55 | ,51,60,0.0223,,
56 | ,61,70,0.0539,,
57 | ,71,80,0.0834,,
58 | ,81,100,0.0800,,
59 | Obesity class 1-2,16,24,.116,
60 | ,25,34,.184,
61 | ,35,44,.241,
62 | ,45,54,.308,
63 | ,55,64,.310,
64 | ,65,74,.289,
65 | ,75,100,.264,
66 | Obesity class 3,16,24,.019,
67 | ,25,34,.028,
68 | ,35,44,.032,
69 | ,45,54,.043,
70 | ,55,64,.038,
71 | ,65,74,.033,
72 | ,75,100,.017,
73 | 


--------------------------------------------------------------------------------
/como/csv/uk_condition_sd.csv:
--------------------------------------------------------------------------------
 1 | condition, start age, end age,lower, upper, mean
 2 | sd_obese_1_2,15,24,.091,.146,.116
 3 | ,25,34,.16,.21,.184
 4 | ,35,44,.215,.269,.241
 5 | ,45,54,.278,.339,.308
 6 | ,55,64,.281,.341,.310
 7 | ,65,74,.263,.316,.289
 8 | ,75,100,.230,.301,.264
 9 | sd_obese_3,16,24,.011,.035,.019
10 | ,25,34,.018,.042,.028
11 | ,35,44,.024,.044,.032
12 | ,45,54,.032,.058,.043
13 | ,55,64,.027,.054,.038
14 | ,65,74,.024,.046,.033
15 | ,75,100,.01,.031,.017
16 | sd_bp_high,16,24,.012,.057,.026
17 | ,25,34,.044,.107,.062
18 | ,35,44,.095,.157,.112
19 | ,45,54,.205,.276,.151
20 | ,55,64,.369,.451,.283
21 | ,65,74,.545,.617,.254
22 | ,75,100,.618,.700,.335
23 | diabetes_uncontr,16,44,
24 | ,45,64,
25 | ,65,100,
26 | diabetes_contr,16,44,
27 | ,45,64,
28 | ,65,100,
29 | 
30 | 


--------------------------------------------------------------------------------
/como/csv/uk_demography.csv:
--------------------------------------------------------------------------------
 1 | age,uk_pop
 2 | 0-4,3857263
 3 |  5-9,4149852
 4 | 10-14,3953866
 5 | 15-19,3656968
 6 | 20-24,4153080
 7 | 25-29,4514249
 8 | 30-34,4497132
 9 | 35-39,4395667
10 | 40-44,4019539
11 | 45-49,4402122
12 | 50-54,4661015
13 | 55-59,4405908
14 | 60-64,3755185
15 | 65-69,3368199
16 | 70-74,3318867
17 | 75-79,2325296
18 | 80-84,1715328
19 | 85-89,1042090
20 | 90+,605181
21 | 


--------------------------------------------------------------------------------
/como/csv/uk_nhs_hazard_ratios.csv:
--------------------------------------------------------------------------------
 1 | variable,HR_age_sex,HR_age_sex_low,HR_age_sex_up,HR_full,HR_full_low,HR_full_up,HR_full_ec,HR_full_low_ec,HR_full_up_ec
 2 | age18_40,0.05,0.04,0.08,0.06,0.04,0.08,0.08,0.05,0.13
 3 | age40_50,0.27,0.21,0.34,0.30,0.25,0.36,0.31,0.21,0.46
 4 | age50_60,1,1,1,1,1,1,1,1,1
 5 | age60_70,2.61,2.29,2.96,2.40,2.16,2.66,2.35,1.9,2.91
 6 | age70_80,7.61,6.78,8.54,6.08,5.52,6.69,5.55,4.54,6.77
 7 | age80_,26.27,23.52,29.33,20.61,18.72,22.7,13.43,10.95,16.45
 8 | female,1,1,1,1,1,1,1,1,1
 9 | male,2.24,2.12,2.36,1.59,1.53,1.65,2.18,1.99,2.38
10 | bmi_not_obese,1,1,1,1,1,1,1,1,1
11 | bmi_obeseI,1.57,1.47,1.68,1.05,1.00,1.11,1.39,1.25,1.54
12 | bmi_obeseII,2.01,1.82,2.21,1.40,1.30,1.52,1.62,1.39,1.9
13 | bmi_obeseIII,2.97,2.62,3.38,1.92,1.72,2.13,2.45,2,3.01
14 | obese_1_2,1.69,1.57,1.83,1.15,1.08,1.22,1.45,1.29,1.64
15 | obese_3,2.97,2.62,3.38,1.92,1.72,2.13,2.45,2,3.01
16 | bp_not_high,1,1,1,1,1,1,1,1,1
17 | bp_high,1.22,1.15,1.3,0.89,0.85,0.93,0.94,0.85,1.05
18 | chronic_resp_dz,2.35,2.21,2.5,1.63,1.55,1.71,1.97,1.77,2.18
19 | asthma_no_ocs,1.23,1.14,1.33,0.99,0.93,1.05,1.14,1.01,1.29
20 | asthma_ocs,1.7,1.48,1.96,1.13,1.01,1.26,1.39,1.12,1.73
21 | chronic_heart_dz,2.01,1.9,2.13,1.17,1.12,1.22,1.33,1.22,1.46
22 | diabetes_contr,2.02,1.89,2.16,1.31,1.24,1.37,1.48,1.33,1.65
23 | diabetes_uncontr,3.61,3.34,3.9,1.95,1.83,2.07,2.57,2.27,2.91
24 | diabetes_no_measure,2.35,2.04,2.7,1.90,1.71,2.09,1.68,1.33,2.12
25 | cancer_non_haem_1,1.83,1.51,2.21,1.72,1.50,1.97,1.51,1.1,2.05
26 | cancer_non_haem_1_5,1.39,1.22,1.58,1.15,1.05,1.27,1.36,1.13,1.65
27 | cancer_non_haem_5,1.03,0.94,1.12,0.96,0.91,1.03,0.92,0.79,1.06
28 | haem_malig_1,4.03,2.76,5.88,2.82,2.09,3.81,2.6,1.3,5.22
29 | haem_malig_1_5,3.59,2.88,4.48,2.47,2.05,2.96,3.67,2.66,5.06
30 | haem_malig_5,2.13,1.76,2.59,1.62,1.39,1.88,1.64,1.18,2.28
31 | liver_dz,2.34,1.94,2.83,1.75,1.51,2.03,1.86,1.4,2.47
32 | stroke_dementia,2.34,2.18,2.51,2.16,2.06,2.27,1.61,1.43,1.81
33 | neuro_other,2.94,2.62,3.3,2.58,2.38,2.79,2.28,1.88,2.76
34 | kidney_dz,2.19,2.06,2.32,1.42,1.36,1.50,1.75,1.58,1.92
35 | organ_transplant,7.79,5.88,10.33,3.55,2.79,4.52,2.62,1.51,4.57
36 | spleen_dz,1.82,1.21,2.74,1.34,0.98,1.83,1.87,1.06,3.3
37 | autoimmune_dz,1.35,1.24,1.48,1.19,1.11,1.27,1.31,1.14,1.51
38 | immuno_other_dz,2.02,1.45,2.81,1.70,1.34,2.16,2.01,1.25,3.25
39 | 


--------------------------------------------------------------------------------
/como/csv/uk_nhs_incidence.csv:
--------------------------------------------------------------------------------
 1 | condition,prevalence
 2 | age18_40,34.4
 3 | age40_50,16.5
 4 | age50_60,17.6
 5 | age60_70,13.8
 6 | age70_80,11.2
 7 | age80_,6.5
 8 | female,46.0
 9 | male,49.9
10 | bmi_not_obese,56.5
11 | bmi_obeseI,13.8
12 | bmi_obeseII,5.3
13 | bmi_obeseIII,2.7
14 | obese_1_2,19.1
15 | obese_3,2.7
16 | bp_not_high,65.8
17 | bp_high,34.2
18 | chronic_resp_dz,4.1
19 | asthma_no_ocs,14.2
20 | asthma_ocs,1.7
21 | chronic_heart_dz,6.7
22 | diabetes_contr,6.0
23 | diabetes_uncontr,2.8
24 | diabetes_no_measure,1.1
25 | cancer_non_haem_1,0.13
26 | cancer_non_haem_1_5,0.10
27 | cancer_non_haem_5,0.10
28 | haem_malig_1,0.31
29 | haem_malig_1_5,0.29
30 | haem_malig_5,0.16
31 | liver_dz,0.70
32 | stroke_dementia,2.1
33 | neuro_other,1.0
34 | kidney_dz,6.3
35 | organ_transplant,0.1
36 | spleen_dz,0.2
37 | autoimmune_dz,5.1
38 | immuno_other_dz,1.6
39 | 


--------------------------------------------------------------------------------
/como/csv/weighted_hrs.txt:
--------------------------------------------------------------------------------
 1 | OpenSAFELY reports HRs for Obesity 1 & 2 separately. We need them
 2 | together to match prevalence. We take prevalence-weighted mean of
 3 | hazard ratios.
 4 | 
 5 | Obesity 1: prevalence: 0.138, hazard ratio: 1.05 (1.00-1.11)
 6 | Obesity 2: prevalence: 0.053, hazard ratio: 1.40 (1.30-1.52)
 7 | 
 8 | Combined: 1.15 (1.08 - 1.22)
 9 | 
10 | Kidney mild:   0.058, HR: 1.33 (1.28-1.40)
11 | Kidney severe: 0.005, HR: 2.52 (2.33-2.72)
12 | 
13 | Combined: 1.42 (1.36-1.50)
14 | 
15 | 


--------------------------------------------------------------------------------
/como/e/explore_gbd_vs_dlhs.do:
--------------------------------------------------------------------------------
1 | use $tmp/india_models, clear
2 | 
3 | sort age
4 | twoway ///
5 |     (line diabetes_uncontr age) (scatter gbd_diabetes age)
6 | graphout x
7 | 


--------------------------------------------------------------------------------
/como/e/hr_vs_or.do:
--------------------------------------------------------------------------------
 1 | /* import hazard ratios from NHS study, fully adjusted model */
 2 | use $tmp/uk_nhs_hazard_ratios_flat_hr_fully_adj, clear
 3 | 
 4 | global varlist $age_vars $hr_biomarker_vars $hr_gbd_vars
 5 | 
 6 | /* define reference group mortality -- 50--60 year olds */
 7 | global r = 355 / 3068883
 8 | 
 9 | /* rename hazard ratio vars for consistency */
10 | foreach condition in $comorbid_vars {
11 |   ren `condition'_hr_fully_adj hr_`condition'
12 | }
13 | 
14 | /* calculate relative risk for each condition from hazard ratio */
15 | foreach condition in $comorbid_vars {
16 |   gen rr_`condition' = (1 - exp(hr_`condition' * ln(1 - ${r}))) / ${r}
17 | }
18 | 
19 | /* calculate odds ratios from relative risk */
20 | foreach condition in $comorbid_vars {
21 |   gen or_`condition' = (rr_`condition' * (1 + ${r})) / (1 - rr_`condition' * ${r})
22 | }
23 | 
24 | /* check we got it right by recalculating rr from or */
25 | foreach condition in $comorbid_vars {
26 |   gen rr2_`condition' = or_`condition' / (1 - $r + $r * or_`condition')
27 |   gen diff = rr_`condition' / rr2_`condition'
28 |   assert inrange(diff, .999, 1.001)
29 |   drop diff
30 | }
31 | drop rr2*
32 | 
33 | /* check out the comparison */
34 | foreach condition in $comorbid_vars {
35 |   list or_`condition' rr_`condition' hr_`condition'
36 | }
37 | 
38 | 
39 | /* reshape to wide on different stats */
40 | reshape long hr or rr, string i(v1) j(stat)
41 | drop v1
42 | 
43 | /* round results to 3 digits */
44 | foreach v in hr or rr {
45 |   replace `v' = round(`v', .001)
46 |   format `v' %6.3f
47 | }
48 | 
49 | /* list results */
50 | list
51 | 


--------------------------------------------------------------------------------
/como/e/summarize_india_conditions.do:
--------------------------------------------------------------------------------
 1 | /**************************************************************/
 2 | /* explore different risk factors across the age distribution */
 3 | /**************************************************************/
 4 | use $tmp/combined, clear
 5 | collapse (mean) risk_factor_* [aw=wt], by(age)
 6 | 
 7 | keep if age < 85
 8 | sort age
 9 | save $tmp/foo, replace
10 | 
11 | /* 1. compare continuous age distributions to discrete to confirm they are ok */
12 | twoway (line risk_factor_simple_cts age) (line risk_factor_simple age), yscale(log) ylabel(0.1 0.5 1 2 5 10 50)
13 | graphout simple_comp
14 | 
15 | twoway (line risk_factor_full_cts age) (line risk_factor_full age), yscale(log) ylabel(0.1 0.5 1 2 5 10 50) 
16 | graphout full_comp
17 | 
18 | /* 2. compare fully adjusted, age-sex, comorbid conditions only  */
19 | twoway (line risk_factor_full_cts age, lwidth(medthick)) (line risk_factor_simple_cts age, lwidth(medthick)) , yscale(log) ylabel(0.1 0.5 1 2 5 10 50)
20 | graphout risk_factors
21 | 
22 | /* 3. compare the discrete graphs */
23 | twoway (line risk_factor_full age, lwidth(medthick)) (line risk_factor_simple age, lwidth(medthick)) , yscale(log) ylabel(0.1 0.5 1 2 5 10 50)
24 | graphout risk_factors_discrete
25 | 
26 | twoway (line risk_factor_age_weird age, lwidth(medthick)) (line risk_factor_full_cts age, lwidth(medthick)) (line risk_factor_full age, lwidth(medthick)) , yscale(log) ylabel(0.1 0.5 1 2 5 10 50)
27 | graphout risk_factors_full_agesex_part_only
28 | 
29 | /* review some results */
30 | sum risk_factor* if age == 20, d
31 | sum risk_factor* if age == 65, d
32 | sum risk_factor* if age == 20 & male == 1, d
33 | sum risk_factor* if age == 65 & male == 1, d
34 | sum risk_factor* if age == 65 & male == 0, d
35 | 
36 | /********************************/
37 | /* create a fast sample dataset */
38 | /********************************/
39 | use $tmp/combined, clear
40 | keep if uniform() < .1
41 | save $tmp/combined_short, replace
42 | 
43 | 
44 | 
45 | /* run some HR comparisons [obsolete i think] */
46 | use $tmp/combined, clear
47 | 
48 | /* compare discrete vs. continuous risk factors */
49 | keep hr_age_*_age* hr_full*age* age
50 | 
51 | /* create combined discrete age factors */
52 | gen hr_age_discrete_full = hr_full_age18_40 * hr_full_age40_50 * hr_full_age50_60 * hr_full_age60_70 * hr_full_age70_80 * hr_full_age80_
53 | gen hr_age_discrete_age_sex = hr_age_sex_age18_40 * hr_age_sex_age40_50 * hr_age_sex_age50_60 * hr_age_sex_age60_70 * hr_age_sex_age70_80 * hr_age_sex_age80_
54 | 
55 | gen ln_d_full = ln(hr_age_discrete_full)
56 | gen ln_d_age_sex = ln(hr_age_discrete_age_sex)
57 | gen ln_c_full = ln(hr_full_age_cts)
58 | gen ln_c_age_sex = ln(hr_age_sex_age_cts)
59 | 
60 | binscatter ln_d_full ln_c_full age, linetype(none) xq(age)
61 | graphout hr_comp_full
62 | 
63 | binscatter ln_d_age_sex ln_c_age_sex age, linetype(none) xq(age) legend(off)
64 | graphout hr_comp_age_sex
65 | 


--------------------------------------------------------------------------------
/como/e/test_cts_ors.do:
--------------------------------------------------------------------------------
 1 | import delimited uk_age_predicted_or.csv, clear
 2 | 
 3 | /* see how well the bin means line up */
 4 | replace or_simple = exp(or_simple)
 5 | replace or_full = exp(or_full)
 6 | 
 7 | sum or* if inrange(age, 18, 39)
 8 | sum or* if inrange(age, 40, 49)
 9 | sum or* if inrange(age, 50, 59)
10 | sum or* if inrange(age, 60, 69)
11 | sum or* if inrange(age, 70, 79)
12 | sum or* if inrange(age, 80, 85)
13 | 


--------------------------------------------------------------------------------
/como/e/test_map.do:
--------------------------------------------------------------------------------
 1 | use ~/iec/output/pn/test, clear
 2 | 
 3 | /* test by making kerala (32) / rajasthan (8) into outliers */
 4 | replace rf_conditions = 5 if pc11_state_name == "kerala"
 5 | replace rf_conditions = -1 if pc11_state_name == "rajasthan"
 6 | 
 7 | ren pc11_state_id pc11_s_id
 8 | 
 9 | /* save the temp dataset for merging the values to the geodataset */
10 | save $tmp/test.dta, replace
11 | 
12 | /* convert the shapefile into a geodatabase */
13 | shp2dta using $iec1/gis/pc11/pc11-state, database($tmp/state_db) coordinates($tmp/state_coord) genid(geo_id)  replace 
14 | 
15 | /* use the created database, it is the one that the map can be created from */
16 | use $tmp/state_db, clear 
17 | 
18 | /* merge wiith the  */
19 | 	merge 1:1 pc11_s_id  using $tmp/test.dta 		
20 | 
21 | 	cap destring pc11_s_id, replace
22 | 
23 | /* test blank map by state */
24 | spmap using $tmp/state_coord, id(geo_id)
25 | graphout blank_map
26 | 
27 | /* heatmap conditions by state */
28 | spmap rf_conditions using $tmp/state_coord, id(geo_id)
29 | graphout heatmap
30 | 


--------------------------------------------------------------------------------
/como/make_como.do:
--------------------------------------------------------------------------------
 1 | /*********************/
 2 | /* data construction */
 3 | /*********************/
 4 | 
 5 | /* get continuous fit to UK age hazard ratios */
 6 | //shell matlab $ccode/como/b/fit_cts_uk_age_hr.m
 7 | 
 8 | /* combine DLHS and AHS */
 9 | do $ccode/como/b/prep_health_data.do
10 | 
11 | /* prepare global burden of disease data */
12 | do $ccode/como/b/prep_gbd.do
13 | 
14 | /* calculate risk factors */
15 | do $ccode/como/b/prep_india_comorbidities.do
16 | 
17 | /* create an age-level dataset with England condition prevalence */
18 | do $ccode/como/b/prep_england_prevalence.do
19 | 
20 | /* create a clean set of files with relative risks */
21 | do $ccode/como/b/prep_hrs.do
22 | 
23 | /* prep NY odds ratios of death */
24 | do $ccode/como/b/prep_ny_mortality.do
25 | 
26 | /* prep india and UK sex ratios and populations */
27 | do $ccode/como/b/prep_pop_sex.do
28 | 
29 | /* create age-level datasets for HR, prevalence, population, all with identical structures */
30 | /* THIS CREATES THE MAIN ANALYSIS FILE */
31 | do $ccode/como/b/prep_age_level_data.do
32 | 
33 | /* create prevalence standard errors for bootstraps */
34 | do $ccode/como/b/prep_standard_errors.do
35 | 
36 | /* calculate population relative risks and death distributions for england / india */
37 | do $ccode/como/a/calc_prrs.do
38 | 
39 | /************/
40 | /* analysis */
41 | /************/
42 | 
43 | /* prepare data for England / India prevalence comparison */
44 | do $ccode/como/a/prep_eng_india_prev_compare.do
45 | 
46 | /* calculate summary statistics and prevalences */
47 | // do $ccode/como/a/sumstats.do
48 | 
49 | /**********************/
50 | /* figures and tables */
51 | /**********************/
52 | 
53 | /* create tables for main text and appendix*/
54 | do $ccode/como/a/make_paper_tables.do
55 | 
56 | /* create figures */
57 | do $ccode/como/a/make_paper_figures.do
58 | 
59 | 
60 | /************/
61 | /* appendix */
62 | /************/
63 | 
64 | /* app figure: hr interpolations */
65 | do $ccode/como/a/app_age_hr_interpolation.do
66 | 
67 | /* run sensitivity tests for sampling error in HRs */
68 | do $ccode/como/a/calc_hr_sensitivity.do
69 | 
70 | /* run sensitivity tests for sampling error in prevalences */
71 | do $ccode/como/a/calc_prev_sensitivity.do
72 | 
73 | /* sensitivity to joint conditions */
74 | do $ccode/como/a/app_joint_condition.do
75 | 


--------------------------------------------------------------------------------
/como/r/covid_como_sumstats.csv:
--------------------------------------------------------------------------------
  1 | uk_male_risk,1.48
  2 | india_male_risk,1.50
  3 | male_ratio_sign,+
  4 | male_ratio,1.35
  5 | uk_obese_1_2_risk,1.09
  6 | india_obese_1_2_risk,1.01
  7 | obese_1_2_ratio_sign,
  8 | obese_1_2_ratio,-6.73
  9 | uk_obese_3_risk,1.04
 10 | india_obese_3_risk,1.01
 11 | obese_3_ratio_sign,
 12 | obese_3_ratio,-3.31
 13 | uk_bp_high_risk,0.99
 14 | india_bp_high_risk,0.99
 15 | bp_high_ratio_sign,+
 16 | bp_high_ratio,0.09
 17 | uk_diabetes_uncontr_risk,1.03
 18 | india_diabetes_uncontr_risk,1.11
 19 | diabetes_uncontr_ratio_sign,+
 20 | diabetes_uncontr_ratio,8.12
 21 | uk_diabetes_contr_risk,1.03
 22 | india_diabetes_contr_risk,1.01
 23 | diabetes_contr_ratio_sign,
 24 | diabetes_contr_ratio,-2.44
 25 | uk_asthma_ocs_risk,1.02
 26 | india_asthma_ocs_risk,1.01
 27 | asthma_ocs_ratio_sign,
 28 | asthma_ocs_ratio,-1.50
 29 | uk_autoimmune_dz_risk,1.01
 30 | india_autoimmune_dz_risk,1.00
 31 | autoimmune_dz_ratio_sign,
 32 | autoimmune_dz_ratio,-0.51
 33 | uk_haem_malig_1_risk,1.01
 34 | india_haem_malig_1_risk,1.00
 35 | haem_malig_1_ratio_sign,
 36 | haem_malig_1_ratio,-0.92
 37 | uk_cancer_non_haem_1_risk,1.03
 38 | india_cancer_non_haem_1_risk,1.00
 39 | cancer_non_haem_1_ratio_sign,
 40 | cancer_non_haem_1_ratio,-2.35
 41 | uk_chronic_heart_dz_risk,1.03
 42 | india_chronic_heart_dz_risk,1.01
 43 | chronic_heart_dz_ratio_sign,
 44 | chronic_heart_dz_ratio,-1.89
 45 | uk_chronic_resp_dz_risk,1.02
 46 | india_chronic_resp_dz_risk,1.04
 47 | chronic_resp_dz_ratio_sign,+
 48 | chronic_resp_dz_ratio,2.34
 49 | uk_immuno_other_dz_risk,1.00
 50 | india_immuno_other_dz_risk,1.00
 51 | immuno_other_dz_ratio_sign,+
 52 | immuno_other_dz_ratio,0.02
 53 | uk_kidney_dz_risk,1.08
 54 | india_kidney_dz_risk,1.09
 55 | kidney_dz_ratio_sign,+
 56 | kidney_dz_ratio,0.76
 57 | uk_liver_dz_risk,1.02
 58 | india_liver_dz_risk,1.03
 59 | liver_dz_ratio_sign,+
 60 | liver_dz_ratio,1.42
 61 | uk_neuro_other_risk,1.00
 62 | india_neuro_other_risk,1.00
 63 | neuro_other_ratio_sign,
 64 | neuro_other_ratio,-0.08
 65 | uk_stroke_dementia_risk,1.03
 66 | india_stroke_dementia_risk,1.01
 67 | stroke_dementia_ratio_sign,
 68 | stroke_dementia_ratio,-1.72
 69 | uk_health_risk,2.29
 70 | india_health_risk,2.10
 71 | health_ratio_sign,
 72 | health_ratio,-8.25
 73 | india_age18_40_mu,50.2
 74 | india_age40_50_mu,19.2
 75 | india_age50_60_mu,14.3
 76 | india_age60_70_mu,10.3
 77 | india_age70_80_mu,4.6
 78 | india_age80__mu,1.5
 79 | india_male_mu,47.1
 80 | india_diabetes_uncontr_mu,8.9
 81 | india_diabetes_contr_mu,1.7
 82 | india_hypertension_both_mu,28.2
 83 | india_obese_3_mu,0.4
 84 | india_obese_1_2_mu,4.0
 85 | uk_age_18_40,36.6
 86 | uk_age_40_50,16.3
 87 | uk_age_50_60,17.0
 88 | uk_age_60_70,13.3
 89 | uk_age_70_80,10.4
 90 | uk_age_80,6.3
 91 | india_gbd_chronic_heart_dz_mu,4.4
 92 | india_gbd_chronic_resp_dz_mu,4.8
 93 | india_gbd_kidney_dz_mu,9.7
 94 | india_gbd_liver_dz_mu,5.3
 95 | india_gbd_asthma_ocs_mu,2.5
 96 | india_gbd_cancer_non_haem_1_mu,0.3
 97 | india_gbd_haem_malig_1_mu,0.0
 98 | india_gbd_autoimmune_dz_mu,1.0
 99 | india_gbd_immuno_other_dz_mu,0.1
100 | india_gbd_stroke_dementia_mu,1.3
101 | india_gbd_neuro_other_mu,0.0
102 | uk_gbd_chronic_heart_dz_mu,5.9
103 | uk_gbd_chronic_resp_dz_mu,5.4
104 | uk_gbd_kidney_dz_mu,5.6
105 | uk_gbd_liver_dz_mu,2.6
106 | uk_gbd_asthma_ocs_mu,9.2
107 | uk_gbd_cancer_non_haem_1_mu,2.6
108 | uk_gbd_haem_malig_1_mu,0.2
109 | uk_gbd_autoimmune_dz_mu,2.4
110 | uk_gbd_immuno_other_dz_mu,0.1
111 | uk_gbd_stroke_dementia_mu,1.5
112 | uk_gbd_neuro_other_mu,0.1
113 | male,48.9
114 | uk_prev_diabetes_contr,6.4
115 | uk_prev_diabetes_uncontr,2.1
116 | uk_prev_chronic_resp_dz,2.5
117 | uk_prev_hypertension_both,28.0
118 | uk_prev_obese_3,3.1
119 | uk_prev_obese_1_2,24.8
120 | 


--------------------------------------------------------------------------------
/como/tex/como_exhibits.tex:
--------------------------------------------------------------------------------
  1 | \documentclass[12pt,letterpaper]{article}
  2 | \setcounter{page}{0}
  3 | 
  4 | \usepackage{mathtools} 
  5 | \usepackage{bbm}
  6 | \usepackage[multiple]{footmisc}
  7 | \usepackage{floatpag,amsmath,amsthm,amssymb}
  8 | \newtheorem{proposition}{Proposition}
  9 | \numberwithin{equation}{section}
 10 | \newtheorem{nono-prop}{Proposition}[]
 11 | 
 12 | % Figure panel header font
 13 | \newcommand{\panel}{\fontfamily{phv}\selectfont\scriptsize\textbf}
 14 | \usepackage{amsmath} 
 15 | \DeclareMathOperator*{\argmin}{arg\,min}
 16 | \DeclareMathOperator*{\argmax}{arg\,max}
 17 | 
 18 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 19 | %% LOAD LOCAL COMPILATION PATHS
 20 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 21 | 
 22 | %% DON'T CHANGE ANY OF THESE PATHS. FOR LOCAL COMPILE, EDIT YOUR
 23 | %%                                            ~/include.tex ONLY
 24 | \newcommand{\HOME}{\string~}
 25 | \input{\HOME/include.tex}
 26 | 
 27 | % include standard package
 28 | \input{front_matter_como}
 29 | 
 30 | \usepackage{fancyhdr} 
 31 | \pagestyle{fancy}
 32 | \lhead{}
 33 | \chead{}
 34 | \rhead{\thepage}
 35 | \cfoot{} % get rid of the page number 
 36 | \renewcommand{\headrulewidth}{0pt}
 37 | \renewcommand{\footrulewidth}{0pt}
 38 | \setlength{\headsep}{24pt}
 39 | 
 40 | % package for color-shared tables
 41 | \usepackage[table]{xcolor}
 42 | 
 43 | \usepackage{graphicx}
 44 | % disable hyperlinks, which were breaking on appendix references
 45 | % \usepackage[options]{nohyperref}
 46 | 
 47 | \title{COVID Comorbidity paper} \author{Nobody}
 48 | 
 49 | %%%%%%%%%%%%%%%%%%%%%% 
 50 | % NO TITLE PAGE
 51 | %%%%%%%%%%%%%%%%%%%%%% 
 52 | \begin{document}
 53 | \date{June 2020}
 54 | \maketitle
 55 | \clearpage
 56 | 
 57 | \begin{figure}
 58 |   \centering
 59 |   \caption{{\footnotesize Prevalence of diabetes, hypertension, and
 60 |       obesity in India and England.}}
 61 |   \begin{tabular}{@{}p{0.48\linewidth}@{\quad}p{0.48\linewidth}@{}}
 62 |     \subfigimg[width=\linewidth]{A) Diabetes}{\covidpath/diabetes.pdf} &
 63 |     \subfigimg[width=\linewidth]{B) Hypertension}{\covidpath/hypertension.pdf} \\
 64 |     \subfigimg[width=\linewidth]{C) Obesity}{\covidpath/obese.pdf} &
 65 |   \end{tabular}
 66 | \end{figure}
 67 | 
 68 | \clearpage
 69 | 
 70 | \begin{figure}[H]
 71 |   \begin{center}
 72 |     \caption{Age-specific population relative risk of COVID-19 mortality from all health conditions ($PRR_a$)}
 73 |     \includegraphics[scale=1.0]{\covidpath/prr_health.pdf}
 74 |   \end{center}
 75 | \end{figure}
 76 | 
 77 | \begin{figure}[H]
 78 |   \begin{center}
 79 |     \caption{Comorbidity-specific population relative risk of COVID-19 mortality in India v. England}
 80 |     \includegraphics[scale=0.7]{\covidpath/coefplot.pdf}
 81 |   \end{center}
 82 | \end{figure}
 83 | 
 84 | \begin{figure}[H]
 85 |   \begin{center}
 86 |     \caption{Modelled age distribution of COVID-19 mortality}
 87 |     \includegraphics[scale=1.0]{\covidpath/mort_density_full.pdf}
 88 |   \end{center}
 89 | \end{figure}
 90 | 
 91 | \begin{table}[H]
 92 |   \begin{center}
 93 |     \caption{}
 94 |     \input{\covidpath/covid_como_sumstats}
 95 |   \end{center}
 96 | \end{table}
 97 | 
 98 | \begin{table}[H]
 99 |   \begin{center}
100 |     \caption{}
101 |     \input{\covidpath/covid_como_sumhr}
102 |   \end{center}
103 | \end{table}
104 | 
105 | 
106 | \end{document}
107 | 
108 | 


--------------------------------------------------------------------------------
/como/tex/como_tables_figures.tex:
--------------------------------------------------------------------------------
 1 | \documentclass[12pt,letterpaper]{article}
 2 | \setcounter{page}{0}
 3 | 
 4 | % \usepackage[a4paper,margin=1in,landscape]{geometry}
 5 | \usepackage{mathtools} 
 6 | \usepackage{bbm}
 7 | \usepackage[multiple]{footmisc}
 8 | \usepackage{floatpag,amsmath,amsthm,amssymb}
 9 | \newtheorem{proposition}{Proposition}
10 | \numberwithin{equation}{section}
11 | \newtheorem{nono-prop}{Proposition}[]
12 | 
13 | % Figure panel header font
14 | \newcommand{\panel}{\fontfamily{phv}\selectfont\scriptsize\textbf}
15 | \usepackage{amsmath} 
16 | \DeclareMathOperator*{\argmin}{arg\,min}
17 | \DeclareMathOperator*{\argmax}{arg\,max}
18 | 
19 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
20 | %% LOAD LOCAL COMPILATION PATHS
21 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
22 | 
23 | %% DON'T CHANGE ANY OF THESE PATHS. FOR LOCAL COMPILE, EDIT YOUR
24 | %%                                            ~/include.tex ONLY
25 | \newcommand{\HOME}{\string~}
26 | \input{\HOME/include.tex}
27 | 
28 | % include standard package
29 | \input{front_matter_como}
30 | 
31 | \usepackage{fancyhdr} 
32 | \pagestyle{fancy}
33 | \lhead{}
34 | \chead{}
35 | \rhead{\thepage}
36 | \cfoot{} % get rid of the page number 
37 | \renewcommand{\headrulewidth}{0pt}
38 | \renewcommand{\footrulewidth}{0pt}
39 | \setlength{\headsep}{24pt}
40 | 
41 | % package for color-shared tables
42 | \usepackage[table]{xcolor}
43 | 
44 | % disable hyperlinks, which were breaking on appendix references
45 | % \usepackage[options]{nohyperref}
46 | 
47 | \title{COVID Comorbidity paper} \author{Nobody}
48 | 
49 | %%%%%%%%%%%%%%%%%%%%%% 
50 | % NO TITLE PAGE
51 | %%%%%%%%%%%%%%%%%%%%%% 
52 | \begin{document}
53 | \date{June 2020}
54 | %  \maketitle
55 | 
56 |   \section{Figures and Tables}
57 |  
58 |    \begin{table}[H]
59 |     \begin{center}
60 |      \caption{condition prevalences }
61 |      %\input{\covidpath/app_table_age_bin_prev}
62 |      \input{\covidpath/covid_como_agerisks.tex}
63 |      
64 |      \footnotesize{[table note]}
65 |     \end{center}
66 |    \end{table}
67 |  
68 |   \begin{table}[H]
69 |     \begin{center}
70 |     \caption{Prevalence of Conditions in Population and in OpenSAFELY}
71 |     %\input{\covidpath/app_table_os_vs_nhs}
72 |     \input{\covidpath/covid_como_oscompare.tex}
73 |     
74 |     \footnotesize{[table note]}
75 |     \end{center}
76 |   \end{table}
77 | 
78 |   \clearpage
79 |   \begin{figure}[H]
80 |     \begin{center}
81 |     \caption{Prevalence of Conditions in Population and in OpenSAFELY}
82 |      \textbf{Age Interpolation: Fully-Adjusted Model}
83 |     
84 |     \includegraphics[scale=0.5]{\covidpath/age_interpolation_full}
85 |     
86 |     \footnotesize{[figure note]}
87 |     \end{center}
88 |   \end{figure}
89 | 
90 | \end{document}
91 | 
92 | 


--------------------------------------------------------------------------------
/como/tex/front_matter_como.tex:
--------------------------------------------------------------------------------
 1 | \usepackage[latin1]{inputenc}
 2 | % \usepackage{lmodern} % keep or kill this??  might affect italics.
 3 | \usepackage{setspace}
 4 | \usepackage{amsmath}
 5 | \usepackage{amsthm}
 6 | \usepackage{amsfonts}
 7 | \usepackage{longtable}
 8 | \addtolength{\textwidth}{5cm}
 9 | \addtolength{\textheight}{5cm}
10 | \usepackage{fullpage}
11 | \usepackage{amssymb}
12 | \usepackage[hyperpageref]{backref}
13 | \usepackage[hidelinks]{hyperref}
14 | \usepackage{url}
15 | \usepackage{epstopdf}
16 | \usepackage{multirow}
17 | %\usepackage{array}
18 | %\usepackage{harvard}
19 | \usepackage{tabularx}
20 | %\citationmode{abbr}
21 | 
22 | \usepackage{float}
23 | % \usepackage{perpage}
24 | % \MakeSorted{figure}
25 | % \MakeSorted{table}
26 | \usepackage{lscape}
27 | \usepackage{verbatim}
28 | \usepackage{pdflscape}
29 | \usepackage{chngcntr}
30 | \usepackage{appendix}
31 | \usepackage{booktabs,calc}
32 | \usepackage{ulem}
33 | \usepackage{siunitx}
34 | %\sisetup{output-decimal-marker=\cdot}
35 | 
36 | % allow yellow highlighting in tables
37 | \usepackage{color,colortbl}
38 | \usepackage{soul}
39 | \definecolor{Yellow}{rgb}{.88,1,.65}
40 | \definecolor{Green}{rgb}{.65,1,.65}
41 | \definecolor{Red}{rgb}{1,.65,.65}
42 | 
43 | %\citationstyle{dcu}
44 | 
45 | \usepackage[labelfont=bf,center,large,labelsep=newline]{caption}
46 | %\usepackage{subfigure}
47 | % \counterwithout{subtable}{table}
48 | \def\changemargin#1#2{\list{}{\rightmargin#2\leftmargin#1}\item[]}
49 | \let\endchangemargin=\endlist
50 | 
51 | % define subscript / superscript commands
52 | \newcommand{\superscript}[1]{\ensuremath{^{\textrm{#1}}}}
53 | \newcommand{\subscript}[1]{\ensuremath{_{\textrm{#1}}}}
54 | 
55 | % create a shortcut for newlines in captions:
56 | \newcommand{\cnewline}{\hspace{\linewidth}}
57 | 
58 | %format paper to save trees
59 | \usepackage[right=1in,left=1in,top=1in,bottom=1in]{geometry}
60 | \usepackage{savetrees}
61 | 
62 | %AER style headers
63 | \def\thesection{\arabic{section}}
64 | \def\thesubsection {\thesection.\arabic{subsection}}
65 | 
66 | % set home path
67 | % \newcommand{\HOME}{\string~}
68 | 
69 | \newcommand{\subfigimg}[3][,]{%
70 |   \setbox1=\hbox{\includegraphics[#1]{#3}}% Store image in box
71 |   \leavevmode\rlap{\usebox1}% Print image
72 |   \rlap{\hspace*{90pt}\raisebox{\dimexpr\ht1+0.9\baselineskip}{\colorbox{white}{{\footnotesize#2}}}}% Print label
73 |   \phantom{\usebox1}% Insert appropriate spcing
74 | }
75 | 


--------------------------------------------------------------------------------
/e/compare_hosp_counts.do:
--------------------------------------------------------------------------------
  1 | /********************************************************************************************/
  2 | /* COPY CODE FROM HOSPITAL ESTIMATES TO GET DLHS LINKED WITH PC, AND SCALE UP PC BED COUNTS */
  3 | /********************************************************************************************/
  4 | 
  5 | /* combine DLHS, Population Census, Economic Census, to estimate hospital
  6 |   capacity at the district and subdistrict level. */
  7 | 
  8 | /* merge DLHS, PC, EC together at district level */
  9 | use $covidpub/hospitals/dlhs4_hospitals_dist.dta, clear
 10 | merge 1:1 pc11_state_id pc11_district_id using $covidpub/hospitals/ec_hospitals_dist.dta, gen(_m_ec13)
 11 | merge 1:1 pc11_state_id pc11_district_id using $covidpub/hospitals/pc_hospitals_dist.dta, gen(_m_pc11)
 12 | 
 13 | /* drop if missing pc11 ids */
 14 | drop if mi(pc11_state_id) | mi(pc11_district_id)
 15 | 
 16 | /* reconcile variable names (though really should do this in the build files above) */
 17 | ren dlhs4* dlhs*
 18 | 
 19 | /* key variables */
 20 | /* dlhs: dlhs4_total_beds, dlhs4_total_count, dlhs4_total_staff */
 21 | /* ec13: ec_emp_hosp_priv, ec_emp_hosp_gov */
 22 | /* pc11: pc_hosp_beds_u pc_clinic_beds_u */
 23 | 
 24 | /* generate private share from EC */
 25 | gen ec_priv_hosp_share = ec_emp_hosp_priv / (ec_emp_hosp_priv + ec_emp_hosp_gov)
 26 | sum ec_priv_hosp_share,d
 27 | /* tons of variation, from 0 to 1, med .52, close to uniform */
 28 | 
 29 | /* generate total ec emp in hospitals */
 30 | gen ec_emp_hosp_tot = ec_emp_hosp_priv + ec_emp_hosp_gov
 31 | 
 32 | /* gen urban to rural doctor share */
 33 | gen pc_doc_u_share = pc_docs_pos_u / (pc_docs_pos_r + pc_docs_pos_u)
 34 | 
 35 | /* gen urban to rural doctor in hospital share */
 36 | gen pc_hosp_doc_u_share = pc_docs_hosp_u / (pc_docs_hosp_r + pc_docs_hosp_u)
 37 | 
 38 | /* scale up urban beds in pop census using rural share of doctors */
 39 | 
 40 | /* use overall doc share for clinic beds */
 41 | gen pc_clinic_beds = pc_clinic_beds_u / pc_doc_u_share
 42 | 
 43 | /* use hospital doc share for hospital beds */
 44 | gen pc_hosp_beds = pc_hosp_beds_u / pc_hosp_doc_u_share
 45 | 
 46 | /* scale up DLHS primary health clinics */
 47 | foreach v in beds count staff pop {
 48 |   replace dlhs_phc_`v' = dlhs_phc_`v' * dlhs_phc_mult
 49 | }
 50 | 
 51 | /* combine two DLHS clinic types */
 52 | egen dlhs_clinic_beds = rowtotal(dlhs_chc_beds dlhs_phc_beds)
 53 | 
 54 | /* compare different clinic type counts */
 55 | corr dlhs_dh_beds dlhs_chc_beds dlhs_phc_beds dlhs_clinic_beds pc_clinic_beds pc_hosp_beds
 56 | 
 57 | 
 58 | /* log correlation */
 59 | foreach v in dlhs_dh_beds dlhs_chc_beds dlhs_phc_beds dlhs_clinic_beds pc_clinic_beds pc_hosp_beds {
 60 |   gen ln_`v' = ln(`v' + 1)
 61 | }
 62 | 
 63 | corr ln_*
 64 | 
 65 | 
 66 | 
 67 | dlhs4_dh_beds   int     %9.0g                 Total beds in district hospitals
 68 | dlhs4_dh_count  byte    %9.0g                 Total district hospitals
 69 | dlhs4_dh_staff  int     %9.0g                 Total staff district hospitals
 70 | dlhs4_chc_beds  int     %9.0g                 Total beds in community health centers
 71 | dlhs4_chc_count byte    %9.0g                 Total community health centers
 72 | dlhs4_chc_staff int     %9.0g                 Total staff in community health centers
 73 | dlhs4_phc_beds  int     %9.0g                 Total beds in primary health centers
 74 | dlhs4_phc_count byte    %9.0g                 Total primary health centers
 75 | dlhs4_phc_staff int     %9.0g                 Total staff in primary health centers
 76 | dlhs4_phc_pop   long    %9.0g                 Population covered by sampled primary health centers
 77 | 
 78 | 
 79 | 
 80 | 
 81 | 
 82 | 
 83 | 
 84 | 
 85 | 
 86 | 
 87 | 
 88 | 
 89 | 
 90 | /***********************/
 91 | /* explore ICU shares  */
 92 | /***********************/
 93 | use $health/DLHS4_FacilitySurveyData/AHS_FACILITY/AHS_dh, clear
 94 | append using $health/DLHS4_FacilitySurveyData/NON_AHS_FACILITY/DH_NONAHS
 95 | 
 96 | /* merge in pc11 districts */
 97 | merge m:1 state dist using $health/DLHS4_FacilitySurveyData/dlhs4_district_key, keepusing(pc11_state_id pc11_state_name pc11_district_id pc11_district_name)
 98 | drop if _merge == 2
 99 | drop _merge
100 | 
101 | collapse (sum) qd2 qd68_total, by(pc11_state_name pc11_state_id)
102 | 
103 | gen ratio = qd68_total / qd2
104 | 
105 | sort ratio
106 | list
107 | 
108 | merge 1:1 pc11_state_id using $pc11/pc11_pca_state_clean, keepusing(pc11_pca_tot_p)
109 | 
110 | gen icu_per_100k = qd68_total / pc11_pca_tot_p * 100000
111 | gen bed_per_k = qd2 / pc11_pca_tot_p * 1000
112 | 
113 | sort icu_per_100k
114 | list pc11_state_name icu_per_100k bed_per_k
115 | 


--------------------------------------------------------------------------------
/e/create_agmark_plots.do:
--------------------------------------------------------------------------------
 1 | use $covidpub/agmark/agmark_clean, clear
 2 | drop if mi(lgd_state_id)
 3 | 
 4 | /* adjust formats of identifying variables */
 5 | format date %dM_d,_CY
 6 | tostring lgd_state_id, format("%02.0f") replace
 7 | tostring lgd_district_id, format("%03.0f") replace
 8 | 
 9 | /* indicate if something is a perishabel */
10 | gen perishable = 1 if (group == 8 | group == 9 | group == 15)
11 | replace perishable = 0 if mi(perishable)
12 | 
13 | /* save overall data file */
14 | save $tmp/agmark_data, replace
15 | 
16 | /****************/
17 | /* TOTAL VOLUME */
18 | /****************/
19 | /* replace quantity with 0 if it's a number, we can't convert this, it's 0.28% of total entries */
20 | replace qty = . if unit == 1
21 | 
22 | /* first collapse to state-district-date level */
23 | collapse (sum) qty, by(date lgd_state_id lgd_district_id)
24 | 
25 | /* merge in covid case data */
26 | merge m:1 date lgd_state_id lgd_district_id using $covidpub/covid/covid_infected_deaths
27 | drop _merge
28 | 
29 | /* now collapse to national-day level */
30 | collapse (sum) qty cases death, by(date)
31 | 
32 | /* get year */
33 | gen year = year(date)
34 | 
35 | /* save */
36 | save $tmp/agmark_total_ts, replace
37 | 
38 | /**************/
39 | /* LIVESTOCK */
40 | /**************/
41 | use $tmp/agmark_data, clear
42 | 
43 | /* keep large livestock */
44 | keep if item == 47 | item == 52 | item == 89 | item == 119 | item == 140 | item == 211 | item == 254 | item == 255 | item == 256 | item == 226 | item == 237
45 | 
46 | /* collapse to state-district-date level */
47 | collapse (sum) qty, by(date lgd_state_id lgd_district_id)
48 | 
49 | /* merge in covid data by date */
50 | merge m:1 date lgd_state_id lgd_district_id using $covidpub/covid/covid_infected_deaths
51 | drop _merge
52 | 
53 | /* collapse to national-date level */
54 | collapse (sum) qty cases death, by(date)
55 | 
56 | /* create year */
57 | gen year = year(date)
58 | 
59 | /* save */
60 | save $tmp/agmark_livestock_ts, replace
61 | 


--------------------------------------------------------------------------------
/e/describe_migration.do:
--------------------------------------------------------------------------------
 1 | /* Matching migration data to covid data */
 2 | 
 3 | /* use covid dataset */
 4 | use $covidpub/covid/covid_infected_deaths.dta, clear
 5 | 
 6 | /* merge migration data */
 7 | merge m:1 lgd_district_id using $covidpub/migration/district_migration.dta
 8 | 
 9 | /* drop _merge */
10 | drop _merge
11 | 
12 | /* merge population data */
13 | merge m:1 lgd_district_id using $covidpub/demography/dem_district.dta
14 | 
15 | /* drop missing values */
16 | drop if mi(lgd_state_id)
17 | drop if mi(lgd_district_id)
18 | 
19 | /* generate migration district data (share in national total*total national migrants) */
20 | gen outltmigration = outltmigrationshare * outltmigrantstotal
21 | 
22 | /* gen per capita variables */
23 | gen total_cases_pc = total_cases / pc11_pca_tot_p
24 | gen outltmigration_pc = outltmigration / pc11_pca_tot_p
25 | 
26 | /* gen log variables */
27 | foreach var in total_cases outltmigration pc11_pca_tot_p outltmigration_pc total_cases_pc {
28 |   gen log_`var' = ln(`var')
29 | }
30 | 
31 | /* keep latest covid data */
32 | keep if date == 22082
33 | 
34 | /* save dataset */
35 | save $tmp/covid_migration.dta, replace
36 | 
37 | /* binscatter log cases vs. log outmigratns */
38 | binscatter log_total_cases log_outltmigration 
39 | graphout cases_outmigrants
40 | 
41 | /* repeat, controlling for population */
42 | binscatter log_total_cases log_outltmigration, control(log_pc11_pca_tot_p) xlabel(7.5(.5)10.5) ylabel(2.5(.5)4.5)
43 | graphout cases_outmigrants_popcontrol
44 | 
45 | /* per capita variables */
46 | binscatter log_total_cases_pc log_outltmigration_pc, control(log_pc11_pca_tot_p)
47 | graphout cases_outmigrants_pc
48 | 
49 | /* repeat, restricting to bihar and UP */
50 | gen sample = inlist(lgd_state_name, "bihar", "uttar pradesh")
51 | binscatter log_total_cases log_outltmigration if sample == 1
52 | graphout cases_outmigrants_subsample
53 | binscatter log_total_cases log_outltmigration if sample == 1, control(log_pc11_pca_tot_p) xlabel(7.5(.5)10.5) ylabel(2.5(.5)4.5) xtitle("Log number typical outmigrants") ytitle("Log cases (5/22)")
54 | graphout cases_outmigrants_popcontrol_subsample
55 | binscatter log_total_cases_pc log_outltmigration_pc if sample == 1, control(log_pc11_pca_tot_p)
56 | graphout cases_outmigrants_pc_subsample
57 | 
58 | reg log_total_cases log_pc11_pca_tot_p if sample == 1
59 | predict case_hat, resid
60 | reg log_outltmigration log_pc11_pca_tot_p if sample == 1
61 | predict outmigrants_hat, resid
62 | 
63 | twoway (scatter case_hat outmigrants_hat if sample == 1, xtitle("Log residual number typical outmigrants") ytitle("Log residual cases (5/22)")) ///
64 | (lfit case_hat outmigrants_hat if sample & inrange(outmigrants_hat, -2, 2))
65 | graphout scatter
66 | 
67 | /* regression versions */
68 | reg log_total_cases log_outltmigration 
69 | reg log_total_cases log_outltmigration log_pc11_pca_tot_p
70 | reg log_total_cases log_outltmigration if sample == 1
71 | reg log_total_cases log_outltmigration log_pc11_pca_tot_p if sample == 1
72 | 


--------------------------------------------------------------------------------
/e/dlhs.do:
--------------------------------------------------------------------------------
  1 | global out ~/iec/SAworking/hosp
  2 | mkdir $out
  3 | 
  4 | ls ~/iec/health/DLHS4_FacilitySurveyData/AHS_FACILITY
  5 | ls ~/iec/health/DLHS4_FacilitySurveyData/NON_AHS_FACILITY
  6 | 
  7 | /* explore data */
  8 | 
  9 | 
 10 | /* hospitals (dh), community health centers (chc), primary health centers (phc), sub-health centers (shc) */
 11 | 
 12 | /* AHS districts */
 13 | 
 14 | /* district hospitals */
 15 | use ~/iec/health/DLHS4_FacilitySurveyData/AHS_FACILITY/AHS_dh.dta , clear
 16 | /* variables of interest: */
 17 | /* qd2             double  %3.0f                 TOTAL NUMBER OF BEDS */
 18 | /* note: has beds broken out by type */
 19 | 
 20 | /* community health cetners */
 21 | use ~/iec/health/DLHS4_FacilitySurveyData/AHS_FACILITY/AHS_chc.dta , clear
 22 | /* qc571           double  %3.0f                 Total Number of beds in CHC */
 23 | 
 24 | /* primary health center */
 25 | use ~/iec/health/DLHS4_FacilitySurveyData/AHS_FACILITY/AHS_phc.dta , clear
 26 | /* qp429a          double  %2.0f                 Total number of bed sanction for PHC */
 27 | /* qp429b          double  %2.0f                 Total number of bed available in PHC */
 28 | 
 29 | 
 30 | /* sub health centers */
 31 | use ~/iec/health/DLHS4_FacilitySurveyData/AHS_FACILITY/AHS_shc.dta , clear
 32 | /* NO INPATIENT CARE */
 33 | 
 34 | 
 35 | /* NON AHS districts */
 36 | 
 37 | /* district hospitals */
 38 | use ~/iec/health/DLHS4_FacilitySurveyData/NON_AHS_FACILITY/DH_NONAHS.dta , clear
 39 | /* qd2             double  %3.0f                 TOTAL NUMBER OF BEDS */
 40 | 
 41 | /* community health centers */
 42 | use ~/iec/health/DLHS4_FacilitySurveyData/NON_AHS_FACILITY/CHC_NONAHS.dta , clear
 43 | /* qc571           double  %3.0f                 Total Number of beds in CHC */
 44 | 
 45 | /* primary health center */
 46 | use ~/iec/health/DLHS4_FacilitySurveyData/NON_AHS_FACILITY/PHC_NONAHS.dta , clear
 47 | /* qp429a          double  %2.0f                 Total number of bed sanction for PHC */
 48 | /* qp429b          double  %2.0f                 Total number of bed available in PHC */
 49 | 
 50 | 
 51 | /* scatter beds vs staff */
 52 | reg dlhs4_total_beds dlhs4_total_staff
 53 | scatter dlhs4_total_beds dlhs4_total_staff
 54 | graphout x
 55 | 
 56 | /* explore */
 57 | exit
 58 | 
 59 | forval i = 2/36 {
 60 |   tab state state_name if state == `i'
 61 | }
 62 | 
 63 | 
 64 | 
 65 | use $iec/health/hosp/hospitals_dist, clear
 66 | 
 67 | /* sum vars */
 68 | sum dlhs4_perk_total_beds dlhs4_perk_total_facilities dlhs4_perk_total_staff , d
 69 | sum pc_perk_beds_tot pc_perk_beds_allo pc_perk_beds_urb_tot pc_perk_beds_urb_allo , d
 70 | sum ec_perk_emp_hosp_priv ec_perk_emp_hosp_gov ec_perk_emp_hosp_tot , d
 71 | 
 72 | /* compare beds vars */
 73 | corr dlhs4_perk_total_beds pc_perk_beds_tot
 74 | corr dlhs4_perk_total_beds pc_perk_beds_urb_tot
 75 | reg dlhs4_perk_total_beds pc_perk_beds_tot
 76 | reg dlhs4_perk_total_beds pc_perk_beds_urb_tot
 77 | 
 78 | /* compare rank vars */
 79 | corr rank_dlhs4_perk_total_beds rank_pc_perk_beds_tot
 80 | reg rank_dlhs4_perk_total_beds rank_pc_perk_beds_tot
 81 | 
 82 | scatter rank_dlhs4_perk_total_beds rank_pc_perk_beds_tot
 83 | graphout ranks
 84 | 
 85 | /* compare bottom vars */
 86 | corr bot_dlhs4_perk_total_beds bot_pc_perk_beds_tot
 87 | reg bot_dlhs4_perk_total_beds bot_pc_perk_beds_tot
 88 | tab bot_dlhs4_perk_total_beds bot_pc_perk_beds_tot
 89 | 
 90 | /* pc vs dlhs bed count */
 91 | gen pc_dlhs_beds_ratio = pc_beds_tot / dlhs4_total_beds
 92 | gen pc_dlhs_priv_share = (pc_beds_tot - dlhs4_total_beds) / pc_beds_tot
 93 | sum pc_dlhs_beds_ratio ec_priv_hosp_share, d
 94 | corr pc_dlhs_beds_ratio ec_priv_hosp_share
 95 | 
 96 | /* is pc capturing private hospitals? */
 97 | tabstat ec_priv_hosp_share pc_dlhs_priv_share [aw=pc11_pca_tot_p], by(pc11_state_name ) 
 98 | corr ec_priv_hosp_share pc_dlhs_priv_share [aw=pc11_pca_tot_p]
 99 | /* doesn't look like it, since ec_priv_share seems more correlated */
100 | 
101 | 
102 | 


--------------------------------------------------------------------------------
/e/explore_ec_microdata.do:
--------------------------------------------------------------------------------
1 | use $tmp/ec13_hosp, clear
2 | 
3 | keep if nic == 861
4 | 
5 | sum emp_all, d
6 | 


--------------------------------------------------------------------------------
/e/explore_idi_survey_r2.do:
--------------------------------------------------------------------------------
  1 | global idi ~/iec/covid/idi_survey/round2
  2 | 
  3 | /* import data */
  4 | use $idi/wb2_cleaned_2020_08_07, clear
  5 | 
  6 | /* relabel demo_ag_hh var for easy interpretation on graphs */
  7 | label define a 0 "Non-ag household" 1 "Ag household"
  8 | label values demo_ag_hh_r2 a
  9 | 
 10 | /* create earnings variables */
 11 | foreach t in lckdwn curr{
 12 |   gen lab_`t'_earn_r2 = lab_`t'_wage_r2 * lab_`t'_freq_mean_r2
 13 | }
 14 | 
 15 | /* earnings change between lckdwn and r2 */
 16 | gen earn_change_r2 = (lab_curr_earn_r2 - lab_lckdwn_earn_r2) / lab_lckdwn_earn_r2
 17 | 
 18 | /* top code earnings change */
 19 | sum earn_change_r2, d
 20 | replace earn_change_r2 = . if earn_change_r2 > r(p95)
 21 | 
 22 | /* label earnings change */
 23 | la var earn_change_r2 "% change in earnings since lockdown"
 24 | 
 25 | /* gen indicator variable for whether an individual faced difficulty in fert purchase */
 26 | gen fert_diff = agr_fert_diffs_none_prop_r2
 27 | 
 28 | label define df 1 "Faced no difficulty" 0 "Faced dificulty"
 29 | label values fert_diff ft
 30 | 
 31 | /* 2 obs in r2 have negative weights - drop them */
 32 | drop if weight_hh_r2 < 0
 33 | 
 34 | /* set scheme */
 35 | set scheme pn
 36 | 
 37 | /**********/
 38 | /* Labour */
 39 | /**********/
 40 | 
 41 | /* 1. What are those who were unemployed in the previous round doing now? */
 42 | tab lab_curr_occu_r2 if lab_curr_occu_r1 == 0
 43 | tab demo_ag_hh_r2 if lab_curr_occu_r1 == 0
 44 | 
 45 | /* clone current occupation variable */
 46 | gen r2_occ = lab_curr_occu_r2
 47 | replace r2_occ = 6 if lab_curr_occu_r1 == 0 & demo_ag_hh_r2 == 1
 48 | replace r2_occ = . if r2_occ < 0
 49 | 
 50 | /* label values */
 51 | label define r2 0 "Unemployed" 1 "Self-employed non-ag" 2 "Salaried pvt" 3 "Salaried govt" 4 "Daily wage ag" 5 "Daily wage non-ag" 6 "Working on own farm" 99 "Other"
 52 | label values r2_occ r2
 53 | 
 54 | /* plot */
 55 | la var r2_occ " "
 56 | catplot r2_occ if lab_curr_occu_r1 == 0, title("Current occupation of sample unemployed during lockdown", margin(medium))
 57 | graphout lab_then_now
 58 | 
 59 | /* 2. Labour market status since lockdown remains bleak  */
 60 | graph bar lab_freq_change_r2 lab_wagechange_mean_r2 earn_change_r2 if inlist(lab_curr_occu_r2, 1, 2, 3, 4, 5) [aw = weight_hh_r2], ytitle("% change since lockdown", margin(small)) bargap(20) legend(label(1 "Weekly workdays change") label(2 "Daily wage change") label(3 "Weekly earnings change"))
 61 | graphout lab_status
 62 | 
 63 | /* 3. Who is still getting work */
 64 | cibar lab_freq_change_r2 if inlist(lab_curr_occu_r2, 1, 2, 3, 4, 5) [aw = weight_hh_r2], over(lab_curr_occu_r2) graphopts(ytitle("% change in weekly workdays since lockdown"))
 65 | graphout recovery
 66 | 
 67 | /***************/
 68 | /* Agriculture */
 69 | /***************/
 70 | 
 71 | /* 1. general state of agriculture */
 72 | graph bar agr_land_change_mean_r2 agr_fert_cost_mean_r2 agr_borrow_mean_r2 agr_borrow_kcc_mean_r2 [aw = weight_hh_r2], ytitle("% change since last season") legend(label(1 "Planned land for kharif cultivation") label(2 "Fertilizer spending") label(3 "Borrowing") label(4 "Borrowing - KCC")) ylabel(-0.25 (0.05) 0.1)
 73 | graphout ag_stat
 74 | 
 75 | /* 2. fertilizers */
 76 | graph bar fert_diff agr_fert_price_all_inc_prop_r2 [aw = weight_hh_r2], bar(1, color(green)) bar(2, color(red)) bargap(30) legend(label( 1 "Faced no difficulty in fertilizer purchase") label(2 "Reported a price increase of fertilizers")) ytitle("Percentage", margin(small)) ylabel(0 (0.1) 0.6, grid)
 77 | graphout fert
 78 | 
 79 | /* 3. planned land for cultivation, by state */
 80 | ciplot agr_land_change_mean_r2 [aw = weight_hh_r2], by(state) xtitle("State") ytitle("% change in land planned for kharif cultivation")
 81 | graphout state_land
 82 | 
 83 | /* 4. graphs to show ag households are doing well */
 84 | cibar con_limit_wk_reduce_prop_r2 [aw = weight_hh_r2], over(demo_ag_hh_r2) graphopts(ytitle("Reduced proportion size of meals in the last week") ylabel(0 (0.02) 0.2) name(food_1, replace))
 85 | cibar con_limit_wk_out_prop_r2 [aw = weight_hh_r2], over(demo_ag_hh_r2) graphopts(ytitle("Ran out of food in the last week") ylabel(0 (0.02) 0.2) name(food_2, replace))
 86 | graph combine food_1 food_2, ycommon
 87 | graphout ag_better
 88 | 
 89 | /* 5. relief diff between ag/non-ag households */
 90 | ciplot rel_amt_received_mean_r2 [aw = weight_hh_r2], by(demo_ag_hh_r2) xtitle(" ") name(relief_amt, replace)
 91 | graphout relief_amt
 92 | 
 93 | 
 94 | 
 95 | 
 96 | 
 97 | 
 98 | 
 99 | 
100 | 


--------------------------------------------------------------------------------
/e/explore_pc_dlhs_doctors.do:
--------------------------------------------------------------------------------
 1 | /* Investigates Hospital Definitions in DLHS4 and PC (Issue #14) */
 2 | 
 3 | /* open DLHS4 dataset */
 4 | use $covidpub/hospitals/dlhs4_hospitals_dist.dta, clear
 5 | 
 6 | /* merge with PC data */
 7 | merge 1:1 pc11_district_id using $covidpub/hospitals/pc_hospitals_dist.dta
 8 | drop if _merge != 3
 9 | 
10 | /* collapse to state level */
11 | collapse (sum) dlhs4* pc11_pca_tot_p pc_*, by(pc11_state_id)
12 | 
13 | /* add state names */
14 | get_state_names, y(11)
15 | 
16 | /* drop states with populations less than 5m */
17 | drop if pc11_pca_tot_p < 5000000
18 | 
19 | /* generate absolute values table */
20 | sort pc_docs
21 | list pc11_state_name pc_docs pc_docs_hosp dlhs4_total_staff
22 | 
23 | /* generate ratio variables */
24 | gen docs_ratio = pc_docs / dlhs4_total_staff
25 | gen docs_hosp_ratio = pc_docs_hosp / dlhs4_total_staff
26 | gen ratio_diff = docs_hosp_ratio - docs_ratio
27 | 
28 | /* generate ratios table */
29 | sort docs_ratio
30 | list pc11_state_name docs_ratio docs_hosp_ratio ratio_diff
31 | 


--------------------------------------------------------------------------------
/e/gen_survey_map.py:
--------------------------------------------------------------------------------
 1 | import geopandas as gpd
 2 | import contextily as ctx
 3 | import pandas as pd
 4 | import matplotlib as mpl
 5 | import matplotlib.pyplot as plt
 6 | import getpass
 7 | import os
 8 | ​
 9 | ​
10 | # select the population you want to work with and store full variable name in var
11 | var = "dummy"
12 | ​
13 | df = pd.read_excel("nrega.xlsx")
14 | 
15 | # convert the dataframe to a geodataframe
16 | df = gpd.GeoDataFrame(pd.read_excel("nrega.xlsx"), geometry=gpd.points_from_xy(df["longitude"], df["latitude"]),crs={'init' :'epsg:4326'})
17 | ​
18 | # convert the crs of the dataframe
19 | df = df.to_crs(epsg=3857)
20 | ​
21 | # sort values by longitude
22 | df = df.sort_values(by="longitude")
23 | ​
24 | # identify minimum and maxiumum values of variable of interest                           
25 | vmin = df[var].min()
26 | vmax = df[var].max()
27 | ​
28 | # set up a figure                              
29 | f, ax = plt.subplots(1, figsize=[10,15])
30 | ​
31 | # choose colormap
32 | cmap = "viridis_r"
33 |                               
34 | # plot figure                            
35 | df.plot(column=var, ax=ax, vmin=vmin, vmax=vmax, cmap=cmap, alpha=0.85)
36 | ​
37 | # add basemap
38 | ctx.add_basemap(ax, source=ctx.providers.Stamen.TonerLite, zoom=6)
39 | ​
40 | # set axis parameters - these are manually set to be the window over all of India                             
41 | ax.set_xlim([7510000, 10000000])
42 | ax.set_ylim([1250000, 3750000])
43 | ax.axes.xaxis.set_visible(False)
44 | ax.axes.yaxis.set_visible(False)
45 |                               
46 | # set plot title
47 | ax.set_title(f"% reporting NREGA unavailability", fontsize=18, pad=8)
48 | ​
49 | # add colorbar
50 | cax = f.add_axes([0.93, .25, 0.025, 0.5])
51 | sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(vmin=vmin, vmax=vmax))
52 | ​
53 | # fake up the array of the scalar mappable. 
54 | sm._A = []
55 | cb = f.colorbar(sm, cax=cax)
56 |                               
57 | # label the colorbar
58 | cb.set_label(label='Share', fontsize=16, rotation=270, labelpad=30)
59 | cb.ax.tick_params(labelsize=14)
60 | 
61 | plt.savefig("nrega.png", bbox_inches="tight", dpi=300)
62 | 
63 | 
64 | # save the figure                              
65 | username = getpass.getuser()
66 | plt.savefig(os.path.join("/scratch", username, "f{var}.png"), bbox_inches="tight", dpi=300)
67 | 
68 | 
69 | plt.savefig("/scratch/adibmk/labor_lost_work.png", bbox_inches="tight", dpi=300)
70 | 
71 | 
72 | plt.savefig(f"{var}.png", bbox_inches="tight", dpi=300)
73 | 
74 | plt.close("all")
75 |  
76 | 


--------------------------------------------------------------------------------
/e/get_vac_data.do:
--------------------------------------------------------------------------------
  1 | /* define lgd matching programs */
  2 | qui do $ddl/covid/covid_progs.do
  3 | qui do $ddl/tools/do/tools.do
  4 | 
  5 | /* Pull district-level vaccination data from covid19india API */
  6 | import delimited "https://api.covid19india.org/csv/latest/cowin_vaccine_data_districtwise.csv", clear
  7 | 
  8 | /* rename all the variables */
  9 | local k = 7 
 10 | local j = 1 
 11 | 
 12 | foreach var of var v* {
 13 | 
 14 | local label : variable label `var'
 15 | local label: subinstr local label "/" ""
 16 | local label: subinstr local label "/" ""
 17 | local label: subinstr local label "." "_"
 18 | 
 19 | ren v`k' v_`label'_`j'
 20 | local k = `k'+1
 21 | local j = `j'+1 
 22 | 
 23 | cap ren v_`label'_* v_`label'_(#), renumber
 24 | 
 25 | }
 26 | 
 27 | /* drop first row containing variable names in the raw API */
 28 | drop in 1
 29 | 
 30 | /* tag duplicates */
 31 | duplicates tag state_code district_key, gen(tag)
 32 | keep if tag == 0
 33 | drop tag
 34 | cap drop v__*
 35 | 
 36 | /* more renaming */
 37 | forval i = 1/10 {
 38 |   ren v*`i' v`i'*
 39 | }
 40 | 
 41 | ren v*_ v*
 42 | 
 43 | /* reshape data from wide to long */
 44 | reshape long v1_ v2_ v3_ v4_ v5_ v6_ v7_ v8_ v9_ v10_, i(state district state_code district_key cowinkey) j(date) string
 45 | 
 46 | destring v*, replace
 47 | 
 48 | /* label variables */
 49 | la var v1_ "Total Individuals Registered"	
 50 | la var v2_ "Total Sessions Conducted"	
 51 | la var v3_ "Total Sites" 	
 52 | la var v4_ "First Dose Administered"	
 53 | la var v5_ "Second Dose Administered"	
 54 | la var v6_ "Male(Individuals Vaccinated)"	
 55 | la var v7_ "Female(Individuals Vaccinated)"	
 56 | la var v8_ "Transgender(Individuals Vaccinated)"	
 57 | la var v9_ "Total Covaxin Administered"	
 58 | la var v10_ "Total CoviShield Administered"
 59 | 
 60 | /* rename final vars */
 61 | ren v1_ total_reg
 62 | ren v2_ total_sessions
 63 | ren v3_ total_sites
 64 | ren v4_ total_first_dose
 65 | ren v5_ total_second_dose
 66 | ren v6_ total_vac_male
 67 | ren v7_ total_vac_female
 68 | ren v8_ total_vac_trans
 69 | ren v9_ total_covaxin
 70 | ren v10_ total_covishield
 71 | 
 72 | /* create time variable */
 73 | gen day = substr(date, 1, 2)
 74 | gen month = substr(date, 3, 2)
 75 | gen year = substr(date, 5, 4)
 76 | 
 77 | destring day month year, replace
 78 | gen edate = mdy(month, day, year)
 79 | format edate %dM_d,_CY
 80 | 
 81 | /* generate unique id on district key and date */
 82 | egen id = group(district_key edate)
 83 | isid id
 84 | 
 85 | /* set as panel */
 86 | xtset id edate, daily
 87 | 
 88 | save $tmp/vaccines_clean , replace
 89 | 
 90 | /****************/
 91 | /* match to LGD */
 92 | /****************/
 93 | use $tmp/vaccines_clean, clear
 94 | 
 95 | /* drop extra variables */
 96 | drop district_key state_code
 97 | 
 98 | /* create lgd_state variable to merge */
 99 | gen lgd_state_name = lower(state)
100 | 
101 | /* fix dadra and nager haveli and daman and diu */
102 | replace lgd_state_name = "dadra and nagar haveli" if district == "Dadra and Nagar Haveli"
103 | replace lgd_state_name = "daman and diu" if (district == "Daman") | (district == "Diu")
104 | 
105 | /* merge in lgd state id */
106 | merge m:1 lgd_state_name using $keys/lgd_state_key, keepusing(lgd_state_id) keep(match master) nogen
107 | 
108 | /* now create an lgd_district variable to merge */
109 | gen lgd_district_name = lower(district)
110 | 
111 | /* fix misspellings and name changes */
112 | synonym_fix lgd_district_name, synfile($ddl/covid/b/str/cov19india_vaccine_district_fixes.txt) replace
113 | 
114 | /* save */
115 | save $tmp/temp, replace
116 | 
117 | /* run masala merge */
118 | keep lgd_state_name lgd_district_name
119 | duplicates drop
120 | masala_merge lgd_state_name using $keys/lgd_district_key, s1(lgd_district_name) minbigram(0.2) minscore(0.6) outfile($tmp/vaccine_lgd_district)
121 | 
122 | /* keep master matches */
123 | keep if match_source < 7
124 | 
125 | /* drop unneeded variables */
126 | keep lgd_state_name lgd_district_name_using lgd_district_name_master
127 | 
128 | /* merge data back in */
129 | ren lgd_district_name_master lgd_district_name
130 | merge 1:m lgd_state_name lgd_district_name using $tmp/temp
131 | drop _merge
132 | 
133 | /* now replace the district name with the lgd key name */
134 | drop lgd_district_name
135 | ren lgd_district_name_using lgd_district_name
136 | 
137 | /* ensure that it is it square */
138 | egen dgroup = group(lgd_state_name lgd_district_name)
139 | fillin date dgroup
140 | drop dgroup _fillin
141 | 
142 | /* save data */
143 | export delimited using "$tmp/covid_vaccination.csv", replace
144 | 


--------------------------------------------------------------------------------
/e/graphs_idi_r3.do:
--------------------------------------------------------------------------------
 1 | use $iec/covid/idi_survey/wb3_clean, clear
 2 | 
 3 | drop if weight_hh_r3 < 0
 4 | 
 5 | global ag agr_loc_shift_prop_r3 agr_harvest_outlook_prop_r3 
 6 | 
 7 | /* collapse dataset to shrid level */
 8 | collapse (mean) $ag *change* hea_symp*prop_r3 rel_pds_any_prop_r3 rel*mean_r3 con_stillinsecure_prop* lab_occ*none_r3 (firstnm) state [pw = weight_hh_r3] , by(shrid)
 9 | 
10 | /* merge to shrids */
11 | merge 1:1 shrid using $iec/covid/idi_survey/survey_shrid_data.dta, keep(master match) nogen
12 | 
13 | /* add nightlights data */
14 | merge 1:1 shrid using $shrug/data/shrug_nl_wide, keep(master match) nogen keepusing(*2013)
15 | 
16 | /* sc-st share */
17 | merge 1:1 shrid using $shrug/data/shrug_pc11_pca, keepusing(*pca_tot_p *p_sc) keep(master match) nogen
18 | 
19 | /* poverty rate */
20 | merge 1:1 shrid using $shrug/data/shrug_secc, keepusing(secc_pov_rate_rural) keep(master match) nogen
21 | 
22 | /* village directory chars */
23 | merge 1:1 shrid using $shrug/data/shrug_pc11_vd, keepusing(pc11_vd_asha pc11_vd_ams *wkl_haat *vd_mrkt) keep(master match) nogen
24 | 
25 | /* keep only variables we need */
26 | keep $ag *change* land* pc11_pca* ec13* tdist* rural* *light* secc* pc11_vd* hea_symp*prop_r3 rel_pds_any_prop_r3 rel*mean_r3 shrid state *insecure* *none*
27 | 
28 | /* generate sc population share */
29 | gen sc_share = pc11_pca_p_sc/pc11_pca_tot_p
30 | 
31 | /************************/
32 | /* Analysis begins here */
33 | /************************/
34 | 
35 | set scheme pn
36 | 
37 | /* consumption recovery */
38 | ren con_stillinsecure_prop* insecure*
39 | 
40 | twoway lfitci insecure_r3 secc_pov_rate_rural, ytitle("% HH still food insecure - Sept 2020", margin(medium)) xtitle("Poverty % in village - SECC") name(insecure2, replace) ///
41 |     note("Note: The Y-axis shows % of HH in the village that became food insecure due to the pandemic and haven't recovered", size(vsmall))
42 | graphout cons_pov
43 | 
44 | /* relief poverty rate */
45 | /* MNREGA targeting */
46 | /* take logs */
47 | gen temp = rel_mnrega_wages_mean_r3 + 1
48 | gen ln_mnrega = ln(temp)
49 | replace temp = secc_pov_rate_rural + 1  
50 | gen ln_pov = ln(temp)
51 | drop temp
52 | 
53 | reg ln_mnrega ln_pov
54 | 
55 | /* Save coefficients for graph */
56 | local   beta_pov  = round(_b[ln_pov],0.001)
57 | 
58 | test _b[ln_pov] = 0
59 | local p_val = round(`r(p)', 0.001)
60 | 
61 | twoway lfitci ln_mnrega ln_pov, ytitle("Log (mean MNREGA wages received)") xtitle("Log (SECC village poverty rate)") clcolor(navy) acolor(ltblue%80) ///
62 |     text( 5.4 5.5 "ln(MNREGA wage) on ln(poverty)" " "  "Regression coefficient: 0`beta_pov'***", orient(horizontal) size(vsmall) justification(center) fcolor(white) box margin(small))
63 | graphout targeting
64 | 
65 | /* unemployment as of september 2020 */
66 | twoway lfitci lab_occ_none_r3 tdist_100, ytitle("% HH unemployed - September 2020") xtitle("Distance to nearest town (Km)") name(unemp_1, replace) ylabel(0.25 (.05) .6) clcolor(navy) acolor(green)
67 | twoway lfitci lab_occ_lckdwn_none_r3 tdist_100, ytitle("% HH unemployed - Lockdown") xtitle("Distance to nearest town (Km)") name(unemp_2, replace) ylabel(0.25 (.05) .6) clcolor(pink) acolor(red)
68 | 
69 | graph combine unemp_2 unemp_1, rows(1) 
70 | graphout unemp_urb
71 | 
72 | /* agriculture infrastructure - roads, access to AMS, mandis */
73 | ren agr_loc_shift_prop_r3 shift_loc
74 | 
75 | reg shift_loc rural_road
76 | estimates store Road
77 | reg shift_loc pc11_vd_mrkt
78 | estimates store Mandi
79 | 
80 | la var pc11_vd_mrkt "Village has regular mandis"
81 | 
82 | coefplot Road Mandi, drop(_cons) yline(0) scheme(plottig) levels(90) legend(label(1 "Village has a road", 2 "Village has weekly mandis", 3 "Distance to nearest city"))  recast(bar) vertical ytitle("Outcome: Whether cultivators shifted selling location in 2020", size(small))
83 | graphout infra
84 | 
85 | /* Harvest outlook */
86 | reg harvest_outlook landless_share
87 | 
88 | /* Save coefficients for graph */
89 | local   beta_land  = round(_b[landless_share],0.001)
90 | 
91 | twoway lfitci harvest_outlook landless_share, ytitle("% of farmers with a +ve harvest outlook this year vs. last year") xtitle("Census: Share of landless working age population (18-65)") clcolor(sienna) acolor(sand%80) ///
92 |     text( 0.6 0.7 "Harvest outlook on landless share" " "  "Regression coefficient: `beta_land'***", orient(horizontal) size(vsmall) justification(center) fcolor(white) box margin(small))
93 | graphout ag_outlook
94 | 
95 | /* wage change versus poverty rate */
96 | ren lab_wagechange_mean* wagechange*
97 | 
98 | 


--------------------------------------------------------------------------------
/e/pop_estimates_21.csv:
--------------------------------------------------------------------------------
 1 | state_name	pop_2021_est
 2 | uttar pradesh	240000000
 3 | maharashtra	123100000
 4 | bihar	124700000
 5 | west bengal	99600000
 6 | madhya pradesh	85400000
 7 | tamil nadu	77800000
 8 | rajasthan	81000000
 9 | karnataka	67600000
10 | gujarat	63900000
11 | andhra pradesh	53900000
12 | odisha	46400000
13 | telangana	38500000
14 | kerala	35700000
15 | jharkhand	38600000
16 | assam	35600000
17 | punjab	30100000
18 | chhattisgarh	29400000
19 | haryana	28200000
20 | delhi	18700000
21 | jammu & kashmir	13600000
22 | uttarakhand	11200000
23 | himachal pradesh	7450000
24 | tripura	4170000
25 | meghalaya	3340000
26 | manipur	3091000
27 | nagaland	2300000
28 | goa	1586000
29 | arunachal pradesh	1570000
30 | puducherry	1413000
31 | mizoram	1239000
32 | chandigarh	1158000
33 | sikkim	690000
34 | dadra & nagar haveli and daman & diu	615000
35 | andaman & nicobar	417000
36 | ladakh	289000
37 | lakshadweep	73183
38 | 


--------------------------------------------------------------------------------
/e/prep_dlhs_data.do:
--------------------------------------------------------------------------------
 1 | /* This file preps DLHS Data
 2 | 1. structure DLHS data, combine by state
 3 | 2. merge with PC11 state and district codes
 4 | */
 5 | 
 6 | /**************************/
 7 | /* 1. Structure DLHS data */
 8 | /**************************/
 9 | 
10 | /* initiate empty files for each */
11 | cap mkdir $tmp/dlhs
12 | clear
13 | save $tmp/dlhs/dlhs_BIRTH, emptyok replace
14 | save $tmp/dlhs/dlhs_cab, emptyok replace
15 | save $tmp/dlhs/dlhs_HOUSEHOLD, emptyok replace
16 | save $tmp/dlhs/dlhs_IMMU, emptyok replace
17 | save $tmp/dlhs/dlhs_marriage, emptyok replace
18 | save $tmp/dlhs/dlhs_person, emptyok replace
19 | save $tmp/dlhs/dlhs_village, emptyok replace
20 | save $tmp/dlhs/dlhs_WOMAN, emptyok replace
21 | 
22 | /* combine state data for each file type */
23 | local statelist Andaman_Nicobar AndhraPradesh ArunachalPradesh Chandigarh GOA Haryana HimachalPradesh Karnataka Kerala Maharashtra Manipur Meghalaya Mizoram Nagaland Puducherry Punjab Sikkim TamilNadu Telangana Tripura WestBengal
24 | 
25 | /* cycle through all states with dlhs data */
26 | foreach state in `statelist' {
27 | 
28 |   /* get the list of files in the state folder */
29 |   local filelist: dir "$health/dlhs/raw/`state'" files "*cab.dta"
30 | 
31 |   /* cycle through the data files for this state */
32 |   foreach file in `filelist' {
33 | 
34 |     /* extract the name of this file */
35 |     tokenize "`file'" , parse("_")
36 |     local var = "`3'"
37 |     
38 |     /* open the file */
39 |     use $health/dlhs/raw/`state'/`file', clear
40 |     qui count
41 |     local counter = `counter' + `r(N)'
42 | 
43 |     /* save the state name */
44 |     gen state_name = "`state'"
45 |     replace state_name = lower(state_name)
46 | 
47 |     /* append to the full file */
48 |     append using $tmp/dlhs/dlhs_`var'
49 | 
50 |     /* resave full file */
51 |     save $tmp/dlhs/dlhs_`var', replace
52 |   }
53 | }
54 | 
55 | 
56 | /****************************/
57 | /* 2. Match with PC11 codes */
58 | /****************************/
59 | /* 05/19/20 - for now this only deals with the cab data, merging in some hh variables from ahs_comb */
60 | 
61 | /* open the DLHS data file, clean and save */
62 | use $tmp/dlhs/dlhs_cab, clear
63 | 
64 | /* clean state names to match pc11_state_name */
65 | gen pc11_state_name = state_name
66 | replace pc11_state_name = subinstr(pc11_state_name, "pradesh", " pradesh", .)
67 | replace pc11_state_name = "andaman nicobar islands" if pc11_state_name == "andaman_nicobar"
68 | replace pc11_state_name = "tamil nadu" if pc11_state_name == "tamilnadu"
69 | replace pc11_state_name = "andhra pradesh" if pc11_state_name == "telangana"
70 | replace pc11_state_name = "west bengal" if pc11_state_name == "westbengal"
71 | 
72 | /* merge in pc11 id from key */
73 | merge m:1 pc11_state_name dist using $health/dlhs/dlhs4_district_key, keepusing(pc11_state_id pc11_district_id) keep(match master) nogen
74 | 
75 | /* Basic Cleaning */
76 | /* drop 14,076 records from Karnataka that have all data fields missing */
77 | drop if mi(psu)
78 | 
79 | /* drop duplicates - force these to drop as these are all duplicated records but won't 
80 |    get dropped with a simple duplicates drop because of missing values */
81 | duplicates drop primekeynew, force
82 | 
83 | /* rename the primekeynew to be an index for DLHS */
84 | ren primekeynew index
85 | 
86 | /* create pregnancy indicator */
87 | gen pregnant = 1 if !mi(hv81) & (hv81 == 1 | hv81 == 2)
88 | replace pregnant = 0 if mi(pregnant)
89 | 
90 | /* define dlhs_merge variable to describe which observations are from cab, comb, or both */
91 | gen dlhs_merge = 3
92 | replace dlhs_merge = 2 if mi(q77_intro) | q77_intro == 2
93 | cap label define dlhs_merge 1 "cab only" 2 "comb only" 3 "cab & comb"
94 | label values dlhs_merge dlhs_merge
95 | 
96 | /* save in permanent dlhs folder */
97 | save $health/dlhs/dlhs_cab, replace
98 | 


--------------------------------------------------------------------------------
/forecasting/README.md:
--------------------------------------------------------------------------------
 1 | # COVID forecasting map
 2 | 
 3 | This subfolder contains the backend code to construct the DDL COVID forecasting interactive map, [hosted here](http://www.devdatalab.org/covid-forecast). Forecast variables such as prospective Rt featured in the map are provided by [COVID_metrics](https://twitter.com/COVID_metrics), and supplement DDL COVID data from this repository.
 4 | 
 5 | 
 6 | Note: this codebase is not intended to be entirely executable or reproducible, rather by open-sourcing we hope to share our methodologies and increase transparency of the approaches taken in the data processing steps for the DDL COVID forecasting map.
 7 | 
 8 | 
 9 | ![DAG](covid_dag.png)
10 | 


--------------------------------------------------------------------------------
/forecasting/b/create_vector_tileset.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # this script takes geojson district data and creates a vector tileset for pushing to mapbox.
 4 | # this requires tippecanoe and tile-join, which are installed in ~/iec/local/share/tippecanoe/
 5 | 
 6 | # note: --generate-ids option is required for referencing feature ids in
 7 | # e.g. hover effects. from Mapbox: "mapbox/tippecanoe#615 adds the most
 8 | # basic --generate-ids option (using the input feature sequence for the
 9 | # ID), with the disclaimer that the IDs are not stable and that their
10 | # format may change in the future."
11 | 
12 | # create full-data district tileset with zoom range defined (cost saver)
13 | ~/iec/local/share/tippecanoe/tippecanoe --force -z8 -Z5 -o $TMP/covid_data_plot.mbtiles --read-parallel --coalesce-smallest-as-needed --detect-shared-borders --generate-ids $1
14 | 
15 | # create district tileset with most recent observations (for map)
16 | ~/iec/local/share/tippecanoe/tippecanoe --force -z8 -Z5 -o $TMP/covid_data_map.mbtiles --read-parallel --coalesce-smallest-as-needed --detect-shared-borders --generate-ids $2
17 | 
18 | # merge tilesets
19 | ~/iec/local/share/tippecanoe/tile-join --force -o $TMP/covid_data.mbtiles $TMP/covid_data_map.mbtiles $TMP/covid_data_plot.mbtiles
20 | 


--------------------------------------------------------------------------------
/forecasting/b/data_to_geojson.py:
--------------------------------------------------------------------------------
 1 | # take DTA data and joins with shapefiles for both shrid and dist
 2 | # outputs geojson, which will then be merged into a tileset using tippecanoe
 3 | # depends on py_spatial env (run from snakemake)
 4 | 
 5 | 
 6 | ############
 7 | # Preamble #
 8 | ############
 9 | 
10 | import sys, os, importlib
11 | import geopandas as gpd
12 | import pandas as pd
13 | import argparse
14 | 
15 | # import ddlpy utils
16 | from ddlpy.geospatialtools.utils import import_vector_data
17 | 
18 | # initialize args
19 | parser = argparse.ArgumentParser()
20 | parser.add_argument("--intable", type=str)
21 | parser.add_argument("--inshp", type=str)
22 | parser.add_argument("--outfile", type=str)
23 | args = parser.parse_args()
24 | 
25 | # define tabular import fn
26 | def import_tabular_data(fp):
27 |     """
28 |     Reads in tabular data with file extension checks
29 |     fp: filepath for datafile to be imported, must bs shp/csv/dta/excel
30 |     """
31 |     # expand data filepath
32 |     fp = os.path.expanduser(fp)
33 | 
34 |     # assert that the data file exists
35 |     if not os.path.isfile(fp):
36 |         raise OSError("Input file not found")
37 | 
38 |     # ensure that the data file is a readable format
39 |     fp_ext = os.path.splitext(fp)[1]
40 |     if fp_ext not in [".csv", ".dta", ".xls", ".xlsx"]:
41 |         raise ValueError("Data must be .dta, .csv, .xlsx/.xls format")
42 | 
43 |     # read in csv
44 |     if fp_ext == ".csv":
45 |         target_df = pd.read_csv(fp)
46 | 
47 |     # read in excel
48 |     if fp_ext in [".xls", "xlsx"]:
49 |         target_df = pd.read_excel(fp)
50 | 
51 |     # read in dta
52 |     if fp_ext == ".dta":
53 |         target_df = pd.read_stata(fp)
54 | 
55 |     return target_df
56 | 
57 | # function to merge tabular data with a shapefile / gdf object
58 | def table_geodataframe_join(poly_in, join_id, fp_table, fp_out=""):
59 | 
60 |     # expand filepaths
61 |     fp_table = os.path.expanduser(fp_table)
62 |     fp_out = os.path.expanduser(fp_out)
63 | 
64 |     # assert that the filepaths exist
65 |     if not os.path.isfile(fp_table):
66 |         raise OSError("Tabular data file not found")
67 | 
68 |     # read in the tabular data
69 |     tab_data = import_tabular_data(fp_table)
70 | 
71 |     # execute the merge
72 |     #    joined = poly_in.merge(tab_data, on=join_id, how='left')
73 |     # inner join removes district polygons wihtout data rather than keeping empty geometries
74 |     joined = poly_in.merge(tab_data, on=join_id, how='inner')
75 | 
76 |     # convert any categorical columns to string (breaks to_file gpd method)
77 |     for column in joined.select_dtypes(include='category').columns: joined[column] = joined[column].astype('string') 
78 | 
79 |     # write to geojson in desired location
80 |     joined.to_file(fp_out, driver="GeoJSON")
81 | 
82 |     
83 | #################
84 | # District data #
85 | #################
86 | 
87 | # read in district shapefile simplified on mapshaper.org
88 | dist_poly = import_vector_data(f'{args.inshp}')
89 | 
90 | # run the join
91 | print("initiating district-level join")
92 | table_geodataframe_join(poly_in=dist_poly, join_id='lgd_d_id', fp_table=f'{args.intable}', fp_out=os.path.expanduser(f'{args.outfile}'))
93 | 
94 | 


--------------------------------------------------------------------------------
/forecasting/b/merge_ddl_pred_data.do:
--------------------------------------------------------------------------------
 1 | /****************************/
 2 | /* District level data join */
 3 | /****************************/
 4 | 
 5 | /* pull globals */
 6 | process_yaml_config ~/ddl/covid/forecasting/config/config.yaml
 7 | 
 8 | /* combine DDL covid data and UChicago predictions */
 9 | use $cdata/pred_data_district, clear
10 | 
11 | /* merge in DDL data */
12 | merge m:1 lgd_state_id lgd_district_id using $cdata/ddl_data
13 | keep if _merge == 3
14 | drop _merge
15 | 
16 | /* some var cleanup. start with formatting ids and geonames */
17 | ren lgd_state_id lgd_s_id 
18 | ren lgd_district_id lgd_d_id
19 | ren lgd_state_name lgd_s_name
20 | ren lgd_district_name lgd_d_name 
21 | 
22 | /* capitalize geonames */
23 | replace lgd_s_name = upper(substr(lgd_s_name,1,1)) + substr(lgd_s_name,2,.)
24 | replace lgd_d_name = upper(substr(lgd_d_name,1,1)) + substr(lgd_d_name,2,.)
25 | 
26 | /* other var tweaks */
27 | ren dates date
28 | foreach var of varlist rt_* *cases* {
29 |   replace `var' = round(`var', .01)
30 | }
31 | 
32 | /* confirm drop of extraneous modeling vars */
33 | cap drop t_*
34 | 
35 | /* save to permadir */
36 | save $cdata/merged_data_district, replace
37 | 
38 | /* CSV version */
39 | outsheet using $cdata/merged_data_district.csv, comma replace
40 | 


--------------------------------------------------------------------------------
/forecasting/b/old/push_predicted_metadata.py:
--------------------------------------------------------------------------------
 1 | # push predicted covid variable metadata to DDL AWS bucket (web server)
 2 | # in practice this is just the most recent rt_pred date from the latest run in a js object
 3 | # this will then be used as the basis for the choropleth in the web app
 4 | 
 5 | # note: you need the aws cli and an operational config for this to work (currently only TL has this)
 6 | # but can easily set up for others
 7 | 
 8 | import json
 9 | import requests
10 | import argparse
11 | import boto3
12 | import os
13 | 
14 | # initialize args
15 | parser = argparse.ArgumentParser()
16 | parser.add_argument("--file", type=str)
17 | args = parser.parse_args()
18 | 
19 | # pull file input into python obj
20 | pushfile = f'{args.file}'
21 | fname = os.path.basename(pushfile)
22 | 
23 | ##########################
24 | # upload new zips to AWS # 
25 | ##########################
26 | 
27 | # status report
28 | print(f'pushing data from {pushfile} to AWS')
29 | 
30 | # initialize the boto s3 resource
31 | s3 = boto3.resource('s3')
32 | 
33 | # execute AWS command to push the new zip file to S3.
34 | # This requires your aws cli be configured properly, and depends on the current bucket subdirectory configuration
35 | data = open(pushfile, 'rb')
36 | s3.Bucket('shrug-assets-ddl').put_object(Key='static/main/assets/other/' + fname, Body=data, ACL='public-read', ContentType='text/html')
37 | 


--------------------------------------------------------------------------------
/forecasting/b/process_ddl_data.do:
--------------------------------------------------------------------------------
 1 | /* process DDL covid data for merging with dist-level predictions */
 2 | 
 3 | /* FIXME TODO: paths - use globals */
 4 | /* pull globals */
 5 | process_yaml_config ~/ddl/covid/forecasting/config/config.yaml
 6 | 
 7 | /* read from covidi repo output */
 8 | use ~/iec/covid/hospitals/pc_hospitals_dist.dta , clear
 9 | 
10 | /* keep vars to include in the tileset */
11 | keep lgd_*id pc_clinics pc_num_hospitals
12 | 
13 | /* write out for merging */
14 | save $cdata/ddl_data, replace
15 | 


--------------------------------------------------------------------------------
/forecasting/b/process_predicted_data.do:
--------------------------------------------------------------------------------
  1 | /* assemble statewise district-level Rt estimates manually from CSVs */
  2 | /* TODO FIXME: get rid of absolute paths and use globals */
  3 | 
  4 | /* pull globals */
  5 | process_yaml_config ~/ddl/covid/forecasting/config/config.yaml
  6 | 
  7 | /* function to append state and district files */
  8 | cap prog drop append_covid_estimates
  9 | prog def append_covid_estimates
 10 |   syntax anything
 11 |   local geo "`anything'"
 12 | 
 13 |   /* get all state or dist-level files */
 14 |   global imports $cdata/all_rt_estimates
 15 |   local files : dir "$imports" files "*`geo'*.csv"
 16 |   
 17 |   /* loop over files to save as .dta and append. slow logic but concise */
 18 |   clear
 19 |   save $tmp/covid_appender, emptyok replace
 20 |   foreach file in `files' {
 21 |     insheet using $imports/`file', names clear
 22 |     local state_abbrev = substr("`file'", 1, 2)
 23 |     drop v1
 24 |     append using $tmp/covid_appender
 25 |     save $tmp/covid_appender, replace
 26 |   }
 27 | end
 28 | 
 29 | /*************/
 30 | /* Districts */
 31 | /*************/
 32 | 
 33 | /* append raw data */
 34 | append_covid_estimates district
 35 | 
 36 | /* stringify ids */
 37 | gen tmp = string(lgd_state_id,"%02.0f")
 38 | drop lgd_state_id
 39 | ren tmp lgd_state_id 
 40 | gen tmp  = string(lgd_district_id,"%03.0f")
 41 | drop lgd_district_id
 42 | ren tmp lgd_district_id 
 43 | 
 44 | /* HACK - get rid of duplicates on date */
 45 | count
 46 | local pre_drop `r(N)'
 47 | ddrop lgd_state_id lgd_district_id dates
 48 | count
 49 | local post_drop `r(N)'
 50 | di "`post_drop' / `pre_drop'"
 51 | assert `post_drop' / `pre_drop' > 0.997
 52 | 
 53 | /* assert there are no duplicate entries for any district at any date */
 54 | distinct lgd_state_id lgd_district_id dates, joint
 55 | assert `r(ndistinct)' == `r(N)'
 56 | 
 57 | /* final output for dist data - save to tmp as we'll be merging back districts we want to keep */
 58 | drop state district
 59 | order lgd* dates, first
 60 | drop t_*
 61 | save $tmp/pred_data_all_dists, replace
 62 | 
 63 | /* new data file with single entry of latest date for each district -
 64 | used for choropleth */
 65 | 
 66 | /* get most recent date for imputation */
 67 | gen sdate = date(dates, "YMD")
 68 | gsort -sdate
 69 | 
 70 | /* keep most recent observed rt_pred for each district */
 71 | keep if !mi(rt_pred)
 72 | gsort lgd_district_id lgd_state_id dates
 73 | bysort lgd_district_id lgd_state_id : gen order = _n
 74 | by lgd_district_id lgd_state_id: gen latest = _n == _N
 75 | keep if latest
 76 | 
 77 | /* DROP IF OVER A MONTH OUT OF DATE */
 78 | 
 79 | /* create a var for the lag between today and the most recent date */
 80 | gen lag = sdate - daily("`c(current_date)'", "DMY")
 81 | 
 82 | /* drop if over 30 days out of date */
 83 | drop if lag < -30
 84 | 
 85 | /* create 100xed Rt for scaling (MB only allows interpolated fills with integer stops...) */
 86 | gen rt_pred_100x = 100 * rt_pred
 87 | 
 88 | /* merge back to district data */
 89 | preserve
 90 | keep lgd_district_id lgd_state_id 
 91 | merge 1:m lgd_district_id lgd_state_id using $tmp/pred_data_all_dists, keep(match) nogen
 92 | 
 93 | /* DTA and CSV versions */
 94 | save $cdata/pred_data_district, replace
 95 | outsheet using $cdata/pred_data_district.csv, comma replace
 96 | restore
 97 | 
 98 | /* keep only bare minimum of variables */
 99 | ren lgd_district_id lgd_d_id
100 | keep lgd_d_id rt_pred_100x
101 | 
102 | /* save for adding to tileset */
103 | save $cdata/pred_data_rt_choropleth, replace
104 | 
105 | 
106 | /**********/
107 | /* States */
108 | /**********/
109 | 
110 | /* append raw data */
111 | append_covid_estimates state
112 | 
113 | /* stringify ids */
114 | gen tmp = string(lgd_state_id,"%02.0f")
115 | drop lgd_state_id
116 | ren tmp lgd_state_id 
117 | 
118 | /* same basic assertion */
119 | distinct lgd_state_id dates, joint
120 | assert `r(ndistinct)' == `r(N)'
121 | 
122 | /* minimal cleanup here */
123 | drop state
124 | order lgd* dates, first
125 | 
126 | /* final output for state data */
127 | drop t_*
128 | save $cdata/pred_data_state, replace
129 | 
130 | /* CSV version */
131 | outsheet using $cdata/pred_data_state.csv, comma replace
132 | 


--------------------------------------------------------------------------------
/forecasting/b/pull_predicted_data.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | # $1 is credential file location; $2 is the helper script location; $3 is target directory for downloads
4 | GOOGLE_APPLICATION_CREDENTIALS=$1 python3 $2 --dir $3
5 | 


--------------------------------------------------------------------------------
/forecasting/b/pull_predicted_data_helper.py:
--------------------------------------------------------------------------------
 1 | # pulls complete predictions data from Satej's google cloud bucket
 2 | from google.cloud import storage
 3 | from pathlib import Path 
 4 | import argparse
 5 | 
 6 | # initialize args
 7 | parser = argparse.ArgumentParser()
 8 | parser.add_argument("--dir", type=str)
 9 | args = parser.parse_args()
10 | 
11 | # set target location
12 | target_path = Path(f'{args.dir}')
13 | 
14 | # define name of satej's GC bucket
15 | bucket_name = "daily_pipeline"
16 | 
17 | # loop over estimates and download each file
18 | for blob in storage.Client().list_blobs(bucket_name, prefix = "pipeline/est"):
19 |     filename = Path(blob.name).name
20 |     print(f"{blob.name} -> {filename}")
21 |     blob.download_to_filename(target_path / filename)
22 | 


--------------------------------------------------------------------------------
/forecasting/b/push_public_data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # push public covid forecasting partnership data to public dropbox folder.
 4 | # note: this only makes sense (1) on Polaris and (2) if you have Rclone configured properly.
 5 | # file link: https://www.dropbox.com/s/cuyn0wj6bsuilwq/merged_data.dta?dl=0
 6 | 
 7 | # zip up state and dist DTAs and CSVs
 8 | cd ~/iec/covid/forecasting/
 9 | tar -vczf covid_forecast.tar.gz README.md merged_data_district.dta merged_data_district.csv pred_data_district.dta pred_data_district.csv pred_data_state.dta pred_data_state.csv
10 | cd -
11 | 
12 | # push to the public data folder
13 | # this will change to AWS eventually
14 | rclone copy ~/iec/covid/forecasting/covid_forecast.tar.gz my_remote:SamPaul/covid_data/forecasts
15 | printf "finished pushing data to dropbox"
16 | 


--------------------------------------------------------------------------------
/forecasting/b/push_vector_tileset.py:
--------------------------------------------------------------------------------
 1 | # take tippecanoe vector tileset and push to mapbox
 2 | # see: https://docs.mapbox.com/api/maps/uploads/
 3 | # this requires mapbox credentials, which are defined in the YAML config for this project
 4 | # resulting tileset will have the tileset ID of devdatalab.rural-data-portal in mapbox studio
 5 | 
 6 | import json
 7 | import requests
 8 | import argparse
 9 | import boto3
10 | 
11 | # initialize args
12 | parser = argparse.ArgumentParser()
13 | parser.add_argument("--token", type=str)
14 | parser.add_argument("--file", type=str)
15 | parser.add_argument("--tilesetname", type=str)
16 | args = parser.parse_args()
17 | 
18 | 
19 | ##########################
20 | # Request S3 Credentials #
21 | ##########################
22 | 
23 | # retrieve S3 credentials. mapbox access token must be passed as an argument
24 | params = (
25 |     ('access_token', f'{args.token}'),
26 | )
27 | response = requests.post('https://api.mapbox.com/uploads/v1/devdatalab/credentials', params=params)
28 | json_data = response.json() if response and response.status_code == 200 else None
29 | 
30 | # process the JSON response to pull necessary fields
31 | bucket = json_data['bucket']
32 | key = json_data['key']
33 | url = json_data['url']
34 | accessKeyId = json_data['accessKeyId']
35 | secretAccessKey = json_data['secretAccessKey']
36 | sessionToken = json_data['sessionToken']
37 | key = json_data['key']
38 | 
39 | # define username and vector tileset name
40 | username = 'devdatalab'
41 | tileset_name = f'{args.tilesetname}'
42 | 
43 | 
44 | ##########################
45 | # Upload to staging area #
46 | ##########################
47 | 
48 | # iniatialize AWS session with temp credentials
49 | session = boto3.Session(
50 |     aws_access_key_id = accessKeyId,
51 |     aws_secret_access_key = secretAccessKey,
52 |     aws_session_token = sessionToken,
53 | )
54 | 
55 | # initialize the boto s3 resource
56 | s3 = session.resource('s3')
57 | 
58 | # upload file to Mapbox's S3 staging bucket
59 | #aws s3 cp f'{args.token}' s3://{bucket}/{key} --region us-east-1
60 | data = open(f'{args.file}', 'rb')
61 | s3.Bucket(bucket).put_object(Key=key, Body=data)
62 | 
63 | 
64 | ###########################
65 | # Create upload to Mapbox #
66 | ###########################
67 | 
68 | # define the API call and initiate upload
69 | headers = {
70 |     'Content-Type': 'application/json',
71 |     'Cache-Control': 'no-cache',
72 | }
73 | data = '{ "url": ' + f'"{url}"' + ', "tileset": ' + f'"{username}.{tileset_name}"' + ' }' # awkward bc fstrings can't handle literal colons
74 | response = requests.post('https://api.mapbox.com/uploads/v1/devdatalab', headers=headers, params=params, data=data)
75 | 
76 | # get upload ID from the response
77 | json_data = response.json() if response and response.status_code == 201 else None
78 | upload_id = json_data['id']
79 | 
80 | 
81 | #########################
82 | # Assert against errors #
83 | #########################
84 | 
85 | # check upload status
86 | response = requests.get(f'https://api.mapbox.com/uploads/v1/devdatalab/{upload_id}', params=params)
87 | 
88 | # assert there are no errors in the response
89 | json_data = response.json() if response and response.status_code == 200 else None
90 | error = json_data['error']
91 | assert not error
92 | 
93 | 


--------------------------------------------------------------------------------
/forecasting/b/test_merged_data.py:
--------------------------------------------------------------------------------
  1 | # general imports - use spatial env in configs/
  2 | import geopandas as gpd
  3 | import pandas as pd
  4 | from pathlib import Path
  5 | import shutil
  6 | 
  7 | # import our configs
  8 | import sys, os
  9 | from ddlpy.utils.tools import process_yaml_config
 10 | config = process_yaml_config('~/ddl/covid/forecasting/config/config.yaml')
 11 | 
 12 | # shorten path globals
 13 | CCODE = Path(os.path.expanduser(config['globals']['ccode']))
 14 | CDATA = Path(os.path.expanduser(config['globals']['cdata']))
 15 | 
 16 | # read temp directory from env variable
 17 | TMP = Path(os.environ['TMP'])
 18 | 
 19 | 
 20 | ###############
 21 | # Merge tests #
 22 | ###############
 23 | 
 24 | # combine DDL covid data and UChicago predictions
 25 | pred_data = pd.read_stata(CDATA / 'pred_data_district.dta')
 26 | ddl_data = pd.read_stata(CDATA / 'ddl_data.dta')
 27 | merged_data = pred_data.merge(ddl_data, how='inner', on=['lgd_district_id', 'lgd_state_id'])
 28 | 
 29 | # check merge rate
 30 | if (len(merged_data) / len(pred_data)) < 0.98:
 31 |     raise ValueError('merge rate from DDL data to covid predictions on LGD state / dist must be greater than 98%')
 32 | 
 33 | 
 34 | #####################
 35 | # Identifiers tests #
 36 | #####################
 37 | 
 38 | # read in the merged data saved by Stata script
 39 | merged_data = pd.read_stata(CDATA / 'merged_data_district.dta')
 40 | 
 41 | # assert we're unique on LGD state/dist and time
 42 | if not merged_data.set_index(['lgd_d_id','lgd_s_id', 'date']).index.is_unique:
 43 |     raise ValueError('LGD state and district do not uniquely identify observations across dates')
 44 | 
 45 | # assert no missings in identifiers
 46 | idnames = ['lgd_d_id', 'lgd_s_id']
 47 | for idname in idnames:
 48 |     if not merged_data[idname].isna().sum() == 0:
 49 |         raise ValueError(f'Identifier {idname} has missings')
 50 | 
 51 | ###################
 52 | # Variables tests #
 53 | ###################
 54 | 
 55 | # look for missings
 56 | varnames = ['rt_pred', 'total_cases', 'new_cases_ts']
 57 | for varname in varnames:
 58 |     if not merged_data[varname].isna().sum() == 0:
 59 |         raise ValueError(f'Variable {varname} has missings')
 60 | 
 61 | 
 62 | ##############
 63 | # Dates test #
 64 | ##############
 65 | 
 66 | # THIS HAS BEEN OVERRULED - we allow for differential dates now
 67 | ## convert to pd datetime format for sorting
 68 | #merged_data['date'] = pd.to_datetime(merged_data['date'])
 69 | #
 70 | ## get latest date observed for RT within each district into an array
 71 | #latest_df = merged_data.loc[merged_data.groupby(['lgd_d_id','lgd_s_id']).date.idxmax()]
 72 | #
 73 | ## assert we only have a single latest date across all dists
 74 | #if not len(latest_df['date'].unique()) == 1:
 75 | #    raise ValueError(f'Different districts have different latest Rt observation dates in merged DTA file')
 76 | #
 77 | ## pull latest date into a string
 78 | #latest_date = latest_df.iloc[0]['date'].strftime('%Y-%m-%d')
 79 | #
 80 | ## read in the JSON object in a JS file that contains this "most recent date" metadata to compare to the date in the tabular data
 81 | #with open(CDATA / 'pred_metadata.js') as f:
 82 | #    lines = f.readlines()
 83 | #json_date = lines[0].split('most_recent":"',1)[1][:10]
 84 | #
 85 | ## check that the latest tabular date matches
 86 | #if not latest_date == json_date:
 87 | #    raise ValueError(f'Different latest dates in tabular file and JSON metadata')
 88 | 
 89 | 
 90 | #################
 91 | # GeoJSON tests #
 92 | #################
 93 | 
 94 | ## read in geojson output that gets transformed to vector tileset
 95 | ## hack around gpd.read_file having STRANGE conda-related error when reading from ~/iec/ filesystem?!
 96 | #geojson = gpd.read_file(CDATA / 'district.geojson')
 97 | #
 98 | ## check merged state ids are the same
 99 | #geojson['lgd_s_id_x'].equals(geojson['lgd_s_id_y'])
100 | #
101 | ## check that the geojson file also has the same latest date
102 | #geojson['date'] = pd.to_datetime(geojson['date'])
103 | #json_latest = geojson.loc[geojson.groupby(['lgd_d_id','lgd_s_id_x']).date.idxmax()]
104 | #if not len(json_latest['date'].unique()) == 1:
105 | #    raise ValueError(f'Different districts have different latest Rt observation dates in geojson file')
106 | #
107 | ## check that the latest date agrees with JS metadata
108 | #latest_date = json_latest.iloc[0]['date'].strftime('%Y-%m-%d')
109 | #if not latest_date == json_date:
110 | #    raise ValueError(f'Different latest dates in geojson file and JSON metadata')
111 | 
112 | # EXIT
113 | print('TESTS PASSED')
114 | 
115 | 
116 | 
117 | 


--------------------------------------------------------------------------------
/forecasting/config/config.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | # config file for COVID forecasting site
 3 | 
 4 | # define globals shared across Python and Stata
 5 | globals:
 6 |     ccode: ~/ddl/covid/forecasting
 7 |     cdata: ~/iec/covid/forecasting
 8 |     tileset_name: covid-forecasting
 9 | 
10 |     
11 | 
12 | 


--------------------------------------------------------------------------------
/forecasting/config/forecasting.yaml:
--------------------------------------------------------------------------------
 1 | channels:
 2 |   - defaults
 3 |   - conda-forge
 4 | dependencies:
 5 |   - python=3
 6 |   - requests
 7 |   - boto3
 8 |   - git=2
 9 |   - google-cloud-storage
10 |   - pyyaml


--------------------------------------------------------------------------------
/forecasting/config/forecasting_spatial.yaml:
--------------------------------------------------------------------------------
 1 | channels:
 2 |   - conda-forge
 3 |   - defaults
 4 | dependencies:
 5 |   - python>=3.7
 6 |   - geos
 7 |   - geopandas=0.9
 8 |   - geotiff=1
 9 |   - git=2
10 |   - haversine=0.4
11 |   - matplotlib=3
12 |   - numpy=1
13 |   - pandas=1
14 |   - proj=6
15 |   - pygeos=0.8
16 |   - pysal=2
17 |   - rasterio=1
18 |   - rasterstats=0.14
19 |   - shapely=1
20 |   - pyyaml
21 |   - pip
22 |   - pip:
23 |     - topojson
24 | 


--------------------------------------------------------------------------------
/forecasting/update_forecasts_cronjob.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # set this up with the following cron command (executes at 1030AM daily):
 4 | # $ crontab -e
 5 | # $ 30 10 * * * source $HOME/.bashrc; touch $TMP/rerun_indicator.txt; $HOME/ddl/covid/forecasting/update_forecasts_cronjob.sh
 6 | 
 7 | # depends on slack messaging hook in env variable SLACKKEY
 8 | if [[ -z "$SLACKKEY" ]]; then
 9 |   printf "\nENV variable $SLACKKEY must be defined for cronjob to execute. Add to your .bashrc\n"
10 | fi
11 | 
12 | # send init message via slack
13 | curl -X POST -H 'Content-type: application/json' --data '{"text":":building_construction: Beginning auto-update of COVID forecasting platform"}' https://hooks.slack.com/services/$SLACKKEY
14 | 
15 | # change dir to scratch for logging
16 | cd /scratch/`whoami`
17 | 
18 | # run update script with basic error handling
19 | printf "\nbegin update build: ~/ddl/covid/forecasting/Snakemake\n"
20 | if snakemake --conda-not-block-search-path-envvars --directory $HOME/ddl/covid/forecasting/ --snakefile $HOME/ddl/covid/forecasting/Snakefile --cores 4 --use-conda; then
21 | 
22 |   # if we don't have an error, send a slack
23 |   curl -X POST -H 'Content-type: application/json' --data '{"text":":not-a-dumpster-fire: Successful update of forecasting data!"}' https://hooks.slack.com/services/$SLACKKEY
24 | else
25 | 
26 |   # if we do have an error, send a slack
27 |   curl -X POST -H 'Content-type: application/json' --data '{"text":":rotating_light: FAILURE: auto-update of COVID data had non-zero exit status"}' https://hooks.slack.com/services/$SLACKKEY
28 | fi 
29 | 
30 | # move back to starting dir
31 | cd -
32 | 


--------------------------------------------------------------------------------
/make_covid.do:
--------------------------------------------------------------------------------
  1 | /* This makefile runs all the data construction steps in the repo */
  2 | 
  3 | /* globals that need to be set:
  4 | $tmp -- a temporary folder
  5 | $ccode -- this root folder for this repo
  6 | $covidpub -- processed data used as inputs for COVID variable construction
  7 | */
  8 | 
  9 | global fast 1
 10 | 
 11 | /*****************************/
 12 | /* PART 1 -- DDL SERVER ONLY */
 13 | /*****************************/
 14 | 
 15 | /* match DLHS4 to PC11 districts */
 16 | /* in: $health/DLHS4, $keys/pc11_district_key.  out: $health/DLHS4 */
 17 | do $ccode/b/create_dlhs4_pc11_district_key
 18 | 
 19 | /* collapse raw DLHS4 data to district level */
 20 | /* in: $health/DLHS4, pc11_pca_district.  out: $health/hosp/dlhs4_hospitals_dist, $covidpub/dhls4_hospitals_dist */
 21 | do $ccode/b/prep_dlhs4_district
 22 | 
 23 | /* prepare short village/town directory and PCA to save in public repo */
 24 | /* in: TD/VD.  out: $covidpub/pc11r_hosp, pc11r_hosp */
 25 | do $ccode/b/prep_hosp_pca_vd
 26 | 
 27 | /* generate demographic data and save in public repo */
 28 | do $ccode/b/gen_lgd_pc11_demographics
 29 | 
 30 | /* prepare EC microdata on hospitals */
 31 | /* in: raw economic census 2013.  out: $covidpub/ec_hosp_microdata */
 32 | do $ccode/b/prep_ec_hosp_microdata
 33 | 
 34 | /* build age distribution by district/subdistrict, using SECC + PC */
 35 | if "$fast" != "1" {
 36 |   do $ccode/b/gen_age_distribution
 37 | }
 38 | 
 39 | /* Process and generate HMIS distirct data*/
 40 | do $core/hmis/b/create_hmis_district_yearly.do
 41 | do $core/hmis/b/create_hmis_district_clean.do
 42 | do $core/hmis/b/create_hmis_district_keys.do
 43 | 
 44 | /* Process and generate HMIS subdistrict data*/
 45 | do $core/hmis/b/create_hmis_subdistrict_yearly.do
 46 | do $core/hmis/b/create_hmis_subdistrict_clean.do
 47 | do $core/hmis/b/create_hmis_subdistrict_keys.do
 48 | 
 49 | /* download latest district-level case data (runs in py3 conda env) */
 50 | do $ccode/b/get_case_data
 51 | 
 52 | /* build NSS deaths data */
 53 | do $ccode/b/gen_nss_district_key.do
 54 | do $ccode/b/prep_nss75.do
 55 | 
 56 | /* copy and process keys */
 57 | do $ccode/b/copy_keys.do
 58 | 
 59 | /* process NFHS data */
 60 | // note: this is not executable (sourced from collaborators) but included for reference
 61 | // do $ccode/b/ddl_nfhs_poll_hmis.do
 62 | 
 63 | /***********************************************/
 64 | /* PART 2 -- RUNS FROM DATA LINKED IN GIT REPO */
 65 | /***********************************************/
 66 | 
 67 | /* aggregate case data into a district file with confirmed + deaths */
 68 | do $ccode/b/aggregate_case_data
 69 | 
 70 | /* prepare PC11 hospital/clinic data */
 71 | do $ccode/b/prep_pc_hosp.do
 72 | 
 73 | /* prepare economic census (2013) hospital data */
 74 | do $ccode/b/prep_ec_hosp.do
 75 | 
 76 | /* clean migration data and transform to LGD */
 77 | do $ccode/b/clean_migration.do
 78 | 
 79 | /* clean agmark mandi price data */
 80 | do $ccode/b/clean_agmark.do
 81 | 
 82 | /* prepare SECC district-level poverty data [unfinished] */
 83 | // do $ccode/b/prep_secc.do
 84 | 
 85 | /* subdistrict-level urbanization */
 86 | // gen_urbanization_subdist -- subdistrict PCA urbanization
 87 | 
 88 | 
 89 | /***************************************/
 90 | /* PART 3 ANALYTICAL RESULTS/ESTIMATES */
 91 | /***************************************/
 92 | 
 93 | /* predict district and subdistrict mortality distribution based on age distribution */
 94 | /* out: estimates/(sub)district_age_dist_cfr */
 95 | do $ccode/a/predict_age_cfr
 96 | 
 97 | /* combine PC and DLHS hospital capacity */
 98 | do $ccode/a/estimate_hosp_capacity
 99 | 
100 | /* export some additional stats that were asked for into a combined file */
101 | do $ccode/a/impute_additional_fields
102 | 
103 | 
104 | /*****************************/
105 | /* PART 4 -- DDL SERVER ONLY */
106 | /*****************************/
107 | 
108 | /* push data and metadata to production. metadata will be included in
109 | data download links as well. */
110 | // shell source $ccode/b/push_data.sh
111 | 


--------------------------------------------------------------------------------
/str/manual_covid_case_district_match.csv:
--------------------------------------------------------------------------------
 1 | idm_master,idu_using,_pc11_district_name_master,_pc11_district_name_using
 2 | 06-nuh,06-mewat,nuh,mewat
 3 | 09-prayagraj,09-allahabad,prayagraj,allahabad
 4 | 29-bengaluru,29-bangalore,bengaluru,bangalore
 5 | 29-belagavi,29-belgaum,belagavi,belgaum
 6 | 16-gomati,16-south tripura,gomati,south tripura
 7 | 03-s.a.s. nagar,03-sahibzada ajit singh nagar,s.a.s. nagar,sahibzada ajit singh nagar
 8 | 06-gurugram,06-gurgaon,gurugram,gurgaon
 9 | 09-shamli,09-muzaffarnagar,shamli,muzaffarnagar
10 | 28-s.p.s. nellore,28-sri potti sriramulu nellore,s.p.s. nellore,sri potti sriramulu nellore
11 | 


--------------------------------------------------------------------------------