├── .gitignore ├── code ├── ado │ ├── compute_ci_percentiles.ado │ ├── compute_racefracs.ado │ ├── compute_raceshares.ado │ ├── corr_reg.ado │ ├── estimate_gompertz.jl │ ├── estimate_gompertz2.ado │ ├── fastregby.ado │ ├── fastregby_gompMLE_julia.ado │ ├── gen_mortrates2.ado │ ├── generate_le_with_raceadj.ado │ ├── gompertz_LYbetween.py │ ├── mle_gomp_est.ado │ ├── scalarout.ado │ ├── scheme-leap.scheme │ ├── simulated_delta.ado │ └── wsample.ado ├── ado_maptile_geo │ ├── county1990_coords_clean.dta │ ├── county1990_database_clean.dta │ ├── county1990_maptile.ado │ ├── county1990_maptile.smcl │ ├── cz_coords_clean.dta │ ├── cz_database_clean.dta │ ├── cz_maptile.ado │ ├── cz_maptile.smcl │ ├── state_coords_clean.dta │ ├── state_database_clean.dta │ ├── state_maptile.ado │ └── state_maptile.smcl ├── ado_ssc │ ├── _gwtmean.ado │ ├── binscatter.ado │ ├── binscatter.sthlp │ ├── fastxtile.ado │ ├── fastxtile.sthlp │ ├── maptile.ado │ ├── maptile.sthlp │ ├── maptile_geohelp.ado │ ├── maptile_geolist.ado │ ├── maptile_install.ado │ ├── maptile_newgeo.sthlp │ ├── save12.ado │ ├── save12.sthlp │ ├── save13.ado │ ├── save13.sthlp │ ├── spmap.ado │ ├── spmap.hlp │ ├── spmap_arrow.ado │ ├── spmap_color.ado │ ├── spmap_diagram.ado │ ├── spmap_examples.ado │ ├── spmap_label.ado │ ├── spmap_line.ado │ ├── spmap_point.ado │ ├── spmap_polygon.ado │ ├── spmap_psl.ado │ └── spmap_scalebar.ado ├── assign_fig_tab_numbers.do ├── covariates │ ├── Health behavior maps.do │ ├── clean_brfss.do │ ├── clean_causeofdeath.do │ ├── clean_dartmouthatlas.do │ ├── clean_educ.do │ ├── clean_hospquality.do │ ├── clean_pop_labforce_change1980to2000.do │ ├── clean_racefractions.do │ ├── clean_uninsurance.do │ ├── combine_all_covariates.do │ ├── cz_names.do │ └── list_of_covariates.do ├── create_online_tables.do ├── lifeexpectancy │ ├── Check correlations in CZ LE trends.do │ ├── Compute Years of Social Security Eligibility.do │ ├── Estimate NCHS State LEs pooling income groups.do │ ├── Explore national trends at fixed income levels.do │ ├── Generate Signal Standard Deviations.do │ ├── LE ranked lists of states.do │ ├── List top 10 and bottom 10 CZs.do │ ├── National LE profile sensitivity checks.do │ ├── Plot CZ LE trend scatters.do │ ├── Plot international comparison of LE at age 40.do │ ├── Plot major city LE profiles by ventile.do │ ├── Plot national LE profiles by percentile.do │ ├── Plot national LE trends by income.do │ ├── Plot signal by local population level.do │ ├── Sensitivity analyses of local LEs.do │ ├── Standard errors vs population size.do │ ├── State Level Inequality Correlations.do │ ├── bootstrap_le.do │ ├── compute_LE_trends.do │ ├── generate_bootstrap_confidence_estimates.do │ ├── generate_life_expectancies.do │ ├── le_correlations.do │ ├── le_levels_maps.do │ └── le_trends_maps.do ├── mortality │ ├── Calculate aggregate deaths.do │ ├── Construct multi-source death and population counts.do │ ├── Decompose mortality into medical v external.do │ ├── Lag invariance.do │ ├── Output income means by national income quantile.do │ ├── Plot observed mortality and survival profiles.do │ ├── Sample comparison - NCHS SSA IRS.do │ ├── Summary stats on number of obs and income.do │ ├── construct_cdc_mortrates.do │ ├── convert_IRScollapses_to_mortrates.do │ └── estimate_irs_gompertz_parameters.do ├── nlms │ ├── Explore mortality profiles implied by race shifters.do │ ├── nlms_analyze_raceshifters_BYincq_BYcsregion.do │ ├── nlms_bootstrap_shifters.do │ ├── nlms_createsample.do │ ├── nlms_generate_shifters.do │ ├── nlms_loaddata.do │ └── nlms_mortality_profiles_BYrace.do ├── raceshares │ ├── Explore CZ race share maps.do │ ├── Explore income distribution over 16 income bins in County SF3 data.do │ ├── SF3 incbin weights for national income quantiles.do │ ├── cty_cz_st_racefracBY_workingagebin_hhincquantile.do │ ├── cty_cz_st_racepopBY_year_agebin_gnd_hhincquantile.do │ ├── cty_cz_st_raceshareBY_year_agebin_gnd_hhincquantile.do │ ├── cty_racepopBY_workingagebin_hhincbin.do │ ├── cty_racepopBY_year_agebin_gnd.do │ ├── national_Explore race fraction smoothing.do │ ├── national_Race share extrapolation check - nonparametric age 51 test.do │ ├── national_racefracBY_workingage_gnd_incpctile.do │ ├── national_racepopBY_year_age_gnd.do │ ├── national_racepopBY_year_age_gnd_incpctile.do │ ├── national_raceshareBY_gnd.do │ └── national_raceshareBY_year_age_gnd_incpctile.do ├── readme.md ├── set_bootstrap.do ├── set_environment.do └── tests │ ├── Check completeness of mortality data.do │ ├── Explore runtimes for estimating race shifters.do │ ├── Simulate various methods of deriving LE from Gompertz parameters.do │ ├── Test Julia Gompertz matches Stata Gompertz.do │ └── Test gen_mortrates.do ├── mortality.do ├── mortality_datagen.do ├── mortality_init.do ├── mortality_tests.do └── readme.md /.gitignore: -------------------------------------------------------------------------------- 1 | # -project- ignores 2 | *.log 3 | archive/ 4 | replicate/ 5 | mortality_init_files.dta 6 | mortality_init_links.dta 7 | mortality_datagen_files.dta 8 | mortality_datagen_links.dta 9 | mortality_files.dta 10 | mortality_links.dta 11 | mortality_tests_files.dta 12 | mortality_tests_links.dta 13 | 14 | # Other ignores 15 | data/ 16 | scratch/ 17 | results/ 18 | code/set_julia.do 19 | -------------------------------------------------------------------------------- /code/ado/compute_ci_percentiles.ado: -------------------------------------------------------------------------------- 1 | program define compute_ci_percentiles 2 | 3 | syntax varlist(numeric) , by(varlist) gen(name) 4 | 5 | * Build matrix of 2.5th and 97.5th percentiles 6 | foreach var of local varlist { 7 | tempfile `var'pctiles 8 | 9 | statsby `var'25=r(r1) `var'975=r(r2), saving(``var'pctiles') /// 10 | by(`by'): _pctile `var', percentiles(2.5 97.5) 11 | } 12 | 13 | * Combine datasets of percentile values 14 | clear 15 | qui use `by' using ``:word 1 of `varlist''pctiles' 16 | foreach var of local varlist { 17 | qui merge 1:1 `by' using ``var'pctiles', assert(3) nogen 18 | } 19 | 20 | * Reshape percentile long 21 | reshape long `varlist', i(`by') j(`gen') 22 | replace `gen'=`gen'/10 23 | 24 | end 25 | -------------------------------------------------------------------------------- /code/ado/compute_racefracs.ado: -------------------------------------------------------------------------------- 1 | program define compute_racefracs 2 | 3 | /*** 4 | 5 | In each by-group, compute the fraction of each race's population in each income bin. 6 | 7 | For example, if black males had the same income distribution as the full population, 8 | they would be 0.01 of each percentile. Across income bins, the fractions sum to one. 9 | 10 | Optionally, smooth the race fractions across positive bins using lowess. 11 | 12 | ***/ 13 | 14 | syntax , by(varlist) incomevar(varname) [ racelist(namelist) lowess_bw(real -99) ] 15 | 16 | if ("`racelist'"=="") local racelist black hispanic asian other 17 | 18 | * Verify that file is identified at By Group x Income Bin level 19 | isid `by' `incomevar' 20 | 21 | * Generate race fractions 22 | foreach race in `racelist' { 23 | 24 | tempvar sumpop_`race' 25 | egen long `sumpop_`race''=total(pop_`race'), by(`by') 26 | 27 | qui gen double frac_of_`race'=pop_`race'/`sumpop_`race'' 28 | label var frac_of_`race' "Fraction of `race' population in income bin" 29 | 30 | } 31 | 32 | * Smooth the noise in the race fractions across positive income bins 33 | if (`lowess_bw'!=-99) { 34 | foreach race in `racelist' { 35 | 36 | * Lowess smooth race fractions 37 | lowess frac_of_`race' `incomevar' if `incomevar'>0, by(`by') nograph gen(smoothfrac_of_`race') bw(`lowess_bw') 38 | qui replace smoothfrac_of_`race' = frac_of_`race' if `incomevar'<=0 39 | label var smoothfrac_of_`race' "Smoothed fraction of `race' population in income bin" 40 | 41 | * Renormalize the smoothed estimates so that the fractions will sum to 1 for each age x gnd 42 | tempvar sumfrac_`race' sumsmoothfrac_`race' 43 | qui egen double `sumfrac_`race''=total(frac_of_`race') if `incomevar'>0, by(`by') 44 | qui egen double `sumsmoothfrac_`race''=total(smoothfrac_of_`race') if `incomevar'>0, by(`by') 45 | 46 | qui replace smoothfrac_of_`race' = smoothfrac_of_`race'/`sumsmoothfrac_`race''*`sumfrac_`race'' if `incomevar'>0 47 | 48 | * Check that smoothfracs sum to 1 49 | tempvar newsumsmoothfrac_`race' 50 | egen float `newsumsmoothfrac_`race'' = total(smoothfrac_of_`race'), by(`by') 51 | assert abs(`newsumsmoothfrac_`race''-1)<10^-6 52 | 53 | * Change smoothfrac datatype from double to float (the final digits vary between runs, this rounds them away) 54 | qui recast float smoothfrac_of_`race', force 55 | 56 | } 57 | } 58 | 59 | * Change frac datatype from double to float (the extra precision isn't meaningful) 60 | qui recast float frac_of_*, force 61 | 62 | end 63 | -------------------------------------------------------------------------------- /code/ado/compute_raceshares.ado: -------------------------------------------------------------------------------- 1 | program define compute_raceshares 2 | /*** Compute race shares in each By Group x Income Percentile cell 3 | ***/ 4 | 5 | syntax , by(varlist) 6 | 7 | * Verify that file is identified at By Group x Income Percentile level 8 | isid `by' 9 | 10 | * Generate race shares 11 | foreach race in black asian hispanic other { 12 | 13 | * Compute race share in each cell 14 | gen double raceshare_`race' = pop_`race' / (pop_black + pop_asian + pop_hispanic + pop_other) 15 | label var raceshare_`race' "Share of `race' in -`by'- cell" 16 | 17 | } 18 | 19 | * Check that race shares sum to 1 in each cell OR all are missing because total population is 0 in the cell 20 | assert abs(raceshare_black + raceshare_asian + raceshare_hispanic + raceshare_other - 1) < 10^-6 | /// 21 | (pop_black + pop_asian + pop_hispanic + pop_other)==0 22 | 23 | * Set order 24 | order `by' pop_* raceshare_* 25 | 26 | end 27 | -------------------------------------------------------------------------------- /code/ado/corr_reg.ado: -------------------------------------------------------------------------------- 1 | program define corr_reg 2 | 3 | /*** Run a regression estimating the correlation between two variables. 4 | ***/ 5 | 6 | syntax varlist(min=2 max=2 numeric) [if] [aweight fweight], [ vce(string) ] 7 | 8 | *** Prep 9 | 10 | * Create convenient weight local 11 | if ("`weight'"!="") local wt [`weight'`exp'] 12 | 13 | * Parse yvar and xvar 14 | local yvar=word("`varlist'",1) 15 | local xvar=word("`varlist'",2) 16 | 17 | *** Regressions 18 | 19 | * Regress in natural units 20 | reg `yvar' `xvar' `if' `wt' 21 | 22 | * Convert to Std Dev units 23 | qui sum `yvar' `wt' if e(sample) 24 | qui gen mb = (`yvar' - r(mean))/r(sd) 25 | 26 | qui sum `xvar' `wt' if e(sample) 27 | qui gen vb = (`xvar' - r(mean))/r(sd) 28 | 29 | * Regress in standard deviation units, applying VCE if specified 30 | reg mb vb `if' `wt', vce(`vce') 31 | drop mb vb 32 | 33 | if ("`vce'"!="") di "Note: Standard errors depend on order of y and x with robust standard errors." 34 | 35 | end 36 | -------------------------------------------------------------------------------- /code/ado/estimate_gompertz.jl: -------------------------------------------------------------------------------- 1 | using ArgParse 2 | using GLM, DataFrames, Distributions 3 | 4 | function parse_commandline() 5 | s = ArgParseSettings() 6 | 7 | @add_arg_table s begin 8 | "--input" 9 | help = "input CSV with counts of deaths and survivals by Age x Group" 10 | arg_type = String 11 | required = true 12 | "--output" 13 | help = "output CSV with Gompertz parameter estimates" 14 | arg_type = String 15 | required = true 16 | "--vce" 17 | help = "type of VCV matrix computed; either 'oim' or 'builtin'" 18 | arg_type = String 19 | default = "oim" 20 | end 21 | 22 | return parse_args(s) 23 | end 24 | 25 | function gompertzreg(df, vce) 26 | # Perform a Gompertz MLE regression, return the coefficients and Cholesky-decomposed covariance matrix as a DataFrame. 27 | 28 | model = try 29 | GLM.glm(@formula(mort ~ age), df, Binomial(), LogLink(), wts=1.0*df[:count].data, start=(-10,0.1)) 30 | catch err 31 | err 32 | end 33 | 34 | if isa(model, DataFrames.DataFrameRegressionModel) 35 | coef = GLM.coef(model)' 36 | 37 | if vce=="oim" 38 | vcov = vcov_Gompertz_OIM(model, df) 39 | elseif vce=="builtin" 40 | vcov = GLM.vcov(model) 41 | else 42 | throw(ArgumentError("vce must be oim or builtin")) 43 | end 44 | 45 | A = chol([vcov[2,2] vcov[2,1]; vcov[1,2] vcov[1,1]]) 46 | return DataFrame(gomp_int = coef[1], gomp_slope = coef[2], A_slope_1 = A[1,1], A_int_1 = A[1,2], A_int_2 = A[2,2]) 47 | 48 | elseif isa(model, ErrorException) 49 | println("caught error in _grp $(dec(df[1,:_grp])): $(model.msg) (Likely 0 deaths.)") 50 | return DataFrame(gomp_int = 0, gomp_slope = 0, A_slope_1 = 0, A_int_1 = 0, A_int_2 = 0) 51 | 52 | elseif isa(model, LinAlg.PosDefException) 53 | println("caught error in _grp $(dec(df[1,:_grp])): LinAlg.PosDefException (Likely insufficient observations)") 54 | return DataFrame(gomp_int = 0, gomp_slope = 0, A_slope_1 = 0, A_int_1 = 0, A_int_2 = 0) 55 | 56 | else 57 | throw(model) 58 | 59 | end 60 | 61 | end 62 | 63 | function vcov_Gompertz_OIM(model, df) 64 | # Computes the OIM variance-covariance matrix for a Gompertz GLM model 65 | 66 | df_alive = df[df[:mort] .== 0, :] 67 | df_alive[:predmort] = exp( GLM.coef(model)[1] + GLM.coef(model)[2] * df_alive[:age] ) 68 | 69 | OIM_contribution = DataFrame() 70 | OIM_contribution[:a2] = df_alive[:count] .* df_alive[:predmort] ./ (1-df_alive[:predmort]).^2 71 | OIM_contribution[:ab] = OIM_contribution[:a2] .* df_alive[:age] 72 | OIM_contribution[:b2] = OIM_contribution[:ab] .* df_alive[:age] 73 | 74 | OIM_elements = aggregate(OIM_contribution, sum) 75 | return Symmetric(inv([OIM_elements[1,:a2_sum] OIM_elements[1,:ab_sum]; OIM_elements[1,:ab_sum] OIM_elements[1,:b2_sum]])) 76 | 77 | end 78 | 79 | function main() 80 | 81 | # Parse arguments 82 | parsed_args = parse_commandline() 83 | 84 | # Load data 85 | data = readtable(parsed_args["input"]) 86 | 87 | # Run Gompertz MLE regression on each by-group 88 | gompBY = by(data, :_grp, df -> gompertzreg(df, parsed_args["vce"]) ) 89 | 90 | # Output results 91 | writetable(parsed_args["output"], gompBY) 92 | 93 | end 94 | 95 | main() 96 | -------------------------------------------------------------------------------- /code/ado/estimate_gompertz2.ado: -------------------------------------------------------------------------------- 1 | program define estimate_gompertz2 2 | 3 | /*** Calculate Gompertz parameters from mortality rates by age, within by-groups. 4 | 5 | Use Julia to estimate GLM regressions quickly if it is configured, 6 | otherwise use -statsby- in Stata. 7 | 8 | ***/ 9 | 10 | syntax varlist(min=1), age(varname) mort(varname) [ n(varname) /// 11 | collapsefrom(varlist) vce(name) /// 12 | type(string) /// 13 | cz_popmsk(integer 0) cty_popmsk(integer 0) force ] 14 | local group `varlist' 15 | if !inlist("`type'","mle","ols") { 16 | di as error "type() must be 'mle' or 'ols'" 17 | exit 198 18 | } 19 | if ("`n'"=="") { 20 | if ("`type'"=="mle") { 21 | di as error "n() required with type(mle)" 22 | exit 198 23 | } 24 | if ("`collapsefrom'"!="") { 25 | di as error "n() required with collapsefrom()" 26 | exit 198 27 | } 28 | } 29 | 30 | *** Collapse dataset if requested 31 | if ("`collapsefrom'"!="") { 32 | isid `collapsefrom' `age' 33 | collapse (mean) `mort' (rawsum) `n' [aw=`n'], by(`group' `age') fast 34 | qui compress 35 | } 36 | else isid `group' `age' 37 | 38 | *** Check dataset has expected structure 39 | assert inrange(`age', 40, 76) // only expected ages for this project 40 | if ("`n'"!="") { 41 | cap confirm long variable `n' // integer number of individuals 42 | if (_rc==7) confirm int variable `n' 43 | else confirm long variable `n' 44 | assert reldif(`mort'*`n', round(`mort'*`n')) < 2e-6 // integer number of deaths 45 | } 46 | 47 | *** Mask Gompertz parameters for small CZs or counties 48 | if (`cz_popmsk'!=0) { 49 | project, original("${root}/data/raw/Covariate Data/cz_characteristics.dta") preserve 50 | merge m:1 cz using "${root}/data/raw/Covariate Data/cz_characteristics.dta", keepusing(pop2000) /// 51 | assert(2 3) keep(3) nogen 52 | drop if pop2000 < `cz_popmsk' 53 | drop pop2000 54 | } 55 | 56 | if (`cty_popmsk'!=0) { 57 | project, original("${root}/data/raw/Covariate Data/cty_covariates.dta") preserve 58 | merge m:1 cty using "${root}/data/raw/Covariate Data/cty_covariates.dta", keepus(cty_pop2000) /// 59 | assert(2 3) keep(3) nogen 60 | drop if cty_pop2000 < `cty_popmsk' 61 | drop cty_pop2000 62 | } 63 | 64 | *** Estimate Gompertz parameters for each group 65 | if ("`type'"=="mle") { 66 | 67 | * "Reshape" from mortality rates into counts of deaths and non-deaths 68 | /* After reshape: 69 | - `mort' variable = 0 or 1. 70 | - `n' variable = count of deaths or non-deaths 71 | 72 | Note: 73 | - Already confirmed unique by `group' `age' above. 74 | - Already confirmed number of deaths is an integer above. 75 | 76 | */ 77 | quietly { 78 | expand 2 79 | bys `group' `age': replace `n' = cond(_n == 1, round(`mort'*`n'), round(`n' - `mort'*`n')) 80 | by `group' `age': replace `mort' = (_n == 1) 81 | } 82 | recast byte `mort' 83 | 84 | * Estimate Gompertz parameters (coefficients and covariance matrix) 85 | if ("$juliapath"=="") { 86 | statsby A_slope_1 = r(A_slope_1) A_int_1 = r(A_int_1) A_int_2 = r(A_int_2) gomp_int = r(gomp_int) gomp_slope = r(gomp_slope), /// 87 | by(`group') clear: mle_gomp_est, age(`age') mort(`mort') n(`n') vce(`vce') 88 | 89 | sort `group' 90 | } 91 | else fastregby_gompMLE_julia `mort' `age', count(`n') by(`group') vce(`vce') clear `force' 92 | 93 | } 94 | else if ("`type'"=="ols") { 95 | 96 | * Generate log mortality rates 97 | qui count if `mort'==0 98 | if (r(N)>0) di as error "warning: `r(N)'/`=_N' mortality rates are 0, so their log mortality rates are missing." 99 | 100 | tempvar l_mort 101 | gen `l_mort' = log(`mort') 102 | 103 | * Estimate Gompertz parameters (coefficients only) 104 | fastregby `l_mort' `age', by(`group') clear 105 | rename (_b_cons _b_`age') (gomp_int gomp_slope) 106 | 107 | } 108 | 109 | * Check for groups that had errors, prevent disclosure of missingness 110 | qui count if mi(gomp_int, gomp_slope) 111 | if (r(N)>0 & "`force'"=="force") { 112 | qui drop if mi(gomp_int, gomp_slope) 113 | di as error "warning: dropped `r(N)' group(s) that had an error in Gompertz estimation" 114 | } 115 | else if r(N)>0 { 116 | di as error "`r(N)' group(s) had an error in Gompertz estimation. use 'force' option to drop groups with errors." 117 | exit 2000 118 | } 119 | 120 | end 121 | 122 | -------------------------------------------------------------------------------- /code/ado/fastregby.ado: -------------------------------------------------------------------------------- 1 | program define fastregby 2 | 3 | /*** Performs a univariate OLS regression for each by-group, storing the 4 | coefficients in new dataset. 5 | 6 | The following commands are equivalent: 7 | - fastregby y x, by(byvars) clear 8 | - statsby, by(byvars) clear: reg y x 9 | 10 | Except fastregby will run approximately 80 times faster. 11 | ***/ 12 | 13 | version 13.1 14 | syntax varlist(min=2 max=2 numeric), by(varlist) clear 15 | 16 | * Convert string by-vars to numeric 17 | foreach var of varlist `by' { 18 | cap confirm numeric variable `var', exact 19 | if _rc==0 { // numeric var 20 | local bynumeric `bynumeric' `var' 21 | } 22 | else { // string var 23 | tempvar `var'N 24 | encode `var', gen(``var'N') 25 | local bynumeric `bynumeric' ``var'N' 26 | local bystr `var' // list of string by-vars 27 | } 28 | } 29 | 30 | * Sort using by-groups 31 | sort `by' 32 | /* 33 | if ("`:sortedby'"!="`by'") { 34 | di as error "Loaded dataset must be sorted in by-groups: `by'" 35 | exit 5 36 | } 37 | */ 38 | 39 | * Generate a single by-variable counting by groups 40 | tempvar grp 41 | egen `grp'=group(`bynumeric') 42 | 43 | * Perform regressions on each by-group, store in dataset 44 | mata: _fastregby("`varlist'", "`grp'", "`bynumeric'") 45 | 46 | * Convert string by-vars back to strings, from numeric 47 | foreach var in `bystr' { 48 | decode ``var'N', gen(`var') 49 | } 50 | order `by' 51 | 52 | end 53 | 54 | 55 | version 13.1 56 | set matastrict on 57 | 58 | mata: 59 | 60 | void _fastregby(string scalar regvars, string scalar grpvar, string scalar byvars) { 61 | // Inputs: 62 | // - a y-var and x-var for an OLS regression 63 | // - a group var, for which each value represents a distinct by-group. 64 | // This var must be in ascending order. 65 | // - a list of numeric by-variables, whose groups correspond to the group var. 66 | // Outputs: 67 | // - dataset of coefficients from OLS regression for each by-group 68 | 69 | // Convert variable names to column indices 70 | real rowvector regcols 71 | real scalar ycol 72 | real scalar xcol 73 | real scalar grpcol 74 | real rowvector bycols 75 | 76 | regcols = st_varindex(tokens(regvars)) 77 | ycol = regcols[1] 78 | xcol = regcols[2] 79 | grpcol = st_varindex(grpvar) 80 | bycols = st_varindex(tokens(byvars)) 81 | 82 | // Fetch number of groups 83 | real scalar numgrp 84 | numgrp = _st_data(st_nobs(),grpcol) 85 | 86 | // Preallocate matrices of group identifiers & coefs 87 | real matrix groups 88 | real matrix coefs 89 | groups = J(numgrp, cols(bycols), .) // Num Groups x Num By-Vars matrix 90 | coefs = J(numgrp, 2, .) // Num Groups x 2 matrix 91 | 92 | // Perform sequence of regressions 93 | real matrix M 94 | real matrix y 95 | real matrix X 96 | real scalar startobs 97 | real scalar curgrp 98 | startobs=1 // starting obsevation for current group being processed 99 | curgrp=_st_data(1,grpcol) // current group being processed 100 | 101 | for (obs=1; obs<=st_nobs()-1; obs++) { 102 | 103 | if (_st_data(obs,grpcol)!=curgrp) { 104 | 105 | // compute OLS coefs: beta = inv(X'X) * X'y. 106 | // --> see Example 4 of -help mf_cross- 107 | st_view(M, (startobs,obs-1), regcols, 0) 108 | st_subview(y, M, ., 1) 109 | st_subview(X, M, ., (2\.)) 110 | 111 | coefs[curgrp,.] = ( invsym(cross(X,1 , X,1)) * cross(X,1 , y,0) )' 112 | 113 | // store group identifiers 114 | groups[curgrp,.] = st_data(startobs,bycols) 115 | 116 | // update counters 117 | curgrp=_st_data(obs,grpcol) 118 | startobs=obs 119 | 120 | } 121 | 122 | } 123 | 124 | // Always perform regression for last observation 125 | obs=st_nobs() 126 | if (_st_data(obs,grpcol)==curgrp) { // last observation is not a group to itself 127 | 128 | // increment obs, since code is written as processing the observation 129 | // that is 1 past the last in the group 130 | ++obs 131 | 132 | // compute OLS coefs: beta = inv(X'X) * X'y. 133 | // --> see Example 4 of -help mf_cross- 134 | st_view(M, (startobs,obs-1), regcols, 0) 135 | st_subview(y, M, ., 1) 136 | st_subview(X, M, ., (2\.)) 137 | 138 | coefs[curgrp,.] = ( invsym(cross(X,1 , X,1)) * cross(X,1 , y,0) )' 139 | 140 | // store group identifiers 141 | groups[curgrp,.] = st_data(startobs,bycols) 142 | 143 | } 144 | else { 145 | display("{error} last observation is in a singleton group") 146 | exit(2001) 147 | } 148 | 149 | // Store group identifiers in dataset 150 | stata("qui keep in 1/"+strofreal(numgrp)) 151 | stata("keep "+byvars) 152 | st_store(.,tokens(byvars),groups) 153 | 154 | // Store coefficients in dataset 155 | (void) st_addvar("float", "_b_"+tokens(regvars)[2]) 156 | (void) st_addvar("float", "_b_cons") 157 | st_store(., ("_b_"+tokens(regvars)[2], "_b_cons"), coefs) 158 | 159 | } 160 | 161 | end 162 | -------------------------------------------------------------------------------- /code/ado/fastregby_gompMLE_julia.ado: -------------------------------------------------------------------------------- 1 | program define fastregby_gompMLE_julia 2 | 3 | /*** Performs a Gompertz MLE regression for each by-group, storing the 4 | coefficients and Cholesky-decomposed covariances in a new dataset. 5 | 6 | 'debug' option prints all output from Julia. Otherwise Julia output is suppressed. 7 | ***/ 8 | 9 | version 13.1 10 | syntax varlist(min=2 max=2 numeric), count(varname numeric) by(varlist) [vce(name) force debug] clear 11 | if ("`vce'"!="") local vce --vce `vce' 12 | if ("`debug'"=="") local quietly quietly 13 | 14 | * Sort on by-variables 15 | sort `by' 16 | 17 | * Generate a single by-variable counting by-groups 18 | egen _grp = group(`by') 19 | 20 | * Perform regressions on each by-group using Julia 21 | tempfile deathandsurvival_data gomppar 22 | 23 | keep `by' _grp `varlist' `count' 24 | rename (`varlist' `count') (mort age count) 25 | qui export delim _grp age mort count using `deathandsurvival_data' 26 | di "Estimating Gompertz parameters using Julia..." 27 | `quietly' !${juliapath} "${root}/code/ado/estimate_gompertz.jl" `vce' --input `deathandsurvival_data' --output `gomppar' 28 | 29 | * Store association between grp var and by-vars 30 | keep `by' _grp 31 | qui duplicates drop `by' _grp, force 32 | tempfile bylabels 33 | qui save `bylabels' 34 | 35 | * Load results from Julia 36 | import delim using `gomppar', asdouble clear 37 | rename (a_slope_1 a_int_1 a_int_2) (A_slope_1 A_int_1 A_int_2) 38 | qui drop if gomp_int==0 & gomp_slope==0 & A_slope_1==0 & A_int_1==0 & A_int_2==0 // that's how estimate_gompertz.jl indicates error 39 | 40 | * Replace _grp var with by-vars 41 | qui merge 1:1 _grp using `bylabels', assert(2 3) nogen // _merge==2 happens when by-groups dropped due to errors 42 | drop _grp 43 | order `by' 44 | sort `by' 45 | 46 | end 47 | -------------------------------------------------------------------------------- /code/ado/gen_mortrates2.ado: -------------------------------------------------------------------------------- 1 | program define gen_mortrates2 2 | 3 | /*** Input is a loaded dataset that is: 4 | - Identified by 5 | `varlist' x `age' x `year' 6 | - Containing variables: 7 | - `age', the age at which income is measured 8 | - `n', the count of people in that cell 9 | - deadby_1_Mean to deadby_18_Mean containing the cumulative 10 | fraction of people who were alive at the time income was 11 | measured, but have died by the start of the n'th year later. 12 | 13 | The variable specified in age() must either be: 14 | - named age, containing exact years 15 | - OR named agebin, containing 5-year agebins and a 4-year agebin 16 | whose income is measured at 58-61 => 2-year mortality measured 60-63. 17 | 18 | These agebins should be labeled by the mean age at which 19 | **income** is measured. 20 | ***/ 21 | 22 | syntax varlist, age(varname) year(varname) n(varname) [ all_lags ] 23 | local by `varlist' 24 | if ("`n'"!="count") rename `n' count 25 | isid `by' `age' `year' 26 | 27 | * Check no one dies in first year 28 | assert deadby_1_Mean==0 29 | 30 | * Check expected years are present 31 | forvalues i=1/18 { 32 | confirm numeric var deadby_`i'_Mean 33 | } 34 | 35 | * Check maximum age 36 | sum `age', meanonly 37 | if ("`age'"=="age") assert r(max)==61 // maximum age is exactly 61 (last working age) 38 | else if ("`age'"=="agebin") assert r(max)==59.5 // maximum agebin is 58-61 39 | else { 40 | di as error "age() variable must either be age or agebin." 41 | exit 198 42 | } 43 | local maxage=r(max) 44 | 45 | * Reshape long on how many years income is lagged 46 | reshape long deadby_@_Mean, i(`by' `age' `year') j(lag) 47 | label var lag "Income Lag" 48 | 49 | * Only use 2-year lags except for age 61/agebin 60, which we use throughout retirement 50 | if ("`all_lags'"=="") drop if lag > 2 & `age' < `maxage' 51 | else local lag lag // lag will be an identifying variable 52 | 53 | * Calculate mortality rates & count of surviving population 54 | by `by' `age' `year' (lag): gen double mortrate = (deadby__Mean - deadby__Mean[_n-1]) / (1 - deadby__Mean[_n-1] ) 55 | by `by' `age' `year' (lag): replace count = count[1] * (1 - deadby__Mean[_n-1]) if lag>1 56 | compress count 57 | label var mortrate "Mortality Rate" 58 | label var count "Denominator of mortrate = people alive at beginning of year" 59 | 60 | * Clean data 61 | assert lag==1 | mi(deadby__Mean) if mi(mortrate) 62 | drop if mi(mortrate) 63 | 64 | * Generate age and year of death 65 | gen age_at_d = `age' + lag 66 | gen int yod = `year' + lag 67 | drop `age' `year' 68 | compress age_at_d 69 | 70 | label var age_at_d "Age at Death" 71 | label var yod "Year of Death" 72 | 73 | * Check no data past 2014 74 | capture assert yod<=2014 75 | if _rc!=0 { // check that data past 2014 is missing and can be thrown away 76 | assert deadby__Mean==0 | deadby__Mean==. if yod>2014 77 | drop if yod>2014 78 | } 79 | 80 | * Output 81 | drop deadby__Mean 82 | order `by' age_at_d yod lag mortrate count 83 | isid `by' age_at_d yod `lag' 84 | sort `by' age_at_d yod `lag' 85 | label data "" 86 | end 87 | -------------------------------------------------------------------------------- /code/ado/gompertz_LYbetween.py: -------------------------------------------------------------------------------- 1 | from math import exp, isnan 2 | from scipy.special import exp1 3 | from numpy import genfromtxt, savetxt 4 | import sys, argparse 5 | 6 | def main(): 7 | startage, endage, a, b, infile, outfile = check_arg(sys.argv[1:]) 8 | 9 | if infile is None: 10 | if a is None or b is None: 11 | raise ValueError('you must specify an input file or BOTH a and b') 12 | 13 | LY = gompertz_lifeyears(par=[a, b], minage=startage, maxage=endage) 14 | 15 | if outfile is None: 16 | print LY 17 | else: 18 | with open(outfile, 'w') as f: 19 | f.write('%s' % LY) 20 | 21 | else: # infile specified 22 | if outfile is None: 23 | raise ValueError('you must specify an output file with an input file') 24 | 25 | gompertz_parameters = genfromtxt(infile, delimiter=',') 26 | lifeyears = [ gompertz_lifeyears(par=p, minage=startage, maxage=endage) for p in gompertz_parameters ] 27 | 28 | with open(outfile, 'w') as f: 29 | savetxt(f, lifeyears, fmt='%9g') 30 | 31 | 32 | def check_arg(args=None): 33 | parser = argparse.ArgumentParser(description='Compute expected life years between startage and endage for a person alive at startage') 34 | parser.add_argument('--startage', 35 | type=float, 36 | help='Starting age for expected life years calculation', 37 | required='True') 38 | parser.add_argument('--endage', 39 | type=float, 40 | help='Ending age for expected life years calculation', 41 | required='True') 42 | parser.add_argument('-a', '--intercept', 43 | type=float, 44 | help='Gompertz intercept; "a" in exp(a+bx)') 45 | parser.add_argument('-b', '--slope', 46 | type=float, 47 | help='Gompertz slope; "b" in exp(a+bx)') 48 | parser.add_argument('-i', '--input', 49 | type=str, 50 | help='CSV of Gompertz parameters; int in col 1, slope in col 2') 51 | parser.add_argument('-o', '--output', 52 | type=str, 53 | help='File to output expected life years to') 54 | 55 | results = parser.parse_args(args) 56 | return (results.startage, 57 | results.endage, 58 | results.intercept, 59 | results.slope, 60 | results.input, 61 | results.output) 62 | 63 | def gompertz_lifeyears(par, minage, maxage): 64 | a=par[0] 65 | b=par[1] 66 | if -0.00001 < b < 0.00001: # b is approximately 0 67 | return exp(-a) - exp(-a - exp(a) * (maxage - minage) ) 68 | else: 69 | f_min = 1 / b * exp(a + b * minage) 70 | f_max = 1 / b * exp(a + b * maxage) 71 | cons_of_integ = exp(f_min) 72 | 73 | if b>0: 74 | LY = -1 * cons_of_integ / b * ( exp1(f_max) - exp1(f_min) ) 75 | else: 76 | LY = -1 * cons_of_integ / b * ( exp1(complex(f_max)) - exp1(complex(f_min)) ).real 77 | 78 | if isnan(LY) and gompertz_survival(a, b, minage, minage+0.01)<0.00001: # everyone dies instantly 79 | return 0.0 80 | else: 81 | return LY 82 | 83 | def gompertz_survival(a, b, minage, age): 84 | return exp( 1 / b * ( exp(a + b*minage) - exp(a + b*age) ) ) 85 | 86 | if __name__ == '__main__': 87 | main() 88 | -------------------------------------------------------------------------------- /code/ado/mle_gomp_est.ado: -------------------------------------------------------------------------------- 1 | * Subcommand of simulated_delta 2 | 3 | program define mle_gomp_est , rclass 4 | syntax , age(varname) mort(varname) n(varname) [if] [ vce(passthru) ] 5 | 6 | * Estimate the MLE 7 | glm `mort' `age' [fw = `n'] `if', link(log) family(binomial) from(_cons=-10 `age'=0.1) `vce' 8 | 9 | * Extract Variance Covariance Matrix and Save 10 | matrix V = e(V) 11 | * return scalar var_slope = V[1,1] 12 | * return scalar var_int = V[2,2] 13 | * return scalar covar_int_slope = V[2,1] 14 | 15 | * Perform Cholesky Decomposition on Covariance Matrix and Save Relevant Coefficients (No need for A_slope_2 since A is upper triangular) 16 | * Could potentially get additional speed savings by using _cholesky(V), which returns modifies the matrix in place 17 | matrix A = cholesky(V) 18 | return scalar A_slope_1 = A[1,1] 19 | return scalar A_int_1 = A[2,1] 20 | return scalar A_int_2 = A[2,2] 21 | 22 | * Save point estimates 23 | return scalar gomp_int = _b[_cons] 24 | return scalar gomp_slope = _b[`mort':`age'] 25 | 26 | 27 | end 28 | 29 | -------------------------------------------------------------------------------- /code/ado/scalarout.ado: -------------------------------------------------------------------------------- 1 | program define scalarout 2 | 3 | syntax using, num(real) id(string) [fmt(string) replace] 4 | 5 | * Replace or Append? 6 | if ("`replace'"=="replace") local overwrite replace 7 | else local overwrite append 8 | 9 | * Open file 10 | tempname file 11 | file open `file' `using', write text `overwrite' 12 | 13 | * Write CSV line 14 | if (substr("`fmt'",-1,1)=="c") file write `file' `""`id'",""' `fmt' (`num') `"""' _n 15 | else file write `file' `""`id'","' `fmt' (`num') _n 16 | 17 | * Close file 18 | file close `file' 19 | 20 | end 21 | -------------------------------------------------------------------------------- /code/ado/scheme-leap.scheme: -------------------------------------------------------------------------------- 1 | * LEAP STATA GRAPHING SCHEME 2 | 3 | #include s2color 4 | 5 | * anglestyle vertical_tick horizontal 6 | color background white 7 | linestyle legend none 8 | 9 | 10 | color p1 navy 11 | color p2 maroon 12 | color p3 forest_green 13 | color p4 dkorange 14 | color p5 teal 15 | color p6 cranberry 16 | color p7 lavender 17 | color p8 khaki 18 | color p9 sienna 19 | color p10 emidblue 20 | color p11 emerald 21 | color p12 brown 22 | color p13 erose 23 | color p14 gold 24 | color p15 bluishgray 25 | 26 | 27 | 28 | gsize axis_title_gap tiny 29 | margin legend zero 30 | margin graph "2 3 3 3" 31 | -------------------------------------------------------------------------------- /code/ado/simulated_delta.ado: -------------------------------------------------------------------------------- 1 | program define simulated_delta 2 | 3 | /*** Draw new Gompertz parameters from the estimated multivariate normal distribution 4 | with point estimates and covariance from the MLE. 5 | ***/ 6 | 7 | version 13.1 8 | syntax varlist(min=1), REPs(integer) [seed(string) keep(string)] 9 | local group `varlist' 10 | 11 | isid `group' 12 | expand `reps' 13 | bys `group': gen sample_num = _n 14 | order `group' sample_num 15 | 16 | set seed `seed' 17 | tempvar z1 z2 18 | gen `z1' = rnormal() 19 | gen `z2' = rnormal() 20 | 21 | replace gomp_slope = A_slope_1*`z1' + gomp_slope 22 | replace gomp_int = A_int_1*`z1' + A_int_2*`z2' + gomp_int 23 | 24 | keep `group' `keep' sample_num gomp_int gomp_slope 25 | 26 | end 27 | -------------------------------------------------------------------------------- /code/ado/wsample.ado: -------------------------------------------------------------------------------- 1 | program define wsample 2 | 3 | version 13.1 4 | syntax newvarname, wt(varname) 5 | 6 | qui gen byte `varlist'=. 7 | mata: draw_wsample("`wt'", "`varlist'") 8 | 9 | end 10 | 11 | version 13.1 12 | set matastrict on 13 | 14 | mata: 15 | 16 | void draw_wsample(string scalar wt, string scalar gen) { 17 | // Inputs: 18 | // - a variable containing sampling weights, 19 | // - an empty variable to store number of times each record is sampled. 20 | // Requires: 21 | // - the weights to be non-missing 22 | // Outputs: 23 | // - number of times each record is sampled, using random sampling with 24 | // replacement. each record's probability of being sampled is equal to 25 | // its relative sample weight. number of draws = number of records = _N 26 | 27 | 28 | // Get column number of weight and to-generate vars 29 | real scalar wtcol 30 | wtcol = st_varindex(wt) 31 | 32 | real scalar gencol 33 | gencol = st_varindex(gen) 34 | 35 | // Compute total weight 36 | stata("sum " + wt + ", meanonly") 37 | 38 | real scalar totwt 39 | totwt = st_numscalar("r(sum)") 40 | 41 | // Generate random draws, uniform from 0 to totwt 42 | real draws 43 | draws = runiform(st_nobs(), 1) * totwt 44 | _sort(draws, 1) 45 | 46 | // Store number of times each unit has been drawn 47 | real scalar timesdrawn 48 | 49 | real scalar runningdraws 50 | runningdraws=1 51 | 52 | real scalar runningweight 53 | runningweight=0 54 | 55 | for (obs=1; obs<=st_nobs(); obs++) { 56 | 57 | // Update "running weight" and "times drawn" 58 | runningweight = runningweight + _st_data(obs,wtcol) 59 | timesdrawn=0 60 | 61 | // Determine number of times this unit was drawn 62 | while (runningdraws<=st_nobs()) { 63 | if (draws[runningdraws,1] <= runningweight) { 64 | timesdrawn++ 65 | runningdraws++ 66 | } 67 | else break 68 | } 69 | 70 | // Store number of draws 71 | _st_store(obs, gencol, timesdrawn) 72 | 73 | } 74 | 75 | } 76 | 77 | end 78 | -------------------------------------------------------------------------------- /code/ado_maptile_geo/county1990_coords_clean.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michaelstepner/healthinequality-code/ea86c0d2f8762c8738d1ee8ca506c21d5a1c4ef7/code/ado_maptile_geo/county1990_coords_clean.dta -------------------------------------------------------------------------------- /code/ado_maptile_geo/county1990_database_clean.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michaelstepner/healthinequality-code/ea86c0d2f8762c8738d1ee8ca506c21d5a1c4ef7/code/ado_maptile_geo/county1990_database_clean.dta -------------------------------------------------------------------------------- /code/ado_maptile_geo/county1990_maptile.ado: -------------------------------------------------------------------------------- 1 | *! 22mar2015, Michael Stepner, stepner@mit.edu 2 | 3 | program define _maptile_county1990 4 | syntax , [ geofolder(string) /// 5 | mergedatabase /// 6 | map spmapvar(varname) var(varname) binvar(varname) clopt(string) legopt(string) min(string) clbreaks(string) max(string) mapcolors(string) ndfcolor(string) /// 7 | savegraph(string) replace resolution(string) map_restriction(string) spopt(string) /// 8 | /* Geography-specific options */ /// 9 | stateoutline(string) conus /// 10 | ] 11 | 12 | if ("`mergedatabase'"!="") { 13 | novarabbrev merge 1:m county using `"`geofolder'/county1990_database_clean"', nogen update replace 14 | exit 15 | } 16 | 17 | if ("`map'"!="") { 18 | 19 | if ("`conus'"=="conus") { 20 | * Hide AK and HI from stateoutline 21 | local polygon_select select(drop if inlist(_ID,27,8)) 22 | 23 | * Hide AK and HI from main map 24 | if ("`map_restriction'"=="") local map_restriction if !inlist(floor(county/1000),2,15) 25 | else local map_restriction `map_restriction' & !inlist(floor(county/1000),2,15) 26 | } 27 | 28 | if ("`stateoutline'"!="") { 29 | cap confirm file `"`geofolder'/state_coords_clean.dta"' 30 | if (_rc==0) local polygon polygon(data(`"`geofolder'/state_coords_clean"') ocolor(black) osize(`stateoutline' ...) `polygon_select') 31 | else if (_rc==601) { 32 | di as error `"stateoutline() requires the {it:state} geography to be installed"' 33 | di as error `"--> state_coords_clean.dta must be present in the geofolder"' 34 | exit 198 35 | } 36 | else { 37 | error _rc 38 | exit _rc 39 | } 40 | } 41 | 42 | spmap `spmapvar' using `"`geofolder'/county1990_coords_clean"' `map_restriction', id(id) /// 43 | `clopt' /// 44 | `legopt' /// 45 | legend(pos(5) size(*1.8)) /// 46 | fcolor(`mapcolors') ndfcolor(`ndfcolor') /// 47 | oc(black ...) ndo(black) /// 48 | os(vvthin ...) nds(vvthin) /// 49 | `polygon' /// 50 | `spopt' 51 | 52 | * Save graph 53 | if (`"`savegraph'"'!="") __savegraph_maptile, savegraph(`savegraph') resolution(`resolution') `replace' 54 | 55 | } 56 | 57 | end 58 | 59 | * Save map to file 60 | cap program drop __savegraph_maptile 61 | program define __savegraph_maptile 62 | 63 | syntax, savegraph(string) resolution(string) [replace] 64 | 65 | * check file extension using a regular expression 66 | if regexm(`"`savegraph'"',"\.[a-zA-Z0-9]+$") local graphextension=regexs(0) 67 | 68 | * deal with different filetypes appropriately 69 | if inlist(`"`graphextension'"',".gph","") graph save `"`savegraph'"', `replace' 70 | else if inlist(`"`graphextension'"',".ps",".eps") graph export `"`savegraph'"', mag(`=round(100*`resolution')') `replace' 71 | else if (`"`graphextension'"'==".png") graph export `"`savegraph'"', width(`=round(3200*`resolution')') `replace' 72 | else if (`"`graphextension'"'==".tif") graph export `"`savegraph'"', width(`=round(1600*`resolution')') `replace' 73 | else graph export `"`savegraph'"', `replace' 74 | 75 | end 76 | 77 | -------------------------------------------------------------------------------- /code/ado_maptile_geo/county1990_maptile.smcl: -------------------------------------------------------------------------------- 1 | {smcl} 2 | 3 | {pstd}{bf:maptile {hline 2} Geography: county1990}{p_end} 4 | 5 | 6 | {marker description}{...} 7 | {title:Description} 8 | 9 | {pstd}{bf:county1990} generates a map of United States counties using 1990 boundaries.{p_end} 10 | 11 | {pstd}It displays Alaska and Hawaii rescaled and moved to the bottom left of the map, below the continental US, for ease of viewing.{p_end} 12 | 13 | 14 | {marker geographic-id-variable}{...} 15 | {title:Geographic ID variable} 16 | 17 | {pstd}{bf:county} {hline 2} FIPS county codes{p_end} 18 | 19 | 20 | {marker geography-specific-options}{...} 21 | {title:Geography-Specific Options} 22 | 23 | {synoptset 32 tabbed}{...} 24 | {synopt:{opth stateoutline(linewidthstyle)}}overlays the map with a (potentially thicker) line on state boundaries{p_end} 25 | {synopt:{bf:conus}}hides Alaska and Hawaii, on the map {bf:and} the state outline{p_end} 26 | 27 | 28 | {marker author}{...} 29 | {title:Author} 30 | 31 | {pstd}Michael Stepner{break} stepner@mit.edu{p_end} -------------------------------------------------------------------------------- /code/ado_maptile_geo/cz_coords_clean.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michaelstepner/healthinequality-code/ea86c0d2f8762c8738d1ee8ca506c21d5a1c4ef7/code/ado_maptile_geo/cz_coords_clean.dta -------------------------------------------------------------------------------- /code/ado_maptile_geo/cz_database_clean.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michaelstepner/healthinequality-code/ea86c0d2f8762c8738d1ee8ca506c21d5a1c4ef7/code/ado_maptile_geo/cz_database_clean.dta -------------------------------------------------------------------------------- /code/ado_maptile_geo/cz_maptile.ado: -------------------------------------------------------------------------------- 1 | *! 22mar2015, Michael Stepner, stepner@mit.edu 2 | 3 | program define _maptile_cz 4 | syntax , [ geofolder(string) /// 5 | mergedatabase /// 6 | map spmapvar(varname) var(varname) binvar(varname) clopt(string) legopt(string) min(string) clbreaks(string) max(string) mapcolors(string) ndfcolor(string) /// 7 | savegraph(string) replace resolution(string) map_restriction(string) spopt(string) /// 8 | /* Geography-specific options */ /// 9 | stateoutline(string) conus /// 10 | ] 11 | 12 | if ("`mergedatabase'"!="") { 13 | novarabbrev merge 1:m cz using `"`geofolder'/cz_database_clean"', nogen 14 | exit 15 | } 16 | 17 | if ("`map'"!="") { 18 | 19 | if ("`conus'"=="conus") { 20 | * Hide AK and HI from stateoutline 21 | local polygon_select select(drop if inlist(_ID,27,8)) 22 | 23 | * Hide AK and HI from main map 24 | if ("`map_restriction'"=="") local map_restriction if !inlist(floor(cz/100),341,347,356) 25 | else local map_restriction `map_restriction' & !inlist(floor(cz/100),341,347,356) 26 | } 27 | 28 | if ("`stateoutline'"!="") { 29 | cap confirm file `"`geofolder'/state_coords_clean.dta"' 30 | if (_rc==0) local polygon polygon(data(`"`geofolder'/state_coords_clean"') ocolor(black) osize(`stateoutline' ...) `polygon_select') 31 | else if (_rc==601) { 32 | di as error `"stateoutline() requires the {it:state} geography to be installed"' 33 | di as error `"--> state_coords_clean.dta must be present in the geofolder"' 34 | exit 198 35 | } 36 | else { 37 | error _rc 38 | exit _rc 39 | } 40 | } 41 | 42 | spmap `spmapvar' using `"`geofolder'/cz_coords_clean"' `map_restriction', id(id) /// 43 | `clopt' /// 44 | `legopt' /// 45 | legend(pos(5) size(*1.8)) /// 46 | fcolor(`mapcolors') ndfcolor(`ndfcolor') /// 47 | oc(black ...) ndo(black) /// 48 | os(vvthin ...) nds(vvthin) /// 49 | `polygon' /// 50 | `spopt' 51 | 52 | * Save graph 53 | if (`"`savegraph'"'!="") __savegraph_maptile, savegraph(`savegraph') resolution(`resolution') `replace' 54 | 55 | } 56 | 57 | end 58 | 59 | * Save map to file 60 | cap program drop __savegraph_maptile 61 | program define __savegraph_maptile 62 | 63 | syntax, savegraph(string) resolution(string) [replace] 64 | 65 | * check file extension using a regular expression 66 | if regexm(`"`savegraph'"',"\.[a-zA-Z0-9]+$") local graphextension=regexs(0) 67 | 68 | * deal with different filetypes appropriately 69 | if inlist(`"`graphextension'"',".gph","") graph save `"`savegraph'"', `replace' 70 | else if inlist(`"`graphextension'"',".ps",".eps") graph export `"`savegraph'"', mag(`=round(100*`resolution')') `replace' 71 | else if (`"`graphextension'"'==".png") graph export `"`savegraph'"', width(`=round(3200*`resolution')') `replace' 72 | else if (`"`graphextension'"'==".tif") graph export `"`savegraph'"', width(`=round(1600*`resolution')') `replace' 73 | else graph export `"`savegraph'"', `replace' 74 | 75 | end 76 | 77 | -------------------------------------------------------------------------------- /code/ado_maptile_geo/cz_maptile.smcl: -------------------------------------------------------------------------------- 1 | {smcl} 2 | 3 | {pstd}{bf:maptile {hline 2} Geography: cz}{p_end} 4 | 5 | 6 | {marker description}{...} 7 | {title:Description} 8 | 9 | {pstd}{bf:cz} generates a map of United States 1990 Commuting Zones.{p_end} 10 | 11 | {pstd}It displays Alaska and Hawaii rescaled and moved to the bottom left of the map, below the continental US, for ease of viewing.{p_end} 12 | 13 | 14 | {marker geographic-id-variable}{...} 15 | {title:Geographic ID variable} 16 | 17 | {pstd}{bf:cz} {hline 2} 1990 Commuting Zone codes{p_end} 18 | 19 | 20 | {marker geography-specific-options}{...} 21 | {title:Geography-Specific Options} 22 | 23 | {synoptset 32 tabbed}{...} 24 | {synopt:{opth stateoutline(linewidthstyle)}}overlays the map with a (potentially thicker) line on state boundaries{p_end} 25 | {synopt:{bf:conus}}hides Alaska and Hawaii, on the map {bf:and} the state outline{p_end} 26 | 27 | 28 | {marker author}{...} 29 | {title:Author} 30 | 31 | {pstd}Michael Stepner{break} stepner@mit.edu{p_end} -------------------------------------------------------------------------------- /code/ado_maptile_geo/state_coords_clean.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michaelstepner/healthinequality-code/ea86c0d2f8762c8738d1ee8ca506c21d5a1c4ef7/code/ado_maptile_geo/state_coords_clean.dta -------------------------------------------------------------------------------- /code/ado_maptile_geo/state_database_clean.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michaelstepner/healthinequality-code/ea86c0d2f8762c8738d1ee8ca506c21d5a1c4ef7/code/ado_maptile_geo/state_database_clean.dta -------------------------------------------------------------------------------- /code/ado_maptile_geo/state_maptile.ado: -------------------------------------------------------------------------------- 1 | *! 6sep2014, Michael Stepner, stepner@mit.edu 2 | 3 | program define _maptile_state 4 | syntax , [ geofolder(string) /// 5 | mergedatabase /// 6 | map spmapvar(varname) var(varname) binvar(varname) clopt(string) legopt(string) min(string) clbreaks(string) max(string) mapcolors(string) ndfcolor(string) /// 7 | savegraph(string) replace resolution(string) map_restriction(string) spopt(string) /// 8 | /* Geography-specific options */ /// 9 | geoid(varname) /// 10 | ] 11 | 12 | if ("`mergedatabase'"!="") { 13 | if ("`geoid'"=="") local geoid state 14 | 15 | if inlist("`geoid'","state","statefips","statename") novarabbrev merge 1:1 `geoid' using `"`geofolder'/state_database_clean"', nogen keepusing(`geoid' _polygonid) 16 | else { 17 | di as error "with geography(state), geoid() must be 'state', 'statefips', 'statename', or blank" 18 | exit 198 19 | } 20 | exit 21 | } 22 | 23 | if ("`map'"!="") { 24 | 25 | * Avoid having a "No Data" legend item just because DC is missing (it's not visible on the map, nor is it a state) 26 | sum `spmapvar' if _polygonid==6, meanonly 27 | if (r(N)==0) { 28 | * add map restriction 29 | if (`"`map_restriction'"'!="") local map_restriction `map_restriction' & _polygonid!=6 30 | else local map_restriction if _polygonid!=6 31 | } 32 | 33 | spmap `spmapvar' using `"`geofolder'/state_coords_clean"' `map_restriction', id(_polygonid) /// 34 | `clopt' /// 35 | `legopt' /// 36 | legend(pos(5) size(*1.8)) /// 37 | fcolor(`mapcolors') ndfcolor(`ndfcolor') /// 38 | oc(black ...) ndo(black) /// 39 | os(vthin ...) nds(vthin) /// 40 | `spopt' 41 | 42 | * Save graph 43 | if (`"`savegraph'"'!="") __savegraph_maptile, savegraph(`savegraph') resolution(`resolution') `replace' 44 | 45 | } 46 | 47 | end 48 | 49 | * Save map to file 50 | cap program drop __savegraph_maptile 51 | program define __savegraph_maptile 52 | 53 | syntax, savegraph(string) resolution(string) [replace] 54 | 55 | * check file extension using a regular expression 56 | if regexm(`"`savegraph'"',"\.[a-zA-Z0-9]+$") local graphextension=regexs(0) 57 | 58 | * deal with different filetypes appropriately 59 | if inlist(`"`graphextension'"',".gph","") graph save `"`savegraph'"', `replace' 60 | else if inlist(`"`graphextension'"',".ps",".eps") graph export `"`savegraph'"', mag(`=round(100*`resolution')') `replace' 61 | else if (`"`graphextension'"'==".png") graph export `"`savegraph'"', width(`=round(3200*`resolution')') `replace' 62 | else if (`"`graphextension'"'==".tif") graph export `"`savegraph'"', width(`=round(1600*`resolution')') `replace' 63 | else graph export `"`savegraph'"', `replace' 64 | 65 | end 66 | 67 | -------------------------------------------------------------------------------- /code/ado_maptile_geo/state_maptile.smcl: -------------------------------------------------------------------------------- 1 | {smcl} 2 | 3 | {pstd}{bf:maptile {hline 2} Geography: state}{p_end} 4 | 5 | 6 | {marker description}{...} 7 | {title:Description} 8 | 9 | {pstd}{bf:state} generates a map of U.S. states.{p_end} 10 | 11 | {pstd}It displays Alaska and Hawaii rescaled and moved to the bottom left of the map, below the continental US, for ease of viewing.{p_end} 12 | 13 | 14 | {marker geographic-id-variables}{...} 15 | {title:Geographic ID variables} 16 | 17 | {synoptset 13 tabbed}{...} 18 | {synopt:{bf:state}}2-letter state abbreviations{p_end} 19 | {synopt:{bf:statefips}}2-digit state FIPS codes{p_end} 20 | {synopt:{bf:statename}}unabbreviated state names{p_end} 21 | 22 | 23 | {marker geography-specific-options}{...} 24 | {title:Geography-Specific Options} 25 | 26 | {synoptset tabbed}{...} 27 | {synopt:{opth geoid(varname)}}specifies the geographic ID variable to use; default is {bf:geoid(state)}{p_end} 28 | 29 | 30 | {marker author}{...} 31 | {title:Author} 32 | 33 | {pstd}Michael Stepner{break} stepner@mit.edu{p_end} -------------------------------------------------------------------------------- /code/ado_ssc/_gwtmean.ado: -------------------------------------------------------------------------------- 1 | /* _gwtmean.ado 6-5-2001 David Kantor, Institute for Policy Studies Johns Hopkins University This is an egen weighted mean function, based on c:\stata\ado\_\_gmean.ado Actually, it is just a very minor tweaking of _gmean. In fact this could be considered an enhancement to _gmean; just rename this file to _gmean and adjust the program define statement. This is used with egen as follows: egen newvar = wtmean(exp), by(byvar) weight(wgt_expr) This allows a weight option. egen does not allow a weight in the usual manner ([weight= wgtvar]), so we us an option. If the weight option is omitted, then the action and results are identical to egen mean (_gmean.ado). (If the weight expression is a non-zero constant, then, too, the results are identical to egen mean.) The weight macro captures the weight. If it is non-empty, then it is changed to include parentheses and the * operator. The inclusion of parentheses actually allows an expression -- not just a variable. */ *! version 1.0.0 6-5-2001 program define _gwtmean version 3.0 local varlist "req new max(1)" local exp "req nopre" local if "opt" local in "opt" local options "by(string) weight(string)" parse "`*'" tempvar touse if "`weight'" ~= "" { local weight "* (`weight')" } quietly { gen byte `touse'=1 `if' `in' sort `touse' `by' by `touse' `by': replace `varlist' = /* */ sum((`exp')`weight')/sum(((`exp')!=.)`weight') if `touse'==1 by `touse' `by': replace `varlist' = `varlist'[_N] } end -------------------------------------------------------------------------------- /code/ado_ssc/maptile_geohelp.ado: -------------------------------------------------------------------------------- 1 | *! version 1.02 1nov2015 Michael Stepner, stepner@mit.edu 2 | 3 | /*** Unlicence (abridged): 4 | This is free and unencumbered software released into the public domain. 5 | It is provided "AS IS", without warranty of any kind. 6 | 7 | For the full legal text of the Unlicense, see 8 | */ 9 | 10 | * Why did I include a formal license? Jeff Atwood gives good reasons: 11 | * http://blog.codinghorror.com/pick-a-license-any-license/ 12 | 13 | 14 | program define maptile_geohelp 15 | version 11 16 | 17 | set more off 18 | 19 | syntax name(name=geography id="geoname") [, geofolder(string)] 20 | 21 | * Set default directory 22 | if (`"`geofolder'"'=="") local geofolder `c(sysdir_personal)'maptile_geographies 23 | 24 | * Check that the geography is installed 25 | cap confirm file `"`geofolder'/`geography'_maptile.ado"' 26 | if (_rc!=0) { 27 | di as error "geography(`geography') specified, but it is not installed." 28 | 29 | if ("`geofolder'"=="`c(sysdir_personal)'maptile_geographies") di as text `"To see a list of installed geographies run: {stata maptile_geolist}"' 30 | else { 31 | di as text `"To see a list of installed geographies run:"' 32 | di as text `" {stata maptile_geolist, geofolder(`geofolder')}"' 33 | di as text "" 34 | } 35 | exit 198 36 | } 37 | 38 | * Check that it has a help file 39 | cap confirm file `"`geofolder'/`geography'_maptile.smcl"' 40 | if (_rc!=0) { 41 | di as error `"{bf:`geography'} geography exists, but it does not have a help file. There is no file:"' 42 | di as text `" `geofolder'/`geography'_maptile.smcl"' 43 | exit 198 44 | } 45 | 46 | * Display the help file 47 | view `"`geofolder'/`geography'_maptile.smcl"' 48 | 49 | end 50 | -------------------------------------------------------------------------------- /code/ado_ssc/maptile_geolist.ado: -------------------------------------------------------------------------------- 1 | *! version 1.02 1nov2015 Michael Stepner, stepner@mit.edu 2 | 3 | /*** Unlicence (abridged): 4 | This is free and unencumbered software released into the public domain. 5 | It is provided "AS IS", without warranty of any kind. 6 | 7 | For the full legal text of the Unlicense, see 8 | */ 9 | 10 | * Why did I include a formal license? Jeff Atwood gives good reasons: 11 | * http://blog.codinghorror.com/pick-a-license-any-license/ 12 | 13 | 14 | program define maptile_geolist 15 | version 11 16 | 17 | set more off 18 | 19 | syntax [, geofolder(string)] 20 | 21 | * Set default directory 22 | if (`"`geofolder'"'=="") local geofolder `c(sysdir_personal)'maptile_geographies 23 | 24 | * Check that the specified directory exists (based on confirmdir.ado code by Dan Blanchette) 25 | local current_dir `"`c(pwd)'"' 26 | quietly capture cd `"`geofolder'"' 27 | if _rc!=0 { 28 | di as error `"unable to load directory `geofolder'"' 29 | exit 198 30 | } 31 | quietly cd `"`current_dir'"' 32 | 33 | 34 | * Store all relevant files in local 35 | local geos : dir `"`geofolder'"' files "*_maptile.ado" 36 | 37 | * Output geo_names 38 | if (`"`geos'"'=="") di as text "no geography templates found" 39 | else { 40 | di `: subinstr local geos "_maptile.ado" " ", all' 41 | } 42 | 43 | end 44 | -------------------------------------------------------------------------------- /code/ado_ssc/maptile_install.ado: -------------------------------------------------------------------------------- 1 | *! version 1.02 1nov2015 Michael Stepner, stepner@mit.edu 2 | 3 | /*** Unlicence (abridged): 4 | This is free and unencumbered software released into the public domain. 5 | It is provided "AS IS", without warranty of any kind. 6 | 7 | For the full legal text of the Unlicense, see 8 | */ 9 | 10 | * Why did I include a formal license? Jeff Atwood gives good reasons: 11 | * http://blog.codinghorror.com/pick-a-license-any-license/ 12 | 13 | 14 | program define maptile_install 15 | version 11 16 | 17 | syntax using/, [replace] 18 | 19 | * Ensure that the directories exist 20 | cap mkdir "`c(sysdir_personal)'" 21 | cap mkdir "`c(sysdir_personal)'maptile_geographies" 22 | 23 | * Copy/download the specified geography 24 | qui copy `"`using'"' `"`c(sysdir_personal)'maptile_geographies/temp.zip"', replace 25 | 26 | * Change to the target directory 27 | local cwd `c(pwd)' 28 | di as text "" 29 | cd `"`c(sysdir_personal)'maptile_geographies"' 30 | 31 | * Extract the geography 32 | unzipfile temp.zip, `replace' 33 | erase temp.zip 34 | 35 | * Change back to original directory 36 | qui cd `"`cwd'"' 37 | 38 | * Suggest reading help file 39 | di as text "" 40 | di as text "To see the help file of the geography template, run:" 41 | di as text " {cmd:maptile_geohelp} {it:geoname}" 42 | 43 | end 44 | -------------------------------------------------------------------------------- /code/ado_ssc/save12.ado: -------------------------------------------------------------------------------- 1 | *! version 1.0.0 9oct2016 Michael Stepner, stepner@mit.edu 2 | 3 | /*** Unlicense (abridged): 4 | This is free and unencumbered software released into the public domain. 5 | It is provided "AS IS", without warranty of any kind. 6 | 7 | For the full legal text of the Unlicense, see 8 | */ 9 | 10 | * Why did I include a formal license? Jeff Atwood gives good reasons: 11 | * https://blog.codinghorror.com/pick-a-license-any-license/ 12 | 13 | program define save12 14 | version 12 15 | 16 | syntax [anything], [noLabel replace all] 17 | 18 | if c(stata_version)>=12 & c(stata_version)<13 { 19 | save `anything', `label' `replace' `all' 20 | } 21 | else if c(stata_version)>=13 & c(stata_version)<14 { 22 | saveold `anything', `label' `replace' `all' 23 | } 24 | else if c(stata_version)>=14 { 25 | saveold `anything', `label' `replace' `all' version(12) 26 | } 27 | else { 28 | di as error "Must have Stata version 12 or higher to save in Stata 12 format." 29 | error 499 30 | } 31 | 32 | end 33 | -------------------------------------------------------------------------------- /code/ado_ssc/save12.sthlp: -------------------------------------------------------------------------------- 1 | {smcl} 2 | {* *! version 1.0.0 9oct2016}{...} 3 | {viewerjumpto "Syntax" "save12##syntax"}{...} 4 | {viewerjumpto "Description" "save12##description"}{...} 5 | {viewerjumpto "Options" "save12##options"}{...} 6 | {viewerjumpto "Author" "save12##author"}{...} 7 | {title:Title} 8 | 9 | {p2colset 5 15 17 2}{...} 10 | {p2col :{hi:save12} {hline 2}}Save Stata 12 dataset{p_end} 11 | {p2colreset}{...} 12 | 13 | 14 | {marker syntax}{...} 15 | {title:Syntax} 16 | 17 | {phang} 18 | Save data in memory to file in Stata 12 format 19 | 20 | {p 8 16 2} 21 | {cmd:save12} 22 | {it:{help filename}} 23 | [{cmd:,} {it:options}] 24 | 25 | 26 | {synoptset 17}{...} 27 | {synopthdr :options} 28 | {synoptline} 29 | {synopt :{opt nol:abel}}omit value labels from the saved dataset{p_end} 30 | {synopt :{opt replace}}overwrite existing dataset{p_end} 31 | {synopt :{opt all}}save {cmd:e(sample)} with the dataset; programmer's 32 | option{p_end} 33 | {synoptline} 34 | 35 | 36 | {marker description}{...} 37 | {title:Description} 38 | 39 | {pstd} 40 | {cmd:save12} provides a consistent command to save datasets in Stata 12 format, 41 | which facilitates collaboration between computers using different versions 42 | of Stata. Stata 12 and later versions of Stata can produce datasets in Stata 12 43 | format, but not using the same command. {cmd:save12} assures that the same code 44 | shared across multiple computers will save datasets in Stata 12 format, 45 | regardless of the host computer's version of Stata. 46 | 47 | {pstd} 48 | {opt save12} stores the dataset currently in memory on disk in Stata 12 format 49 | under the name {it:{help filename}}. If {it:filename} is not specified, the 50 | name under which the data were last known to Stata ({cmd:c(filename)}) is used. 51 | If {it:filename} is specified without an extension, {cmd:.dta} is used. If your 52 | {it:filename} contains embedded spaces, remember to enclose it in double 53 | quotes. 54 | 55 | {pstd} 56 | {opt save12} has been tested to work in Stata 12, 13 and 14. It will continue to 57 | work correctly in future versions of Stata so long as they maintain the same 58 | {bf:{help saveold}} syntax as Stata 14, with an option of {opt version(12)}. 59 | Stata 11 can read datasets in Stata 12 format, but cannot write datasets in 60 | Stata 12 format. 61 | 62 | 63 | {marker options}{...} 64 | {title:Options} 65 | 66 | {phang} 67 | {opt nolabel} omits value labels from the saved dataset. 68 | The associations between variables and value-label names, however, 69 | are saved along with the dataset label and the variable labels. 70 | 71 | {phang} 72 | {opt replace} permits {opt save12} to overwrite an existing dataset. 73 | 74 | {phang} 75 | {opt all} is for use by programmers. If specified, {cmd:e(sample)} will 76 | be saved with the dataset. You could run a regression; {cmd:save12 mydata, all}; 77 | {cmd:drop _all}; {cmd:use mydata}; and {cmd:predict yhat if e(sample)}. 78 | 79 | 80 | {marker author}{...} 81 | {title:Author} 82 | 83 | {pstd}Michael Stepner{p_end} 84 | {pstd}stepner@mit.edu{p_end} 85 | -------------------------------------------------------------------------------- /code/ado_ssc/save13.ado: -------------------------------------------------------------------------------- 1 | *! version 1.0.0 9oct2016 Michael Stepner, stepner@mit.edu 2 | 3 | /*** Unlicense (abridged): 4 | This is free and unencumbered software released into the public domain. 5 | It is provided "AS IS", without warranty of any kind. 6 | 7 | For the full legal text of the Unlicense, see 8 | */ 9 | 10 | * Why did I include a formal license? Jeff Atwood gives good reasons: 11 | * https://blog.codinghorror.com/pick-a-license-any-license/ 12 | 13 | program define save13 14 | version 13 15 | 16 | syntax [anything], [noLabel replace all] 17 | 18 | if c(stata_version)>=13 & c(stata_version)<14 { 19 | save `anything', `label' `replace' `all' 20 | } 21 | else if c(stata_version)>=14 { 22 | saveold `anything', `label' `replace' `all' version(13) 23 | } 24 | else { 25 | di as error "Must have Stata version 13 or higher to save in Stata 13 format." 26 | error 499 27 | } 28 | 29 | end 30 | -------------------------------------------------------------------------------- /code/ado_ssc/save13.sthlp: -------------------------------------------------------------------------------- 1 | {smcl} 2 | {* *! version 1.0.0 9oct2016}{...} 3 | {viewerjumpto "Syntax" "save13##syntax"}{...} 4 | {viewerjumpto "Description" "save13##description"}{...} 5 | {viewerjumpto "Options" "save13##options"}{...} 6 | {viewerjumpto "Author" "save13##author"}{...} 7 | {title:Title} 8 | 9 | {p2colset 5 15 17 2}{...} 10 | {p2col :{hi:save13} {hline 2}}Save Stata 13 dataset{p_end} 11 | {p2colreset}{...} 12 | 13 | 14 | {marker syntax}{...} 15 | {title:Syntax} 16 | 17 | {phang} 18 | Save data in memory to file in Stata 13 format 19 | 20 | {p 8 16 2} 21 | {cmd:save13} 22 | {it:{help filename}} 23 | [{cmd:,} {it:options}] 24 | 25 | 26 | {synoptset 17}{...} 27 | {synopthdr :options} 28 | {synoptline} 29 | {synopt :{opt nol:abel}}omit value labels from the saved dataset{p_end} 30 | {synopt :{opt replace}}overwrite existing dataset{p_end} 31 | {synopt :{opt all}}save {cmd:e(sample)} with the dataset; programmer's 32 | option{p_end} 33 | {synoptline} 34 | 35 | 36 | {marker description}{...} 37 | {title:Description} 38 | 39 | {pstd} 40 | {cmd:save13} provides a consistent command to save datasets in Stata 13 format, 41 | which facilitates collaboration between computers using different versions 42 | of Stata. Stata 13 and later versions of Stata can produce datasets in Stata 13 43 | format, but not using the same command. {cmd:save13} assures that the same code 44 | shared across multiple computers will save datasets in Stata 13 format, 45 | regardless of the host computer's version of Stata. 46 | 47 | {pstd} 48 | {opt save13} stores the dataset currently in memory on disk in Stata 13 format 49 | under the name {it:{help filename}}. If {it:filename} is not specified, the 50 | name under which the data were last known to Stata ({cmd:c(filename)}) is used. 51 | If {it:filename} is specified without an extension, {cmd:.dta} is used. If your 52 | {it:filename} contains embedded spaces, remember to enclose it in double 53 | quotes. 54 | 55 | {pstd} 56 | {opt save13} has been tested to work in Stata 13 and Stata 14. It will continue to 57 | work correctly in future versions of Stata so long as they maintain the same 58 | {bf:{help saveold}} syntax as Stata 14, with an option of {opt version(13)}. 59 | 60 | 61 | {marker options}{...} 62 | {title:Options} 63 | 64 | {phang} 65 | {opt nolabel} omits value labels from the saved dataset. 66 | The associations between variables and value-label names, however, 67 | are saved along with the dataset label and the variable labels. 68 | 69 | {phang} 70 | {opt replace} permits {opt save13} to overwrite an existing dataset. 71 | 72 | {phang} 73 | {opt all} is for use by programmers. If specified, {cmd:e(sample)} will 74 | be saved with the dataset. You could run a regression; {cmd:save13 mydata, all}; 75 | {cmd:drop _all}; {cmd:use mydata}; and {cmd:predict yhat if e(sample)}. 76 | 77 | 78 | {marker author}{...} 79 | {title:Author} 80 | 81 | {pstd}Michael Stepner{p_end} 82 | {pstd}stepner@mit.edu{p_end} 83 | -------------------------------------------------------------------------------- /code/ado_ssc/spmap.hlp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michaelstepner/healthinequality-code/ea86c0d2f8762c8738d1ee8ca506c21d5a1c4ef7/code/ado_ssc/spmap.hlp -------------------------------------------------------------------------------- /code/ado_ssc/spmap_psl.ado: -------------------------------------------------------------------------------- 1 | *! -spmap_psl-: Auxiliary program for -spmap- 2 | *! Version 1.3.0 - 13 March 2017 3 | *! Version 1.2.0 - 14 March 2008 4 | *! Version 1.1.0 - 7 May 2007 5 | *! Version 1.0.0 - 7 December 2006 6 | *! Author: Maurizio Pisati 7 | *! Department of Sociology and Social Research 8 | *! University of Milano Bicocca (Italy) 9 | *! maurizio.pisati@unimib.it 10 | 11 | 12 | 13 | 14 | * ---------------------------------------------------------------------------- 15 | * 1. Define program 16 | * ---------------------------------------------------------------------------- 17 | 18 | program spmap_psl, sclass 19 | version 9.2 20 | 21 | 22 | 23 | 24 | * ---------------------------------------------------------------------------- 25 | * 2. Define syntax 26 | * ---------------------------------------------------------------------------- 27 | 28 | syntax, List(string asis) Max(numlist min=1 max=1 >0 integer) /// 29 | Option(string asis) [Default(string)] 30 | 31 | 32 | 33 | 34 | * ---------------------------------------------------------------------------- 35 | * 3. Parse list 36 | * ---------------------------------------------------------------------------- 37 | 38 | local FIRST : word 1 of `list' 39 | if (inlist("`FIRST'","=","..","...")) { 40 | di as err `"{p}Option `option' does not accept symbol "`FIRST'" "' /// 41 | "as its first argument{p_end}" 42 | exit 198 43 | } 44 | local NL : word count `list' 45 | forval i = 1/`NL' { 46 | local ITEM : word `i' of `list' 47 | if ("`ITEM'" == ".") { 48 | if ("`default'" != "") { 49 | local EL "`default'" 50 | local PL `"`PL'"`EL'" "' 51 | } 52 | else { 53 | di as err `"{p}Option `option' does not accept symbol ".", "' /// 54 | `"since no default value exists for this "' /// 55 | `"option{p_end}"' 56 | exit 198 57 | } 58 | } 59 | else if ("`ITEM'" == "=") { 60 | local EL `"`: word `=`i'-1' of `PL''"' 61 | local PL `"`PL'"`EL'" "' 62 | } 63 | else if ("`ITEM'" == ".." | "`ITEM'" == "...") { 64 | local EL `"`: word `=`i'-1' of `PL''"' 65 | forval j = `i'/`max' { 66 | local PL `"`PL'"`EL'" "' 67 | } 68 | continue, break 69 | } 70 | else { 71 | local PL `"`PL'"`ITEM'" "' 72 | } 73 | } 74 | local NL : word count `PL' 75 | if (`NL' < `max') { 76 | if ("`default'" != "") { 77 | forval i = `=`NL'+1'/`max' { 78 | local PL `"`PL'"`default'" "' 79 | } 80 | } 81 | else { 82 | di as err "{p}Option `option' requires exactly `max' arguments{p_end}" 83 | exit 198 84 | } 85 | } 86 | sreturn local pl `"`PL'"' 87 | 88 | 89 | 90 | 91 | * ---------------------------------------------------------------------------- 92 | * 4. End program 93 | * ---------------------------------------------------------------------------- 94 | 95 | end 96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /code/covariates/Health behavior maps.do: -------------------------------------------------------------------------------- 1 | * Set $root 2 | return clear 3 | capture project, doinfo 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)' // using -project- 5 | else { // running directly 6 | if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"' 7 | do "${mortality_root}/code/set_environment.do" 8 | } 9 | 10 | * Create required folders 11 | cap mkdir "$root/scratch/BRFSS maps" 12 | cap mkdir "$root/scratch/BRFSS maps/data" 13 | 14 | 15 | *********************************** 16 | *** CZ Maps of Health Behaviors *** 17 | *********************************** 18 | 19 | * Load data 20 | project, original("${root}/data/derived/final_covariates/cz_full_covariates.dta") 21 | use "${root}/data/derived/final_covariates/cz_full_covariates.dta", clear 22 | 23 | *** Create maps 24 | foreach healthvar in "cur_smoke" "bmi_obese" "exercise_any" { 25 | 26 | if ("`healthvar'"=="exercise_any") local revcolor revcolor 27 | 28 | replace `healthvar'_q1 = 100 * `healthvar'_q1 29 | 30 | local title : var label `healthvar'_q1 31 | 32 | maptile `healthvar'_q1, geo(cz) `revcolor' nquantiles(10) legd(1) /// 33 | geofolder("$root/code/ado_maptile_geo") /// 34 | twopt( /// 35 | title("`title'") /// 36 | subtitle("by Commuting Zone") /// 37 | legend(size(*0.8)) /// 38 | ) /// 39 | savegraph("$root/scratch/BRFSS maps/CZ map - `title'.png") replace 40 | project, creates("$root/scratch/BRFSS maps/CZ map - `title'.png") preserve 41 | 42 | export delim cz `healthvar'_q1 using "$root/scratch/BRFSS maps/data/CZ map - `title'.csv", /// 43 | replace // output data for formatted map 44 | project, creates("$root/scratch/BRFSS maps/data/CZ map - `title'.csv") preserve 45 | 46 | } 47 | 48 | -------------------------------------------------------------------------------- /code/covariates/clean_educ.do: -------------------------------------------------------------------------------- 1 | * Set $root 2 | return clear 3 | capture project, doinfo 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)' // using -project- 5 | else { // running directly 6 | if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"' 7 | do "${mortality_root}/code/set_environment.do" 8 | } 9 | 10 | * Set convenient globals 11 | global cov_raw "$root/data/raw/Covariate Data" 12 | global cov_clean "$root/data/derived/covariate_data" 13 | 14 | * Create required folders 15 | cap mkdir "$root/data/derived/covariate_data" 16 | 17 | /*** Education, year 2000 18 | ***/ 19 | 20 | ******* 21 | 22 | *** Load Census Data 23 | project, original("${cov_raw}/census_education_2000_county.csv") 24 | insheet using "${cov_raw}/census_education_2000_county.csv", clear names 25 | ren geoid2 cty 26 | ren (hc01_vc31 hc01_vc32) (cs_educ_hs cs_educ_ba) // age 25+ with high school degree, bachelor's degree 27 | keep cty cs_educ_hs cs_educ_ba 28 | recode cty (12086=12025) // Miami-Dade name/FIPS change 29 | lab var cs_educ_hs "Percent age 25+ with high school degree" 30 | lab var cs_educ_ba "Percent age 25+ with bachelor's degree" 31 | 32 | //merge counties to CZs 33 | project, original("${cov_raw}/cty_covariates.dta") preserve 34 | merge 1:1 cty using "${cov_raw}/cty_covariates.dta", keepus(cz cty_pop2000) keep(match master) 35 | assert floor(cty/1000)==2 | cty==15005 if _merge!=3 // unmerged are either Alaska counties or Kalawao, Hawaii 36 | drop _merge 37 | drop if cz==. 38 | preserve 39 | keep cty cs_educ* 40 | save13 "${cov_clean}/cty_cs_educ.dta", replace 41 | project, creates("${cov_clean}/cty_cs_educ.dta") 42 | 43 | * Collapse to CZ level 44 | restore 45 | collapse (mean) cs_educ_hs cs_educ_ba [w=cty_pop2000], by(cz) 46 | save13 "${cov_clean}/cz_cs_educ.dta", replace 47 | project, creates("${cov_clean}/cz_cs_educ.dta") 48 | -------------------------------------------------------------------------------- /code/covariates/clean_hospquality.do: -------------------------------------------------------------------------------- 1 | * Set $root 2 | return clear 3 | capture project, doinfo 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)' // using -project- 5 | else { // running directly 6 | if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"' 7 | do "${mortality_root}/code/set_environment.do" 8 | } 9 | 10 | * Set convenient globals 11 | global cov_raw "$root/data/raw/Covariate Data" 12 | global cov_clean "$root/data/derived/covariate_data" 13 | 14 | * Create required folders 15 | cap mkdir "$root/data/derived/covariate_data" 16 | 17 | /*** Hospital Compare Data 18 | 19 | 30-day hospital mortality rates for heart attacks, pneumonia, heart failure. 20 | Averaged over years 2002-2013. 21 | 22 | ***/ 23 | 24 | ******* 25 | 26 | 27 | *** Create SSA-cty FIPS crosswalk 28 | project, original("${cov_raw}/ssa_cty_cw.dta") 29 | use "${cov_raw}/ssa_cty_cw.dta", clear 30 | *drop double-coding of miami-dade 31 | drop if fips=="12086" 32 | isid ssa 33 | tempfile ssa_cty 34 | save `ssa_cty' 35 | 36 | 37 | *** Load data 38 | project, original("${cov_raw}/Adj_mortality_2002to2013_byCounty.dta") 39 | use "${cov_raw}/Adj_mortality_2002to2013_byCounty.dta", clear 40 | drop service 41 | drop if substr(cnty,-3,3)=="999" //state-level observations 42 | ren cnty ssa 43 | 44 | //merge to county FIPS identifiers 45 | merge m:1 ssa using `ssa_cty', keepus(fips) keep(match master) nogen 46 | drop if mi(fips) 47 | ren fips cty 48 | destring cty, replace 49 | 50 | * Recode Counties 51 | * Miami-Dade name/FIPS change, Broomfield split off from Boulder, Washabaugh Merged into Jackson, South Boston Merged into Halifax, Nansemond merged into Suffolk 52 | recode cty (12086=12025) (8014 = 8013) (46131=46071) (51780=51083) (51695=51800) 53 | 54 | * Collapse to County level 55 | foreach cause in ami chf pn { 56 | bys cty: egen temp = wtmean(adjmortmeas_`cause'all30day), weight(ndmortmeas_`cause'all) 57 | drop adjmortmeas_`cause'all30day 58 | ren temp adjmortmeas_`cause'all30day 59 | } 60 | collapse (mean) adj* (rawsum) nd*, by(cty) 61 | 62 | 63 | drop if cty>=57000 //Drop Territories 64 | project, original("${cov_raw}/cty_covariates.dta") preserve 65 | merge 1:1 cty using "${cov_raw}/cty_covariates.dta", keepus(cz cty_pop2000 cz_pop2000) 66 | assert floor(cty/1000)==2 if _merge!=3 // unmatched counties are Alaskan 67 | drop _merge 68 | drop if cz==. 69 | 70 | preserve 71 | 72 | *** County level 73 | keep adj* cty cty_pop2000 74 | 75 | * Create index for acute care (standardize & combine measures) 76 | foreach v of varlist adjmortmeas_amiall30day adjmortmeas_chfall30day adjmortmeas_pnall30day { 77 | qui sum `v' [w=cty_pop2000] 78 | gen `v'_std = (`v'- `r(mean)')/`r(sd)' 79 | } 80 | gen mort_30day_hosp_z = (adjmortmeas_amiall30day_std + adjmortmeas_chfall30day_std + adjmortmeas_pnall30day_std)/3 81 | 82 | * Output cty dataset 83 | save13 "${cov_clean}/cty_hospitalcompare_30day.dta", replace 84 | project, creates("${cov_clean}/cty_hospitalcompare_30day.dta") 85 | 86 | 87 | *** CZ level 88 | restore 89 | 90 | * Collapse mortality rates to CZ level 91 | foreach cause in ami chf pn { 92 | bys cz: egen temp = wtmean(adjmortmeas_`cause'all30day), weight(ndmortmeas_`cause'all) 93 | drop adjmortmeas_`cause'all30day 94 | ren temp adjmortmeas_`cause'all30day 95 | } 96 | collapse (mean) adj* cz_pop2000, by(cz) 97 | 98 | * Create index for acute care (standardize & combine measures) 99 | foreach v of varlist adjmortmeas_amiall30day adjmortmeas_chfall30day adjmortmeas_pnall30day { 100 | qui sum `v' [w=cz_pop2000] 101 | gen `v'_std = (`v'- `r(mean)')/`r(sd)' 102 | } 103 | gen mort_30day_hosp_z = (adjmortmeas_amiall30day_std + adjmortmeas_chfall30day_std + adjmortmeas_pnall30day_std)/3 104 | drop cz_pop2000 105 | 106 | * Output cz dataset 107 | save13 "${cov_clean}/cz_hospitalcompare_30day.dta", replace 108 | project, creates("${cov_clean}/cz_hospitalcompare_30day.dta") 109 | -------------------------------------------------------------------------------- /code/covariates/clean_pop_labforce_change1980to2000.do: -------------------------------------------------------------------------------- 1 | * Set $root 2 | return clear 3 | capture project, doinfo 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)' // using -project- 5 | else { // running directly 6 | if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"' 7 | do "${mortality_root}/code/set_environment.do" 8 | } 9 | 10 | * Set convenient globals 11 | global cov_raw "$root/data/raw/Covariate Data" 12 | global cov_clean "$root/data/derived/covariate_data" 13 | 14 | * Create required folders 15 | cap mkdir "$root/data/derived/covariate_data" 16 | 17 | /*** County-level change in population and labor force, 1980-2000 18 | ***/ 19 | 20 | ******* 21 | 22 | * Load Census data 23 | project, original("${cov_raw}/nhgis0007_ts_nominal_county.csv") 24 | import delimited using "${cov_raw}/nhgis0007_ts_nominal_county.csv", clear varnames(1) rowrange(3) 25 | destring *, replace 26 | 27 | //generate FIPS identifier, and keep vars from 1980 and 2000 28 | gen cty = statefp*1000 + countyfp 29 | gen statefips = statefp 30 | ren (a00aa1980 a00aa2000) (pop1980 pop2000) // total population, 1980 & 2000 31 | ren (b84aa1980 b84aa2000) (lf1980 lf2000) // population 16+ in the labor force, 1980 & 2000 32 | drop if mi(pop1980) & mi(pop2000) & mi(lf1980) & mi(lf2000) 33 | keep cty statefips pop* lf* 34 | 35 | * Output at county level 36 | drop if cty==. 37 | 38 | //recode 1 county that changed FIPS (but not boundaries) over 1980-2000 39 | gen flag = 1 if inlist(cty,12025,12086) // Miami-Dade 40 | sort flag cty 41 | replace pop2000 = pop2000[_n+1] if cty==12025 42 | replace lf2000 = lf2000[_n+1] if cty==12025 43 | drop if cty==12086 44 | drop flag 45 | 46 | //calculate percent change in population and labor force 47 | gen pop_d_2000_1980 = (pop2000 - pop1980)/pop1980 48 | gen lf_d_2000_1980 = (lf2000 - lf1980)/lf1980 49 | 50 | //drop values for counties that changed boundaries over 1980-2000 51 | replace pop_d_2000_1980 = . if inlist(cty, 4012, 4027, 35006, 35061, 51780, /// 52 | 51083, 30113, 30031, 30067) 53 | replace lf_d_2000_1980 = . if inlist(cty, 4012, 4027, 35006, 35061, 51780, /// 54 | 51083, 30113, 30031, 30067) 55 | //drop Alaska (many county changes) 56 | replace pop_d_2000_1980 = . if statefips==2 57 | replace lf_d_2000_1980 = . if statefips==2 58 | //(excludes population ~1m with Alaska) 59 | 60 | project, original("${cov_raw}/cty_covariates.dta") preserve 61 | merge 1:1 cty using "${cov_raw}/cty_covariates.dta", keepus(cz) keep(match master) 62 | //recode CZs of 2 unmatched counties 63 | assert statefips==2 | cty==15005 | inlist(cty,51780,30113) if _merge!=3 64 | drop _merge 65 | replace cz = 2200 if cty==51780 66 | replace cz = 34402 if cty==30113 67 | drop if cz==. //Kalawao County, Hawaii (population ~90), unmatched counties in Alaska 68 | 69 | preserve 70 | keep if pop_d_2000_1980!=. | lf_d_2000_1980!=. 71 | keep cty statefips pop_d_2000_1980 lf_d_2000_1980 72 | order cty statefips pop_d_2000_1980 lf_d_2000_1980 73 | save13 "${cov_clean}/cty_cs_popchange_labforce.dta", replace 74 | project, creates("${cov_clean}/cty_cs_popchange_labforce.dta") 75 | 76 | * Output at CZ level 77 | restore 78 | //recode populations of CZs with county changes (38100, 38300) 79 | expand 2 if cty==4027, gen(flag) //Yuma County 80 | replace cz = 38300 if flag==1 //La Paz CZ 81 | // La Paz CZ gets a small part of the pre-split Yuma County 82 | replace pop1980 = 13844/(106895+13844) * pop1980 if flag==1 83 | replace lf1980 = 13844/(106895+13844)* lf1980 if flag==1 84 | replace pop2000 = . if flag==1 85 | replace lf2000 = . if flag==1 86 | 87 | // Yuma CZ gets most of the pre-split Yuma County 88 | replace pop1980 = 106895/(106895+13844) * pop1980 if flag==0 & cty==4027 89 | replace lf1980 = 106895/(106895+13844) * lf1980 if flag==0 & cty==4027 90 | 91 | //collapse, and calculate percent change in population and labor force, 1980-2000 92 | collapse (sum) pop1980 pop2000 lf1980 lf2000, by(cz) 93 | 94 | gen pop_d_2000_1980 = (pop2000 - pop1980)/pop1980 95 | gen lf_d_2000_1980 = (lf2000 - lf1980)/lf1980 96 | 97 | keep cz pop_d_2000_1980 lf_d_2000_1980 98 | save13 "${cov_clean}/cz_cs_popchange_labforce.dta", replace 99 | project, creates("${cov_clean}/cz_cs_popchange_labforce.dta") 100 | -------------------------------------------------------------------------------- /code/covariates/clean_racefractions.do: -------------------------------------------------------------------------------- 1 | * Set $root 2 | return clear 3 | capture project, doinfo 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)' // using -project- 5 | else { // running directly 6 | if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"' 7 | do "${mortality_root}/code/set_environment.do" 8 | } 9 | 10 | * Set convenient globals 11 | global cov_raw "$root/data/raw/Covariate Data" 12 | global cov_clean "$root/data/derived/covariate_data" 13 | 14 | * Create required folders 15 | cap mkdir "$root/data/derived/covariate_data" 16 | 17 | /*** fraction Black, fraction Hispanic; year 2000 18 | ***/ 19 | 20 | ******* 21 | 22 | *** Load Census Data 23 | project, original("${cov_raw}/Census cty population by race/DEC_00_SF1_P008_with_ann.csv") 24 | import delim "${cov_raw}/Census cty population by race/DEC_00_SF1_P008_with_ann.csv", clear rowr(3) varn(1) 25 | destring *, replace 26 | ren (geoid2 vd01 vd04 vd10) (cty pop_total pop_black pop_hispanic) 27 | keep cty pop_* 28 | 29 | // merge to CZs 30 | recode cty (12086=12025) // Miami-Dade fips change 31 | merge 1:1 cty using "${cov_raw}/cty_covariates.dta", keepus(cz) keep(match master) 32 | assert floor(cty/1000)==2 | cty==15005 if _merge!=3 // unmerged are Alaska counties or Kalawao, Hawaii 33 | drop _merge 34 | drop if cz==. 35 | preserve 36 | 37 | // output at county level 38 | gen cs_frac_black = pop_black/pop_total 39 | gen cs_frac_hisp = pop_hispanic/pop_total 40 | keep cty cs_frac* 41 | save13 "${cov_clean}/cty_cs_popbyrace.dta", replace 42 | project, creates("${cov_clean}/cty_cs_popbyrace.dta") 43 | 44 | // output at CZ level 45 | restore 46 | collapse (sum) pop*, by(cz) 47 | gen cs_frac_black = pop_black/pop_total 48 | gen cs_frac_hisp = pop_hispanic/pop_total 49 | keep cz cs_frac* 50 | save13 "${cov_clean}/cz_cs_popbyrace.dta", replace 51 | project, creates("${cov_clean}/cz_cs_popbyrace.dta") 52 | -------------------------------------------------------------------------------- /code/covariates/clean_uninsurance.do: -------------------------------------------------------------------------------- 1 | * Set $root 2 | return clear 3 | capture project, doinfo 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)' // using -project- 5 | else { // running directly 6 | if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"' 7 | do "${mortality_root}/code/set_environment.do" 8 | } 9 | 10 | * Set convenient globals 11 | global cov_raw "$root/data/raw/Covariate Data" 12 | global cov_clean "$root/data/derived/covariate_data" 13 | 14 | * Create required folders 15 | cap mkdir "$root/data/derived/covariate_data" 16 | 17 | /*** Percent uninsured, year 2010 18 | ***/ 19 | 20 | ******* 21 | 22 | 23 | *** Load 2010 Small Area Health Insurance Estimates 24 | project, original("${cov_raw}/sahie2010.csv") 25 | import delimited "${cov_raw}/sahie2010.csv", delimiter(comma) varnames(4) rowrange(4) clear 26 | destring, replace 27 | 28 | * Keep county-level estimates by all ages/races/genders/incomes 29 | * (Note that data is only for people below age 65, since Medicare starts at 65) 30 | keep if geocat==50 & agecat==0 & racecat==0 & sexcat==0 & iprcat==0 31 | keep stcou nic nui 32 | destring nic nui, replace force 33 | replace stcou = subinstr(stcou,"=","",.) 34 | replace stcou = subinstr(stcou,`"""',"",.) 35 | destring stcou, replace 36 | ren stcou cty 37 | recode cty (12086=12025) (8014 = 8013) // Miami-Dade name/FIPS change, Broomfield split off from Boulder 38 | collapse (sum) nic nui, by(cty) 39 | //merge counties to CZs 40 | 41 | project, original("${cov_raw}/cty_covariates.dta") preserve 42 | merge 1:1 cty using "${cov_raw}/cty_covariates.dta", keepus(cz) keep(match master) 43 | assert floor(cty/1000)==2 | cty==15005 /// 44 | | inlist(cty,51560) if _merge!=3 // unmerged are Alaska counties, Kalawao, Hawaii, or Clifton Forge, VA 45 | drop _merge 46 | drop if cz==. 47 | preserve 48 | gen puninsured2010 = 100 * nui / (nic + nui) 49 | lab var puninsured2010 "Percent without Health Insurance in 2010" 50 | keep cty puninsured2010 51 | save13 "${cov_clean}/cty_cs_puninsured.dta", replace 52 | project, creates("${cov_clean}/cty_cs_puninsured.dta") 53 | 54 | * Collapse to CZ level 55 | restore 56 | collapse (sum) nui nic, by(cz) 57 | gen puninsured2010 = 100 * nui / (nic + nui) 58 | lab var puninsured2010 "Percent without Health Insurance in 2010" 59 | keep cz puninsured2010 60 | save13 "${cov_clean}/cz_cs_puninsured.dta", replace 61 | project, creates("${cov_clean}/cz_cs_puninsured.dta") 62 | -------------------------------------------------------------------------------- /code/covariates/cz_names.do: -------------------------------------------------------------------------------- 1 | * Set $root 2 | return clear 3 | capture project, doinfo 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)' // using -project- 5 | else { // running directly 6 | if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"' 7 | do "${mortality_root}/code/set_environment.do" 8 | } 9 | 10 | * Set convenient globals 11 | global raw "$root/data/raw" 12 | global derived "$root/data/derived" 13 | 14 | * Create required folders 15 | cap mkdir "$root/data/derived" 16 | cap mkdir "$root/data/derived/final_covariates" 17 | 18 | ************ 19 | 20 | * Load Census 2000 Data 21 | project, original("${raw}/Covariate Data/SUB-EST00INT.csv") 22 | import delimited "${raw}/Covariate Data/SUB-EST00INT.csv", clear 23 | gen cty = state*1000 + county 24 | 25 | * Convert 2000-2014 Inter/Postcensal counties to 1999 counties 26 | * see https://www.census.gov/geo/reference/county-changes.html 27 | drop if cty == 15005 // Kalawao, Hawaii (pop ~90) 28 | recode cty (12086 = 12025) // Miami-Dade FIPS change 29 | recode cty (8014 = 8013) // Broomfield County created from Boulder, CO 30 | recode cty (2282 = 2231) (2275 2195 = 2280) (2230 2105 = 2231) /// 31 | (2198 = 2201) (2068 = 2290) // County changes in Alaska 32 | 33 | project, original("${raw}/Covariate Data/cty_covariates.dta") preserve 34 | merge m:1 cty using "${raw}/Covariate Data/cty_covariates.dta", keepusing(cty county_name cz cz_name statename state_id stateabbrv) 35 | collapse (sum) popestimate2000 , by(name cz sumlev statename cz_name) 36 | ren cz_name czname_old 37 | keep if !mi(cz) 38 | 39 | * String Parsing 40 | replace name = substr(name,1,strlen(name)-6) if lower(substr(name,-5,.)) == "(pt.)" 41 | replace name = substr(name,1,strlen(name)-12) if lower(substr(name,-11,.)) == "and borough" 42 | replace name = substr(name,1,strlen(name)-10) if lower(substr(name,-9,.)) == "(balance)" 43 | replace name = substr(name,1,strlen(name)-12) if lower(substr(name,-11,.)) == "government" 44 | 45 | * Subset to cities and towns, Hawaiian Counties, townships with no large cities, and counties with no subdivisions 46 | gen toDrop = 0 47 | replace toDrop = 1 if sumlev <= 50 48 | replace toDrop = 1 if substr(name,-6,.) == "County" 49 | replace toDrop = 1 if substr(name,-6,.) == "Parish" 50 | replace toDrop = 1 if lower(substr(name,1,7)) == "balance" 51 | replace toDrop = 0 if sumlev == 50 & statename == "Hawaii" //Note that the Hawaiian counties appear to have no subdivisions in the data 52 | bys cz: replace toDrop = 0 if _N == 1 53 | drop if toDrop 54 | gen city_pop = popestimate2000 if lower(substr(name,-8,.)) != "township" 55 | egen max_city_pop = max(city_pop), by(cz) 56 | replace toDrop = 1 if max_city_pop > 60e3 & lower(substr(name,-8,.)) == "township" 57 | drop if toDrop 58 | drop toDrop 59 | 60 | * Keep the largest city 61 | gsort cz -popestimate2000 62 | by cz: keep if _n == 1 63 | 64 | * More String Parsing 65 | gen suffix = word(name,-1) 66 | gen czname = substr(name,1,length(name) - length(suffix) - 1) 67 | replace czname = name if substr(name,1,8) == "Township" 68 | replace czname = substr(czname,1,strpos(czname,"-")-1) if strpos(czname,"-") & suffix != "city" 69 | replace czname = substr(czname,1,strpos(czname,"/")-1) if strpos(czname,"/") 70 | replace czname = substr(czname,1,length(czname) - length(word(czname,-1)) - 1) if lower(word(czname,-1)) == "charter" 71 | replace czname = "Boise" if czname == "Boise City" 72 | replace czname = "Galesburg" if czname == "Galesburg City" 73 | replace czname = "New York City" if czname == "New York" 74 | replace czname = "Washington DC" if statename == "District Of Columbia" 75 | 76 | * Save 77 | order cz czname statename 78 | keep cz czname statename czname_old 79 | save13 "${derived}/final_covariates/cz_names_updated.dta", replace 80 | project, creates("${derived}/final_covariates/cz_names_updated.dta") 81 | 82 | -------------------------------------------------------------------------------- /code/covariates/list_of_covariates.do: -------------------------------------------------------------------------------- 1 | ******* 2 | *** List of Covariates 3 | ******* 4 | 5 | ** Variable categories 6 | 7 | * Health Behaviors 8 | local behavior_vars cur_smoke_q bmi_obese_q exercise_any_q 9 | 10 | * Health Care 11 | local insurance_vars puninsured2010 reimb_penroll_adj10 12 | local acute_vars mort_30day_hosp_z adjmortmeas_amiall30day adjmortmeas_chfall30day adjmortmeas_pnall30day // Acute Care 13 | local preventive_vars med_prev_qual_z primcarevis_10 diab_hemotest_10 diab_eyeexam_10 diab_lipids_10 mammogram_10 amb_disch_per1000_10 // Preventive Care 14 | 15 | * Environmental Factors 16 | local segregation_vars cs00_seg_inc cs00_seg_inc_pov25 cs00_seg_inc_aff75 cs_race_theil_2000 // Segregation 17 | 18 | * Inequality and Social Cohesion 19 | local distribution_vars gini99 poor_share inc_share_1perc frac_middleclass // Income Distribution 20 | local social_vars scap_ski90pcm rel_tot // Social Cohesion 21 | local race_vars cs_frac_black cs_frac_hisp // Race & Ethnicity 22 | 23 | * Labor Market Conditions 24 | local labor_vars unemp_rate pop_d_2000_1980 lf_d_2000_1980 cs_labforce cs_elf_ind_man 25 | 26 | * Other Factors 27 | local migration_vars cs_born_foreign mig_inflow mig_outflow // Migration 28 | local localgeo_vars pop_density frac_traveltime_lt15 // Local Geography 29 | local affluance_vars hhinc00 median_house_value // Affluence 30 | local k12_vars ccd_exp_tot ccd_pup_tch_ratio score_r dropout_r // K-12 Education 31 | local college_vars cs_educ_ba tuition gradrate_r // College Education 32 | local socioecon_vars e_rank_b cs_fam_wkidsinglemom crime_total // Socioeconomics 33 | local tax_vars subcty_exp_pc taxrate tax_st_diff_top20 // Local Taxation 34 | 35 | ** All variables 36 | global covars_other `migration_vars' `localgeo_vars' `affluance_vars' /// 37 | `k12_vars' `college_vars' `socioecon_vars' `tax_vars' 38 | 39 | global covars_all `behavior_vars' `insurance_vars' `acute_vars' `preventive_vars' /// 40 | `segregation_vars' `distribution_vars' `social_vars' `race_vars' `labor_vars' /// 41 | ${covars_other} 42 | -------------------------------------------------------------------------------- /code/lifeexpectancy/Check correlations in CZ LE trends.do: -------------------------------------------------------------------------------- 1 | * Set $root 2 | return clear 3 | capture project, doinfo 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)' // using -project- 5 | else { // running directly 6 | if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"' 7 | do "${mortality_root}/code/set_environment.do" 8 | } 9 | 10 | * Set convenient globals 11 | global derived "${root}/data/derived" 12 | 13 | * Create required folders 14 | cap mkdir "${root}/scratch/CZ trend correlations" 15 | 16 | /*** Compute population-weighted correlations in CZ LE trends: 17 | - Between unadjusted and race-adjusted trends 18 | - Between trends for men and for women 19 | ***/ 20 | 21 | *** Check correlation between unadjusted and race-adjusted trends 22 | 23 | * Load data 24 | project, original("${derived}/le_trends/cz_letrendsBY_gnd_hhincquartile.dta") 25 | use "${derived}/le_trends/cz_letrendsBY_gnd_hhincquartile.dta", clear 26 | 27 | * Merge on CZ populations 28 | project, original("${root}/data/derived/final_covariates/cz_full_covariates.dta") preserve 29 | merge m:1 cz using "${root}/data/derived/final_covariates/cz_full_covariates.dta", /// 30 | assert(2 3) keep(3) nogen /// 31 | keepusing(pop2000) 32 | 33 | * Compute population-weighted correlations by gender 34 | bys gnd: corr le_agg_b_year le_raceadj_b_year [w=pop2000] 35 | 36 | * Compute population-weighted correlations by gender and income quartile 37 | isid cz gnd hh_inc_q 38 | statsby corr_raceadj_unadj = r(rho), by(gnd hh_inc_q) clear: corr le_raceadj_b_year le_agg_b_year [w=pop2000] 39 | 40 | sum corr_raceadj_unadj 41 | scalarout using "${root}/scratch/CZ trend correlations/Correlation between CZ LE trends raceadj and unadj.csv", /// 42 | replace /// 43 | id("Min corr b/w race-unadj and race-adj CZ trends in a Gender x Quartile") /// 44 | num(`=r(min)') fmt(%9.3f) 45 | project, creates("${root}/scratch/CZ trend correlations/Correlation between CZ LE trends raceadj and unadj.csv") 46 | 47 | 48 | *** Check correlation between genders 49 | 50 | * Load data 51 | project, original("${derived}/le_trends/cz_letrendsBY_gnd_hhincquartile.dta") 52 | use "${derived}/le_trends/cz_letrendsBY_gnd_hhincquartile.dta", clear 53 | drop le_agg_b_cons le_raceadj_b_cons 54 | rename *_b_year *_b_year_ 55 | 56 | reshape wide le_agg_b_year_ le_raceadj_b_year_, i(cz hh_inc_q) j(gnd) string 57 | 58 | * Merge on CZ populations 59 | project, original("${root}/data/derived/final_covariates/cz_full_covariates.dta") preserve 60 | merge m:1 cz using "${root}/data/derived/final_covariates/cz_full_covariates.dta", /// 61 | assert(2 3) keep(3) nogen /// 62 | keepusing(pop2000) 63 | 64 | * Compute correlations across genders 65 | corr le_agg_b_year_F le_agg_b_year_M [w=pop2000] 66 | corr le_raceadj_b_year_F le_raceadj_b_year_M [w=pop2000] 67 | 68 | * Compute correlations across genders by Income Quartile 69 | bys hh_inc_q: corr le_agg_b_year_F le_agg_b_year_M [w=pop2000] 70 | bys hh_inc_q: corr le_raceadj_b_year_F le_raceadj_b_year_M [w=pop2000] 71 | -------------------------------------------------------------------------------- /code/lifeexpectancy/Compute Years of Social Security Eligibility.do: -------------------------------------------------------------------------------- 1 | * Set $root 2 | return clear 3 | capture project, doinfo 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)' // using -project- 5 | else { // running directly 6 | if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"' 7 | do "${mortality_root}/code/set_environment.do" 8 | } 9 | 10 | * Set convenient globals 11 | global derived "${root}/data/derived" 12 | 13 | * Create required folders 14 | cap mkdir "$root/scratch/Medicare and Social Security Eligibility" 15 | 16 | /*** Compute the expected number of years lived over age 65 for 40 year-olds 17 | by gender and income percentile, then report the p100-p1 gap. 18 | ***/ 19 | 20 | ******** 21 | 22 | project, original("${root}/code/ado/generate_le_with_raceadj.ado") 23 | 24 | 25 | * Calculate national Gompertz parameters by Gender x Income Percentile x Race 26 | tempfile racegomp 27 | generate_le_with_raceadj, returngompertz by(gnd pctile) /// 28 | original /// 29 | gompparameters("$derived/Gompertz Parameters/national_gompBY_gnd_hhincpctile.dta") /// 30 | raceshares("$derived/raceshares/national_racesharesBY_age_gnd_hhincpctile.dta") /// 31 | saving(`racegomp') 32 | 33 | use `racegomp', clear 34 | keep if pctile == 1 | pctile == 100 35 | 36 | 37 | * Add age dimension, for ages 40-120 38 | isid gnd pctile 39 | expand 81 40 | gen int age=. 41 | bys gnd pctile: replace age = _n + 39 42 | 43 | * Merge in CDC mortality rates 44 | project, original("${root}/data/derived/Mortality Rates/CDC-SSA Life Table/national_CDC_SSA_mortratesBY_gnd_age.dta") preserve 45 | merge m:1 gnd age using "${root}/data/derived/Mortality Rates/CDC-SSA Life Table/national_CDC_SSA_mortratesBY_gnd_age.dta", /// 46 | keepusing(cdc_mort) /// 47 | assert(1 2 3) keep(1 3) // ages<40 are _merge==2, age 120 is _merge==1 48 | assert age==120 if _merge==1 49 | drop _merge 50 | 51 | * Generate survival curves 52 | isid gnd pctile age 53 | foreach r in "agg" "w" "b" "a" "h" { 54 | 55 | gen surv_`r' = 1 if age==40 56 | bys gnd pctile (age): replace surv_`r' = surv_`r'[_n-1] * /// 57 | (1 - exp(gomp_int_`r' + gomp_slope_`r' * age[_n-1]) ) if inrange(age,41,90) // mortality rates are mortality between [x,x+1) 58 | replace surv_`r' = surv_`r'[_n-1] * (1 - cdc_mort[_n-1]) if age>90 59 | 60 | } 61 | 62 | * Compute expected number of years lived above 65 (for a person alive at 40) 63 | keep if age>=65 64 | foreach var of varlist surv_* { 65 | replace `var' = `var'/2 if age==65 // trapezoidal approximation for the integral 66 | } 67 | collapse (sum) surv_*, by(gnd pctile) 68 | rename surv* ly65* 69 | 70 | * Race-adjust expected years above 65 71 | project, original("${root}/data/derived/raceshares/national_2000age40_racesharesBY_gnd.dta") preserve 72 | merge m:1 gnd using "${root}/data/derived/raceshares/national_2000age40_racesharesBY_gnd.dta", /// 73 | keepusing(raceshare_*) /// 74 | assert(match) nogen 75 | 76 | gen ly65_raceadj = raceshare_black * ly65_b /// 77 | + raceshare_asian * ly65_a /// 78 | + raceshare_hispanic * ly65_h /// 79 | + raceshare_other * ly65_w 80 | drop raceshare* 81 | 82 | * Reshape wide on percentile 83 | rename ly65* ly65*_p 84 | ds ly65* 85 | reshape wide `r(varlist)', i(gnd) j(pctile) 86 | 87 | * Generate p100-p1 gap 88 | gen ly65_raceadj_gap = ly65_raceadj_p100 - ly65_raceadj_p1 89 | 90 | * Output 91 | assert gnd==cond(_n==1,"F","M") 92 | 93 | scalarout using "${root}/scratch/Medicare and Social Security Eligibility/Years Lived Above 65.csv", replace /// 94 | id("Expected Years Lived Above 65: p100 minus p1 (Women)") /// 95 | num(`=ly65_raceadj_gap[1]') fmt(%9.1f) 96 | 97 | scalarout using "${root}/scratch/Medicare and Social Security Eligibility/Years Lived Above 65.csv", /// 98 | id("Expected Years Lived Above 65: p100 minus p1 (Men)") /// 99 | num(`=ly65_raceadj_gap[2]') fmt(%9.1f) 100 | project, creates("${root}/scratch/Medicare and Social Security Eligibility/Years Lived Above 65.csv") 101 | -------------------------------------------------------------------------------- /code/lifeexpectancy/Estimate NCHS State LEs pooling income groups.do: -------------------------------------------------------------------------------- 1 | * Set $root 2 | return clear 3 | capture project, doinfo 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)' // using -project- 5 | else { // running directly 6 | if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"' 7 | do "${mortality_root}/code/set_environment.do" 8 | } 9 | 10 | * Set convenient globals 11 | global derived "${root}/data/derived" 12 | 13 | * Create required folders 14 | cap mkdir "${root}/data/derived/Mortality Rates/NCHS state pooled income" 15 | cap mkdir "${root}/data/derived/Gompertz Parameters/NCHS state pooled income" 16 | cap mkdir "${root}/data/derived/le_estimates/NCHS state pooled income" 17 | 18 | 19 | *********************** 20 | *** Mortality rates *** 21 | *********************** 22 | 23 | * Load NCHS State mortality data by Gender x Age, pooling 1999-2014 24 | project, original("$root/data/raw/NCHS Underlying Cause of Death/Underlying Cause of Death, 1999-2014, by state, gender, and age.txt") 25 | import delimited "$root/data/raw/NCHS Underlying Cause of Death/Underlying Cause of Death, 1999-2014, by state, gender, and age.txt", clear 26 | drop if mi(gendercode) 27 | 28 | assert mi(notes) 29 | drop notes 30 | 31 | keep if inrange(singleyearagescode,40,76) 32 | destring population, replace 33 | 34 | * Check that there are 51 states x 2 genders x 37 ages 35 | assert _N == 51*2*37 36 | 37 | * Keep required vars and use our standard var names and formats 38 | keep statecode singleyearagescode gendercode population deaths 39 | ren (statecode singleyearagescode gendercode population) /// 40 | (st age_at_d gnd count ) 41 | 42 | gen double mortrate = deaths/count 43 | drop deaths 44 | 45 | * Output 46 | save13 "${root}/data/derived/Mortality Rates/NCHS state pooled income/st_NCHSmortratesBY_gnd_age.dta", replace 47 | project, creates("${root}/data/derived/Mortality Rates/NCHS state pooled income/st_NCHSmortratesBY_gnd_age.dta") preserve 48 | 49 | 50 | *************************** 51 | *** Gompertz parameters *** 52 | *************************** 53 | 54 | * Calculate Gompertz parameters from mortality rates 55 | project, original("${root}/code/ado/estimate_gompertz2.ado") preserve 56 | project, original("${root}/code/ado/mle_gomp_est.ado") preserve 57 | project, original("${root}/code/ado/fastregby_gompMLE_julia.ado") preserve 58 | project, original("${root}/code/ado/estimate_gompertz.jl") preserve 59 | 60 | estimate_gompertz2 st gnd, age(age_at_d) mort(mortrate) n(count) /// 61 | type(mle) 62 | 63 | * Output 64 | save13 "${derived}/Gompertz Parameters/NCHS state pooled income/st_NCHSgompBY_gnd.dta", replace 65 | project, creates("${derived}/Gompertz Parameters/NCHS state pooled income/st_NCHSgompBY_gnd.dta") preserve 66 | 67 | 68 | *********************** 69 | *** Life expectancy *** 70 | *********************** 71 | 72 | generate_le_with_raceadj, by(st gnd) /// 73 | gompparameters("${derived}/Gompertz Parameters/NCHS state pooled income/st_NCHSgompBY_gnd.dta") /// 74 | raceshares("${derived}/raceshares/st_racesharesBY_agebin_gnd.dta") /// 75 | saving("${derived}/le_estimates/NCHS state pooled income/st_NCHSleBY_gnd.dta") 76 | -------------------------------------------------------------------------------- /code/lifeexpectancy/Explore national trends at fixed income levels.do: -------------------------------------------------------------------------------- 1 | * Set $root 2 | return clear 3 | capture project, doinfo 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)' // using -project- 5 | else { // running directly 6 | if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"' 7 | do "${mortality_root}/code/set_environment.do" 8 | } 9 | 10 | * Set convenient globals 11 | global derived "${root}/data/derived" 12 | 13 | * Create required folders 14 | cap mkdir "${root}/scratch/National LE Trends at fixed income levels or ranks" 15 | 16 | 17 | 18 | ******************** 19 | 20 | 21 | *** Estimate National LE Trend by Gender x Income Quartile, controlling for income levels 22 | 23 | * Load mean household income level data 24 | project, original("${root}/data/derived/Mortality Rates/mskd_national_mortratesBY_gnd_hhincpctile_age_year.dta") 25 | use "${root}/data/derived/Mortality Rates/mskd_national_mortratesBY_gnd_hhincpctile_age_year.dta", clear 26 | isid gnd pctile age_at_d yod 27 | drop mortrate count 28 | 29 | * Keep only incomes at age 40 30 | keep if age_at_d - lag == 40 // income measured at 40 31 | drop age_at_d lag 32 | 33 | rename yod year 34 | isid gnd pctile year 35 | rename hh_inc hh_inc40 36 | 37 | * Merge in annual national LE 38 | project, original("$derived/le_estimates/national_leBY_year_gnd_hhincpctile.dta") preserve 39 | merge 1:1 gnd pctile year using "$derived/le_estimates/national_leBY_year_gnd_hhincpctile.dta", /// 40 | assert(3) nogen 41 | 42 | * Estimate trends by Gender x Income Quartile; controlling for income level at age 40 in each percentile in each year 43 | g quartile = ceil(pctile/25) 44 | statsby _b _se, by(gnd quartile) clear : reg le_raceadj year hh_inc40, vce(cl year) 45 | keep gnd quartile _b_year _se_year 46 | 47 | * Compute upper & lower 95% confidence interval 48 | gen ci_h = _b_year + 1.96*_se_year 49 | gen ci_l = _b_year - 1.96*_se_year 50 | gen ci_str = "(" + string(ci_l,"%03.2f") + ", " + string(ci_h,"%03.2f") + ")" 51 | keep gnd quartile _b_year ci_str 52 | 53 | * Reshape wide on quartile 54 | rename (_b_year ci_str) (le_raceadj_tr_q CI_le_raceadj_tr_q) 55 | reshape wide le_raceadj_tr_q CI_le_raceadj_tr_q, i(gnd) j(quartile) 56 | 57 | * Output 58 | gsort - gnd 59 | export delim using "${root}/scratch/National LE Trends at fixed income levels or ranks/National LE trend by Quartile Controlling for Income.csv", replace 60 | project, creates("${root}/scratch/National LE Trends at fixed income levels or ranks/National LE trend by Quartile Controlling for Income.csv") preserve 61 | 62 | -------------------------------------------------------------------------------- /code/lifeexpectancy/LE ranked lists of states.do: -------------------------------------------------------------------------------- 1 | * Set $root 2 | return clear 3 | capture project, doinfo 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)' // using -project- 5 | else { // running directly 6 | if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"' 7 | do "${mortality_root}/code/set_environment.do" 8 | } 9 | 10 | * Set convenient globals 11 | global derived "${root}/data/derived" 12 | 13 | * Create required folders 14 | cap mkdir "$root/scratch/State LE ranked lists by income quartile" 15 | 16 | 17 | /*** Create ranked lists of State LE levels and State LE trends, 18 | by income quartile. 19 | ***/ 20 | 21 | ************** 22 | *** Levels *** 23 | ************** 24 | 25 | * Load state LE levels data 26 | project, original("$derived/le_estimates/st_leBY_gnd_hhincquartile.dta") 27 | use st gnd hh_inc_q le_raceadj using "$derived/le_estimates/st_leBY_gnd_hhincquartile.dta", clear 28 | 29 | * Take unweighted average of men and women 30 | isid st gnd hh_inc_q 31 | collapse (mean) le_raceadj, by(st hh_inc_q) 32 | 33 | * Merge on state names 34 | rename st statefips 35 | project, original("${root}/data/raw/Covariate Data/state_database.dta") preserve 36 | merge m:1 statefips using "${root}/data/raw/Covariate Data/state_database.dta", /// 37 | keepusing(state*) /// 38 | assert(3) nogen 39 | 40 | * Reshape wide on income quartile 41 | rename le_raceadj le_raceadj_q 42 | reshape wide le_raceadj_q, i(statefips) j(hh_inc_q) 43 | order state* 44 | 45 | * Ranked lists 46 | foreach q in 1 4 { 47 | sort le_raceadj_q`q' 48 | export delim state* le_raceadj_q`q' /// 49 | using "$root/scratch/State LE ranked lists by income quartile/State ranked LE levels Q`q'.csv", /// 50 | replace 51 | project, creates("$root/scratch/State LE ranked lists by income quartile/State ranked LE levels Q`q'.csv") preserve 52 | } 53 | 54 | sort le_raceadj_q1 55 | maptile le_raceadj_q1, geo(state) geoid(statefips) cutv(`=le_raceadj_q1[10]+epsfloat()') rev /// 56 | geofolder("$root/code/ado_maptile_geo") /// 57 | savegraph("$root/scratch/State LE ranked lists by income quartile/State ranked LE levels Q`q' - geographic belt among 10 lowest states.png") replace 58 | project, creates("$root/scratch/State LE ranked lists by income quartile/State ranked LE levels Q`q' - geographic belt among 10 lowest states.png") preserve 59 | 60 | ************** 61 | *** Trends *** 62 | ************** 63 | 64 | * Load state LE trends data 65 | project, original("$derived/le_trends/st_letrendsBY_gnd_hhincquartile.dta") 66 | use st gnd hh_inc_q le_raceadj_b_year using "$derived/le_trends/st_letrendsBY_gnd_hhincquartile.dta", clear 67 | rename le_raceadj_b_year le_raceadj_tr 68 | 69 | * Take unweighted average of men and women 70 | isid st gnd hh_inc_q 71 | collapse (mean) le_raceadj_tr, by(st hh_inc_q) 72 | 73 | * Merge on state names 74 | rename st statefips 75 | project, original("${root}/data/raw/Covariate Data/state_database.dta") preserve 76 | merge m:1 statefips using "${root}/data/raw/Covariate Data/state_database.dta", /// 77 | keepusing(statename) /// 78 | assert(3) nogen 79 | 80 | * Reshape wide on income quartile 81 | rename le_raceadj_tr le_raceadj_tr_q 82 | reshape wide le_raceadj_tr_q, i(statefips) j(hh_inc_q) 83 | order state* 84 | 85 | * Ranked lists 86 | sort le_raceadj_tr_q1 87 | export delim state* le_raceadj_tr_q1 /// 88 | using "$root/scratch/State LE ranked lists by income quartile/State ranked LE trends Q1.csv", /// 89 | replace 90 | project, creates("$root/scratch/State LE ranked lists by income quartile/State ranked LE trends Q1.csv") preserve 91 | -------------------------------------------------------------------------------- /code/lifeexpectancy/Plot international comparison of LE at age 40.do: -------------------------------------------------------------------------------- 1 | * Set $root 2 | return clear 3 | capture project, doinfo 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)' // using -project- 5 | else { // running directly 6 | if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"' 7 | do "${mortality_root}/code/set_environment.do" 8 | } 9 | 10 | * Set convenient globals 11 | global derived "${root}/data/derived" 12 | 13 | if (c(os)=="Windows") global img wmf 14 | else global img png 15 | 16 | * Create required folders 17 | cap mkdir "${root}/scratch/International comparison of LE at 40" 18 | cap mkdir "${root}/scratch/International comparison of LE at 40/data" 19 | 20 | 21 | **************** 22 | 23 | * Load World Health Organization data on LE at 40 24 | project, original("${root}/data/raw/WHO/International_LEat40_in2013.dta") 25 | use "${root}/data/raw/WHO/International_LEat40_in2013.dta", clear 26 | 27 | * Append our national LE estimates by Gender x Income Percentile, for select percentiles 28 | project, original("$derived/le_estimates/national_leBY_gnd_hhincpctile.dta") preserve 29 | append using "$derived/le_estimates/national_leBY_gnd_hhincpctile.dta", /// 30 | keep(le_raceadj gnd pctile) gen(_append) 31 | keep if _append==0 | inlist(pctile,1,25,50,100) 32 | replace country = "United States - P" + string(pctile) if _append==1 33 | drop _append 34 | 35 | * Manually select specific countries to highlight 36 | gen byte highlight=0 37 | replace highlight=1 if gnd=="M" & (inlist(country,"Lesotho","Zambia","Sudan","India","Pakistan") | inlist(country,"Iraq","Libya","China","San Marino","United Kingdom","Canada")) 38 | replace highlight=1 if gnd=="F" & (inlist(country,"Sierra Leone","Zambia","Sudan","India","Pakistan") | inlist(country,"Iraq","Libya","China","United Kingdom","Canada","Japan")) 39 | replace highlight=. if !mi(pctile) 40 | 41 | * Generate ranks by gender, where rank==1 means lowest LE 42 | bys gnd (le_raceadj): gen le_raceadj_rank = _n 43 | order gnd country pctile highlight le_raceadj le_raceadj_rank 44 | 45 | 46 | * Plot bar graphs of LE at age 40 47 | 48 | foreach gender in "Men" "Women" { 49 | 50 | local g=cond("`gender'"=="Men","M","F") 51 | 52 | * Generate figure 53 | twoway (spike le_raceadj le_raceadj_rank if highlight==0, hor lc(gs10)) /// 54 | (spike le_raceadj le_raceadj_rank if highlight==1, hor lc(navy)) /// 55 | (spike le_raceadj le_raceadj_rank if !mi(pctile), hor lc(maroon)) /// 56 | (scatter le_raceadj_rank le_raceadj if highlight==1, m(none) mlabel(country) mlabc(black)) /// 57 | (scatter le_raceadj_rank le_raceadj if !mi(pctile), m(none) mlabel(country) mlabc(maroon)) /// 58 | if gnd=="`g'", /// 59 | graphregion(fcolor(white)) legend(off) /// 60 | ytitle("") ylab(none) /// 61 | ysize(7) xsize(8.25) fxsize(66.667) /// Create a canvas that is height 7 x width 5.5, without cutting off labels 62 | xtitle("Expected Age at Death for 40 Year Old `gender'") xlabel(60(5)90) /// 63 | title("") 64 | graph export "${root}/scratch/International comparison of LE at 40/International comparison of LE at 40 - `gender'.${img}", replace 65 | project, creates("${root}/scratch/International comparison of LE at 40/International comparison of LE at 40 - `gender'.${img}") preserve 66 | 67 | * Export data underlying figure 68 | export delim if gnd=="`g'" /// 69 | using "${root}/scratch/International comparison of LE at 40/data/International comparison of LE at 40 - `gender'.csv", /// 70 | replace 71 | project, creates("${root}/scratch/International comparison of LE at 40/data/International comparison of LE at 40 - `gender'.csv") preserve 72 | 73 | } 74 | -------------------------------------------------------------------------------- /code/lifeexpectancy/Plot signal by local population level.do: -------------------------------------------------------------------------------- 1 | * Set $root 2 | return clear 3 | capture project, doinfo 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)' // using -project- 5 | else { // running directly 6 | if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"' 7 | do "${mortality_root}/code/set_environment.do" 8 | } 9 | 10 | * Set convenient globals 11 | global derived "${root}/data/derived" 12 | 13 | * Create required folders 14 | cap mkdir "${root}/scratch/Signal to Noise" 15 | 16 | /*** For each CZ or County life expectancy reported in online tables, plot 17 | fraction of variance that is signal against population size. 18 | ***/ 19 | 20 | 21 | **************** 22 | *** Programs *** 23 | **************** 24 | 25 | cap program drop plot_signalfrac_by_pop 26 | program define plot_signalfrac_by_pop 27 | 28 | /*** Plot fraction of variance that is signal against population size, 29 | separately by Gender x Income Quantile. 30 | ***/ 31 | 32 | syntax name using/, geo(name) nq(integer) label(string) [ year(name) incq(name) plotq(string) xlab(passthru) xline(passthru) xsize(passthru) ] 33 | local var `namelist' 34 | if ("`incq'"=="") local incq hh_inc_q 35 | if ("`plotq'"=="") local plotq 1 4 36 | 37 | * Load point estimates and bootstrap SDs 38 | project, original(`"`using'"') 39 | use `"`using'"', clear 40 | isid `geo' gnd `incq' `year' 41 | keep `geo' gnd `incq' `year' `var' sd_`var' 42 | rename sd* sdnoise* 43 | 44 | * Merge in CZ populations 45 | if ("`geo'"=="cz") { 46 | project, original("${derived}/final_covariates/cz_pop.dta") preserve 47 | merge m:1 `geo' using "${derived}/final_covariates/cz_pop.dta", /// 48 | assert(2 3) keep(3) nogen 49 | } 50 | else if ("`geo'"=="cty") { 51 | project, original("${derived}/final_covariates/cty_full_covariates.dta") preserve 52 | merge m:1 `geo' using "${derived}/final_covariates/cty_full_covariates.dta", /// 53 | assert(2 3) keep(3) nogen /// 54 | keepusing(cty_pop2000) 55 | rename cty_pop2000 pop2000 56 | } 57 | 58 | * Generate total SD, in each ventile of CZ pop 59 | fastxtile pop_quantile = pop2000, nq(`nq') 60 | egen sdtotal_`var' = sd(`var'), by(gnd `incq' `year' pop_quantile) 61 | 62 | * Decompose variance into signal and noise 63 | gen varnoise_`var' = sdnoise_`var'^2 64 | gen vartotal_`var' = sdtotal_`var'^2 65 | gen varsignal_`var' = vartotal_`var' - varnoise_`var' 66 | gen fracsignal = varsignal_`var' / vartotal_`var' 67 | 68 | * Plot fraction of variance that is signal for Q1 and Q4, by Population 69 | graph drop _all 70 | foreach gender in "Male" "Female" { 71 | 72 | local g = substr("`gender'",1,1) 73 | 74 | foreach q in `plotq' { 75 | 76 | if (`q'==1) local color navy 77 | else local color maroon 78 | 79 | binscatter fracsignal pop2000 if gnd=="`g'" & `incq'==`q' & pop_quantile<`nq', /// 80 | xq(pop_quantile) line(none) color(`color') /// 81 | ytitle("Signal Variance / Total Variance") /// 82 | xtitle("Population") /// 83 | ylab(0(0.1)1, gmin gmax) /// 84 | `xlab' `xline' /// 85 | name(`g'q`q') subtitle("`gender' Q`q'") 86 | local plots `plots' `g'q`q' 87 | 88 | } 89 | 90 | } 91 | 92 | graph combine `plots', rows(2) iscale(*0.8) ysize(7) `xsize' /// 93 | title("Signal Fraction: `label'", size(medium)) /// 94 | note("Note: Quantile with highest population omitted for scale.", size(vsmall)) 95 | graph export "${root}/scratch/Signal to Noise/`label' - Signal Fraction by `geo' pop.png", replace 96 | graph drop `plots' 97 | project, creates("${root}/scratch/Signal to Noise/`label' - Signal Fraction by `geo' pop.png") 98 | 99 | end 100 | 101 | 102 | ********************** 103 | *** Generate Plots *** 104 | ********************** 105 | 106 | *** CZ LE Levels, by Gender x Income Quartile 107 | plot_signalfrac_by_pop le_raceadj using "$derived/le_estimates_stderr/cz_SEleBY_gnd_hhincquartile.dta", /// 108 | geo(cz) label(CZ LE Levels) /// 109 | nq(20) /// 110 | xlab(25e3 "25k" 250e3 "250k" 500e3 "500k" 750e3 "750k" 1e6 "1M" 1.25e6 "1.25M" 1.5e6 "1.5M", grid) 111 | 112 | *** CZ LE Trends, by Gender x Income Quartile 113 | plot_signalfrac_by_pop le_raceadj_b_year using "$derived/le_trends_stderr/cz_SEletrendsBY_gnd_hhincquartile.dta", /// 114 | geo(cz) label(CZ LE Trends) /// 115 | nq(6) /// 116 | xlab( 500e3 "500k" 1e6 "1M" 1.5e6 "1.5M" 2e6 "2M" 2.5e6 "2.5M" 3e6 "3M", grid) 117 | 118 | *** CZ LE Levels, by Gender x Income Quartile x Year 119 | plot_signalfrac_by_pop le_raceadj using "$derived/le_estimates_stderr/cz_SEleBY_gnd_hhincquartile_year.dta", /// 120 | year(year) /// 121 | geo(cz) label(CZ LE Levels by Year) /// 122 | nq(6) /// 123 | xlab( 500e3 "500k" 1e6 "1M" 1.5e6 "1.5M" 2e6 "2M" 2.5e6 "2.5M" 3e6 "3M", grid) 124 | 125 | *** CZ LE Levels, by Gender x Income Ventile 126 | plot_signalfrac_by_pop le_raceadj using "$derived/le_estimates_stderr/cz_SEleBY_gnd_hhincventile.dta", /// 127 | incq(hh_inc_v) plotq(1 5 10 15 20) xsize(10) /// 128 | geo(cz) label(CZ LE Levels by Ventile) /// 129 | nq(10) /// 130 | xlab(250e3 "250k" 500e3 "500k" 1e6 "1M" 1.5e6 "1.5M" 2e6 "2M", grid) 131 | 132 | *** County LE Levels, by Gender x Income Quartile 133 | plot_signalfrac_by_pop le_raceadj using "$derived/le_estimates_stderr/cty_SEleBY_gnd_hhincquartile.dta", /// 134 | geo(cty) label(County LE Levels) /// 135 | nq(20) /// 136 | xlab(25e3 "25k" 100e3 "100k" 200e3 "200k" 300e3 "300k" 400e3 "400k" 500e3 "500k", grid) 137 | 138 | 139 | -------------------------------------------------------------------------------- /code/lifeexpectancy/Sensitivity analyses of local LEs.do: -------------------------------------------------------------------------------- 1 | * Set $root 2 | return clear 3 | capture project, doinfo 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)' // using -project- 5 | else { // running directly 6 | if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"' 7 | do "${mortality_root}/code/set_environment.do" 8 | } 9 | 10 | * Set convenient globals 11 | global derived "${root}/data/derived" 12 | 13 | * Create required folders 14 | cap mkdir "${root}/scratch/Sensitivity analyses of local LE" 15 | 16 | /*** Check the correlation between various versions of the 17 | CZ x Gender x Income Quartile estimates and our baseline 18 | race-adjusted estimates. 19 | ***/ 20 | 21 | 22 | ***************** 23 | *** Load data *** 24 | ***************** 25 | 26 | * Load various life expectancies 27 | project, original("$derived/le_estimates/cz_leBY_gnd_hhincquartile.dta") 28 | use "$derived/le_estimates/cz_leBY_gnd_hhincquartile.dta", clear 29 | keep cz gnd hh_inc_q le_agg le_raceadj 30 | rename le_raceadj le_Baseline 31 | rename le_agg le_NoRaceAdj 32 | order cz gnd hh_inc_q le_Baseline 33 | 34 | project, original("$derived/le_estimates/Expected life years until 77/cz_leBY_gnd_hhincquartile_GompertzTo77.dta") preserve 35 | merge 1:1 cz gnd hh_inc_q using "$derived/le_estimates/Expected life years until 77/cz_leBY_gnd_hhincquartile_GompertzTo77.dta", /// 36 | assert(3) nogen keepusing(le_raceadj) 37 | rename le_raceadj le_LYuntil77 38 | 39 | project, original("$derived/le_estimates/Gompertz extrapolation to 100/cz_leBY_gnd_hhincquartile_GompertzTo100.dta") preserve 40 | merge 1:1 cz gnd hh_inc_q using "$derived/le_estimates/Gompertz extrapolation to 100/cz_leBY_gnd_hhincquartile_GompertzTo100.dta", /// 41 | assert(3) nogen keepusing(le_raceadj) 42 | rename le_raceadj le_GompertzTo100 43 | 44 | project, original("$derived/le_estimates/OLS Gompertz estimation/cz_leBY_gnd_hhincquartile_OLS.dta") preserve 45 | merge 1:1 cz gnd hh_inc_q using "$derived/le_estimates/OLS Gompertz estimation/cz_leBY_gnd_hhincquartile_OLS.dta", /// 46 | assert(3) nogen keepusing(le_raceadj) 47 | rename le_raceadj le_OLS 48 | 49 | project, original("$derived/le_estimates/OLS Gompertz estimation/cz_leBY_gnd_hhincquartile_OLSIncomeControl.dta") preserve 50 | merge 1:1 cz gnd hh_inc_q using "$derived/le_estimates/OLS Gompertz estimation/cz_leBY_gnd_hhincquartile_OLSIncomeControl.dta", /// 51 | assert(1 3) nogen keepusing(le_raceadj) // many CZs don't have income controlled LEs due to missing a percentile of data 52 | rename le_raceadj le_IncomeControl 53 | 54 | project, original("$derived/le_estimates/Cost of Living Adjusted/cz_leBY_gnd_hhincquartile_COLIadjusted.dta") preserve 55 | merge 1:1 cz gnd hh_inc_q using "$derived/le_estimates/Cost of Living Adjusted/cz_leBY_gnd_hhincquartile_COLIadjusted.dta", /// 56 | assert(3) nogen keepusing(le_raceadj) 57 | rename le_raceadj le_COLI 58 | 59 | * Load population counts 60 | project, original("${derived}/final_covariates/cz_pop.dta") preserve 61 | merge m:1 cz using "${derived}/final_covariates/cz_pop.dta", nogen assert(2 3) keep(3) 62 | 63 | * Check structure 64 | assert !mi(cz,gnd,hh_inc_q,pop2000) & inrange(hh_inc_q,1,4) 65 | 66 | ************************************ 67 | *** Count number of observations *** 68 | ************************************ 69 | 70 | preserve 71 | 72 | * Collapse to counts of CZs 73 | collapse (count) le*, by(gnd hh_inc_q) 74 | 75 | * Ensure that counts are constant across gender and income quartile 76 | foreach var of varlist le* { 77 | assert `var'==`var'[_n-1] if _n>2 78 | } 79 | 80 | * Collapse dataset of counts to gender 81 | collapse (mean) le*, by(gnd) 82 | 83 | * Output 84 | rename le* czN_le* 85 | export delim using "${root}/scratch/Sensitivity analyses of local LE/Counts of CZs in each sensitivity correlation.csv", replace 86 | project, creates("${root}/scratch/Sensitivity analyses of local LE/Counts of CZs in each sensitivity correlation.csv") 87 | 88 | restore 89 | 90 | **************************** 91 | *** Compute correlations *** 92 | **************************** 93 | 94 | * Store correlations in matrices 95 | tempname corrM corrF tempcorr 96 | forvalues q=1/4 { 97 | 98 | foreach g in "M" "F" { 99 | 100 | corr le_Baseline le_NoRaceAdj le_LYuntil77 le_GompertzTo100 le_OLS le_IncomeControl le_COLI /// 101 | if gnd=="`g'" & hh_inc_q==`q' [w=pop2000] 102 | 103 | matrix `tempcorr'=r(C) 104 | matrix `corr`g''=nullmat(`corr`g'') \ (`tempcorr'[1,2..7]) 105 | } 106 | 107 | } 108 | 109 | * Store correlations in dataset 110 | clear 111 | 112 | svmat `corrM', names(col) 113 | rename le* M_le* 114 | 115 | svmat `corrF', names(col) 116 | rename le* F_le* 117 | 118 | gen hh_inc_q=_n 119 | order hh_inc_q 120 | 121 | * Reformat correlations to 3 decimals 122 | foreach var of varlist M_le* F_le* { 123 | gen rho_`var'=string(`var',"%9.3f") 124 | } 125 | 126 | * Export tables of correlations 127 | export delim hh_inc_q rho_M* /// 128 | using "${root}/scratch/Sensitivity analyses of local LE/CZ correlations of baseline raceadj LE with alternatives - Male.csv", /// 129 | replace 130 | 131 | export delim hh_inc_q rho_F* /// 132 | using "${root}/scratch/Sensitivity analyses of local LE/CZ correlations of baseline raceadj LE with alternatives - Female.csv", /// 133 | replace 134 | 135 | project, creates("${root}/scratch/Sensitivity analyses of local LE/CZ correlations of baseline raceadj LE with alternatives - Male.csv") 136 | project, creates("${root}/scratch/Sensitivity analyses of local LE/CZ correlations of baseline raceadj LE with alternatives - Female.csv") 137 | -------------------------------------------------------------------------------- /code/lifeexpectancy/State Level Inequality Correlations.do: -------------------------------------------------------------------------------- 1 | * Set $root 2 | return clear 3 | capture project, doinfo 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)' // using -project- 5 | else { // running directly 6 | if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"' 7 | do "${mortality_root}/code/set_environment.do" 8 | } 9 | 10 | * Create required folders 11 | cap mkdir "${root}/scratch/State Gini Correlations" 12 | 13 | * Erase output numbers 14 | cap erase "${root}/scratch/State Gini Correlations/State income inequality correlations.csv" 15 | 16 | * Set convenient globals 17 | global derived "${root}/data/derived" 18 | global raw "$root/data/raw" 19 | 20 | **************** 21 | 22 | * Import State Gini Index Data 23 | project, relies_on("${raw}/State Gini Index/ACS_06_EST_B19083.txt") 24 | project, original("${raw}/State Gini Index/ACS_06_EST_B19083_with_ann.csv") 25 | import delimited "${raw}/State Gini Index/ACS_06_EST_B19083_with_ann.csv", varnames(2) rowrange(2) clear 26 | ren (id2 geography estimateginiindex) (state_id statename gini2006) 27 | keep state_id statename gini2006 28 | keep if inrange(state_id,1,56) 29 | tempfile state_gini 30 | save `state_gini' 31 | 32 | project, original("${derived}/final_covariates/cty_full_covariates.dta") 33 | use "${derived}/final_covariates/cty_full_covariates.dta", clear 34 | collapse (sum) st_pop2000 = cty_pop2000, by(state_id) 35 | merge 1:1 state_id using `state_gini', assert(3) nogen 36 | ren state_id st 37 | 38 | project, original("${derived}/le_estimates/st_leBY_gnd_hhincquartile.dta") preserve 39 | merge 1:m st using "${derived}/le_estimates/st_leBY_gnd_hhincquartile.dta", nogen assert(match) keepusing(gnd hh_inc_q le_raceadj) 40 | ren le_raceadj le_raceadj_q 41 | reshape wide le_raceadj_q, i(st gnd) j(hh_inc_q) 42 | 43 | project, original("${derived}/le_estimates/st_leBY_gnd.dta") preserve 44 | merge 1:1 st gnd using "${derived}/le_estimates/st_leBY_gnd.dta", nogen assert(match) keepusing(le_raceadj) 45 | collapse (mean) le_raceadj*, by(st st_pop2000 statename gini2006) 46 | 47 | * Output correlations between Gini and LE 48 | corr_reg le_raceadj gini2006 [w=st_pop2000], vce(robust) 49 | 50 | scalarout using "${root}/scratch/State Gini Correlations/State income inequality correlations.csv", /// 51 | id("Corr of state Gini coefficient with LE pooling all income quartiles: coef") /// 52 | num(`=_b[vb]') fmt(%9.2f) 53 | test vb=0 54 | scalarout using "${root}/scratch/State Gini Correlations/State income inequality correlations.csv", /// 55 | id("Corr of state Gini coefficient with LE pooling all income quartiles: pval") /// 56 | num(`=r(p)') fmt(%9.2f) 57 | 58 | 59 | corr_reg le_raceadj_q1 gini2006 [w=st_pop2000], vce(robust) 60 | 61 | scalarout using "${root}/scratch/State Gini Correlations/State income inequality correlations.csv", /// 62 | id("Corr of state Gini coefficient with LE pooling all income quartiles: coef") /// 63 | num(`=_b[vb]') fmt(%9.2f) 64 | test vb=0 65 | scalarout using "${root}/scratch/State Gini Correlations/State income inequality correlations.csv", /// 66 | id("Corr of state Gini coefficient with LE pooling all income quartiles: pval") /// 67 | num(`=r(p)') fmt(%9.2f) 68 | 69 | project, creates("${root}/scratch/State Gini Correlations/State income inequality correlations.csv") 70 | 71 | -------------------------------------------------------------------------------- /code/lifeexpectancy/compute_LE_trends.do: -------------------------------------------------------------------------------- 1 | * Set $root 2 | return clear 3 | capture project, doinfo 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)' // using -project- 5 | else { // running directly 6 | if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"' 7 | do "${mortality_root}/code/set_environment.do" 8 | } 9 | 10 | * Set convenient globals 11 | global derived "${root}/data/derived" 12 | 13 | * Create required folders 14 | cap mkdir "${root}/data/derived/le_trends" 15 | cap mkdir "${root}/data/derived/le_trends/bootstrap" 16 | 17 | 18 | /*** Compute life expectancy trends estimates. 19 | ***/ 20 | 21 | 22 | **************** 23 | *** Programs *** 24 | **************** 25 | 26 | cap program drop compute_LE_trends 27 | program define compute_LE_trends 28 | 29 | syntax using/, by(namelist) saving(string) 30 | 31 | project, uses(`"`using'"') 32 | 33 | * Calculate trends 34 | foreach var in le_raceadj le_agg { 35 | 36 | use `"`using'"', clear 37 | 38 | isid `by' year 39 | fastregby `var' year, by(`by') clear 40 | rename _b* `var'_b* 41 | 42 | tempfile trend_`var' 43 | qui save `trend_`var'' 44 | } 45 | qui merge 1:1 `by' using `trend_le_raceadj', assert(3) nogen 46 | 47 | * Output 48 | label data "Annual trend in unadjusted and race-adjusted LE" 49 | sort `by' 50 | save13 `"`saving'"', replace 51 | project, creates(`"`saving'"') 52 | 53 | end 54 | 55 | 56 | *************************************** 57 | *** Compute Trends: Point Estimates *** 58 | *************************************** 59 | 60 | * CZ Trends by Gender x Income Quartile 61 | compute_LE_trends using "$derived/le_estimates/cz_leBY_year_gnd_hhincquartile.dta", /// 62 | by(cz gnd hh_inc_q) /// 63 | saving("${derived}/le_trends/cz_letrendsBY_gnd_hhincquartile.dta") 64 | 65 | 66 | * State Trends by Gender x Income Quartile 67 | compute_LE_trends using "$derived/le_estimates/st_leBY_year_gnd_hhincquartile.dta", /// 68 | by(st gnd hh_inc_q) /// 69 | saving("${derived}/le_trends/st_letrendsBY_gnd_hhincquartile.dta") 70 | 71 | 72 | ************************************ 73 | *** Compute Trends: Bootstrapped *** 74 | ************************************ 75 | 76 | * CZ Trends by Gender x Income Quartile 77 | compute_LE_trends using "$derived/le_estimates/bootstrap/bootstrap_cz_leBY_gnd_hhincquartile_year.dta", /// 78 | by(cz gnd hh_inc_q sample_num) /// 79 | saving("${derived}/le_trends/bootstrap/bootstrap_cz_letrendsBY_gnd_hhincquartile.dta") 80 | 81 | 82 | * State Trends by Gender x Income Quartile 83 | compute_LE_trends using "$derived/le_estimates/bootstrap/bootstrap_st_leBY_gnd_hhincquartile_year.dta", /// 84 | by(st gnd hh_inc_q sample_num) /// 85 | saving("${derived}/le_trends/bootstrap/bootstrap_st_letrendsBY_gnd_hhincquartile.dta") 86 | -------------------------------------------------------------------------------- /code/lifeexpectancy/le_trends_maps.do: -------------------------------------------------------------------------------- 1 | * Set $root 2 | return clear 3 | capture project, doinfo 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)' // using -project- 5 | else { // running directly 6 | if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"' 7 | do "${mortality_root}/code/set_environment.do" 8 | } 9 | 10 | * Set convenient globals 11 | global derived "${root}/data/derived" 12 | 13 | * Create required folders 14 | cap mkdir "$root/scratch/LE maps" 15 | cap mkdir "$root/scratch/LE maps/data" 16 | 17 | /*** Generate maps of Life Expectancy trends: 18 | 19 | - by State x Income Quartile 20 | ***/ 21 | 22 | 23 | ******************** 24 | *** State Trends *** 25 | ******************** 26 | 27 | * Load data 28 | project, original("${root}/data/derived/le_trends/st_letrendsBY_gnd_hhincquartile.dta") 29 | use "${root}/data/derived/le_trends/st_letrendsBY_gnd_hhincquartile.dta", clear 30 | rename st statefips 31 | 32 | * Map Q1 33 | foreach gender in "Male" "Female" { 34 | 35 | local g=substr("`gender'",1,1) 36 | 37 | maptile le_raceadj_b_year if gnd=="`g'" & hh_inc_q==1, geo(state) geoid(statefips) rev nq(10) /// 38 | geofolder("$root/code/ado_maptile_geo") /// 39 | twopt( /// 40 | title("Annual Changes in Race-Adjusted Expected Age at Death") /// 41 | subtitle("`gender's, Bottom Quartile") /// 42 | legend(size(*0.8)) /// 43 | ) /// 44 | savegraph("$root/scratch/LE maps/STmap_leTrendsBY_gnd_hhincquartile_Q1_`gender'.png") replace 45 | 46 | export delim statefips le_raceadj_b_year /// 47 | using "$root/scratch/LE maps/data/STmap_leTrendsBY_gnd_hhincquartile_Q1_`gender'.csv" /// 48 | if gnd == "`g'" & hh_inc_q==1, replace 49 | 50 | project, creates("$root/scratch/LE maps/STmap_leTrendsBY_gnd_hhincquartile_Q1_`gender'.png") preserve 51 | project, creates("$root/scratch/LE maps/data/STmap_leTrendsBY_gnd_hhincquartile_Q1_`gender'.csv") preserve 52 | 53 | } 54 | 55 | 56 | * Calculate Q4-Q1 difference in trends 57 | isid statefips gnd hh_inc_q 58 | keep statefips gnd hh_inc_q le_raceadj_b_year 59 | rename le_raceadj_b_year le_raceadj_b_year_q 60 | reshape wide le_raceadj_b_year_q, i(statefips gnd) j(hh_inc_q) 61 | gen le_raceadj_b_year_diffq4q1 = le_raceadj_b_year_q4 - le_raceadj_b_year_q1 62 | 63 | * Map Q4-Q1 difference 64 | foreach gender in "Male" "Female" { 65 | 66 | local g=substr("`gender'",1,1) 67 | 68 | maptile le_raceadj_b_year_diffq4q1 if gnd=="`g'", geo(state) geoid(statefips) nq(10) /// 69 | geofolder("$root/code/ado_maptile_geo") /// 70 | twopt( /// 71 | title("Annual Changes in Race-Adjusted Expected Age at Death") /// 72 | subtitle("`gender's, Difference Between Top and Bottom Quartiles") /// 73 | legend(size(*0.8)) /// 74 | ) /// 75 | savegraph("$root/scratch/LE maps/STmap_leTrendsBY_gnd_hhincquartile_Q4-Q1diff_`gender'.png") replace 76 | 77 | export delim statefips le_raceadj_b_year_diffq4q1 /// 78 | using "$root/scratch/LE maps/data/STmap_leTrendsBY_gnd_hhincquartile_Q4-Q1diff_`gender'.csv" /// 79 | if gnd == "`g'", replace 80 | 81 | project, creates("$root/scratch/LE maps/STmap_leTrendsBY_gnd_hhincquartile_Q4-Q1diff_`gender'.png") preserve 82 | project, creates("$root/scratch/LE maps/data/STmap_leTrendsBY_gnd_hhincquartile_Q4-Q1diff_`gender'.csv") preserve 83 | 84 | } 85 | 86 | -------------------------------------------------------------------------------- /code/mortality/Calculate aggregate deaths.do: -------------------------------------------------------------------------------- 1 | * Set $root 2 | return clear 3 | capture project, doinfo 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)' // using -project- 5 | else { // running directly 6 | if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"' 7 | do "${mortality_root}/code/set_environment.do" 8 | } 9 | 10 | * Create required folders 11 | cap mkdir "${root}/scratch/Deaths and population counts in IRS and other samples" 12 | 13 | /*** Calculate the aggregate number of deaths and mortality rate in our sample, 14 | pooling all ages 40-76 and all years. Separately for men and women. 15 | ***/ 16 | 17 | ************* 18 | 19 | 20 | * Load mortality rates 21 | project, uses("${root}/data/derived/Mortality Rates/national_mortratesBY_gnd_hhincpctile_age_year.dta") 22 | use "${root}/data/derived/Mortality Rates/national_mortratesBY_gnd_hhincpctile_age_year.dta", clear 23 | keep if age_at_d >= 40 24 | 25 | * Collapse to aggregate deaths and population counts by Gender 26 | gen long deaths=mortrate*count 27 | 28 | collapse (rawsum) count deaths, by(gnd) 29 | recast long count deaths 30 | format %12.0gc count deaths 31 | 32 | * Generate mortality per 100,000 33 | gen mortper100K = deaths/count*100000 34 | format %9.1f mortper100K 35 | 36 | * Output numbers 37 | assert gnd=="F" in 1 38 | 39 | scalarout using "${root}/scratch/Deaths and population counts in IRS and other samples/Aggregate deaths and mortality rate by gender.csv", /// 40 | replace /// 41 | id("Aggregate deaths in sample 40-76: Men") /// 42 | num(`=deaths[2]') fmt(%12.0gc) 43 | 44 | scalarout using "${root}/scratch/Deaths and population counts in IRS and other samples/Aggregate deaths and mortality rate by gender.csv", /// 45 | id("Aggregate mortality per 100,000 in sample 40-76: Men") /// 46 | num(`=mortper100K[2]') fmt(%9.1f) 47 | 48 | scalarout using "${root}/scratch/Deaths and population counts in IRS and other samples/Aggregate deaths and mortality rate by gender.csv", /// 49 | id("Aggregate deaths in sample 40-76: Women") /// 50 | num(`=deaths[1]') fmt(%12.0gc) 51 | 52 | scalarout using "${root}/scratch/Deaths and population counts in IRS and other samples/Aggregate deaths and mortality rate by gender.csv", /// 53 | id("Aggregate mortality per 100,000 in sample 40-76: Women") /// 54 | num(`=mortper100K[1]') fmt(%9.1f) 55 | 56 | project, creates("${root}/scratch/Deaths and population counts in IRS and other samples/Aggregate deaths and mortality rate by gender.csv") 57 | -------------------------------------------------------------------------------- /code/mortality/Construct multi-source death and population counts.do: -------------------------------------------------------------------------------- 1 | * Set $root 2 | return clear 3 | capture project, doinfo 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)' // using -project- 5 | else { // running directly 6 | if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"' 7 | do "${mortality_root}/code/set_environment.do" 8 | } 9 | 10 | * Set convenient globals 11 | global raw "${root}/data/raw" 12 | 13 | * Create required folders 14 | cap mkdir "${root}/data/derived/Mortality Rates" 15 | cap mkdir "${root}/data/derived/Mortality Rates/NCHS-SSA-IRS Deaths and Populations" 16 | 17 | 18 | /*** Load and combine counts of deaths and populations 19 | by Gender x Age x Year from numerous sources: 20 | - CDC NCHS 21 | - SSA 22 | - IRS 23 | 24 | Covering years 2001-2014 and ages 40-76 (when available from each source). 25 | ***/ 26 | 27 | ******* 28 | *** SSA DM1 count of deaths and population 29 | ******* 30 | 31 | project, original("${raw}/Mortality Collapses/ssa_dm1_tab_mskd.dta") 32 | use "${raw}/Mortality Collapses/ssa_dm1_tab_mskd.dta", clear 33 | 34 | keep if gnd!="U" 35 | assert inlist(gnd,"M","F") 36 | 37 | reshape wide count, i(gnd year age) j(died) 38 | ren (count0 count1) (dm1_pop dm1_dead) 39 | replace dm1_pop = dm1_pop + dm1_dead 40 | 41 | keep if inrange(age,40,76) & year>=2001 42 | 43 | tempfile ssa 44 | save `ssa' 45 | 46 | ******* 47 | *** IRS count of deaths and population (including zero incomes) 48 | ******* 49 | 50 | project, original("${root}/code/ado/gen_mortrates2.ado") 51 | 52 | project, original("${raw}/Mortality Collapses/mort_ad_v6_nat_byageyr_new_mskd.dta") 53 | use "${raw}/Mortality Collapses/mort_ad_v6_nat_byageyr_new_mskd.dta", clear 54 | 55 | * Clean up raw data 56 | compress 57 | drop if tax_yr<1999 58 | assert inlist(gnd,"M","F") 59 | 60 | assert pctile!=0 61 | replace pctile=0 if pctile==-1 // in IRS collapse, zero incomes are coded as -1 62 | replace pctile=-1 if pctile==. // in IRS collapse, negative incomes are coded as . 63 | 64 | ren deadby_* deadby_*_Mean 65 | drop agebin_mort // we already have exact age 66 | 67 | * Calculate mortality rates 68 | gen_mortrates2 gnd pctile, age(age) year(tax_yr) n(count) 69 | 70 | * Partition into population and deaths by Positive and Zero earnings 71 | rename age_at_d age 72 | rename yod year 73 | rename count irs_pop 74 | 75 | gen irs_dead = chop(mortrate * irs_pop, 10e-6) 76 | 77 | isid gnd age year pctile 78 | replace pctile=1 if pctile>=1 79 | label define poszeroneg 1 "posinc_" 0 "zeroinc_" -1 "neginc_" 80 | label values pctile poszeroneg 81 | 82 | collapse (sum) irs_pop irs_dead, by(gnd age year pctile) 83 | compress 84 | 85 | decode pctile, gen(pos_zero_neg) 86 | drop pctile 87 | reshape wide irs_@pop irs_@dead, i(gnd age year) j(pos_zero_neg) string 88 | 89 | * Calculate total population and deaths 90 | gen long irs_pop = irs_posinc_pop + irs_zeroinc_pop + irs_neginc_pop 91 | gen long irs_dead = irs_posinc_dead + irs_zeroinc_dead + irs_neginc_dead 92 | drop irs_neginc* 93 | 94 | * Output 95 | keep if inrange(age,40,76) & year>=2001 96 | tempfile irs 97 | save `irs' 98 | 99 | 100 | ******* 101 | *** CDC NCHS count of deaths and population 102 | ******* 103 | 104 | project, original("${raw}/CDC-Census Counts/Underlying Cause of Death, 1999-2014.txt") 105 | import delimited "${raw}/CDC-Census Counts/Underlying Cause of Death, 1999-2014.txt", clear 106 | 107 | drop if !mi(notes) 108 | keep singleyearagescode gendercode deaths population year 109 | ren (singleyearagescode gendercode deaths population) /// 110 | (age gnd cdc_dead cdc_pop) 111 | 112 | destring age, replace force 113 | keep if inrange(age,40,76) & year>=2001 114 | destring cdc_pop, replace 115 | 116 | tempfile cdc 117 | save `cdc' 118 | 119 | ******* 120 | *** Merge all death and population data together 121 | ******* 122 | 123 | use `ssa', clear 124 | merge 1:1 gnd age year using `irs', assert(1 3) nogen 125 | merge 1:1 gnd age year using `cdc', assert(3) nogen 126 | 127 | compress 128 | 129 | save13 "${root}/data/derived/Mortality Rates/NCHS-SSA-IRS Deaths and Populations/death_and_pop_counts_multisource.dta", replace 130 | project, creates("${root}/data/derived/Mortality Rates/NCHS-SSA-IRS Deaths and Populations/death_and_pop_counts_multisource.dta") 131 | -------------------------------------------------------------------------------- /code/mortality/Decompose mortality into medical v external.do: -------------------------------------------------------------------------------- 1 | * Set $root 2 | return clear 3 | capture project, doinfo 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)' // using -project- 5 | else { // running directly 6 | if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"' 7 | do "${mortality_root}/code/set_environment.do" 8 | } 9 | 10 | * Create required folders 11 | cap mkdir "${root}/scratch/Decompose mortality into medical v external" 12 | 13 | ************ 14 | 15 | * Load data 16 | project, original("${root}/data/derived/final_covariates/cz_full_covariates.dta") 17 | use "${root}/data/derived/final_covariates/cz_full_covariates.dta", clear 18 | keep cz *_mort_coll* pop2000 19 | 20 | rename ext* external* 21 | rename med* medical* 22 | 23 | * Run regressions 24 | clear matrix 25 | foreach c in 0 1 { 26 | foreach m in "external" "medical" { 27 | 28 | reg `m'_mort_coll`c' total_mort_coll`c' [w=pop2000], robust 29 | matrix reg_decompose = nullmat(reg_decompose), /// 30 | (_b[total_mort_coll`c'] \ _se[total_mort_coll`c'] \ e(N)) 31 | 32 | local cols `cols' `m'_coll`c' 33 | } 34 | } 35 | 36 | * Store regression results in dataset 37 | matrix colnames reg_decompose = `cols' 38 | clear 39 | svmat reg_decompose, names(col) 40 | 41 | gen result="" 42 | replace result = "coef" in 1 43 | replace result = "se" in 2 44 | replace result = "N" in 3 45 | order result 46 | 47 | * Output 48 | export delim using "${root}/scratch/Decompose mortality into medical v external/Regressions decomposing mortality rates into medical vs external causes.csv", /// 49 | replace 50 | project, creates("${root}/scratch/Decompose mortality into medical v external/Regressions decomposing mortality rates into medical vs external causes.csv") 51 | -------------------------------------------------------------------------------- /code/mortality/Lag invariance.do: -------------------------------------------------------------------------------- 1 | * Set $root 2 | return clear 3 | capture project, doinfo 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)' // using -project- 5 | else { // running directly 6 | if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"' 7 | do "${mortality_root}/code/set_environment.do" 8 | } 9 | 10 | * Set convenient globals 11 | if (c(os)=="Windows") global img wmf 12 | else global img png 13 | 14 | * Create required folders 15 | cap mkdir "${root}/scratch/Lag invariance" 16 | cap mkdir "${root}/scratch/Lag invariance/data" 17 | 18 | /*** Examines the "lag invariance" property of mortality rates, and shows 19 | that this is because of the high degree of serial correlation of income. 20 | ***/ 21 | 22 | ************************************************** 23 | *** Income Lag Invariance - Mortality Profiles *** 24 | ************************************************** 25 | 26 | * Load mortality data with all income lags 27 | project, original("${root}/data/derived/Mortality Rates/With all income lags/mskd_national_mortratesBY_gnd_hhincpctile_age_year_WithAllLags.dta") 28 | use "${root}/data/derived/Mortality Rates/With all income lags/mskd_national_mortratesBY_gnd_hhincpctile_age_year_WithAllLags.dta", clear 29 | 30 | * Compute unweighted average mortality rate of 50-54 year olds in 2014 31 | replace mortrate = mortrate * 100000 32 | keep if inrange(age_at_d,50,54) & yod==2014 33 | 34 | isid gnd pctile age_at_d lag 35 | collapse (mean) mortrate, by(gnd pctile lag) 36 | recast float mortrate, force 37 | 38 | * Plot mortality-income profile at different income lags 39 | foreach gender in "Male" "Female" { 40 | 41 | local g=substr("`gender'",1,1) 42 | 43 | * Plot fig 44 | twoway (scatter mortrate pctile if lag==2, ms(o)) /// 45 | (scatter mortrate pctile if lag==5, ms(t)) /// 46 | (scatter mortrate pctile if lag==10, ms(s)) /// 47 | if gnd=="`g'", /// 48 | ylab(0(500)1500, gmin gmax) /// 49 | xtitle("Household Income Percentile") /// 50 | ytitle("Deaths per 100,000") title("") /// 51 | legend( /// 52 | ring(0) pos(2) c(1) order(3 2 1) bmargin(large) /// 53 | label(1 "2 year lag") label(2 "5 year lag") label(3 "10 year lag") /// 54 | ) 55 | graph export "${root}/scratch/Lag invariance/Mortality v Income Percentile profile of 2014 50-54yo with varying lags - `gender'.${img}", replace 56 | project, creates("${root}/scratch/Lag invariance/Mortality v Income Percentile profile of 2014 50-54yo with varying lags - `gender'.${img}") preserve 57 | 58 | * Export data underlying fig 59 | export delim if gnd=="`g'" & inlist(lag,2,5,10) /// 60 | using "${root}/scratch/Lag invariance/data/Mortality v Income Percentile profile of 2014 50-54yo with varying lags - `gender'.csv", replace 61 | project, creates("${root}/scratch/Lag invariance/data/Mortality v Income Percentile profile of 2014 50-54yo with varying lags - `gender'.csv") preserve 62 | 63 | } 64 | 65 | ************************************ 66 | *** Serial Correlation of Income *** 67 | ************************************ 68 | 69 | * Load correlations 70 | foreach g in "M" "F" { 71 | project, original("${root}/data/raw/IRS correlations between income lags/corr_`g'.xlsx") 72 | import excel using "${root}/data/raw/IRS correlations between income lags/corr_`g'.xlsx", clear /// 73 | firstrow 74 | rename A corrvar 75 | keep corrvar l0_pctile 76 | 77 | drop if corrvar=="mort" 78 | gen byte lag = real( regexs(regexm(corrvar,"^l([0-9]+)_pctile$")) ) 79 | drop corrvar 80 | 81 | rename l0_pctile incomecorr_`g' 82 | 83 | tempfile income_corr_`g' 84 | save `income_corr_`g'' 85 | } 86 | 87 | * Combine male and female serial income correlations 88 | use `income_corr_M' 89 | merge 1:1 lag using `income_corr_F', assert(3) nogen 90 | order lag 91 | 92 | twoway (connect incomecorr_M lag, m(t)) /// 93 | (connect incomecorr_F lag, m(o)) /// 94 | if lag<=10, /// 95 | xtitle("Lag (x)") /// 96 | ytitle("Correlation Between Rank in Year t and t - x") /// 97 | ylabel(0 "0" .2 "0.2" .4 "0.4" .6 "0.6" .8 "0.8" 1 "1") /// 98 | legend( /// 99 | ring(0) pos(4) c(1) bmargin(medium) /// 100 | label(1 "Men") label(2 "Women") /// 101 | ) 102 | graph export "${root}/scratch/Lag invariance/Serial correlation of income.${img}", replace 103 | project, creates("${root}/scratch/Lag invariance/Serial correlation of income.${img}") preserve 104 | 105 | * Export data underlying fig 106 | export delim if lag<=10 /// 107 | using "${root}/scratch/Lag invariance/data/Serial correlation of income.csv", replace 108 | project, creates("${root}/scratch/Lag invariance/data/Serial correlation of income.csv") 109 | 110 | 111 | -------------------------------------------------------------------------------- /code/mortality/Output income means by national income quantile.do: -------------------------------------------------------------------------------- 1 | * Set $root 2 | return clear 3 | capture project, doinfo 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)' // using -project- 5 | else { // running directly 6 | if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"' 7 | do "${mortality_root}/code/set_environment.do" 8 | } 9 | 10 | * Set convenient globals 11 | global derived "${root}/data/derived" 12 | 13 | * Create required folders 14 | cap mkdir "${root}/scratch/National income means by quantile" 15 | cap mkdir "${root}/scratch/Income means at national percentiles" 16 | 17 | * Erase output numbers 18 | cap erase "${root}/scratch/Income means at national percentiles/Income means at national p5 and p95 by gender.csv" 19 | 20 | /*** Output mean income by national income percentile, ventile, and quartile, 21 | by gender, pooling across years and ages. 22 | ***/ 23 | 24 | **************************************** 25 | *** Mean income by income percentile *** 26 | **************************************** 27 | 28 | * Load national mortality rates 29 | project, original("${derived}/Mortality Rates/mskd_national_mortratesBY_gnd_hhincpctile_age_year.dta") 30 | use if age_at_d>=40 using "${derived}/Mortality Rates/mskd_national_mortratesBY_gnd_hhincpctile_age_year.dta", clear 31 | 32 | * Pool all years and ages (years 2001-2014, ages 40-76) 33 | collapse (mean) hh_inc (rawsum) count [w=count], by(gnd pctile) 34 | 35 | * Export data 36 | save13 "${root}/scratch/National income means by quantile/National income means by percentile.dta", /// 37 | replace 38 | project, creates("${root}/scratch/National income means by quantile/National income means by percentile.dta") preserve 39 | 40 | *** Paper numbers 41 | foreach g in "M" "F" { 42 | foreach p in 5 95 { 43 | 44 | sum hh_inc if gnd=="`g'" & pctile==`p' 45 | assert r(N)==1 46 | scalarout using "${root}/scratch/Income means at national percentiles/Income means at national p5 and p95 by gender.csv", /// 47 | id("National mean income at p`p' pooling ages and years: `g'") /// 48 | num(`=r(mean)') fmt(%12.0fc) 49 | } 50 | } 51 | project, creates("${root}/scratch/Income means at national percentiles/Income means at national p5 and p95 by gender.csv") 52 | 53 | 54 | ************************************** 55 | *** Mean income by income quartile *** 56 | ************************************** 57 | 58 | * Load national mortality rates 59 | project, original("${derived}/Mortality Rates/mskd_national_mortratesBY_gnd_hhincpctile_age_year.dta") 60 | use if age_at_d>=40 using "${derived}/Mortality Rates/mskd_national_mortratesBY_gnd_hhincpctile_age_year.dta", clear 61 | 62 | * Collapse to quartiles, pooling all years and ages (years 2001-2014, ages 40-76) 63 | gen quartile=ceil(pctile/25) 64 | collapse (mean) hh_inc (rawsum) count [w=count], by(gnd quartile) 65 | 66 | * Export data 67 | save13 "${root}/scratch/National income means by quantile/National income means by quartile.dta", /// 68 | replace 69 | project, creates("${root}/scratch/National income means by quantile/National income means by quartile.dta") 70 | 71 | 72 | ************************************* 73 | *** Mean income by income ventile *** 74 | ************************************* 75 | 76 | * Load national mortality rates 77 | project, original("${derived}/Mortality Rates/mskd_national_mortratesBY_gnd_hhincpctile_age_year.dta") 78 | use if age_at_d>=40 using "${derived}/Mortality Rates/mskd_national_mortratesBY_gnd_hhincpctile_age_year.dta", clear 79 | 80 | * Collapse to ventiles, pooling all years and ages (years 2001-2014, ages 40-76) 81 | gen ventile=ceil(pctile/5) 82 | collapse (mean) hh_inc (rawsum) count [w=count], by(gnd ventile) 83 | 84 | * Export data 85 | save13 "${root}/scratch/National income means by quantile/National income means by ventile.dta", /// 86 | replace 87 | project, creates("${root}/scratch/National income means by quantile/National income means by ventile.dta") 88 | 89 | -------------------------------------------------------------------------------- /code/mortality/Summary stats on number of obs and income.do: -------------------------------------------------------------------------------- 1 | * Set $root 2 | return clear 3 | capture project, doinfo 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)' // using -project- 5 | else { // running directly 6 | if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"' 7 | do "${mortality_root}/code/set_environment.do" 8 | } 9 | 10 | * Create required folders 11 | cap mkdir "${root}/scratch/Summary stats on number of observations age and income" 12 | 13 | * Erase output numbers 14 | cap erase "${root}/scratch/Summary stats on number of observations age and income/Summary Stats.csv" 15 | 16 | /*** Calculate the total number of observations, mean and median household income 17 | for working individuals, and the mean age of individuals in our sample. 18 | ***/ 19 | 20 | ************* 21 | 22 | 23 | * Load mortality rates 24 | project, original("${root}/data/derived/Mortality Rates/mskd_national_mortratesBY_gnd_hhincpctile_age_year.dta") 25 | use "${root}/data/derived/Mortality Rates/mskd_national_mortratesBY_gnd_hhincpctile_age_year.dta", clear 26 | keep if age_at_d >= 40 27 | 28 | sum hh_inc [w=count] if age_at_d <= 63, detail 29 | scalarout using "${root}/scratch/Summary stats on number of observations age and income/Summary Stats.csv", /// 30 | id("Working Individuals Mean Household Income") /// 31 | num(`=r(mean)') fmt(%9.0fc) 32 | scalarout using "${root}/scratch/Summary stats on number of observations age and income/Summary Stats.csv", /// 33 | id("Working Individuals Median Household Income") /// 34 | num(`=r(p50)') fmt(%9.0fc) 35 | 36 | sum age_at_d [w=count] 37 | scalarout using "${root}/scratch/Summary stats on number of observations age and income/Summary Stats.csv", /// 38 | id("Working Individuals Mean Age") /// 39 | num(`=r(mean)') fmt(%9.1f) 40 | 41 | sum count 42 | scalarout using "${root}/scratch/Summary stats on number of observations age and income/Summary Stats.csv", /// 43 | id("Total Number of Observations") /// 44 | num(`=r(sum)') fmt(%14.0fc) 45 | 46 | ***************************************** 47 | *** Project creates: reported numbers *** 48 | ***************************************** 49 | 50 | project, creates("${root}/scratch/Summary stats on number of observations age and income/Summary Stats.csv") 51 | 52 | -------------------------------------------------------------------------------- /code/mortality/construct_cdc_mortrates.do: -------------------------------------------------------------------------------- 1 | * Set $root 2 | return clear 3 | capture project, doinfo 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)' // using -project- 5 | else { // running directly 6 | if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"' 7 | do "${mortality_root}/code/set_environment.do" 8 | } 9 | 10 | * Set convenient globals 11 | global raw "${root}/data/raw" 12 | 13 | * Create required folders 14 | cap mkdir "${root}/data/derived/Mortality Rates" 15 | cap mkdir "${root}/data/derived/Mortality Rates/CDC-SSA Life Table" 16 | 17 | 18 | /*** Generate national mortality rates by Gender x Age, for ages 0-119, from: 19 | 20 | - CDC Life Tables for Ages 0-99, taking a simple average of years 2001-2011 21 | - CDC does not report mortality rates past age 99. 22 | 23 | - SSA Life Tables for Ages 100-119, using data from 2000 life table 24 | 25 | ***/ 26 | 27 | ******************** 28 | 29 | 30 | *** Reference docs 31 | project, relies_on("${raw}/CDC Life Tables/source.txt") 32 | project, relies_on("${raw}/CDC Life Tables/download_CDC_life_tables.do") 33 | project, relies_on("${raw}/SSA Life Tables/source.txt") 34 | 35 | *** Load CDC mortality rates from 0-99 36 | 37 | * Load data 38 | project, original("${raw}/CDC Life Tables/CDC_LifeTables.dta") 39 | use "${raw}/CDC Life Tables/CDC_LifeTables.dta", clear 40 | tab year 41 | 42 | * Average over years 43 | isid gnd age year 44 | collapse (mean) mortrate, by(gnd age) 45 | 46 | 47 | *** Append SSA mortality rates at 100+ 48 | project, original("${raw}/SSA Life Tables/SSA_LifeTables_2000.dta") preserve 49 | append using "${raw}/SSA Life Tables/SSA_LifeTables_2000.dta", gen(_append) 50 | drop if _append==1 & age<100 51 | drop _append 52 | 53 | *** Output 54 | sort gnd age 55 | bys gnd: assert _N==120 & age[1]==0 & age[120]==119 56 | rename mortrate cdc_mort 57 | save13 "${root}/data/derived/Mortality Rates/CDC-SSA Life Table/national_CDC_SSA_mortratesBY_gnd_age.dta", replace 58 | project, creates("${root}/data/derived/Mortality Rates/CDC-SSA Life Table/national_CDC_SSA_mortratesBY_gnd_age.dta") 59 | -------------------------------------------------------------------------------- /code/nlms/Explore mortality profiles implied by race shifters.do: -------------------------------------------------------------------------------- 1 | * Set $root 2 | return clear 3 | capture project, doinfo 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)' // using -project- 5 | else { // running directly 6 | if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"' 7 | do "${mortality_root}/code/set_environment.do" 8 | } 9 | 10 | * Set convenient globals 11 | if (c(os)=="Windows") global img wmf 12 | else global img png 13 | 14 | 15 | /*** Plot NLMS Gompertz mortality-age profiles implied by estimated race shifters. 16 | - Separate series for each race. 17 | - Separate panel for each gender. 18 | ***/ 19 | 20 | ************************ 21 | *** Generate Figures *** 22 | ************************ 23 | 24 | project, original("${root}/data/derived/NLMS/nlms_v5_s11_sampleA.dta") 25 | use "${root}/data/derived/NLMS/nlms_v5_s11_sampleA.dta" 26 | 27 | foreach g in "Male" "Female" { 28 | 29 | streg i.csdiv i.incq i.racegrp if gnd==`"`=substr("`g'",1,1)'"', dist(gompertz) nohr ancillary(i.csdiv i.incq i.racegrp) coeflegend 30 | 31 | * Note: plotting a different income quartile or census plot would just shift and pivot all the lines, 32 | * without changing their relative positions. 33 | 34 | twoway (function y = _b[_t:_cons] + _b[gamma:_cons] * (x - 40), range(40 90)) /// 35 | (function y = _b[_t:_cons] + _b[_t:2.racegrp] + (_b[gamma:_cons] + _b[gamma:2.racegrp]) * (x - 40), range(40 90)) /// 36 | (function y = _b[_t:_cons] + _b[_t:3.racegrp] + (_b[gamma:_cons] + _b[gamma:3.racegrp]) * (x - 40), range(40 90)) /// 37 | (function y = _b[_t:_cons] + _b[_t:4.racegrp] + (_b[gamma:_cons] + _b[gamma:4.racegrp]) * (x - 40), range(40 90)), /// 38 | title(`g') xtitle("Age") ytitle("Log Mortality Rate") /// 39 | xlab(40(10)90) /// 40 | legend(ring(0) pos(4) c(1) order(2 1 3 4)) legend(lab(1 White) lab(2 Black) lab(3 Hispanic) lab(4 Asian)) /// 41 | graphregion(fcolor(white)) 42 | graph export "${root}/scratch/NLMS mortality profiles by race/NLMS_raceshifter_mortality_profiles_`g'.${img}", replace 43 | project, creates("${root}/scratch/NLMS mortality profiles by race/NLMS_raceshifter_mortality_profiles_`g'.${img}") preserve 44 | 45 | } 46 | -------------------------------------------------------------------------------- /code/nlms/nlms_generate_shifters.do: -------------------------------------------------------------------------------- 1 | * Set $root 2 | return clear 3 | capture project, doinfo 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)' // using -project- 5 | else { // running directly 6 | if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"' 7 | do "${mortality_root}/code/set_environment.do" 8 | } 9 | 10 | * Set working directory 11 | cd "$root/data/derived/NLMS" 12 | 13 | * Create required folders 14 | cap mkdir "$root/data/derived/NLMS/raceshifters" 15 | 16 | /*** Estimate the differences in Gompertz intercept and slope by race, 17 | relative to the non-Black non-Hispanic group. 18 | 19 | Generate the estimates for three different by-groups: 20 | - Sex 21 | - Sex x Income Quartile 22 | - Sex x Census Region 23 | ***/ 24 | 25 | **************** 26 | *** Programs *** 27 | **************** 28 | 29 | program define nlms_estimate_raceshifters 30 | 31 | /*** Given an NLMS panel, estimate the Gompertz parameters for each race 32 | while controlling for Income Quartile and Census Divsion. 33 | 34 | Return the race shifters, which are the differences in Gompertz 35 | parameters relative to the non-Black non-Hispanic group. 36 | 37 | Has options for 3 by-groups: 38 | - by Sex 39 | - by Sex x Income Quartile 40 | - by Sex x Census Region 41 | 42 | ***/ 43 | 44 | syntax using/, [ bootstrap replace stderr save_bysex(string) save_bysexincq(string) save_bysexcsregion(string) ] 45 | 46 | ***************** 47 | *** Load data *** 48 | ***************** 49 | 50 | * Load NLMS sample 51 | use `"`using'"', clear 52 | 53 | * If bootstrapping: assign the number of draws for each observation, 54 | * using weights to determine probability of draw 55 | if ("`bootstrap'"!="") { 56 | 57 | wsample num_draws, wt(wt) 58 | keep if num_draws>0 // for speed, drop unnecessary obs 59 | 60 | streset [fw=num_draws], noshow 61 | drop wt startage endage inddea // for speed, drop unnecessary vars 62 | 63 | } 64 | 65 | ****************************** 66 | *** Estimate race shifters *** 67 | ****************************** 68 | 69 | *** Specify parameters to save 70 | local save_est /// 71 | diff_gomp_int_black = _b[_t:2.racegrp] /// 72 | diff_gomp_int_hisp = _b[_t:3.racegrp] /// 73 | diff_gomp_int_asian = _b[_t:4.racegrp] /// 74 | diff_gomp_slope_black = _b[gamma:2.racegrp] /// 75 | diff_gomp_slope_hisp = _b[gamma:3.racegrp] /// 76 | diff_gomp_slope_asian = _b[gamma:4.racegrp] 77 | 78 | if ("`stderr'"=="stderr") local save_est `save_est' /// 79 | se_int_black = _se[_t:2.racegrp] /// 80 | se_int_hisp = _se[_t:3.racegrp] /// 81 | se_int_asian = _se[_t:4.racegrp] /// 82 | se_slope_black = _se[gamma:2.racegrp] /// 83 | se_slope_hisp = _se[gamma:3.racegrp] /// 84 | se_slope_asian = _se[gamma:4.racegrp] /// 85 | 86 | *** Run regressions 87 | 88 | * by Sex 89 | if ("`save_bysex'"!="") statsby `save_est', saving(`save_bysex', `replace') /// 90 | by(gnd): streg i.csdiv i.incq i.racegrp, dist(gompertz) nohr ancillary(i.csdiv i.incq i.racegrp) 91 | 92 | * by Sex x Income Quartile 93 | if ("`save_bysexincq'"!="") statsby `save_est', saving(`save_bysexincq', `replace') /// 94 | by(gnd incq): streg i.csdiv i.racegrp, dist(gompertz) nohr ancillary(i.csdiv i.racegrp) 95 | 96 | * by Sex x Census Region 97 | if ("`save_bysexcsregion'"!="") statsby `save_est', saving(`save_bysexcsregion', `replace') /// 98 | by(gnd csregion): streg i.csdiv i.incq i.racegrp, dist(gompertz) nohr ancillary(i.csdiv i.incq i.racegrp) 99 | 100 | *************************** 101 | *** save13 and add note *** 102 | *************************** 103 | 104 | * If not bootstrapping, reopen each saved file and save13 it, with a note 105 | if ("`bootstrap'"=="") { 106 | 107 | foreach file in "`save_bysex'" "`save_bysexincq'" "`save_bysexcsregion'" { 108 | if ("`file'"=="") continue // skip by-group if wasn't specified 109 | 110 | use "`file'", clear 111 | label data "NOTE: Gompertz intercepts in this data correspond to intercepts at age 40." 112 | save13 `"`file'"', replace 113 | } 114 | 115 | } 116 | 117 | end 118 | 119 | 120 | *************************************** 121 | *** Generate Gompertz race shifters *** 122 | *************************************** 123 | 124 | project, uses("nlms_v5_s11_sampleA.dta") 125 | 126 | nlms_estimate_raceshifters using "nlms_v5_s11_sampleA.dta", stderr /// 127 | save_bysex("raceshifters/raceshifters_v5A_BYsex.dta") /// 128 | save_bysexincq("raceshifters/raceshifters_v5A_BYsex_incq.dta") /// 129 | save_bysexcsregion("raceshifters/raceshifters_v5A_BYsex_csregion.dta") /// 130 | replace 131 | 132 | project, creates("raceshifters/raceshifters_v5A_BYsex.dta") 133 | project, creates("raceshifters/raceshifters_v5A_BYsex_incq.dta") 134 | project, creates("raceshifters/raceshifters_v5A_BYsex_csregion.dta") 135 | 136 | -------------------------------------------------------------------------------- /code/nlms/nlms_loaddata.do: -------------------------------------------------------------------------------- 1 | * Set $root 2 | return clear 3 | capture project, doinfo 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)' // using -project- 5 | else { // running directly 6 | if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"' 7 | do "${mortality_root}/code/set_environment.do" 8 | } 9 | 10 | * Set convenient globals 11 | global v4 "${root}/data/raw/NLMS" 12 | global v5 "${root}/data/raw/NLMS v5" 13 | 14 | * Create necessary folders 15 | cap mkdir "$root/data/derived/NLMS" 16 | 17 | /*** Load raw NLMS data into Stata format 18 | ***/ 19 | 20 | ************************************ 21 | *** Load NLMS PUMS v5, Sample 11 *** 22 | ************************************ 23 | 24 | *** Brief summary of PUMS Release 5 data, derived from docs 25 | 26 | /* 27 | The NLMS PUMS v5 combines CPS and Census data with death certificates, over the period 1973-2011. 28 | We use the NLMS public-use files. 29 | 30 | File 11 contains a subset of the 39 NLMS cohorts that can be followed prospectively for 11 years. 31 | In lieu of identifying the CPS year and starting point of mortality follow-up for each file, 32 | all of the records in File 11 have been assigned an imaginary starting point conceptually 33 | identified as April 1, 1990. These records are then tracked forward for 11 years to observe 34 | whether person in the file has died. 35 | 36 | 37 | Weights for Release 5 of File 11 are normalized to the U.S. non-institutionalized population on April 1, 1990. 38 | [There is an inconsistency on this point in the documentation we received from the NLMS, 39 | which we clarified with Norman Johnson at the Census Bureau.] 40 | */ 41 | 42 | 43 | *** Documentation 44 | 45 | project, relies_on("${v5}/docs/Reference Manual Version 5.pdf") 46 | project, relies_on("${v5}/docs/Read_me.pdf") 47 | 48 | 49 | *** Load data into Stata 50 | 51 | * Load raw data 52 | project, original("${v5}/read_pubfile5.dct") 53 | project, original("${v5}/11.dat") 54 | project, original("${v4}/state_codes_nlms_fips.dta") // note: crosswalk is same in v4 and v5 55 | 56 | infile using "${v5}/read_pubfile5.dct", using("${v5}/11.dat") clear 57 | 58 | * Merge on FIPS codes 59 | merge m:1 stater using "${v4}/state_codes_nlms_fips.dta", keepusing(statefips) nogen assert(3) 60 | 61 | * Output 62 | compress 63 | isid record 64 | sort record 65 | save13 "$root/data/derived/NLMS/nlms_v5_s11_raw.dta", replace 66 | project, creates("$root/data/derived/NLMS/nlms_v5_s11_raw.dta") 67 | 68 | 69 | -------------------------------------------------------------------------------- /code/nlms/nlms_mortality_profiles_BYrace.do: -------------------------------------------------------------------------------- 1 | * Set $root 2 | return clear 3 | capture project, doinfo 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)' // using -project- 5 | else { // running directly 6 | if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"' 7 | do "${mortality_root}/code/set_environment.do" 8 | } 9 | 10 | * Set convenient globals 11 | if (c(os)=="Windows") global img wmf 12 | else global img png 13 | 14 | * Create required folders 15 | cap mkdir "${root}/scratch/NLMS mortality profiles by race" 16 | cap mkdir "${root}/scratch/NLMS mortality profiles by race/data" 17 | 18 | /*** Plot NLMS log mortality-agebin profiles from binned raw data, 19 | drawing OLS fit lines through them illustrating Gompertz relationship. 20 | 21 | - Separate series for each race. 22 | - Separate panel for each gender. 23 | ***/ 24 | 25 | 26 | ******************** 27 | *** Create panel *** 28 | ******************** 29 | 30 | * Load data 31 | project, original("${root}/data/derived/NLMS/nlms_v5_s11_sampleA.dta") 32 | use "${root}/data/derived/NLMS/nlms_v5_s11_sampleA.dta", clear 33 | stset, clear 34 | 35 | * Create panel structure 36 | expand 11 // creates 11 duplicate obs, because we observe up to 11 full years 37 | bys record: replace age = age + (_n-1) // increment age for 11 follow-up obs 38 | keep if inrange(age,startage,endage) // keep observations in risk period (income lagged 2 years and before death) 39 | 40 | * Replace age with average age in year of observation 41 | /* Someone who dies in a given year dies on average 6 months into the year */ 42 | replace age=age+0.5 43 | 44 | * Generate indicator for year of death 45 | gen byte dead=0 46 | isid record age 47 | bys record (age): replace dead=1 if _n==_N & inddea==1 48 | 49 | rename inddea r_dead 50 | label var r_dead "Death Ever Observed indicator" 51 | label var dead "Year of Death indicator" 52 | 53 | * Generate age bins, coding agebin as the center of the bin 54 | assert inrange(age,40,72) 55 | g agebin = floor(age/5)*5 + 2 56 | replace agebin = 71 if agebin==72 // center of last agebin (ages 70-72) 57 | assert inrange(agebin,40,71) 58 | 59 | 60 | ************************ 61 | *** Generate Figures *** 62 | ************************ 63 | 64 | *** Collapse to mortality rates 65 | 66 | * Define mortality rate in each Age Bin x Sex x Race x Income Quartile group G as 67 | * number of individual life-years in group G that are dead by a+1 68 | * divided by the number of individual life-years in group G 69 | collapse (mean) dead [w=wt], by(agebin gnd racegrp incq) 70 | 71 | * Income-adjust by taking unweighted average across income bins 72 | collapse (mean) dead, by(agebin gnd racegrp) 73 | 74 | 75 | *** Create binscatters 76 | * Exclude agebin 71 because it's a 3-year bin instead of a 5-year bin 77 | 78 | g l_mortrate = log(dead) 79 | 80 | foreach g in "M" "F" { 81 | 82 | * Generate scatter 83 | binscatter l_mortrate agebin if gnd=="`g'" & agebin<70, by(racegrp) /// 84 | mcolor(navy maroon) xscale(range(40 70)) /// 85 | xtit("Age Bin in Years") ytit("Log Mortality Rate") tit("") /// 86 | ylabel(-7(1)-3) yscale(range(-7 -3)) legend(ring(0) pos(4) c(1) order(2 1 3 4) bmargin(medium) /// 87 | label(1 "White") label(2 "Black") label(3 "Hispanic") label(4 "Asian")) msymbol(circle triangle square diamond) /// 88 | xlab(42 "40-44" 47 "45-49" 52 "50-54" 57 "55-59" 62 "60-64" 67 "65-69"/* 71 "70-72"*/) 89 | graph export "${root}/scratch/NLMS mortality profiles by race/NLMS_raw_mortality_profiles_wOLSfitline_`g'.${img}", replace 90 | project, creates("${root}/scratch/NLMS mortality profiles by race/NLMS_raw_mortality_profiles_wOLSfitline_`g'.${img}") preserve 91 | 92 | * Export underlying data 93 | export delim gnd racegrp agebin l_mortrate if gnd=="`g'" & agebin<70 /// 94 | using "${root}/scratch/NLMS mortality profiles by race/data/NLMS_raw_mortality_profiles_wOLSfitline_`g'.csv", replace 95 | project, creates("${root}/scratch/NLMS mortality profiles by race/data/NLMS_raw_mortality_profiles_wOLSfitline_`g'.csv") preserve 96 | 97 | 98 | } 99 | -------------------------------------------------------------------------------- /code/raceshares/Explore CZ race share maps.do: -------------------------------------------------------------------------------- 1 | * Set $root 2 | return clear 3 | capture project, doinfo 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)' // using -project- 5 | else { // running directly 6 | if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"' 7 | do "${mortality_root}/code/set_environment.do" 8 | } 9 | 10 | * Set convenient globals 11 | global derived "${root}/data/derived" 12 | global img png 13 | 14 | * Create necessary folders 15 | cap mkdir "${root}/scratch/Race share maps" 16 | 17 | /*** Plot CZ maps of race shares, by: 18 | 1. Sex 19 | 2. Sex x Income Quartile 20 | ***/ 21 | 22 | project, original("$root/code/ado/compute_raceshares.ado") 23 | 24 | ******************************** 25 | *** WITHOUT income quartiles *** 26 | ******************************** 27 | 28 | * Load data 29 | project, original("$derived/raceshares/cz_racesharesBY_agebin_gnd.dta") 30 | use "$derived/raceshares/cz_racesharesBY_agebin_gnd.dta", clear 31 | 32 | * Aggregate to CZ-level, and compute raceshares 33 | collapse (sum) pop_*, by(cz) 34 | compute_raceshares, by(cz) 35 | 36 | * Convert race shares to percentages 37 | foreach var of varlist raceshare_* { 38 | replace `var'=`var'*100 39 | } 40 | 41 | * Maps 42 | foreach r in "asian" "black" "hispanic" { 43 | maptile raceshare_`r', geo(cz) legd(1) cutv(5(5)40) /// 44 | geofolder("$root/code/ado_maptile_geo") /// 45 | twopt(title("Race Shares, all Q, `r' %") subtitle("last bin is >=40%")) /// 46 | savegraph("${root}/scratch/Race share maps/raceshares_ALLQ_`r'.${img}") replace 47 | project, creates("${root}/scratch/Race share maps/raceshares_ALLQ_`r'.${img}") preserve 48 | } 49 | maptile raceshare_other, geo(cz) legd(1) cutv(60(5)95) /// 50 | geofolder("$root/code/ado_maptile_geo") /// 51 | twopt(title("Race Shares, all Q, white %") subtitle("first bin is <=60%")) /// 52 | savegraph("${root}/scratch/Race share maps/raceshares_ALLQ_white.${img}") replace 53 | project, creates("${root}/scratch/Race share maps/raceshares_ALLQ_white.${img}") preserve 54 | 55 | ***************************** 56 | *** WITH income quartiles *** 57 | ***************************** 58 | 59 | * Load data 60 | project, original("$derived/raceshares/cz_racesharesBY_agebin_gnd_hhincquartile.dta") 61 | use "$derived/raceshares/cz_racesharesBY_agebin_gnd_hhincquartile.dta", clear 62 | 63 | * Aggregate to CZ x Income Quartile level, and compute raceshares 64 | collapse (sum) pop_*, by(cz hh_inc_q) 65 | compute_raceshares, by(cz hh_inc_q) 66 | 67 | * Convert race shares to percentages 68 | foreach var of varlist raceshare_* { 69 | replace `var'=`var'*100 70 | } 71 | 72 | * Maps 73 | foreach q in 1 4 { 74 | foreach r in "asian" "black" "hispanic" { 75 | maptile raceshare_`r' if hh_inc_q==`q', geo(cz) legd(1) cutv(5(5)40) /// 76 | geofolder("$root/code/ado_maptile_geo") /// 77 | twopt(title("Race Shares, Q`q', `r' %") subtitle("last bin is >=40%")) /// 78 | savegraph("${root}/scratch/Race share maps/raceshares_Q`q'_`r'.${img}") replace 79 | project, creates("${root}/scratch/Race share maps/raceshares_Q`q'_`r'.${img}") preserve 80 | } 81 | maptile raceshare_other if hh_inc_q==`q', geo(cz) legd(1) cutv(60(5)95) /// 82 | geofolder("$root/code/ado_maptile_geo") /// 83 | twopt(title("Race Shares, Q`q', white %") subtitle("first bin is <=60%")) /// 84 | savegraph("${root}/scratch/Race share maps/raceshares_Q`q'_white.${img}") replace 85 | project, creates("${root}/scratch/Race share maps/raceshares_Q`q'_white.${img}") preserve 86 | } 87 | -------------------------------------------------------------------------------- /code/raceshares/Explore income distribution over 16 income bins in County SF3 data.do: -------------------------------------------------------------------------------- 1 | * Set $root 2 | return clear 3 | capture project, doinfo 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)' // using -project- 5 | else { // running directly 6 | if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"' 7 | do "${mortality_root}/code/set_environment.do" 8 | } 9 | 10 | * Create required folders 11 | cap mkdir "$root/scratch/SF3 income distributions" 12 | 13 | ************************ 14 | 15 | ********************************************************* 16 | *** Explore income distribution across 16 income bins *** 17 | ********************************************************* 18 | 19 | * Load data 20 | project, uses("$root/data/derived/raceshares/cty_racepopBY_workingagebin_hhincbin.dta") 21 | use "$root/data/derived/raceshares/cty_racepopBY_workingagebin_hhincbin.dta", clear 22 | 23 | * Create national income distribution by agebin, collapsing over counties 24 | isid cty hh_inc_bin agebin 25 | collapse (sum) pop_total, by(hh_inc_bin agebin) 26 | 27 | * Display income distribution 28 | bys agebin: tab hh_inc_bin [w=pop_total] 29 | 30 | * Convert from population counts to relative frequencies, by agebin 31 | gen float frac_total=. 32 | foreach a in 35 45 55 { 33 | sum pop_total if agebin==`a', meanonly 34 | replace frac_total=pop_total/`r(sum)' if agebin==`a' 35 | } 36 | 37 | * Reshape agebin wide 38 | drop pop_total 39 | rename frac_total frac_agebin 40 | reshape wide frac_agebin, i(hh_inc_bin) j(agebin) 41 | 42 | * Output 43 | export delim "$root/scratch/SF3 income distributions/national_popfracBY_agebin_hhincbin.csv", replace 44 | project, creates("$root/scratch/SF3 income distributions/national_popfracBY_agebin_hhincbin.csv") preserve 45 | 46 | 47 | ****************************************************************** 48 | *** Explore manual assignment of 16 income bins into quartiles *** 49 | ****************************************************************** 50 | 51 | * Reshape agebin long 52 | reshape long frac_agebin, i(hh_inc_bin) j(agebin) 53 | 54 | * Manually assign income bins to quartiles, separately by age bin 55 | gen byte hh_inc_q = hh_inc_bin 56 | recode hh_inc_q (1/5 = 1) (6/9 = 2) (10/11 = 3) (12/16 = 4) if agebin==35 57 | recode hh_inc_q (1/6 = 1) (7/10 = 2) (11/12 = 3) (13/16 = 4) if agebin==45 58 | recode hh_inc_q (1/4 = 1) (5/8 = 2) (9/11 = 3) (12/16 = 4) if agebin==55 59 | 60 | * Collapse fractions to quartiles 61 | collapse (sum) frac_agebin, by(hh_inc_q agebin) 62 | 63 | * Reshape agebin wide 64 | reshape wide frac_agebin, i(hh_inc_q) j(agebin) 65 | 66 | * Output 67 | export delim "$root/scratch/SF3 income distributions/national_popfracBY_agebin_hhincq.csv", replace 68 | project, creates("$root/scratch/SF3 income distributions/national_popfracBY_agebin_hhincq.csv") 69 | -------------------------------------------------------------------------------- /code/raceshares/cty_cz_st_raceshareBY_year_agebin_gnd_hhincquantile.do: -------------------------------------------------------------------------------- 1 | * Set $root 2 | return clear 3 | capture project, doinfo 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)' // using -project- 5 | else { // running directly 6 | if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"' 7 | do "${mortality_root}/code/set_environment.do" 8 | } 9 | 10 | /*** 11 | Generate County, CZ and State race shares by 12 | (Year x) 5-Year Age Bin x Gender (x Income Quartile). 13 | ***/ 14 | 15 | **************** 16 | 17 | 18 | project, relies_on("$root/code/ado/compute_raceshares.ado") 19 | 20 | cap program drop produce_local_raceshares 21 | program define produce_local_raceshares 22 | 23 | /*** Loads race population data that has an income quantile dimension, then 24 | computes and saves race shares: 25 | 1. by Year x Geo x Age Bin x Gender x Quantile 26 | 2. by Geo x Age Bin x Gender x Quantile 27 | 3. by Geo x Age Bin x Gender 28 | 29 | Each set of generated race shares can be individually turned on or off. 30 | ***/ 31 | 32 | syntax , geo(name) qvar(name) dta_qstub(string) [ trends levels noinc ] 33 | 34 | * Load data 35 | project, uses("$root/data/derived/raceshares/`geo'_racepopBY_year_agebin_gnd_`dta_qstub'.dta") 36 | use "$root/data/derived/raceshares/`geo'_racepopBY_year_agebin_gnd_`dta_qstub'.dta", clear 37 | assert !mi(pop_black, pop_asian, pop_hispanic, pop_other) 38 | 39 | *** Trends, by year *** 40 | 41 | if "`trends'"=="trends" { 42 | * Compute race shares 43 | compute_raceshares, by(`geo' year agebin gnd `qvar') 44 | 45 | * Save data 46 | save13 "$root/data/derived/raceshares/`geo'_racesharesBY_year_agebin_gnd_`dta_qstub'.dta", replace 47 | project, creates("$root/data/derived/raceshares/`geo'_racesharesBY_year_agebin_gnd_`dta_qstub'.dta") preserve 48 | } 49 | 50 | *** Levels, pooled *** 51 | 52 | * Pool populations over all years 53 | isid year `geo' agebin gnd `qvar' 54 | collapse (sum) pop_*, by(`geo' agebin gnd `qvar') 55 | 56 | if "`levels'"=="levels" { 57 | * Compute race shares 58 | compute_raceshares, by(`geo' agebin gnd `qvar') 59 | 60 | * Save data 61 | save13 "$root/data/derived/raceshares/`geo'_racesharesBY_agebin_gnd_`dta_qstub'.dta", replace 62 | project, creates("$root/data/derived/raceshares/`geo'_racesharesBY_agebin_gnd_`dta_qstub'.dta") preserve 63 | } 64 | 65 | *** Levels without income dim, pooled *** 66 | 67 | if "`inc'"=="noinc" { // note: Stata drops the "no" prefix for the local of options beginning with no 68 | * Pool populations over income bins 69 | isid `geo' agebin gnd `qvar' 70 | collapse (sum) pop_*, by(`geo' agebin gnd) 71 | 72 | * Compute race shares 73 | compute_raceshares, by(`geo' agebin gnd) 74 | 75 | * Save data 76 | save13 "$root/data/derived/raceshares/`geo'_racesharesBY_agebin_gnd.dta", replace 77 | project, creates("$root/data/derived/raceshares/`geo'_racesharesBY_agebin_gnd.dta") 78 | } 79 | 80 | end 81 | 82 | *************************** 83 | *** Compute race shares *** 84 | *************************** 85 | 86 | * Quartile race shares by County, CZ, State 87 | foreach geo in cty cz st { 88 | produce_local_raceshares, geo(`geo') qvar(hh_inc_q) dta_qstub(hhincquartile) trends levels noinc 89 | } 90 | 91 | * Note: some county-level cells have 0 total population, and race shares will therefore be missing 92 | 93 | * Ventile race shares by CZ 94 | produce_local_raceshares, geo(cz) qvar(hh_inc_v) dta_qstub(hhincventile) levels 95 | -------------------------------------------------------------------------------- /code/raceshares/cty_racepopBY_year_agebin_gnd.do: -------------------------------------------------------------------------------- 1 | * Set $root 2 | return clear 3 | capture project, doinfo 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)' // using -project- 5 | else { // running directly 6 | if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"' 7 | do "${mortality_root}/code/set_environment.do" 8 | } 9 | 10 | /*** 11 | 12 | Uses the county-level Intercensal and Postcensal estimate tables, which contain 13 | racial populations by County x 5-Year Age Bin x Gender x Year, each July. 14 | 15 | We use these data to generate the county population of each race by 16 | Age Bin x Gender x Year (2001-2014) from the 2000s and 2010s 17 | Intercensal/Postcensal estimates. 18 | 19 | ***/ 20 | 21 | 22 | ******************************************************************** 23 | 24 | cap program drop clean_cty_censal_estimates 25 | program define clean_cty_censal_estimates 26 | 27 | * Generate 5-digit county FIPS code 28 | gen long cty = 1000*state + county 29 | 30 | * Keep only desired age bins 31 | * --> age bins are 5-year age bins, labeled as the bottom age in each age bin 32 | keep if inrange(agegrp,9,16) 33 | replace agegrp=40+(agegrp-9)*5 34 | rename agegrp agebin 35 | 36 | * Keep only required race pops 37 | keep year cty agebin tot_male tot_female h_male h_female nhba_male nhba_female nhaa_male nhaa_female 38 | 39 | * Reshape gender long 40 | reshape long tot_ nhba_ nhaa_ h_, i(year cty agebin) j(gnd) string 41 | assert inlist(gnd,"male","female") 42 | replace gnd=cond(gnd=="male","M","F") 43 | compress gnd 44 | 45 | * Rename pop vars 46 | rename tot_ pop_total 47 | rename nhba_ pop_black 48 | rename nhaa_ pop_asian 49 | rename h_ pop_hispanic 50 | 51 | * Generate 'other' population count 52 | gen long pop_other = pop_total - pop_black - pop_asian - pop_hispanic 53 | drop pop_total 54 | 55 | end 56 | 57 | *************************************** 58 | *** 2000-2010 Intercensal Estimates *** 59 | *************************************** 60 | 61 | project, relies_on("$root/data/raw/Census County Intercensal Estimates 2000-2010/source.txt") // source URL 62 | project, relies_on("$root/data/raw/Census County Intercensal Estimates 2000-2010/Download county intercensal pop data.do") // automated download code 63 | project, relies_on("$root/data/raw/Census County Intercensal Estimates 2000-2010/CO-EST00INT-ALLDATA.pdf") // data dictionary 64 | 65 | * Load intercensal population data 66 | project, original("$root/data/raw/Census County Intercensal Estimates 2000-2010/county_intercensal_2000s_allstates.dta") 67 | use "$root/data/raw/Census County Intercensal Estimates 2000-2010/county_intercensal_2000s_allstates.dta", clear 68 | 69 | * Reshape/rename raw intercensal data 70 | clean_cty_censal_estimates 71 | 72 | * Keep only annual July estimates (drop 2000 and 2010 April Census counts) 73 | * --> see data dictionary for year codes 74 | drop if inlist(year,1,12) 75 | recode year (2=2000) (3=2001) (4=2002) (5=2003) (6=2004) (7=2005) (8=2006) /// 76 | (9=2007) (10=2008) (11=2009) (13=2010) 77 | 78 | * Save 79 | keep if inrange(year,2000,2009) 80 | tempfile cty_intercensal 81 | save `cty_intercensal' 82 | 83 | ************************************** 84 | *** 2010-2014 Postcensal Estimates *** 85 | ************************************** 86 | 87 | project, relies_on("$root/data/raw/Census County Postcensal Estimates 2010-2014/source.txt") // source URL 88 | project, relies_on("$root/data/raw/Census County Postcensal Estimates 2010-2014/CC-EST2014-ALLDATA.pdf") // data dictionary 89 | 90 | * Load postcensal population data 91 | project, original("$root/data/raw/Census County Postcensal Estimates 2010-2014/CC-EST2014-ALLDATA.csv") 92 | insheet using "$root/data/raw/Census County Postcensal Estimates 2010-2014/CC-EST2014-ALLDATA.csv", clear 93 | 94 | * Reshape/rename raw postcensal data 95 | clean_cty_censal_estimates 96 | 97 | * Keep only annual July estimates (drop 2010 April Census counts) 98 | * --> see data dictionary for year codes 99 | keep if inrange(year,3,7) 100 | recode year (3=2010) (4=2011) (5=2012) (6=2013) (7=2014) 101 | 102 | 103 | *********************************** 104 | *** Combine data from 2001-2014 *** 105 | *********************************** 106 | append using `cty_intercensal' 107 | keep if inrange(year,2001,2014) 108 | 109 | * Convert 2000-2014 Inter/Postcensal counties to 1999 counties 110 | * see https://www.census.gov/geo/reference/county-changes.html 111 | isid cty year gnd agebin 112 | drop if cty == 15005 // Kalawao, Hawaii (pop ~90) 113 | recode cty (12086 = 12025) // Miami-Dade FIPS change 114 | recode cty (8014 = 8013) // Broomfield County created from Boulder, CO 115 | recode cty (2282 = 2231) (2275 2195 = 2280) (2230 2105 = 2231) /// 116 | (2198 = 2201) (2068 = 2290) // County changes in Alaska 117 | 118 | collapse (sum) pop*, by(cty year gnd agebin) // combine counties that split in 2000-2014 119 | 120 | /* Note: Bedford City (cty==51515) appears in 2001-2009 but not 2010-2014. See: -tab year-. 121 | 122 | This is because Bedford City was merged into a larger county in 2010-2014. It therefore 123 | appears in 1999 and 2000-2009 but not afterwards. 124 | */ 125 | 126 | 127 | * Output 128 | compress 129 | order year agebin gnd cty 130 | sort year agebin gnd cty 131 | assert !mi(pop_black, pop_asian, pop_hispanic, pop_other) 132 | save13 "$root/data/derived/raceshares/cty_racepopBY_year_agebin_gnd.dta", replace 133 | project, creates("$root/data/derived/raceshares/cty_racepopBY_year_agebin_gnd.dta") 134 | 135 | -------------------------------------------------------------------------------- /code/raceshares/national_Explore race fraction smoothing.do: -------------------------------------------------------------------------------- 1 | * Set $root 2 | return clear 3 | capture project, doinfo 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)' // using -project- 5 | else { // running directly 6 | if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"' 7 | do "${mortality_root}/code/set_environment.do" 8 | } 9 | 10 | * Create required folders 11 | cap mkdir "$root/scratch/Race fraction smoothing figs" 12 | 13 | /*** 1. Visualize the smoothing of racial income distributions using 14 | different lowess bandwidths. 15 | 16 | Separately by: 17 | - Race 18 | - Age 19 | 20 | 2. Verify whether the smoothed pecentiles are approximately equal sized. 21 | ***/ 22 | 23 | 24 | **************** 25 | *** Programs *** 26 | **************** 27 | 28 | cap program drop smoothing_graph 29 | program define smoothing_graph 30 | /*** For a given Lowess bandwidth, create a graph of the lowess-smoothed 31 | racial income distribution and the underlying scatterpoints. 32 | ***/ 33 | 34 | syntax , race(string) age(integer) income_pctile(varname) lowess_bw(real) [ export ] 35 | 36 | if "`race'"=="all" { 37 | foreach race in black asian hispanic other { 38 | smoothing_graph, race(`race') age(`age') income_pctile(`income_pctile') lowess_bw(`lowess_bw') `export' 39 | } 40 | } 41 | else { 42 | 43 | tw (lowess frac_of_`race' `income_pctile' if age==`age' & gnd=="M" & `income_pctile'>0, bw(`lowess_bw')) /// 44 | (lowess frac_of_`race' `income_pctile' if age==`age' & gnd=="F" & `income_pctile'>0, bw(`lowess_bw')) /// 45 | (scatter frac_of_`race' `income_pctile' if age==`age' & gnd=="M" & `income_pctile'>0, mc(navy) ms(o)) /// 46 | (scatter frac_of_`race' `income_pctile' if age==`age' & gnd=="F" & `income_pctile'>0, mc(maroon) ms(o)) /// 47 | (scatter frac_of_`race' `income_pctile' if age==`age' & gnd=="M" & `income_pctile'<=0, mc(navy) ms(T)) /// 48 | (scatter frac_of_`race' `income_pctile' if age==`age' & gnd=="F" & `income_pctile'<=0, mc(maroon) ms(T)) /// 49 | , /// 50 | legend(lab(1 "Male") lab(2 "Female") order(1 2)) /// 51 | ytitle("Fraction of `race' age `age' population") 52 | if ("`export'"=="export") { 53 | local exportfile ${root}/scratch/Race fraction smoothing figs/racefrac_age`age'`race'_bw0`=subinstr("`lowess_bw'",".","",.)'.png 54 | graph export `"`exportfile'"', replace 55 | project, creates(`"`exportfile'"') preserve 56 | } 57 | 58 | *lowess frac_of_`race' `income_pctile' if age==`age' & `income_pctile'>0, by(gnd) bw(`lowess_bw') 59 | *if ("`export'"=="export") graph export "$root/scratch/lowess_racefrac_age`age'`race'_BY_sex_hhincpctile.png", replace 60 | 61 | } 62 | 63 | end 64 | 65 | ************************************************ 66 | *** Visualize different smoothing bandwidths *** 67 | ************************************************ 68 | 69 | * Load data 70 | project, original("$root/data/derived/raceshares/national_racefractionsBY_workingage_gnd_hhincpctile.dta") 71 | use "$root/data/derived/raceshares/national_racefractionsBY_workingage_gnd_hhincpctile.dta", clear 72 | drop smoothfrac_* 73 | 74 | * Generate and save lowess figs 75 | forvalues a=40(5)60 { 76 | smoothing_graph, race(all) age(`a') income_pctile(hh_inc_pctile) lowess_bw(0.2) export 77 | smoothing_graph, race(all) age(`a') income_pctile(hh_inc_pctile) lowess_bw(0.3) export 78 | } 79 | 80 | 81 | ********************************************************* 82 | *** Check population size in smoothed percentile bins *** 83 | ********************************************************* 84 | 85 | * Load data 86 | project, original("$root/data/derived/raceshares/national_racefractionsBY_workingage_gnd_hhincpctile.dta") 87 | use "$root/data/derived/raceshares/national_racefractionsBY_workingage_gnd_hhincpctile.dta", clear 88 | 89 | * Collapse away income dimension 90 | isid age gnd hh_inc_pctile 91 | collapse (sum) pop_*, by(age gnd) 92 | 93 | * Generate an income bin dimension 94 | expand 102 95 | bys age gnd: gen int hh_inc_pctile = _n - 2 96 | replace hh_inc_pctile=-2 if hh_inc_pctile==-1 97 | order age gnd hh_inc_pctile 98 | 99 | * Merge back in racial income distributions 100 | merge 1:1 age gnd hh_inc_pctile using "$root/data/derived/raceshares/national_racefractionsBY_workingage_gnd_hhincpctile.dta", /// 101 | assert(3) keepusing(smoothfrac_*) nogen 102 | 103 | * Apply race fractions to adjust population counts 104 | foreach race in black asian hispanic other { 105 | replace pop_`race' = pop_`race' * smoothfrac_of_`race' 106 | } 107 | drop smoothfrac_* 108 | 109 | * Generate total population 110 | gen double pop_total = pop_black + pop_asian + pop_hispanic + pop_other 111 | 112 | * Analyze variation in total population across income percentiles 113 | egen max_percentile_pop=max(pop_total) if hh_inc_pctile>0, by(age gnd) 114 | egen min_percentile_pop=min(pop_total) if hh_inc_pctile>0, by(age gnd) 115 | egen mean_percentile_pop=mean(pop_total) if hh_inc_pctile>0, by(age gnd) 116 | 117 | collapse (mean) *_percentile_pop, by(age gnd) 118 | 119 | * Study relative error in population size of income percentiles 120 | gen relerr=(max_percentile_pop-min_percentile_pop)/mean_percentile_pop 121 | sum relerr, d 122 | -------------------------------------------------------------------------------- /code/raceshares/national_Race share extrapolation check - nonparametric age 51 test.do: -------------------------------------------------------------------------------- 1 | * Set $root 2 | return clear 3 | capture project, doinfo 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)' // using -project- 5 | else { // running directly 6 | if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"' 7 | do "${mortality_root}/code/set_environment.do" 8 | } 9 | 10 | * Set global aliases 11 | global racedata $root/data/derived/raceshares 12 | 13 | * Create required folders 14 | cap mkdir "$root/scratch/Age 51 Retirement Age Extrapolation Test" 15 | 16 | /*** Check how well we do predicting race shares in retirement ages, 17 | by treating 52 as the retirement age instead of 62 and seeing 18 | how well we predict race shares from 52-61. 19 | 20 | Explanation: 21 | 22 | We only measure contemporaneous income in the Census, so we 23 | don't know how the race shares of 62-year-olds by their income 24 | when they were 61. We therefore use the income distribution 25 | at age 61 to impute the income dimension at all retirement ages. 26 | 27 | In practice, we can't test how well this approximation works 28 | because the true race shares for retirees by working income are 29 | unobsevable. (If they were observable, we'd be using them.) 30 | 31 | We therefore do a test where we treat 52 as the retirement age, 32 | use the racial income distributions at age 51 to impute income 33 | for ages 52-61, and compare the resulting race shares to the 34 | true race shares. 35 | 36 | ***/ 37 | 38 | *********************** 39 | 40 | 41 | ******************************************************************* 42 | *** Nonparametric extrapolation check with age 51 extrapolation *** 43 | ******************************************************************* 44 | 45 | * Load race shares based on age 51 extrapolation 46 | project, original("$racedata/Age 51 Retirement Age Extrapolation Test/national_racesharesBY_age_gnd_hhincpctile_51test.dta") 47 | use "$racedata/Age 51 Retirement Age Extrapolation Test/national_racesharesBY_age_gnd_hhincpctile_51test.dta", clear 48 | rename raceshare_* raceshare_51test_* 49 | 50 | * Merge true race shares, based on observed age 51-61 income distributions 51 | project, original("$racedata/national_racesharesBY_age_gnd_hhincpctile.dta") preserve 52 | merge 1:1 age gnd pctile using "$racedata/national_racesharesBY_age_gnd_hhincpctile.dta", /// 53 | keep(match) assert(2 3) nogen 54 | 55 | * Check that race shares match exactly at age==51, since we're using age 51 race fracs 56 | foreach r in black asian hispanic other { 57 | assert abs(raceshare_51test_`r'-raceshare_`r')<10^-6 if age==51 58 | } 59 | 60 | * Output figures 61 | foreach income_pctile in 1 10 25 50 75 90 99 { 62 | foreach race in black asian hispanic other { 63 | tw (connect raceshare_`race' age) /// 64 | (connect raceshare_51test_`race' age, lpattern(dash)) /// 65 | if pctile == `income_pctile', /// 66 | by(gnd, title(`race' p`income_pctile')) /// 67 | legend(lab(1 "Observed") lab(2 "Extrapolated")) /// 68 | xlab(51(2)61) /// 69 | xtitle(Age) ytitle(Race Share) 70 | graph export "$root/scratch/Age 51 Retirement Age Extrapolation Test/raceshare_51test_p`income_pctile'_`race'.png", replace 71 | project, creates("$root/scratch/Age 51 Retirement Age Extrapolation Test/raceshare_51test_p`income_pctile'_`race'.png") preserve 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /code/raceshares/national_racepopBY_year_age_gnd_incpctile.do: -------------------------------------------------------------------------------- 1 | * Set $root 2 | return clear 3 | capture project, doinfo 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)' // using -project- 5 | else { // running directly 6 | if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"' 7 | do "${mortality_root}/code/set_environment.do" 8 | } 9 | 10 | * Set global aliases 11 | global racedata $root/data/derived/raceshares 12 | 13 | /*** 14 | 15 | Add an income dimension to the national race population data. 16 | 17 | Uses race fractions derived from the 2000 Census public-use microdata 18 | to assign income bins to the race populations in the 2000-2014 19 | Intercensal/Postcensal estimates. 20 | 21 | We apply the age 61 racial income distribution to the racial population for each 22 | age 62-76. This imputes the income dimension of the last working age we see for 23 | people who are at retirement ages. The procedure is an approximation of the 24 | procedure we perform at the IRS, where we use peoples' income at age 61 at all 25 | later ages. Note that we only observe contemporary income in the Census. 26 | 27 | We verify the accuracy of this approximation using a parametric simulation and a 28 | non-parametric check where we extrapolate forward from age 51. 29 | 30 | ***/ 31 | 32 | ************************ 33 | 34 | cap program drop impute_income_dimension 35 | program define impute_income_dimension 36 | 37 | /*** Loads racial population data by Year x Age x Gender, 38 | then imputes an income dimension using racial income distributions, 39 | which are Age x Gender (same distribution used in all years). 40 | 41 | Uses the income distribution at the last working age for the 42 | income dimension at all subsequent ages. 43 | ***/ 44 | 45 | syntax using/, incomevar(name) [ retirementage(integer 62) ] 46 | 47 | project, uses(`"`using'"') 48 | 49 | * Load race population data 50 | project, uses("$racedata/national_racepopBY_year_age_gnd.dta") 51 | use "$racedata/national_racepopBY_year_age_gnd.dta", clear 52 | 53 | * Generate an income bin dimension 54 | isid year age gnd 55 | expand 102 56 | bys year age gnd: gen int `incomevar' = _n - 2 57 | replace `incomevar'=-2 if `incomevar'==-1 58 | order year age gnd `incomevar' 59 | 60 | * Generate a temporary "age" variable corresponding to the age of the income distribution 61 | * we want merged in. 62 | rename age trueage 63 | assert inrange(trueage,40,76) 64 | recode trueage (`retirementage'/76 = `=`retirementage'-1'), gen(age) 65 | 66 | * Merge in racial income distributions 67 | merge m:1 age gnd `incomevar' using `"`using'"', /// 68 | assert(2 3) keep(3) keepusing(smoothfrac_*) nogen 69 | // note: _merge==2 happens when we set the retirement age < 62, so the income distributions at some working ages are not used. 70 | 71 | drop age 72 | rename trueage age 73 | 74 | * Apply race fractions to adjust counts 75 | foreach race in black asian hispanic other { 76 | replace pop_`race' = pop_`race' * smoothfrac_of_`race' 77 | } 78 | drop smoothfrac_* 79 | 80 | * Output 81 | sort year age gnd `incomevar' 82 | isid year age gnd `incomevar' 83 | compress 84 | 85 | end 86 | 87 | 88 | ************************************* 89 | *** 2000 Census, Household income *** 90 | ************************************* 91 | 92 | impute_income_dimension using "$racedata/national_racefractionsBY_workingage_gnd_hhincpctile.dta", /// 93 | incomevar(hh_inc_pctile) 94 | 95 | save13 "$racedata/national_racepopBY_year_age_gnd_hhincpctile.dta", replace 96 | project, creates("$racedata/national_racepopBY_year_age_gnd_hhincpctile.dta") 97 | 98 | 99 | ************************************** 100 | *** 2000 Census, Individual income *** 101 | ************************************** 102 | 103 | impute_income_dimension using "$racedata/Individual income/national_racefractionsBY_workingage_gnd_INDincpctile.dta", /// 104 | incomevar(ind_inc_pctile) 105 | 106 | save13 "$racedata/Individual income/national_racepopBY_year_age_gnd_INDincpctile.dta", replace 107 | project, creates("$racedata/Individual income/national_racepopBY_year_age_gnd_INDincpctile.dta") 108 | 109 | *************************************** 110 | *** 2008-2012 ACS, Household income *** 111 | *************************************** 112 | 113 | impute_income_dimension using "$racedata/2008-2012 ACS income distribution/national_racefractionsBY_workingage_gnd_hhincpctile_ACS.dta", /// 114 | incomevar(hh_inc_pctile) 115 | 116 | save13 "$racedata/2008-2012 ACS income distribution/national_racepopBY_year_age_gnd_hhincpctile_ACS.dta", replace 117 | project, creates("$racedata/2008-2012 ACS income distribution/national_racepopBY_year_age_gnd_hhincpctile_ACS.dta") 118 | 119 | 120 | ******************************************************************************** 121 | *** Age 51 Retirement Age Extrapolation Test (2000 Census, Household Income) *** 122 | ******************************************************************************** 123 | 124 | impute_income_dimension using "$racedata/national_racefractionsBY_workingage_gnd_hhincpctile.dta", /// 125 | incomevar(hh_inc_pctile) retirementage(52) 126 | 127 | keep if inrange(age,51,61) 128 | 129 | cap mkdir "$racedata/Age 51 Retirement Age Extrapolation Test/" 130 | save13 "$racedata/Age 51 Retirement Age Extrapolation Test/national_racepopBY_year_age_gnd_hhincpctile_51test.dta", replace 131 | project, creates("$racedata/Age 51 Retirement Age Extrapolation Test/national_racepopBY_year_age_gnd_hhincpctile_51test.dta") 132 | -------------------------------------------------------------------------------- /code/raceshares/national_raceshareBY_gnd.do: -------------------------------------------------------------------------------- 1 | * Set $root 2 | return clear 3 | capture project, doinfo 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)' // using -project- 5 | else { // running directly 6 | if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"' 7 | do "${mortality_root}/code/set_environment.do" 8 | } 9 | 10 | 11 | ************************ 12 | 13 | project, original("$root/code/ado/compute_raceshares.ado") 14 | 15 | * Load age 40 race population data by Gender from 2000 Census 16 | project, uses("$root/data/derived/raceshares/national_2000age40_racepopBY_gnd.dta") 17 | use "$root/data/derived/raceshares/national_2000age40_racepopBY_gnd.dta", clear 18 | 19 | * Generate race shares 20 | compute_raceshares, by(gnd) 21 | 22 | * Output 23 | isid gnd 24 | save13 "$root/data/derived/raceshares/national_2000age40_racesharesBY_gnd.dta", replace 25 | project, creates("$root/data/derived/raceshares/national_2000age40_racesharesBY_gnd.dta") 26 | -------------------------------------------------------------------------------- /code/raceshares/national_raceshareBY_year_age_gnd_incpctile.do: -------------------------------------------------------------------------------- 1 | * Set $root 2 | return clear 3 | capture project, doinfo 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)' // using -project- 5 | else { // running directly 6 | if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"' 7 | do "${mortality_root}/code/set_environment.do" 8 | } 9 | 10 | * Set global aliases 11 | global racedata $root/data/derived/raceshares 12 | 13 | /*** 14 | 15 | Generate national race shares by (Year x) Age x Gender x Income Percentile. 16 | 17 | ***/ 18 | 19 | **************** 20 | *** Programs *** 21 | **************** 22 | 23 | project, original("$root/code/ado/compute_raceshares.ado") 24 | 25 | 26 | ************************************* 27 | *** 2000 Census, Household income *** 28 | ************************************* 29 | 30 | * Load data 31 | project, uses("$racedata/national_racepopBY_year_age_gnd_hhincpctile.dta") 32 | use "$racedata/national_racepopBY_year_age_gnd_hhincpctile.dta", clear 33 | assert !mi(pop_black, pop_asian, pop_hispanic, pop_other) 34 | rename hh_inc_pctile pctile // match IRS collapse varname 35 | 36 | *** Trends, by year *** 37 | 38 | * Compute race shares 39 | compute_raceshares, by(year age gnd pctile) 40 | 41 | * Save data 42 | save13 "$racedata/national_racesharesBY_year_age_gnd_hhincpctile.dta", replace 43 | project, creates("$racedata/national_racesharesBY_year_age_gnd_hhincpctile.dta") preserve 44 | 45 | *** Levels, pooled *** 46 | 47 | * Pool populations over all years 48 | isid year age gnd pctile 49 | collapse (sum) pop_*, by(age gnd pctile) 50 | 51 | * Compute race shares 52 | compute_raceshares, by(age gnd pctile) 53 | 54 | * Save data 55 | save13 "$racedata/national_racesharesBY_age_gnd_hhincpctile.dta", replace 56 | project, creates("$racedata/national_racesharesBY_age_gnd_hhincpctile.dta") 57 | 58 | 59 | ************************************** 60 | *** 2000 Census, Individual income *** 61 | ************************************** 62 | 63 | * Load data 64 | project, uses("$racedata/Individual income/national_racepopBY_year_age_gnd_INDincpctile.dta") 65 | use "$racedata/Individual income/national_racepopBY_year_age_gnd_INDincpctile.dta", clear 66 | assert !mi(pop_black, pop_asian, pop_hispanic, pop_other) 67 | 68 | * Pool populations over all years 69 | isid year age gnd ind_inc_pctile 70 | collapse (sum) pop_*, by(age gnd ind_inc_pctile) 71 | 72 | * Compute race shares 73 | compute_raceshares, by(age gnd ind_inc_pctile) 74 | rename ind_inc_pctile indv_earn_pctile // match IRS collapse varname 75 | 76 | * Save data 77 | save13 "$racedata/Individual income/national_racesharesBY_age_gnd_INDincpctile.dta", replace 78 | project, creates("$racedata/Individual income/national_racesharesBY_age_gnd_INDincpctile.dta") 79 | 80 | 81 | *************************************** 82 | *** 2008-2012 ACS, Household income *** 83 | *************************************** 84 | 85 | * Load data 86 | project, uses("$racedata/2008-2012 ACS income distribution/national_racepopBY_year_age_gnd_hhincpctile_ACS.dta") 87 | use "$racedata/2008-2012 ACS income distribution/national_racepopBY_year_age_gnd_hhincpctile_ACS.dta", clear 88 | assert !mi(pop_black, pop_asian, pop_hispanic, pop_other) 89 | 90 | * Pool populations over all years 91 | isid year age gnd hh_inc_pctile 92 | collapse (sum) pop_*, by(age gnd hh_inc_pctile) 93 | 94 | * Compute race shares 95 | compute_raceshares, by(age gnd hh_inc_pctile) 96 | rename hh_inc_pctile pctile // match IRS collapse varname 97 | 98 | * Save data 99 | save13 "$racedata/2008-2012 ACS income distribution/national_racesharesBY_age_gnd_hhincpctile_ACS.dta", replace 100 | project, creates("$racedata/2008-2012 ACS income distribution/national_racesharesBY_age_gnd_hhincpctile_ACS.dta") 101 | 102 | 103 | ******************************************************************************** 104 | *** Age 51 Retirement Age Extrapolation Test (2000 Census, Household Income) *** 105 | ******************************************************************************** 106 | 107 | * Load data 108 | project, uses("$racedata/Age 51 Retirement Age Extrapolation Test/national_racepopBY_year_age_gnd_hhincpctile_51test.dta") 109 | use "$racedata/Age 51 Retirement Age Extrapolation Test/national_racepopBY_year_age_gnd_hhincpctile_51test.dta", clear 110 | assert !mi(pop_black, pop_asian, pop_hispanic, pop_other) 111 | 112 | * Pool populations over all years 113 | isid year age gnd hh_inc_pctile 114 | collapse (sum) pop_*, by(age gnd hh_inc_pctile) 115 | 116 | * Compute race shares 117 | compute_raceshares, by(age gnd hh_inc_pctile) 118 | rename hh_inc_pctile pctile // match IRS collapse varname 119 | 120 | * Save data 121 | save13 "$racedata/Age 51 Retirement Age Extrapolation Test/national_racesharesBY_age_gnd_hhincpctile_51test.dta", replace 122 | project, creates("$racedata/Age 51 Retirement Age Extrapolation Test/national_racesharesBY_age_gnd_hhincpctile_51test.dta") 123 | -------------------------------------------------------------------------------- /code/set_bootstrap.do: -------------------------------------------------------------------------------- 1 | global reps 1000 2 | global splitobs 625000 3 | -------------------------------------------------------------------------------- /code/set_environment.do: -------------------------------------------------------------------------------- 1 | version 13.1 2 | clear all 3 | set more off 4 | pause on 5 | 6 | global root "${mortality_root}" 7 | 8 | adopath ++ "$root/code/ado_ssc" 9 | adopath ++ "$root/code/ado" 10 | 11 | * Disable project (since running do-files directly) 12 | cap program drop project 13 | program define project 14 | di "Project is disabled, skipping project command. (To re-enable, run -{stata program drop project}-)" 15 | end 16 | -------------------------------------------------------------------------------- /code/tests/Check completeness of mortality data.do: -------------------------------------------------------------------------------- 1 | 2 | * Set $root 3 | return clear 4 | capture project, doinfo 5 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)' // using -project- 6 | else { // running directly 7 | if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"' 8 | do "${mortality_root}/code/set_environment.do" 9 | } 10 | 11 | * Set convenient globals 12 | global raw_data "${root}/data/raw" 13 | global derived "${root}/data/derived" 14 | 15 | * Create required folders 16 | cap mkdir "${root}/data/derived/Gompertz Parameters" 17 | cap mkdir "${root}/data/derived/Gompertz Parameters/OLS" 18 | 19 | /*** Test whether all observations are included in mortality rate files 20 | loaded in estimate_irs_gompertz_parameters.do. 21 | ***/ 22 | 23 | 24 | ******************************************************************** 25 | ************** National Life Expectancy Estimates ***************** 26 | ******************************************************************** 27 | 28 | * Load mortality rates 29 | project, original("${derived}/Mortality Rates/national_mortratesBY_gnd_hhincpctile_age_year.dta") 30 | use "${derived}/Mortality Rates/national_mortratesBY_gnd_hhincpctile_age_year.dta", clear 31 | keep if age_at_d >= 40 32 | 33 | * Assert that the correct number of observations exist per age 34 | forval age = 40/63 { // full years 2001-2014 35 | bys age_at_d: assert _N == 2*100*(2014-2001+1) if age_at_d == `age' 36 | } 37 | 38 | forval age = 64/76 { // years 2001-2013 for age 64, 2001-2012 for age 65, etc. 39 | bys age_at_d: assert _N == 2*100*(76-`age'+1) if age_at_d == `age' 40 | } 41 | 42 | 43 | ******************************************************************** 44 | *********** Life Expectancy Estimates by Commuting Zone ************ 45 | ******************************************************************** 46 | 47 | * Load mortality rates 48 | project, original("${derived}/Mortality Rates/cz_mortratesBY_gnd_hhincquartile_age_year.dta") 49 | use "${derived}/Mortality Rates/cz_mortratesBY_gnd_hhincquartile_age_year.dta", clear 50 | keep if age_at_d>=40 51 | assert inrange(hh_inc_q, 1 ,4) 52 | 53 | * Check if correct number of observations exist per CZ with pop >= 25k 54 | merge m:1 cz using "$derived/final_covariates/cz_pop.dta", nogen keep(match master) 55 | keep if pop2000 >= 25000 56 | 57 | * Should be 2*14*(63-40+1) + 2*(13+12+11+10+9+8+7+6+5+4+3+2+1) = 854 observations per CZ x quartile 58 | collapse (count) cz_q_obs = mortrate, by(hh_inc_q cz) 59 | bys cz: assert _N==4 60 | count if cz_q_obs!=854 61 | di in red "Fraction of quartile x CZ groups missing observations : "`r(N)'/_N // 2.5% of CZ x quartiles are missing observations 62 | 63 | * Should be 854*4 = 3,416 observations per CZ 64 | collapse (sum) cz_obs = cz_q_obs, by(cz) 65 | count if cz_obs!=3416 66 | di in red "Fraction of CZs missing observations : "`r(N)'/_N // 9.7% of CZs are missing observations 67 | 68 | 69 | ******************************************************************** 70 | *************** Life Expectancy Estimates by County **************** 71 | ******************************************************************** 72 | 73 | * Load mortality rates 74 | project, original("${derived}/Mortality Rates/cty_mortratesBY_gnd_hhincquartile_age_year.dta") 75 | use "${derived}/Mortality Rates/cty_mortratesBY_gnd_hhincquartile_age_year.dta", clear 76 | keep if age_at_d>=40 77 | assert inrange(hh_inc_q, 1 ,4) 78 | 79 | * Check if correct number of observations exist per county with pop >= 25k 80 | merge m:1 cty using "$derived/final_covariates/cty_full_covariates.dta", nogen keep(match master) keepus(cty_pop2000) 81 | keep if cty_pop2000 >= 25000 82 | 83 | * Should be 2*14*(63-40+1) + 2*(13+12+11+10+9+8+7+6+5+4+3+2+1) = 854 observations per county x quartile 84 | collapse (count) cty_q_obs = mortrate, by(hh_inc_q cty) 85 | bys cty: assert _N==4 86 | count if cty_q_obs!=854 87 | di in red "Fraction of quartile x county groups missing observations : "`r(N)'/_N // 6.5% of county x quartiles are missing observations 88 | 89 | * Should be 854*4 = 3,416 observations per county 90 | collapse (sum) cty_obs = cty_q_obs, by(cty) 91 | count if cty_obs!=3416 92 | di in red "Fraction of counties missing observations : "`r(N)'/_N // 21.4% of counties are missing observations 93 | 94 | 95 | ******************************************************************** 96 | *************** Life Expectancy Estimates by State ***************** 97 | ******************************************************************** 98 | 99 | * Load mortality rates 100 | project, original("${derived}/Mortality Rates/st_mortratesBY_gnd_hhincquartile_age_year.dta") 101 | use "${derived}/Mortality Rates/st_mortratesBY_gnd_hhincquartile_age_year.dta", clear 102 | keep if age_at_d >= 40 103 | assert inrange(hh_inc_q, 1 ,4) 104 | 105 | * Check if correct number of observations exist per state 106 | 107 | * Should be 2*14*(63-40+1) + 2*(13+12+11+10+9+8+7+6+5+4+3+2+1) = 854 observations per state x quartile 108 | collapse (count) st_q_obs = mortrate, by(hh_inc_q st) 109 | bys st: assert _N==4 110 | count if st_q_obs!=854 111 | di in red "Fraction of quartile x state groups missing observations : "`r(N)'/_N // 0% of state x quartiles are missing observations 112 | 113 | * Should be 854*4 = 3,416 observations per county 114 | collapse (sum) st_obs = st_q_obs, by(st) 115 | count if st_obs!=3416 116 | di in red "Fraction of states missing observations : "`r(N)'/_N // 0% of states are missing observations 117 | -------------------------------------------------------------------------------- /code/tests/Explore runtimes for estimating race shifters.do: -------------------------------------------------------------------------------- 1 | * Set $root 2 | return clear 3 | capture project, doinfo 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)' // using -project- 5 | else { // running directly 6 | if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"' 7 | do "${mortality_root}/code/set_environment.do" 8 | } 9 | 10 | * Set working directory 11 | cd "$root/data/derived/NLMS" 12 | 13 | /*** Test runtime of different methods for estimating Gompertz race shifters, 14 | which is especially relevant for bootstrap runtimes. 15 | ***/ 16 | 17 | ******** 18 | 19 | 20 | *** Load data 21 | timer clear 22 | 23 | project, original("nlms_v5_s11_sampleA.dta") 24 | use "nlms_v5_s11_sampleA.dta", clear 25 | 26 | 27 | *** Method 1: Loop and store estimates 28 | timer on 1 29 | foreach g in "M" "F" { 30 | streg i.csdiv i.incq i.racegrp if gnd=="`g'", dist(gompertz) nohr ancillary(i.csdiv i.incq i.racegrp) 31 | estimates store est`g' 32 | } 33 | timer off 1 34 | 35 | 36 | *** Method 2: statsby, saving 37 | 38 | * Specify parameters to save 39 | local save_est /// 40 | diff_gomp_int_black = _b[_t:2.racegrp] /// 41 | diff_gomp_int_hisp = _b[_t:3.racegrp] /// 42 | diff_gomp_int_asian = _b[_t:4.racegrp] /// 43 | diff_gomp_slope_black = _b[gamma:2.racegrp] /// 44 | diff_gomp_slope_hisp = _b[gamma:3.racegrp] /// 45 | diff_gomp_slope_asian = _b[gamma:4.racegrp] 46 | 47 | * Run regression 48 | timer on 2 49 | 50 | tempfile save_bysex 51 | statsby `save_est', saving(`save_bysex', `replace') /// 52 | by(gnd): streg i.csdiv i.incq i.racegrp, dist(gompertz) nohr ancillary(i.csdiv i.incq i.racegrp) 53 | 54 | timer off 2 55 | 56 | 57 | *** Method 3: statsby, clear 58 | timer on 3 59 | 60 | statsby `save_est', clear /// 61 | by(gnd): streg i.csdiv i.incq i.racegrp, dist(gompertz) nohr ancillary(i.csdiv i.incq i.racegrp) 62 | 63 | timer off 3 64 | 65 | 66 | *** Display times 67 | timer list 68 | -------------------------------------------------------------------------------- /code/tests/Simulate various methods of deriving LE from Gompertz parameters.do: -------------------------------------------------------------------------------- 1 | * Set $root 2 | return clear 3 | capture project, doinfo 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)' // using -project- 5 | else { // running directly 6 | if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"' 7 | do "${mortality_root}/code/set_environment.do" 8 | } 9 | 10 | * Set convenient globals 11 | global derived "${root}/data/derived" 12 | 13 | * Load LE functions 14 | project, original("${root}/code/ado/generate_le_with_raceadj.ado") 15 | include "$root/code/ado/generate_le_with_raceadj.ado" 16 | 17 | * Add ado files to project 18 | project, original("${root}/code/ado/fastregby.ado") 19 | 20 | 21 | /*** Simulates various methods for integrating the survival curve based on the 22 | "discretized Gompertz parameters" and compares their results to the true LE. 23 | ***/ 24 | 25 | ************* 26 | 27 | 28 | * Load Gompertz parameters for national percentiles 29 | project, original("${derived}/Gompertz Parameters/national_gompBY_gnd_hhincpctile.dta") 30 | use "${derived}/Gompertz Parameters/national_gompBY_gnd_hhincpctile.dta", clear 31 | isid gnd pctile 32 | keep gnd pctile gomp_int gomp_slope 33 | 34 | * Create an age dimension 35 | expand 76-40+1 36 | bys gnd pctile: gen age_at_d = 40 + _n - 1 37 | 38 | * Compute discrete-time mortality rates at age [x,x+1) 39 | gen mort_discrete = 1 - exp( 1 / gomp_slope * ( exp(gomp_int + gomp_slope*age_at_d) - exp(gomp_int + gomp_slope*(age_at_d+1)) ) ) 40 | 41 | * Estimate discretized Gompertz parameters 42 | gen log_mort_discrete = log(mort_discrete) 43 | fastregby log_mort_discrete age_at_d, by(gnd pctile) clear 44 | ren (_b_age_at_d _b_cons) (gomp_slope_estimated gomp_int_estimated) 45 | order gomp_int_estimated gomp_slope_estimated, last 46 | 47 | * Merge in true Gompertz parameters 48 | merge 1:1 gnd pctile using "${derived}/Gompertz Parameters/national_gompBY_gnd_hhincpctile.dta", /// 49 | assert(2 3) keep(3) nogen keepusing(gomp*) 50 | rename (gomp_int gomp_slope) (gomp_int_true gomp_slope_true) 51 | 52 | * Adjust Gompertz intercept to midpoint of integer bins, since "age 40" in estimation is actually age [40,41) 53 | gen gomp_int_adjusted = gomp_int_estimated - 0.5 * gomp_slope_estimated 54 | 55 | 56 | * Compute expected life years at true Gompertz parameters 57 | rename (gomp_int_true gomp_slope_true) (gomp_int gomp_slope) 58 | gen_gomp_lifeyears_cont, startage(40) endage(90) 59 | rename expectedLY_gomp expectedLY_true 60 | rename (gomp_int gomp_slope) (gomp_int_true gomp_slope_true) 61 | 62 | * Compute expected life years at estimated unadjusted Gompertz parameters 63 | rename (gomp_int_estimated gomp_slope_estimated) (gomp_int gomp_slope) 64 | gen_gomp_lifeyears_cont, startage(40) endage(90) 65 | rename expectedLY_gomp expectedLY_est 66 | rename (gomp_int gomp_slope) (gomp_int_estimated gomp_slope_estimated) 67 | 68 | * Compute expected life years at estimated adjusted Gompertz parameters 69 | rename (gomp_int_adjusted gomp_slope_estimated) (gomp_int gomp_slope) 70 | gen_gomp_lifeyears_cont, startage(40) endage(90) 71 | rename expectedLY_gomp expectedLY_adj 72 | rename (gomp_int gomp_slope) (gomp_int_adjusted gomp_slope_estimated) 73 | 74 | * Compute expected life years at estimated Gompertz parameters, DISCRETE method 75 | rename (gomp_int_estimated gomp_slope_estimated) (gomp_int gomp_slope) 76 | mata: gen_gomp_lifeyears_disc(40, 90) 77 | rename expectedLY_gomp expectedLY_estdiscrete 78 | rename (gomp_int gomp_slope) (gomp_int_estimated gomp_slope_estimated) 79 | 80 | 81 | foreach t in est adj estdiscrete { 82 | gen diff_`t' = expectedLY_`t' - expectedLY_true 83 | gen reldiff_`t' = diff_`t' / expectedLY_true 84 | } 85 | 86 | 87 | sum diff* 88 | sum reldiff* 89 | -------------------------------------------------------------------------------- /code/tests/Test Julia Gompertz matches Stata Gompertz.do: -------------------------------------------------------------------------------- 1 | * Set $root 2 | return clear 3 | capture project, doinfo 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)' // using -project- 5 | else { // running directly 6 | if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"' 7 | do "${mortality_root}/code/set_environment.do" 8 | } 9 | 10 | * Check for Julia configuration 11 | project, original("${root}/code/set_julia.do") 12 | confirm file "${root}/code/set_julia.do" 13 | 14 | * Create required folders 15 | cap mkdir "${root}/scratch/tests" 16 | cap mkdir "${root}/scratch/tests/Julia Gompertz matches Stata Gompertz" 17 | 18 | * Add ado files to project 19 | project, original("${root}/code/ado/estimate_gompertz2.ado") 20 | project, original("${root}/code/ado/mle_gomp_est.ado") 21 | project, original("${root}/code/ado/fastregby_gompMLE_julia.ado") 22 | project, original("${root}/code/ado/estimate_gompertz.jl") 23 | 24 | /*** Test whether Gompertz estimates generated by Julia 25 | match those generated in Stata. 26 | ***/ 27 | 28 | 29 | ******************************************************************** 30 | ************** National Life Expectancy Estimates ***************** 31 | ******************************************************************** 32 | 33 | ******* 34 | *** National, by Gender x Income Percentile 35 | ******* 36 | 37 | * Load mortality rates 38 | project, original("${root}/data/derived/Mortality Rates/mskd_national_mortratesBY_gnd_hhincpctile_age_year.dta") 39 | use "${root}/data/derived/Mortality Rates/mskd_national_mortratesBY_gnd_hhincpctile_age_year.dta", clear 40 | keep if age_at_d >= 40 41 | 42 | * Calculate Gompertz parameters from mortality rates 43 | foreach prog in Stata Julia { 44 | 45 | if ("`prog'"=="Stata") global juliapath "" 46 | else include "${root}/code/set_julia.do" 47 | 48 | foreach vce in oim "" { 49 | 50 | preserve 51 | 52 | estimate_gompertz2 gnd pctile, age(age_at_d) mort(mortrate) n(count) /// 53 | collapsefrom(gnd pctile age_at_d yod) type(mle) vce(`vce') 54 | 55 | save "${root}/scratch/tests/Julia Gompertz matches Stata Gompertz/national_gompBY_gnd_hhincpctile - `prog' `vce'.dta", replace 56 | project, creates("${root}/scratch/tests/Julia Gompertz matches Stata Gompertz/national_gompBY_gnd_hhincpctile - `prog' `vce'.dta") 57 | 58 | restore 59 | 60 | } 61 | 62 | } 63 | 64 | 65 | *** Perform comparisons 66 | 67 | cap program drop compare_gompest 68 | program define compare_gompest 69 | 70 | syntax, [vce_julia(name) vce_stata(name) acceptedreldif(string)] 71 | 72 | project, uses("${root}/scratch/tests/Julia Gompertz matches Stata Gompertz/national_gompBY_gnd_hhincpctile - Julia `vce_julia'.dta") 73 | project, uses("${root}/scratch/tests/Julia Gompertz matches Stata Gompertz/national_gompBY_gnd_hhincpctile - Stata `vce_stata'.dta") 74 | 75 | use "${root}/scratch/tests/Julia Gompertz matches Stata Gompertz/national_gompBY_gnd_hhincpctile - Julia `vce_julia'.dta", clear 76 | 77 | rename gomp* Jgomp* 78 | rename A* JA* 79 | 80 | merge 1:1 gnd pctile using "${root}/scratch/tests/Julia Gompertz matches Stata Gompertz/national_gompBY_gnd_hhincpctile - Stata `vce_stata'.dta", /// 81 | assert(3) nogen 82 | 83 | foreach var of varlist gomp* A* { 84 | gen diff_`var' = reldif(J`var', `var') 85 | if ("`acceptedreldif'"!="") assert diff_`var'<`acceptedreldif' 86 | } 87 | sum diff* 88 | drop diff* 89 | 90 | end 91 | 92 | compare_gompest, vce_julia("") vce_stata("") acceptedreldif(1e-5) 93 | compare_gompest, vce_julia("oim") vce_stata("oim") acceptedreldif(1e-5) 94 | -------------------------------------------------------------------------------- /code/tests/Test gen_mortrates.do: -------------------------------------------------------------------------------- 1 | * Set $root 2 | return clear 3 | capture project, doinfo 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)' // using -project- 5 | else { // running directly 6 | if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"' 7 | do "${mortality_root}/code/set_environment.do" 8 | } 9 | 10 | * Create required folders 11 | cap mkdir "${root}/scratch/tests" 12 | cap mkdir "${root}/scratch/tests/Test gen_mortrates ado" 13 | 14 | * Add ado files to project 15 | project, original("${root}/code/ado/gen_mortrates2.ado") 16 | 17 | 18 | /*** Test whether gen_mortrates2.ado properly handles input data. 19 | ***/ 20 | 21 | 22 | ********************************************************* 23 | *** Generate synthetic output: mortality rate dataset *** 24 | ********************************************************* 25 | 26 | * Construct mortality rate dataset 27 | set obs 37 28 | 29 | gen grp=1 30 | gen byte age_at_d=_n+39 31 | gen lag = 2 + (age_at_d>=64) * (age_at_d-63) 32 | gen int yod = lag + 1999 33 | 34 | gen mortrate=exp(-10 + 0.1 * age_at_d) // Gompertz with int=-10 and slope=0.1 35 | gen count = 100000 in 1 36 | replace count = cond(lag==2, 100000, count[_n-1] * (1-mortrate[_n-1])) in 2/`=_N' 37 | 38 | * Reduce precision of pop counts (which are always integers in true data) 39 | replace count = round(count) 40 | 41 | * Output 42 | save13 "${root}/scratch/tests/Test gen_mortrates ado/synthetic_mortrates.dta", replace 43 | project, creates("${root}/scratch/tests/Test gen_mortrates ado/synthetic_mortrates.dta") 44 | 45 | 46 | ************************************************ 47 | *** Generate synthetic input: deadby dataset *** 48 | ************************************************ 49 | 50 | * Construct deadby dataset 51 | clear 52 | set obs 24 53 | 54 | gen grp=1 55 | gen age=_n+37 56 | gen tax_yr=1999 57 | 58 | gen long count=100000 59 | 60 | gen byte deadby_1_Mean = 0 61 | forvalues i=2/18 { 62 | 63 | local survivors count * (1-deadby_`=`i'-1'_Mean) 64 | local mortrate exp(-10 + 0.1 * (age+`i')) 65 | local deathsini `survivors' * `mortrate' 66 | 67 | gen double deadby_`i'_Mean = (count - `survivors' + `deathsini') / count if tax_yr + `i' <= 2014 68 | } 69 | 70 | * Output 71 | save13 "${root}/scratch/tests/Test gen_mortrates ado/synthetic_deadby.dta", replace 72 | project, creates("${root}/scratch/tests/Test gen_mortrates ado/synthetic_deadby.dta") 73 | 74 | 75 | ********************* 76 | *** Perform Tests *** 77 | ********************* 78 | 79 | *** Test with perfect input data 80 | 81 | * Generate mortrates 82 | project, uses("${root}/scratch/tests/Test gen_mortrates ado/synthetic_deadby.dta") 83 | use "${root}/scratch/tests/Test gen_mortrates ado/synthetic_deadby.dta", clear 84 | 85 | gen_mortrates2 grp, age(age) year(tax_yr) n(count) 86 | 87 | * Reduce precision 88 | recast float mortrate count, force 89 | replace count=round(count) 90 | 91 | * Perform comparison 92 | project, uses("${root}/scratch/tests/Test gen_mortrates ado/synthetic_mortrates.dta") preserve 93 | cf _all using "${root}/scratch/tests/Test gen_mortrates ado/synthetic_mortrates.dta" 94 | 95 | 96 | *** Test with a dropped deadby column 97 | 98 | * Generate mortrates 99 | project, uses("${root}/scratch/tests/Test gen_mortrates ado/synthetic_deadby.dta") 100 | use "${root}/scratch/tests/Test gen_mortrates ado/synthetic_deadby.dta", clear 101 | 102 | drop deadby_3_Mean 103 | 104 | rcof "gen_mortrates2 grp, age(age) year(tax_yr) n(count)" == 111 105 | 106 | 107 | *** Test with a missing deadby column 108 | 109 | * Generate mortrates 110 | project, uses("${root}/scratch/tests/Test gen_mortrates ado/synthetic_deadby.dta") 111 | use "${root}/scratch/tests/Test gen_mortrates ado/synthetic_deadby.dta", clear 112 | 113 | replace deadby_3_Mean=. 114 | 115 | rcof "gen_mortrates2 grp, age(age) year(tax_yr) n(count)" == 9 116 | 117 | 118 | *** Test with missing LAST deadby column 119 | 120 | * Generate mortrates 121 | project, uses("${root}/scratch/tests/Test gen_mortrates ado/synthetic_deadby.dta") 122 | use "${root}/scratch/tests/Test gen_mortrates ado/synthetic_deadby.dta", clear 123 | 124 | replace deadby_15_Mean=. 125 | 126 | gen_mortrates2 grp, age(age) year(tax_yr) n(count) 127 | 128 | * Reduce precision 129 | recast float mortrate count, force 130 | replace count=round(count) 131 | 132 | * Perform comparison 133 | isid grp age_at_d yod 134 | ds grp age_at_d yod, not 135 | foreach var in `r(varlist)' { 136 | rename `var' gen_`var' 137 | } 138 | 139 | project, uses("${root}/scratch/tests/Test gen_mortrates ado/synthetic_mortrates.dta") preserve 140 | merge 1:1 grp age_at_d yod using "${root}/scratch/tests/Test gen_mortrates ado/synthetic_mortrates.dta" 141 | 142 | count if _merge==2 143 | assert r(N)==1 144 | 145 | foreach var of varlist lag mortrate count { 146 | assert gen_`var' == `var' | mi(gen_`var') 147 | } 148 | 149 | 150 | *** Test with missing unused deadby data 151 | 152 | * Generate mortrates 153 | project, uses("${root}/scratch/tests/Test gen_mortrates ado/synthetic_deadby.dta") 154 | use "${root}/scratch/tests/Test gen_mortrates ado/synthetic_deadby.dta", clear 155 | 156 | forvalues i=3/18 { 157 | replace deadby_`i'_Mean=. if age!=61 158 | } 159 | 160 | gen_mortrates2 grp, age(age) year(tax_yr) n(count) 161 | 162 | * Reduce precision 163 | recast float mortrate count, force 164 | replace count=round(count) 165 | 166 | * Perform comparison 167 | project, uses("${root}/scratch/tests/Test gen_mortrates ado/synthetic_mortrates.dta") preserve 168 | cf _all using "${root}/scratch/tests/Test gen_mortrates ado/synthetic_mortrates.dta" 169 | -------------------------------------------------------------------------------- /mortality.do: -------------------------------------------------------------------------------- 1 | 2 | *** Initialization *** 3 | version 13.1 4 | set more off 5 | set varabbrev off 6 | 7 | project, doinfo 8 | local pdir=r(pdir) 9 | adopath ++ "`pdir'/code/ado_ssc" 10 | adopath ++ "`pdir'/code/ado" 11 | 12 | project, original("`pdir'/code/ado/scheme-leap.scheme") 13 | set scheme leap 14 | 15 | cap mkdir "`pdir'/scratch" 16 | 17 | *** Load dependencies into project *** 18 | 19 | project, relies_on("`pdir'/code/set_environment.do") 20 | project, relies_on("`pdir'/code/ado/scalarout.ado") 21 | project, relies_on("`pdir'/code/ado_ssc/save13.ado") 22 | 23 | foreach dir in "ado_maptile_geo" "ado_ssc" { 24 | 25 | cd `"`pdir'/code/`dir'"' 26 | local files : dir "`c(pwd)'" files "*" 27 | 28 | foreach file in `files' { 29 | if substr("`file'",1,1)!="." project, relies_on("`file'") 30 | } 31 | 32 | cd `"`pdir'"' 33 | 34 | } 35 | 36 | 37 | ****************** 38 | *** Covariates *** 39 | ****************** 40 | 41 | project, do("code/covariates/Health behavior maps.do") 42 | 43 | 44 | ********************* 45 | *** Race shifters *** 46 | ********************* 47 | 48 | * Figures analyzing race shifters 49 | project, do(code/nlms/nlms_mortality_profiles_BYrace.do) 50 | project, do(code/nlms/nlms_analyze_raceshifters_BYincq_BYcsregion.do) 51 | project, do("code/nlms/Explore mortality profiles implied by race shifters.do") 52 | 53 | 54 | ******************* 55 | *** Race shares *** 56 | ******************* 57 | 58 | * National sensitivity checks 59 | project, do("code/raceshares/national_Explore race fraction smoothing.do") 60 | project, do("code/raceshares/national_Race share extrapolation check - nonparametric age 51 test.do") 61 | 62 | 63 | * Local race shares 64 | project, do("code/raceshares/Explore CZ race share maps.do") 65 | 66 | 67 | *********************** 68 | *** Mortality rates *** 69 | *********************** 70 | 71 | project, do("code/mortality/Plot observed mortality and survival profiles.do") 72 | project, do("code/mortality/Sample comparison - NCHS SSA IRS.do") 73 | project, do("code/mortality/Lag invariance.do") 74 | project, do("code/mortality/Decompose mortality into medical v external.do") 75 | 76 | project, do("code/mortality/Summary stats on number of obs and income.do") 77 | project, do("code/mortality/Output income means by national income quantile.do") 78 | 79 | ************************* 80 | *** Life Expectancies *** 81 | ************************* 82 | 83 | project, do("code/lifeexpectancy/Plot signal by local population level.do") 84 | 85 | project, do("code/lifeexpectancy/Generate Signal Standard Deviations.do") 86 | 87 | project, do("code/lifeexpectancy/Check correlations in CZ LE trends.do") 88 | 89 | project, do("code/lifeexpectancy/Plot national LE profiles by percentile.do") 90 | project, do("code/lifeexpectancy/National LE profile sensitivity checks.do") 91 | project, do("code/lifeexpectancy/Plot national LE trends by income.do") 92 | project, do("code/lifeexpectancy/Plot major city LE profiles by ventile.do") 93 | project, do(code/lifeexpectancy/le_levels_maps.do) 94 | project, do(code/lifeexpectancy/le_trends_maps.do) 95 | project, do(code/lifeexpectancy/LE ranked lists of states.do) 96 | project, do(code/lifeexpectancy/le_correlations.do) 97 | project, do("code/lifeexpectancy/State Level Inequality Correlations.do") 98 | project, do("code/lifeexpectancy/Plot international comparison of LE at age 40.do") 99 | project, do("code/lifeexpectancy/List top 10 and bottom 10 CZs.do") 100 | project, do("code/lifeexpectancy/Plot CZ LE trend scatters.do") 101 | 102 | project, do("code/lifeexpectancy/Sensitivity analyses of local LEs.do") 103 | project, do("code/lifeexpectancy/Standard errors vs population size.do") 104 | 105 | project, do("code/lifeexpectancy/Explore national trends at fixed income levels.do") 106 | 107 | project, do("code/lifeexpectancy/Compute Years of Social Security Eligibility.do") 108 | 109 | **************************************** 110 | *** Output numbered figures & tables *** 111 | **************************************** 112 | 113 | project, do(code/assign_fig_tab_numbers.do) 114 | -------------------------------------------------------------------------------- /mortality_datagen.do: -------------------------------------------------------------------------------- 1 | 2 | *** Initialization *** 3 | version 13.1 4 | set more off 5 | set varabbrev off 6 | 7 | project, doinfo 8 | local pdir=r(pdir) 9 | adopath ++ "`pdir'/code/ado_ssc" 10 | adopath ++ "`pdir'/code/ado" 11 | 12 | cap mkdir "`pdir'/data/derived" 13 | cap mkdir "`pdir'/scratch" 14 | 15 | *** Load dependencies into project *** 16 | 17 | project, relies_on("`pdir'/code/set_environment.do") 18 | project, relies_on("`pdir'/code/ado_ssc/save13.ado") 19 | 20 | ****************** 21 | *** Covariates *** 22 | ****************** 23 | 24 | project, do(code/covariates/cz_names.do) 25 | 26 | project, do(code/covariates/clean_pop_labforce_change1980to2000.do) 27 | project, do(code/covariates/clean_educ.do) 28 | project, do(code/covariates/clean_uninsurance.do) 29 | project, do(code/covariates/clean_racefractions.do) 30 | project, do(code/covariates/clean_dartmouthatlas.do) 31 | project, do(code/covariates/clean_brfss.do) 32 | project, do(code/covariates/clean_hospquality.do) 33 | project, do(code/covariates/clean_causeofdeath.do) 34 | 35 | project, do(code/covariates/combine_all_covariates.do) 36 | 37 | 38 | ********************* 39 | *** Race shifters *** 40 | ********************* 41 | 42 | * Prepare NLMS data 43 | project, do(code/nlms/nlms_loaddata.do) 44 | project, do(code/nlms/nlms_createsample.do) 45 | 46 | * Generate race shifters 47 | project, do(code/nlms/nlms_generate_shifters.do) 48 | 49 | * Bootstrap race shifters 50 | project, do(code/nlms/nlms_bootstrap_shifters.do) 51 | 52 | 53 | ******************* 54 | *** Race shares *** 55 | ******************* 56 | 57 | ** National 58 | project, do(code/raceshares/national_racepopBY_year_age_gnd.do) // load population data 59 | 60 | project, do(code/raceshares/national_racefracBY_workingage_gnd_incpctile.do) // compute income distributions 61 | project, do(code/raceshares/national_racepopBY_year_age_gnd_incpctile.do) // impute income dimension in pop data 62 | 63 | project, do(code/raceshares/national_raceshareBY_year_age_gnd_incpctile.do) // pooled 2001-2014 and by-year race shares 64 | project, do(code/raceshares/national_raceshareBY_gnd.do) // reference race shares 65 | 66 | ** Local 67 | 68 | * Local race populations 69 | project, do(code/raceshares/cty_racepopBY_year_agebin_gnd.do) // county pop without income dim from Inter/Postcensal estimates 70 | 71 | * Local income distributions 72 | project, do(code/raceshares/cty_racepopBY_workingagebin_hhincbin.do) // HH counts with income bins from Census 2000 SF3 tables 73 | project, do("code/raceshares/Explore income distribution over 16 income bins in County SF3 data.do") 74 | project, do("code/raceshares/SF3 incbin weights for national income quantiles.do") 75 | project, do(code/raceshares/cty_cz_st_racefracBY_workingagebin_hhincquantile.do) 76 | project, do(code/raceshares/cty_cz_st_racepopBY_year_agebin_gnd_hhincquantile.do) 77 | 78 | * Local race shares 79 | project, do(code/raceshares/cty_cz_st_raceshareBY_year_agebin_gnd_hhincquantile.do) 80 | 81 | 82 | *********************** 83 | *** Mortality rates *** 84 | *********************** 85 | 86 | project, do(code/mortality/construct_cdc_mortrates.do) 87 | 88 | 89 | ************************* 90 | *** Life Expectancies *** 91 | ************************* 92 | 93 | project, do(code/lifeexpectancy/generate_life_expectancies.do) 94 | project, do(code/lifeexpectancy/bootstrap_le.do) 95 | project, do(code/lifeexpectancy/compute_LE_trends.do) 96 | project, do(code/lifeexpectancy/generate_bootstrap_confidence_estimates.do) 97 | 98 | project, do("code/lifeexpectancy/Estimate NCHS State LEs pooling income groups.do") 99 | 100 | ************************** 101 | *** Online Data Tables *** 102 | ************************** 103 | 104 | project, do(code/create_online_tables.do) 105 | -------------------------------------------------------------------------------- /mortality_init.do: -------------------------------------------------------------------------------- 1 | 2 | *** Initialization *** 3 | version 13.1 4 | set more off 5 | set varabbrev off 6 | 7 | project, doinfo 8 | local pdir=r(pdir) 9 | adopath ++ "`pdir'/code/ado_ssc" 10 | adopath ++ "`pdir'/code/ado" 11 | 12 | cap mkdir "`pdir'/data/derived" 13 | cap mkdir "`pdir'/scratch" 14 | 15 | *** Load dependencies into project *** 16 | 17 | project, relies_on("`pdir'/code/set_environment.do") 18 | project, relies_on("`pdir'/code/ado_ssc/save13.ado") 19 | 20 | *********************** 21 | *** Mortality rates *** 22 | *********************** 23 | 24 | project, do("code/mortality/Construct multi-source death and population counts.do") 25 | project, do(code/mortality/convert_IRScollapses_to_mortrates.do) 26 | 27 | project, do(code/mortality/estimate_irs_gompertz_parameters.do) 28 | 29 | project, do("code/mortality/Calculate aggregate deaths.do") 30 | -------------------------------------------------------------------------------- /mortality_tests.do: -------------------------------------------------------------------------------- 1 | 2 | *** Initialization *** 3 | version 13.1 4 | set more off 5 | set varabbrev off 6 | 7 | project, doinfo 8 | local pdir=r(pdir) 9 | adopath ++ "`pdir'/code/ado_ssc" 10 | adopath ++ "`pdir'/code/ado" 11 | 12 | 13 | ****************** 14 | 15 | project, do("code/tests/Explore runtimes for estimating race shifters.do") 16 | 17 | project, do("code/tests/Test Julia Gompertz matches Stata Gompertz.do") 18 | project, do("code/tests/Test gen_mortrates.do") 19 | project, do("code/tests/Check completeness of mortality data.do") 20 | 21 | project, do("code/tests/Simulate various methods of deriving LE from Gompertz parameters.do") 22 | 23 | --------------------------------------------------------------------------------