├── .gitignore
├── code
    ├── ado
    │   ├── compute_ci_percentiles.ado
    │   ├── compute_racefracs.ado
    │   ├── compute_raceshares.ado
    │   ├── corr_reg.ado
    │   ├── estimate_gompertz.jl
    │   ├── estimate_gompertz2.ado
    │   ├── fastregby.ado
    │   ├── fastregby_gompMLE_julia.ado
    │   ├── gen_mortrates2.ado
    │   ├── generate_le_with_raceadj.ado
    │   ├── gompertz_LYbetween.py
    │   ├── mle_gomp_est.ado
    │   ├── scalarout.ado
    │   ├── scheme-leap.scheme
    │   ├── simulated_delta.ado
    │   └── wsample.ado
    ├── ado_maptile_geo
    │   ├── county1990_coords_clean.dta
    │   ├── county1990_database_clean.dta
    │   ├── county1990_maptile.ado
    │   ├── county1990_maptile.smcl
    │   ├── cz_coords_clean.dta
    │   ├── cz_database_clean.dta
    │   ├── cz_maptile.ado
    │   ├── cz_maptile.smcl
    │   ├── state_coords_clean.dta
    │   ├── state_database_clean.dta
    │   ├── state_maptile.ado
    │   └── state_maptile.smcl
    ├── ado_ssc
    │   ├── _gwtmean.ado
    │   ├── binscatter.ado
    │   ├── binscatter.sthlp
    │   ├── fastxtile.ado
    │   ├── fastxtile.sthlp
    │   ├── maptile.ado
    │   ├── maptile.sthlp
    │   ├── maptile_geohelp.ado
    │   ├── maptile_geolist.ado
    │   ├── maptile_install.ado
    │   ├── maptile_newgeo.sthlp
    │   ├── save12.ado
    │   ├── save12.sthlp
    │   ├── save13.ado
    │   ├── save13.sthlp
    │   ├── spmap.ado
    │   ├── spmap.hlp
    │   ├── spmap_arrow.ado
    │   ├── spmap_color.ado
    │   ├── spmap_diagram.ado
    │   ├── spmap_examples.ado
    │   ├── spmap_label.ado
    │   ├── spmap_line.ado
    │   ├── spmap_point.ado
    │   ├── spmap_polygon.ado
    │   ├── spmap_psl.ado
    │   └── spmap_scalebar.ado
    ├── assign_fig_tab_numbers.do
    ├── covariates
    │   ├── Health behavior maps.do
    │   ├── clean_brfss.do
    │   ├── clean_causeofdeath.do
    │   ├── clean_dartmouthatlas.do
    │   ├── clean_educ.do
    │   ├── clean_hospquality.do
    │   ├── clean_pop_labforce_change1980to2000.do
    │   ├── clean_racefractions.do
    │   ├── clean_uninsurance.do
    │   ├── combine_all_covariates.do
    │   ├── cz_names.do
    │   └── list_of_covariates.do
    ├── create_online_tables.do
    ├── lifeexpectancy
    │   ├── Check correlations in CZ LE trends.do
    │   ├── Compute Years of Social Security Eligibility.do
    │   ├── Estimate NCHS State LEs pooling income groups.do
    │   ├── Explore national trends at fixed income levels.do
    │   ├── Generate Signal Standard Deviations.do
    │   ├── LE ranked lists of states.do
    │   ├── List top 10 and bottom 10 CZs.do
    │   ├── National LE profile sensitivity checks.do
    │   ├── Plot CZ LE trend scatters.do
    │   ├── Plot international comparison of LE at age 40.do
    │   ├── Plot major city LE profiles by ventile.do
    │   ├── Plot national LE profiles by percentile.do
    │   ├── Plot national LE trends by income.do
    │   ├── Plot signal by local population level.do
    │   ├── Sensitivity analyses of local LEs.do
    │   ├── Standard errors vs population size.do
    │   ├── State Level Inequality Correlations.do
    │   ├── bootstrap_le.do
    │   ├── compute_LE_trends.do
    │   ├── generate_bootstrap_confidence_estimates.do
    │   ├── generate_life_expectancies.do
    │   ├── le_correlations.do
    │   ├── le_levels_maps.do
    │   └── le_trends_maps.do
    ├── mortality
    │   ├── Calculate aggregate deaths.do
    │   ├── Construct multi-source death and population counts.do
    │   ├── Decompose mortality into medical v external.do
    │   ├── Lag invariance.do
    │   ├── Output income means by national income quantile.do
    │   ├── Plot observed mortality and survival profiles.do
    │   ├── Sample comparison - NCHS SSA IRS.do
    │   ├── Summary stats on number of obs and income.do
    │   ├── construct_cdc_mortrates.do
    │   ├── convert_IRScollapses_to_mortrates.do
    │   └── estimate_irs_gompertz_parameters.do
    ├── nlms
    │   ├── Explore mortality profiles implied by race shifters.do
    │   ├── nlms_analyze_raceshifters_BYincq_BYcsregion.do
    │   ├── nlms_bootstrap_shifters.do
    │   ├── nlms_createsample.do
    │   ├── nlms_generate_shifters.do
    │   ├── nlms_loaddata.do
    │   └── nlms_mortality_profiles_BYrace.do
    ├── raceshares
    │   ├── Explore CZ race share maps.do
    │   ├── Explore income distribution over 16 income bins in County SF3 data.do
    │   ├── SF3 incbin weights for national income quantiles.do
    │   ├── cty_cz_st_racefracBY_workingagebin_hhincquantile.do
    │   ├── cty_cz_st_racepopBY_year_agebin_gnd_hhincquantile.do
    │   ├── cty_cz_st_raceshareBY_year_agebin_gnd_hhincquantile.do
    │   ├── cty_racepopBY_workingagebin_hhincbin.do
    │   ├── cty_racepopBY_year_agebin_gnd.do
    │   ├── national_Explore race fraction smoothing.do
    │   ├── national_Race share extrapolation check - nonparametric age 51 test.do
    │   ├── national_racefracBY_workingage_gnd_incpctile.do
    │   ├── national_racepopBY_year_age_gnd.do
    │   ├── national_racepopBY_year_age_gnd_incpctile.do
    │   ├── national_raceshareBY_gnd.do
    │   └── national_raceshareBY_year_age_gnd_incpctile.do
    ├── readme.md
    ├── set_bootstrap.do
    ├── set_environment.do
    └── tests
    │   ├── Check completeness of mortality data.do
    │   ├── Explore runtimes for estimating race shifters.do
    │   ├── Simulate various methods of deriving LE from Gompertz parameters.do
    │   ├── Test Julia Gompertz matches Stata Gompertz.do
    │   └── Test gen_mortrates.do
├── mortality.do
├── mortality_datagen.do
├── mortality_init.do
├── mortality_tests.do
└── readme.md


/.gitignore:
--------------------------------------------------------------------------------
 1 | # -project- ignores
 2 | *.log
 3 | archive/
 4 | replicate/
 5 | mortality_init_files.dta
 6 | mortality_init_links.dta
 7 | mortality_datagen_files.dta
 8 | mortality_datagen_links.dta
 9 | mortality_files.dta
10 | mortality_links.dta
11 | mortality_tests_files.dta
12 | mortality_tests_links.dta
13 | 
14 | # Other ignores
15 | data/
16 | scratch/
17 | results/
18 | code/set_julia.do
19 | 


--------------------------------------------------------------------------------
/code/ado/compute_ci_percentiles.ado:
--------------------------------------------------------------------------------
 1 | program define compute_ci_percentiles
 2 | 
 3 | 	syntax varlist(numeric) , by(varlist) gen(name)
 4 | 	
 5 | 	* Build matrix of 2.5th and 97.5th percentiles
 6 | 	foreach var of local varlist {
 7 | 		tempfile `var'pctiles
 8 | 	
 9 | 		statsby `var'25=r(r1) `var'975=r(r2), saving(``var'pctiles') ///
10 | 			by(`by'): _pctile `var', percentiles(2.5 97.5)
11 | 	}
12 | 	
13 | 	* Combine datasets of percentile values
14 | 	clear
15 | 	qui use `by' using ``:word 1 of `varlist''pctiles'
16 | 	foreach var of local varlist {
17 | 		qui merge 1:1 `by' using ``var'pctiles', assert(3) nogen
18 | 	}
19 | 	
20 | 	* Reshape percentile long
21 | 	reshape long `varlist', i(`by') j(`gen')
22 | 	replace `gen'=`gen'/10
23 | 	
24 | end
25 | 


--------------------------------------------------------------------------------
/code/ado/compute_racefracs.ado:
--------------------------------------------------------------------------------
 1 | program define compute_racefracs
 2 | 
 3 | 	/***
 4 | 	
 5 | 	In each by-group, compute the fraction of each race's population in each income bin.
 6 | 	
 7 | 	For example, if black males had the same income distribution as the full population,
 8 | 	they would be 0.01 of each percentile. Across income bins, the fractions sum to one.
 9 | 	
10 | 	Optionally, smooth the race fractions across positive bins using lowess.
11 | 	
12 | 	***/
13 | 
14 | 	syntax , by(varlist) incomevar(varname) [ racelist(namelist) lowess_bw(real -99) ]
15 | 	
16 | 	if ("`racelist'"=="") local racelist black hispanic asian other
17 | 	
18 | 	* Verify that file is identified at By Group x Income Bin level
19 | 	isid `by' `incomevar'
20 | 	
21 | 	* Generate race fractions
22 | 	foreach race in `racelist' {
23 | 	
24 | 		tempvar sumpop_`race'
25 | 		egen long `sumpop_`race''=total(pop_`race'), by(`by')
26 | 		
27 | 		qui gen double frac_of_`race'=pop_`race'/`sumpop_`race''
28 | 		label var frac_of_`race' "Fraction of `race' population in income bin"
29 | 		
30 | 	}
31 | 	
32 | 	* Smooth the noise in the race fractions across positive income bins
33 | 	if (`lowess_bw'!=-99) {
34 | 		foreach race in `racelist' {
35 | 		
36 | 			* Lowess smooth race fractions
37 | 			lowess frac_of_`race' `incomevar' if `incomevar'>0, by(`by') nograph gen(smoothfrac_of_`race') bw(`lowess_bw')
38 | 			qui replace smoothfrac_of_`race' = frac_of_`race' if `incomevar'<=0
39 | 			label var smoothfrac_of_`race' "Smoothed fraction of `race' population in income bin"
40 | 			
41 | 			* Renormalize the smoothed estimates so that the fractions will sum to 1 for each age x gnd
42 | 			tempvar sumfrac_`race' sumsmoothfrac_`race'
43 | 			qui egen double `sumfrac_`race''=total(frac_of_`race') if `incomevar'>0, by(`by')
44 | 			qui egen double `sumsmoothfrac_`race''=total(smoothfrac_of_`race') if `incomevar'>0, by(`by')
45 | 			
46 | 			qui replace smoothfrac_of_`race' = smoothfrac_of_`race'/`sumsmoothfrac_`race''*`sumfrac_`race'' if `incomevar'>0
47 | 			
48 | 			* Check that smoothfracs sum to 1
49 | 			tempvar newsumsmoothfrac_`race'
50 | 			egen float `newsumsmoothfrac_`race'' = total(smoothfrac_of_`race'), by(`by')
51 | 			assert abs(`newsumsmoothfrac_`race''-1)<10^-6
52 | 			
53 | 			* Change smoothfrac datatype from double to float (the final digits vary between runs, this rounds them away)
54 | 			qui recast float smoothfrac_of_`race', force
55 | 
56 | 		}
57 | 	}
58 | 	
59 | 	* Change frac datatype from double to float (the extra precision isn't meaningful)
60 | 	qui recast float frac_of_*, force
61 | 	
62 | end
63 | 


--------------------------------------------------------------------------------
/code/ado/compute_raceshares.ado:
--------------------------------------------------------------------------------
 1 | program define compute_raceshares
 2 | 	/*** Compute race shares in each By Group x Income Percentile cell
 3 | 	***/
 4 | 
 5 | 	syntax , by(varlist)
 6 | 	
 7 | 	* Verify that file is identified at By Group x Income Percentile level
 8 | 	isid `by'
 9 | 
10 | 	* Generate race shares
11 | 	foreach race in black asian hispanic other {
12 | 	
13 | 		* Compute race share in each cell
14 | 		gen double raceshare_`race' = pop_`race' / (pop_black + pop_asian + pop_hispanic + pop_other)
15 | 		label var raceshare_`race' "Share of `race' in -`by'- cell"
16 | 		
17 | 	}
18 | 	
19 | 	* Check that race shares sum to 1 in each cell OR all are missing because total population is 0 in the cell
20 | 	assert abs(raceshare_black + raceshare_asian + raceshare_hispanic + raceshare_other - 1) < 10^-6 | ///
21 | 		(pop_black + pop_asian + pop_hispanic + pop_other)==0
22 | 	
23 | 	* Set order
24 | 	order `by' pop_* raceshare_*
25 | 	
26 | end
27 | 


--------------------------------------------------------------------------------
/code/ado/corr_reg.ado:
--------------------------------------------------------------------------------
 1 | program define corr_reg
 2 | 
 3 | 	/*** Run a regression estimating the correlation between two variables.
 4 | 	***/
 5 | 
 6 | 	syntax varlist(min=2 max=2 numeric) [if] [aweight fweight], [ vce(string) ]
 7 | 
 8 | 	*** Prep
 9 | 	
10 | 	* Create convenient weight local
11 | 	if ("`weight'"!="") local wt [`weight'`exp']
12 | 	
13 | 	* Parse yvar and xvar
14 | 	local yvar=word("`varlist'",1)
15 | 	local xvar=word("`varlist'",2)
16 | 	
17 | 	*** Regressions
18 | 	
19 | 	* Regress in natural units
20 | 	reg `yvar' `xvar' `if' `wt'
21 | 	
22 | 	* Convert to Std Dev units
23 | 	qui sum `yvar' `wt' if e(sample)
24 | 	qui gen mb = (`yvar' - r(mean))/r(sd) 
25 | 	
26 | 	qui sum `xvar' `wt' if e(sample)
27 | 	qui gen vb = (`xvar' - r(mean))/r(sd)
28 | 	
29 | 	* Regress in standard deviation units, applying VCE if specified
30 | 	reg mb vb `if' `wt', vce(`vce')
31 | 	drop mb vb
32 | 	
33 | 	if ("`vce'"!="") di "Note: Standard errors depend on order of y and x with robust standard errors."
34 | 
35 | end
36 | 


--------------------------------------------------------------------------------
/code/ado/estimate_gompertz.jl:
--------------------------------------------------------------------------------
 1 | using ArgParse
 2 | using GLM, DataFrames, Distributions
 3 | 
 4 | function parse_commandline()
 5 |     s = ArgParseSettings()
 6 | 
 7 |     @add_arg_table s begin
 8 |         "--input"
 9 |             help = "input CSV with counts of deaths and survivals by Age x Group"
10 |             arg_type = String
11 |             required = true
12 |         "--output"
13 |             help = "output CSV with Gompertz parameter estimates"
14 |             arg_type = String
15 |             required = true
16 |         "--vce"
17 |             help = "type of VCV matrix computed; either 'oim' or 'builtin'"
18 |             arg_type = String
19 |             default = "oim"
20 |     end
21 | 
22 |     return parse_args(s)
23 | end
24 | 
25 | function gompertzreg(df, vce)
26 |   # Perform a Gompertz MLE regression, return the coefficients and Cholesky-decomposed covariance matrix as a DataFrame.
27 | 
28 |   model = try
29 |       GLM.glm(@formula(mort ~ age), df, Binomial(), LogLink(), wts=1.0*df[:count].data, start=(-10,0.1))
30 |     catch err
31 |       err
32 |     end
33 | 
34 |   if isa(model, DataFrames.DataFrameRegressionModel)
35 |     coef = GLM.coef(model)'
36 | 
37 |     if vce=="oim"
38 |       vcov = vcov_Gompertz_OIM(model, df)
39 |     elseif vce=="builtin"
40 |       vcov = GLM.vcov(model)
41 |     else
42 |       throw(ArgumentError("vce must be oim or builtin"))
43 |     end
44 | 
45 |     A = chol([vcov[2,2] vcov[2,1]; vcov[1,2] vcov[1,1]])
46 |     return DataFrame(gomp_int = coef[1], gomp_slope = coef[2], A_slope_1 = A[1,1], A_int_1 = A[1,2], A_int_2 = A[2,2])
47 | 
48 |   elseif isa(model, ErrorException)
49 |     println("caught error in _grp $(dec(df[1,:_grp])): $(model.msg) (Likely 0 deaths.)")
50 |     return DataFrame(gomp_int = 0, gomp_slope = 0, A_slope_1 = 0, A_int_1 = 0, A_int_2 = 0)
51 | 
52 |   elseif isa(model, LinAlg.PosDefException)
53 |     println("caught error in _grp $(dec(df[1,:_grp])): LinAlg.PosDefException (Likely insufficient observations)")
54 |     return DataFrame(gomp_int = 0, gomp_slope = 0, A_slope_1 = 0, A_int_1 = 0, A_int_2 = 0)
55 | 
56 |   else
57 |     throw(model)
58 | 
59 |   end
60 | 
61 | end
62 | 
63 | function vcov_Gompertz_OIM(model, df)
64 |   # Computes the OIM variance-covariance matrix for a Gompertz GLM model
65 | 
66 |   df_alive = df[df[:mort] .== 0, :]
67 |   df_alive[:predmort] = exp( GLM.coef(model)[1] + GLM.coef(model)[2] * df_alive[:age] )
68 | 
69 |   OIM_contribution = DataFrame()
70 |   OIM_contribution[:a2] = df_alive[:count] .* df_alive[:predmort] ./ (1-df_alive[:predmort]).^2
71 |   OIM_contribution[:ab] = OIM_contribution[:a2] .* df_alive[:age]
72 |   OIM_contribution[:b2] = OIM_contribution[:ab] .* df_alive[:age]
73 | 
74 |   OIM_elements = aggregate(OIM_contribution, sum)
75 |   return Symmetric(inv([OIM_elements[1,:a2_sum] OIM_elements[1,:ab_sum]; OIM_elements[1,:ab_sum] OIM_elements[1,:b2_sum]]))
76 | 
77 | end
78 | 
79 | function main()
80 | 
81 |     # Parse arguments
82 |     parsed_args = parse_commandline()
83 | 
84 |     # Load data
85 |     data = readtable(parsed_args["input"])
86 | 
87 |     # Run Gompertz MLE regression on each by-group
88 |     gompBY = by(data, :_grp, df -> gompertzreg(df, parsed_args["vce"]) )
89 | 
90 |     # Output results
91 |     writetable(parsed_args["output"], gompBY)
92 | 
93 | end
94 | 
95 | main()
96 | 


--------------------------------------------------------------------------------
/code/ado/estimate_gompertz2.ado:
--------------------------------------------------------------------------------
  1 | program define estimate_gompertz2
  2 | 
  3 | 	/*** Calculate Gompertz parameters from mortality rates by age, within by-groups.
  4 | 	
  5 | 		 Use Julia to estimate GLM regressions quickly if it is configured,
  6 | 		 otherwise use -statsby- in Stata.
  7 | 	
  8 | 	***/
  9 | 
 10 | 	syntax varlist(min=1), age(varname) mort(varname) [ n(varname) ///
 11 | 		collapsefrom(varlist) vce(name) ///
 12 | 		type(string) ///
 13 | 		cz_popmsk(integer 0) cty_popmsk(integer 0) force ]
 14 | 	local group `varlist'
 15 | 	if !inlist("`type'","mle","ols") {
 16 | 		di as error "type() must be 'mle' or 'ols'"
 17 | 		exit 198
 18 | 	}
 19 | 	if ("`n'"=="") {
 20 | 		if ("`type'"=="mle") {
 21 | 			di as error "n() required with type(mle)"
 22 | 			exit 198
 23 | 		}
 24 | 		if ("`collapsefrom'"!="") {
 25 | 			di as error "n() required with collapsefrom()"
 26 | 			exit 198
 27 | 		}
 28 | 	}
 29 | 	
 30 | 	*** Collapse dataset if requested
 31 | 	if ("`collapsefrom'"!="") {
 32 | 		isid `collapsefrom' `age'
 33 | 		collapse (mean) `mort' (rawsum) `n' [aw=`n'], by(`group' `age') fast
 34 | 		qui compress
 35 | 	}
 36 | 	else isid `group' `age'
 37 | 
 38 | 	*** Check dataset has expected structure
 39 | 	assert inrange(`age', 40, 76)  // only expected ages for this project
 40 | 	if ("`n'"!="") {
 41 | 		cap confirm long variable `n'  // integer number of individuals
 42 | 		if (_rc==7) confirm int variable `n'
 43 | 		else confirm long variable `n'
 44 | 		assert reldif(`mort'*`n', round(`mort'*`n')) < 2e-6  // integer number of deaths
 45 | 	}
 46 | 		
 47 | 	*** Mask Gompertz parameters for small CZs or counties
 48 | 	if (`cz_popmsk'!=0) { 
 49 | 		project, original("${root}/data/raw/Covariate Data/cz_characteristics.dta") preserve
 50 | 		merge m:1 cz using "${root}/data/raw/Covariate Data/cz_characteristics.dta", keepusing(pop2000) ///
 51 | 			assert(2 3) keep(3) nogen
 52 | 		drop if pop2000 < `cz_popmsk'
 53 | 		drop pop2000
 54 | 	}
 55 | 
 56 | 	if (`cty_popmsk'!=0) {
 57 | 		project, original("${root}/data/raw/Covariate Data/cty_covariates.dta") preserve
 58 | 		merge m:1 cty using "${root}/data/raw/Covariate Data/cty_covariates.dta", keepus(cty_pop2000) ///
 59 | 			assert(2 3) keep(3) nogen
 60 | 		drop if cty_pop2000 < `cty_popmsk'
 61 | 		drop cty_pop2000
 62 | 	}
 63 | 	
 64 | 	*** Estimate Gompertz parameters for each group
 65 | 	if ("`type'"=="mle") {
 66 | 	
 67 | 		* "Reshape" from mortality rates into counts of deaths and non-deaths
 68 | 		/* After reshape:
 69 | 			- `mort' variable = 0 or 1.
 70 | 			- `n' variable = count of deaths or non-deaths
 71 | 			
 72 | 		   Note:
 73 | 			- Already confirmed unique by `group' `age' above.
 74 | 			- Already confirmed number of deaths is an integer above.
 75 | 		
 76 | 		*/
 77 | 		quietly {
 78 | 			expand 2
 79 | 			bys `group' `age': replace `n' = cond(_n == 1, round(`mort'*`n'), round(`n' - `mort'*`n'))
 80 | 			by `group' `age': replace `mort' = (_n == 1)
 81 | 		}
 82 | 		recast byte `mort'
 83 | 	
 84 | 		* Estimate Gompertz parameters (coefficients and covariance matrix)
 85 | 		if ("$juliapath"=="") {
 86 | 			statsby A_slope_1 = r(A_slope_1) A_int_1 = r(A_int_1) A_int_2 = r(A_int_2) gomp_int = r(gomp_int) gomp_slope = r(gomp_slope), ///
 87 | 				by(`group') clear: mle_gomp_est, age(`age') mort(`mort') n(`n') vce(`vce')
 88 | 				
 89 | 			sort `group'
 90 | 		}
 91 | 		else fastregby_gompMLE_julia `mort' `age', count(`n') by(`group') vce(`vce') clear `force'
 92 | 		
 93 | 	}
 94 | 	else if ("`type'"=="ols") {
 95 | 	
 96 | 		* Generate log mortality rates
 97 | 		qui count if `mort'==0
 98 | 		if (r(N)>0) di as error "warning: `r(N)'/`=_N' mortality rates are 0, so their log mortality rates are missing."
 99 | 		
100 | 		tempvar l_mort
101 | 		gen `l_mort' = log(`mort')
102 | 		
103 | 		* Estimate Gompertz parameters (coefficients only)
104 | 		fastregby `l_mort' `age', by(`group') clear
105 | 		rename (_b_cons _b_`age') (gomp_int gomp_slope)
106 | 	
107 | 	}
108 | 	
109 | 	* Check for groups that had errors, prevent disclosure of missingness
110 | 	qui count if mi(gomp_int, gomp_slope)
111 | 	if (r(N)>0 & "`force'"=="force") {
112 | 		qui drop if mi(gomp_int, gomp_slope)
113 | 		di as error "warning: dropped `r(N)' group(s) that had an error in Gompertz estimation"
114 | 	}
115 | 	else if r(N)>0 {
116 | 		di as error "`r(N)' group(s) had an error in Gompertz estimation. use 'force' option to drop groups with errors."
117 | 		exit 2000
118 | 	}
119 | 
120 | end
121 | 
122 | 


--------------------------------------------------------------------------------
/code/ado/fastregby.ado:
--------------------------------------------------------------------------------
  1 | program define fastregby
  2 | 
  3 | 	/*** Performs a univariate OLS regression for each by-group, storing the
  4 | 		 coefficients in new dataset.
  5 | 		 
  6 | 		 The following commands are equivalent:
  7 | 			- fastregby y x, by(byvars) clear
  8 | 			- statsby, by(byvars) clear: reg y x
  9 | 			
 10 | 		 Except fastregby will run approximately 80 times faster.
 11 | 	***/
 12 | 
 13 | 	version 13.1
 14 | 	syntax varlist(min=2 max=2 numeric), by(varlist) clear
 15 | 	
 16 | 	* Convert string by-vars to numeric
 17 | 	foreach var of varlist `by' {
 18 | 		cap confirm numeric variable `var', exact
 19 | 		if _rc==0 {  // numeric var
 20 | 			local bynumeric `bynumeric' `var'
 21 | 		}
 22 | 		else {  // string var
 23 | 			tempvar `var'N
 24 | 			encode `var', gen(``var'N')
 25 | 			local bynumeric `bynumeric' ``var'N'
 26 | 			local bystr `var'  // list of string by-vars
 27 | 		}
 28 | 	}
 29 | 	
 30 | 	* Sort using by-groups
 31 | 	sort `by'
 32 | 	/*
 33 | 	if ("`:sortedby'"!="`by'") {
 34 | 		di as error "Loaded dataset must be sorted in by-groups: `by'"
 35 | 		exit 5
 36 | 	}
 37 | 	*/
 38 | 	
 39 | 	* Generate a single by-variable counting by groups
 40 | 	tempvar grp
 41 | 	egen `grp'=group(`bynumeric')
 42 | 	
 43 | 	* Perform regressions on each by-group, store in dataset
 44 | 	mata: _fastregby("`varlist'", "`grp'", "`bynumeric'")
 45 | 	
 46 | 	* Convert string by-vars back to strings, from numeric
 47 | 	foreach var in `bystr' {
 48 | 		decode ``var'N', gen(`var')
 49 | 	}
 50 | 	order `by'
 51 | 	
 52 | end
 53 | 
 54 | 
 55 | version 13.1
 56 | set matastrict on
 57 | 
 58 | mata:
 59 | 
 60 | void _fastregby(string scalar regvars, string scalar grpvar, string scalar byvars) {
 61 | 	// Inputs:
 62 | 	//  - a y-var and x-var for an OLS regression
 63 | 	//  - a group var, for which each value represents a distinct by-group.
 64 | 	//		This var must be in ascending order.
 65 | 	//	- a list of numeric by-variables, whose groups correspond to the group var.
 66 | 	// Outputs:
 67 | 	//  - dataset of coefficients from OLS regression for each by-group
 68 | 	
 69 | 	// Convert variable names to column indices
 70 | 	real rowvector regcols
 71 | 	real scalar ycol
 72 | 	real scalar xcol
 73 | 	real scalar grpcol
 74 | 	real rowvector bycols
 75 | 	
 76 | 	regcols = st_varindex(tokens(regvars))
 77 | 	ycol = regcols[1]
 78 | 	xcol = regcols[2]
 79 | 	grpcol = st_varindex(grpvar)
 80 | 	bycols = st_varindex(tokens(byvars))
 81 | 	
 82 | 	// Fetch number of groups
 83 | 	real scalar numgrp
 84 | 	numgrp = _st_data(st_nobs(),grpcol)
 85 | 	
 86 | 	// Preallocate matrices of group identifiers & coefs
 87 | 	real matrix groups
 88 | 	real matrix coefs
 89 | 	groups = J(numgrp, cols(bycols), .)  // Num Groups x Num By-Vars matrix
 90 | 	coefs = J(numgrp, 2, .)  // Num Groups x 2 matrix
 91 | 	
 92 | 	// Perform sequence of regressions
 93 | 	real matrix M
 94 | 	real matrix y
 95 | 	real matrix X
 96 | 	real scalar startobs
 97 | 	real scalar curgrp
 98 | 	startobs=1  // starting obsevation for current group being processed
 99 | 	curgrp=_st_data(1,grpcol)  // current group being processed
100 | 	
101 | 	for (obs=1; obs<=st_nobs()-1; obs++) {
102 | 	
103 | 		if (_st_data(obs,grpcol)!=curgrp) {
104 | 			
105 | 			// compute OLS coefs: beta = inv(X'X) * X'y.
106 | 			// --> see Example 4 of -help mf_cross-
107 | 			st_view(M, (startobs,obs-1), regcols, 0)
108 | 			st_subview(y, M, ., 1)
109 | 			st_subview(X, M, ., (2\.))
110 | 
111 | 			coefs[curgrp,.] = ( invsym(cross(X,1 , X,1)) * cross(X,1 , y,0) )'
112 | 			
113 | 			// store group identifiers
114 | 			groups[curgrp,.] = st_data(startobs,bycols)
115 | 			
116 | 			// update counters
117 | 			curgrp=_st_data(obs,grpcol)
118 | 			startobs=obs
119 | 			
120 | 		}
121 | 		
122 | 	}
123 | 	
124 | 	// Always perform regression for last observation
125 | 	obs=st_nobs()
126 | 	if (_st_data(obs,grpcol)==curgrp) {  // last observation is not a group to itself
127 | 	
128 | 		// increment obs, since code is written as processing the observation
129 | 		// that is 1 past the last in the group
130 | 		++obs
131 | 	
132 | 		// compute OLS coefs: beta = inv(X'X) * X'y.
133 | 		// --> see Example 4 of -help mf_cross-
134 | 		st_view(M, (startobs,obs-1), regcols, 0)
135 | 		st_subview(y, M, ., 1)
136 | 		st_subview(X, M, ., (2\.))
137 | 
138 | 		coefs[curgrp,.] = ( invsym(cross(X,1 , X,1)) * cross(X,1 , y,0) )'
139 | 		
140 | 		// store group identifiers
141 | 		groups[curgrp,.] = st_data(startobs,bycols)
142 | 	
143 | 	}
144 | 	else {
145 | 		display("{error} last observation is in a singleton group")
146 | 		exit(2001)
147 | 	}
148 | 	
149 | 	// Store group identifiers in dataset
150 | 	stata("qui keep in 1/"+strofreal(numgrp))
151 | 	stata("keep "+byvars)
152 | 	st_store(.,tokens(byvars),groups)
153 | 	
154 | 	// Store coefficients in dataset
155 | 	(void) st_addvar("float", "_b_"+tokens(regvars)[2])
156 | 	(void) st_addvar("float", "_b_cons")
157 | 	st_store(., ("_b_"+tokens(regvars)[2], "_b_cons"), coefs)
158 | 	
159 | }
160 | 
161 | end
162 | 


--------------------------------------------------------------------------------
/code/ado/fastregby_gompMLE_julia.ado:
--------------------------------------------------------------------------------
 1 | program define fastregby_gompMLE_julia
 2 | 
 3 | 	/*** Performs a Gompertz MLE regression for each by-group, storing the
 4 | 		 coefficients and Cholesky-decomposed covariances in a new dataset.
 5 | 		 
 6 | 		 'debug' option prints all output from Julia. Otherwise Julia output is suppressed.
 7 | 	***/
 8 | 
 9 | 	version 13.1
10 | 	syntax varlist(min=2 max=2 numeric), count(varname numeric) by(varlist) [vce(name) force debug] clear
11 | 	if ("`vce'"!="") local vce --vce `vce'
12 | 	if ("`debug'"=="") local quietly quietly
13 | 	
14 | 	* Sort on by-variables
15 | 	sort `by'
16 | 	
17 | 	* Generate a single by-variable counting by-groups
18 | 	egen _grp = group(`by')
19 | 	
20 | 	* Perform regressions on each by-group using Julia
21 | 	tempfile deathandsurvival_data gomppar
22 | 	
23 | 	keep `by' _grp `varlist' `count'
24 | 	rename (`varlist' `count') (mort age count)
25 | 	qui export delim _grp age mort count using `deathandsurvival_data'
26 | 	di "Estimating Gompertz parameters using Julia..."
27 | 	`quietly' !${juliapath} "${root}/code/ado/estimate_gompertz.jl" `vce' --input `deathandsurvival_data' --output `gomppar'
28 | 	
29 | 	* Store association between grp var and by-vars
30 | 	keep `by' _grp
31 | 	qui duplicates drop `by' _grp, force
32 | 	tempfile bylabels
33 | 	qui save `bylabels'
34 | 	
35 | 	* Load results from Julia
36 | 	import delim using `gomppar', asdouble clear
37 | 	rename (a_slope_1 a_int_1 a_int_2) (A_slope_1 A_int_1 A_int_2)
38 | 	qui drop if gomp_int==0 & gomp_slope==0 & A_slope_1==0 & A_int_1==0 & A_int_2==0  // that's how estimate_gompertz.jl indicates error	
39 | 	
40 | 	* Replace _grp var with by-vars
41 | 	qui merge 1:1 _grp using `bylabels', assert(2 3) nogen  // _merge==2 happens when by-groups dropped due to errors
42 | 	drop _grp
43 | 	order `by'
44 | 	sort `by'
45 | 
46 | end
47 | 


--------------------------------------------------------------------------------
/code/ado/gen_mortrates2.ado:
--------------------------------------------------------------------------------
 1 | program define gen_mortrates2
 2 | 
 3 | 	/*** Input is a loaded dataset that is:
 4 | 			- Identified by
 5 | 				`varlist' x `age' x `year'
 6 | 			- Containing variables:
 7 | 				- `age', the age at which income is measured
 8 | 				- `n', the count of people in that cell
 9 | 				- deadby_1_Mean to deadby_18_Mean containing the cumulative
10 | 					fraction of people who were alive at the time income was
11 | 					measured, but have died by the start of the n'th year later.
12 | 					
13 | 		 The variable specified in age() must either be:
14 | 			- named age, containing exact years
15 | 			- OR named agebin, containing 5-year agebins and a 4-year agebin
16 | 				whose income is measured at 58-61 => 2-year mortality measured 60-63.
17 | 				
18 | 				These agebins should be labeled by the mean age at which
19 | 				**income** is measured.
20 | 	***/
21 | 
22 | 	syntax varlist, age(varname) year(varname) n(varname) [ all_lags ]
23 | 	local by `varlist'
24 | 	if ("`n'"!="count") rename `n' count
25 | 	isid `by' `age' `year'
26 | 	
27 | 	* Check no one dies in first year
28 | 	assert deadby_1_Mean==0
29 | 	
30 | 	* Check expected years are present
31 | 	forvalues i=1/18 {
32 | 		confirm numeric var deadby_`i'_Mean
33 | 	}
34 | 	
35 | 	* Check maximum age
36 | 	sum `age', meanonly
37 | 	if ("`age'"=="age") assert r(max)==61  // maximum age is exactly 61 (last working age)
38 | 	else if ("`age'"=="agebin") assert r(max)==59.5  // maximum agebin is 58-61
39 | 	else {
40 | 		di as error "age() variable must either be age or agebin."
41 | 		exit 198
42 | 	}
43 | 	local maxage=r(max)
44 | 	
45 | 	* Reshape long on how many years income is lagged
46 | 	reshape long deadby_@_Mean, i(`by' `age' `year') j(lag)
47 | 	label var lag "Income Lag"
48 | 	
49 | 	* Only use 2-year lags except for age 61/agebin 60, which we use throughout retirement
50 | 	if ("`all_lags'"=="") drop if lag > 2 & `age' < `maxage'
51 | 	else local lag lag  // lag will be an identifying variable
52 | 
53 | 	* Calculate mortality rates & count of surviving population
54 | 	by `by' `age' `year' (lag): gen double mortrate = (deadby__Mean - deadby__Mean[_n-1]) / (1 - deadby__Mean[_n-1] )
55 | 	by `by' `age' `year' (lag): replace count = count[1] * (1 - deadby__Mean[_n-1]) if lag>1
56 | 	compress count
57 | 	label var mortrate "Mortality Rate"
58 | 	label var count "Denominator of mortrate = people alive at beginning of year"
59 | 	
60 | 	* Clean data
61 | 	assert lag==1 | mi(deadby__Mean) if mi(mortrate)
62 | 	drop if mi(mortrate)
63 | 	
64 | 	* Generate age and year of death
65 | 	gen age_at_d = `age' + lag
66 | 	gen int yod = `year' + lag
67 | 	drop `age' `year'
68 | 	compress age_at_d
69 | 	
70 | 	label var age_at_d "Age at Death"
71 | 	label var yod "Year of Death"
72 | 	
73 | 	* Check no data past 2014
74 | 	capture assert yod<=2014
75 | 	if _rc!=0 {  // check that data past 2014 is missing and can be thrown away
76 | 		assert deadby__Mean==0 | deadby__Mean==. if yod>2014
77 | 		drop if yod>2014
78 | 	}
79 | 	
80 | 	* Output
81 | 	drop deadby__Mean
82 | 	order `by' age_at_d yod lag mortrate count
83 | 	isid `by' age_at_d yod `lag'
84 | 	sort `by' age_at_d yod `lag'
85 | 	label data ""
86 | end
87 | 


--------------------------------------------------------------------------------
/code/ado/gompertz_LYbetween.py:
--------------------------------------------------------------------------------
 1 | from math import exp, isnan
 2 | from scipy.special import exp1
 3 | from numpy import genfromtxt, savetxt
 4 | import sys, argparse
 5 | 
 6 | def main():
 7 | 	startage, endage, a, b, infile, outfile = check_arg(sys.argv[1:])
 8 | 
 9 | 	if infile is None:
10 | 		if a is None or b is None:
11 | 			raise ValueError('you must specify an input file or BOTH a and b')
12 | 
13 | 		LY = gompertz_lifeyears(par=[a, b], minage=startage, maxage=endage)
14 | 
15 | 		if outfile is None:
16 | 			print LY
17 | 		else:
18 | 			with open(outfile, 'w') as f:
19 | 				f.write('%s' % LY)
20 | 
21 | 	else:  # infile specified
22 | 		if outfile is None:
23 | 			raise ValueError('you must specify an output file with an input file')
24 | 
25 | 		gompertz_parameters = genfromtxt(infile, delimiter=',')
26 | 		lifeyears = [ gompertz_lifeyears(par=p, minage=startage, maxage=endage) for p in gompertz_parameters ]
27 | 
28 | 		with open(outfile, 'w') as f:
29 | 			savetxt(f, lifeyears, fmt='%9g')
30 | 
31 | 
32 | def check_arg(args=None):
33 | 	parser = argparse.ArgumentParser(description='Compute expected life years between startage and endage for a person alive at startage')
34 | 	parser.add_argument('--startage',
35 | 						type=float,
36 | 						help='Starting age for expected life years calculation',
37 | 						required='True')
38 | 	parser.add_argument('--endage',
39 | 						type=float,
40 | 						help='Ending age for expected life years calculation',
41 | 						required='True')
42 | 	parser.add_argument('-a', '--intercept',
43 | 						type=float,
44 | 						help='Gompertz intercept; "a" in exp(a+bx)')
45 | 	parser.add_argument('-b', '--slope',
46 | 						type=float,
47 | 						help='Gompertz slope; "b" in exp(a+bx)')
48 | 	parser.add_argument('-i', '--input',
49 | 						type=str,
50 | 						help='CSV of Gompertz parameters; int in col 1, slope in col 2')
51 | 	parser.add_argument('-o', '--output',
52 | 						type=str,
53 | 						help='File to output expected life years to')
54 | 
55 | 	results = parser.parse_args(args)
56 | 	return (results.startage,
57 | 			results.endage,
58 | 			results.intercept,
59 | 			results.slope,
60 | 			results.input,
61 | 			results.output)
62 | 
63 | def gompertz_lifeyears(par, minage, maxage):
64 | 	a=par[0]
65 | 	b=par[1]
66 | 	if -0.00001 < b < 0.00001:  # b is approximately 0
67 | 		return exp(-a) - exp(-a - exp(a) * (maxage - minage) )
68 | 	else:
69 | 		f_min = 1 / b * exp(a + b * minage)
70 | 		f_max = 1 / b * exp(a + b * maxage)
71 | 		cons_of_integ = exp(f_min)
72 | 
73 | 		if b>0:
74 | 			LY = -1 * cons_of_integ / b * ( exp1(f_max) - exp1(f_min) )
75 | 		else:
76 | 			LY = -1 * cons_of_integ / b * ( exp1(complex(f_max)) - exp1(complex(f_min)) ).real
77 | 
78 | 		if isnan(LY) and gompertz_survival(a, b, minage, minage+0.01)<0.00001:  # everyone dies instantly
79 | 			return 0.0
80 | 		else:
81 | 			return LY
82 | 
83 | def gompertz_survival(a, b, minage, age):
84 | 	return exp( 1 / b * ( exp(a + b*minage) - exp(a + b*age) ) )
85 | 
86 | if __name__ == '__main__':
87 | 	main()
88 | 


--------------------------------------------------------------------------------
/code/ado/mle_gomp_est.ado:
--------------------------------------------------------------------------------
 1 | * Subcommand of simulated_delta
 2 | 
 3 | program define mle_gomp_est , rclass
 4 | 	syntax , age(varname) mort(varname) n(varname) [if] [ vce(passthru) ]
 5 | 
 6 | 	* Estimate the MLE
 7 | 	glm `mort' `age' [fw = `n'] `if', link(log) family(binomial) from(_cons=-10 `age'=0.1) `vce'
 8 | 
 9 | 	* Extract Variance Covariance Matrix and Save
10 | 	matrix V = e(V)
11 | 	* return scalar var_slope = V[1,1]
12 | 	* return scalar var_int = V[2,2]
13 | 	* return scalar covar_int_slope = V[2,1]
14 | 
15 | 	* Perform Cholesky Decomposition on Covariance Matrix and Save Relevant Coefficients (No need for A_slope_2 since A is upper triangular)
16 | 	* Could potentially get additional speed savings by using _cholesky(V), which returns modifies the matrix in place
17 | 	matrix A = cholesky(V)
18 | 	return scalar A_slope_1 = A[1,1]
19 | 	return scalar A_int_1 = A[2,1]
20 | 	return scalar A_int_2 = A[2,2]
21 | 
22 | 	* Save point estimates
23 | 	return scalar gomp_int = _b[_cons]
24 | 	return scalar gomp_slope = _b[`mort':`age']
25 | 
26 | 
27 | end
28 | 
29 | 


--------------------------------------------------------------------------------
/code/ado/scalarout.ado:
--------------------------------------------------------------------------------
 1 | program define scalarout
 2 | 
 3 | 	syntax using, num(real) id(string) [fmt(string) replace]
 4 | 	
 5 | 	* Replace or Append?
 6 | 	if ("`replace'"=="replace") local overwrite replace
 7 | 	else local overwrite append
 8 | 	
 9 | 	* Open file
10 | 	tempname file
11 | 	file open `file' `using', write text `overwrite'
12 | 	
13 | 	* Write CSV line
14 | 	if (substr("`fmt'",-1,1)=="c") file write `file' `""`id'",""' `fmt' (`num') `"""' _n
15 | 	else file write `file' `""`id'","' `fmt' (`num') _n
16 | 	
17 | 	* Close file
18 | 	file close `file'
19 | 
20 | end
21 | 


--------------------------------------------------------------------------------
/code/ado/scheme-leap.scheme:
--------------------------------------------------------------------------------
 1 | * LEAP STATA GRAPHING SCHEME
 2 | 
 3 | #include s2color
 4 | 
 5 | * anglestyle vertical_tick	horizontal
 6 | color background			white
 7 | linestyle  legend			none
 8 | 
 9 | 
10 | color p1					navy
11 | color p2					maroon
12 | color p3					forest_green
13 | color p4					dkorange
14 | color p5					teal
15 | color p6					cranberry
16 | color p7					lavender
17 | color p8					khaki
18 | color p9					sienna
19 | color p10					emidblue
20 | color p11					emerald
21 | color p12					brown
22 | color p13					erose
23 | color p14					gold
24 | color p15					bluishgray
25 | 
26 | 
27 | 
28 | gsize axis_title_gap		tiny			
29 | margin legend				zero
30 | margin graph				"2 3 3 3"
31 | 


--------------------------------------------------------------------------------
/code/ado/simulated_delta.ado:
--------------------------------------------------------------------------------
 1 | program define simulated_delta
 2 | 
 3 | 	/*** Draw new Gompertz parameters from the estimated multivariate normal distribution
 4 | 		 with point estimates and covariance from the MLE.
 5 | 	***/
 6 | 
 7 | 	version 13.1
 8 | 	syntax varlist(min=1), REPs(integer) [seed(string) keep(string)]
 9 | 	local group `varlist'
10 | 
11 | 	isid `group'
12 | 	expand `reps'
13 | 	bys `group': gen sample_num = _n
14 | 	order `group' sample_num
15 | 	
16 | 	set seed `seed'
17 | 	tempvar z1 z2
18 | 	gen `z1' = rnormal()
19 | 	gen `z2' = rnormal()
20 | 	
21 | 	replace gomp_slope = A_slope_1*`z1' + gomp_slope
22 | 	replace gomp_int = A_int_1*`z1' + A_int_2*`z2' + gomp_int
23 | 	
24 | 	keep `group' `keep' sample_num gomp_int gomp_slope
25 | 
26 | end
27 | 


--------------------------------------------------------------------------------
/code/ado/wsample.ado:
--------------------------------------------------------------------------------
 1 | program define wsample
 2 | 
 3 | 	version 13.1
 4 | 	syntax newvarname, wt(varname)
 5 | 	
 6 | 	qui gen byte `varlist'=.
 7 | 	mata: draw_wsample("`wt'", "`varlist'")
 8 | 	
 9 | end
10 | 
11 | version 13.1
12 | set matastrict on
13 | 
14 | mata:
15 | 
16 | void draw_wsample(string scalar wt, string scalar gen) {
17 | 	// Inputs:
18 | 	//  - a variable containing sampling weights,
19 | 	//  - an empty variable to store number of times each record is sampled.
20 | 	// Requires:
21 | 	//  - the weights to be non-missing
22 | 	// Outputs:
23 | 	//  - number of times each record is sampled, using random sampling with
24 | 	//		replacement. each record's probability of being sampled is equal to
25 | 	//		its relative sample weight. number of draws = number of records = _N
26 | 	
27 | 
28 | 	// Get column number of weight and to-generate vars
29 | 	real scalar wtcol
30 | 	wtcol = st_varindex(wt)
31 | 	
32 | 	real scalar gencol
33 | 	gencol = st_varindex(gen)
34 | 	
35 | 	// Compute total weight
36 | 	stata("sum " + wt + ", meanonly")
37 | 	
38 | 	real scalar totwt
39 | 	totwt = st_numscalar("r(sum)")
40 | 	
41 | 	// Generate random draws, uniform from 0 to totwt
42 | 	real draws
43 | 	draws = runiform(st_nobs(), 1) * totwt
44 | 	_sort(draws, 1)
45 | 	
46 | 	// Store number of times each unit has been drawn
47 | 	real scalar timesdrawn
48 | 	
49 | 	real scalar runningdraws
50 | 	runningdraws=1
51 | 	
52 | 	real scalar runningweight
53 | 	runningweight=0
54 | 
55 | 	for (obs=1; obs<=st_nobs(); obs++) {
56 | 	
57 | 		// Update "running weight" and "times drawn"
58 | 		runningweight = runningweight + _st_data(obs,wtcol)
59 | 		timesdrawn=0
60 | 	
61 | 		// Determine number of times this unit was drawn
62 | 		while (runningdraws<=st_nobs()) {
63 | 			if (draws[runningdraws,1] <= runningweight) {
64 | 				timesdrawn++
65 | 				runningdraws++
66 | 			}
67 | 			else break
68 | 		}
69 | 				
70 | 		// Store number of draws
71 | 		_st_store(obs, gencol, timesdrawn)
72 | 		
73 | 	}
74 | 	
75 | }
76 | 
77 | end
78 | 


--------------------------------------------------------------------------------
/code/ado_maptile_geo/county1990_coords_clean.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelstepner/healthinequality-code/ea86c0d2f8762c8738d1ee8ca506c21d5a1c4ef7/code/ado_maptile_geo/county1990_coords_clean.dta


--------------------------------------------------------------------------------
/code/ado_maptile_geo/county1990_database_clean.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelstepner/healthinequality-code/ea86c0d2f8762c8738d1ee8ca506c21d5a1c4ef7/code/ado_maptile_geo/county1990_database_clean.dta


--------------------------------------------------------------------------------
/code/ado_maptile_geo/county1990_maptile.ado:
--------------------------------------------------------------------------------
 1 | *! 22mar2015, Michael Stepner, stepner@mit.edu
 2 | 
 3 | program define _maptile_county1990
 4 | 	syntax , [  geofolder(string) ///
 5 | 				mergedatabase ///
 6 | 				map spmapvar(varname) var(varname) binvar(varname) clopt(string) legopt(string) min(string) clbreaks(string) max(string) mapcolors(string) ndfcolor(string) ///
 7 | 					savegraph(string) replace resolution(string) map_restriction(string) spopt(string) ///
 8 | 				/* Geography-specific options */ ///
 9 | 				stateoutline(string) conus ///
10 | 			 ]
11 | 	
12 | 	if ("`mergedatabase'"!="") {
13 | 		novarabbrev merge 1:m county using `"`geofolder'/county1990_database_clean"', nogen update replace
14 | 		exit
15 | 	}
16 | 	
17 | 	if ("`map'"!="") {
18 | 	
19 | 		if ("`conus'"=="conus") {
20 | 			* Hide AK and HI from stateoutline
21 | 			local polygon_select select(drop if inlist(_ID,27,8))
22 | 			
23 | 			* Hide AK and HI from main map
24 | 			if ("`map_restriction'"=="") local map_restriction if !inlist(floor(county/1000),2,15)
25 | 			else local map_restriction `map_restriction' & !inlist(floor(county/1000),2,15)
26 | 		}
27 | 
28 | 		if ("`stateoutline'"!="") {
29 | 			cap confirm file `"`geofolder'/state_coords_clean.dta"'
30 | 			if (_rc==0) local polygon polygon(data(`"`geofolder'/state_coords_clean"') ocolor(black) osize(`stateoutline' ...) `polygon_select')
31 | 			else if (_rc==601) {
32 | 				di as error `"stateoutline() requires the {it:state} geography to be installed"'
33 | 				di as error `"--> state_coords_clean.dta must be present in the geofolder"'
34 | 				exit 198				
35 | 			}
36 | 			else {
37 | 				error _rc
38 | 				exit _rc
39 | 			}
40 | 		}
41 | 	
42 | 		spmap `spmapvar' using `"`geofolder'/county1990_coords_clean"' `map_restriction', id(id) ///
43 | 			`clopt' ///
44 | 			`legopt' ///
45 | 			legend(pos(5) size(*1.8)) ///
46 | 			fcolor(`mapcolors') ndfcolor(`ndfcolor') ///
47 | 			oc(black ...) ndo(black) ///
48 | 			os(vvthin ...) nds(vvthin) ///
49 | 			`polygon' ///
50 | 			`spopt'
51 | 			
52 | 		* Save graph
53 | 		if (`"`savegraph'"'!="") __savegraph_maptile, savegraph(`savegraph') resolution(`resolution') `replace'
54 | 		
55 | 	}
56 | 	
57 | end
58 | 
59 | * Save map to file
60 | cap program drop __savegraph_maptile
61 | program define __savegraph_maptile
62 | 
63 | 	syntax, savegraph(string) resolution(string) [replace]
64 | 	
65 | 	* check file extension using a regular expression
66 | 	if regexm(`"`savegraph'"',"\.[a-zA-Z0-9]+$") local graphextension=regexs(0)
67 | 	
68 | 	* deal with different filetypes appropriately
69 | 	if inlist(`"`graphextension'"',".gph","") graph save `"`savegraph'"', `replace'
70 | 	else if inlist(`"`graphextension'"',".ps",".eps") graph export `"`savegraph'"', mag(`=round(100*`resolution')') `replace'
71 | 	else if (`"`graphextension'"'==".png") graph export `"`savegraph'"', width(`=round(3200*`resolution')') `replace'
72 | 	else if (`"`graphextension'"'==".tif") graph export `"`savegraph'"', width(`=round(1600*`resolution')') `replace'
73 | 	else graph export `"`savegraph'"', `replace'
74 | 
75 | end
76 | 
77 | 


--------------------------------------------------------------------------------
/code/ado_maptile_geo/county1990_maptile.smcl:
--------------------------------------------------------------------------------
 1 | {smcl}
 2 | 
 3 | {pstd}{bf:maptile {hline 2} Geography: county1990}{p_end}
 4 | 
 5 | 
 6 | {marker description}{...}
 7 | {title:Description}
 8 | 
 9 | {pstd}{bf:county1990} generates a map of United States counties using 1990 boundaries.{p_end}
10 | 
11 | {pstd}It displays Alaska and Hawaii rescaled and moved to the bottom left of the map, below the continental US, for ease of viewing.{p_end}
12 | 
13 | 
14 | {marker geographic-id-variable}{...}
15 | {title:Geographic ID variable}
16 | 
17 | {pstd}{bf:county} {hline 2} FIPS county codes{p_end}
18 | 
19 | 
20 | {marker geography-specific-options}{...}
21 | {title:Geography-Specific Options}
22 | 
23 | {synoptset 32 tabbed}{...}
24 | {synopt:{opth stateoutline(linewidthstyle)}}overlays the map with a (potentially thicker) line on state boundaries{p_end}
25 | {synopt:{bf:conus}}hides Alaska and Hawaii, on the map {bf:and} the state outline{p_end}
26 | 
27 | 
28 | {marker author}{...}
29 | {title:Author}
30 | 
31 | {pstd}Michael Stepner{break} stepner@mit.edu{p_end}


--------------------------------------------------------------------------------
/code/ado_maptile_geo/cz_coords_clean.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelstepner/healthinequality-code/ea86c0d2f8762c8738d1ee8ca506c21d5a1c4ef7/code/ado_maptile_geo/cz_coords_clean.dta


--------------------------------------------------------------------------------
/code/ado_maptile_geo/cz_database_clean.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelstepner/healthinequality-code/ea86c0d2f8762c8738d1ee8ca506c21d5a1c4ef7/code/ado_maptile_geo/cz_database_clean.dta


--------------------------------------------------------------------------------
/code/ado_maptile_geo/cz_maptile.ado:
--------------------------------------------------------------------------------
 1 | *! 22mar2015, Michael Stepner, stepner@mit.edu
 2 | 
 3 | program define _maptile_cz
 4 | 	syntax , [  geofolder(string) ///
 5 | 				mergedatabase ///
 6 | 				map spmapvar(varname) var(varname) binvar(varname) clopt(string) legopt(string) min(string) clbreaks(string) max(string) mapcolors(string) ndfcolor(string) ///
 7 | 					savegraph(string) replace resolution(string) map_restriction(string) spopt(string) ///
 8 | 				/* Geography-specific options */ ///
 9 | 				stateoutline(string) conus ///
10 | 			 ]
11 | 	
12 | 	if ("`mergedatabase'"!="") {
13 | 		novarabbrev merge 1:m cz using `"`geofolder'/cz_database_clean"', nogen
14 | 		exit
15 | 	}
16 | 	
17 | 	if ("`map'"!="") {
18 | 	
19 | 		if ("`conus'"=="conus") {
20 | 			* Hide AK and HI from stateoutline
21 | 			local polygon_select select(drop if inlist(_ID,27,8))
22 | 			
23 | 			* Hide AK and HI from main map
24 | 			if ("`map_restriction'"=="") local map_restriction if !inlist(floor(cz/100),341,347,356)
25 | 			else local map_restriction `map_restriction' & !inlist(floor(cz/100),341,347,356)
26 | 		}
27 | 
28 | 		if ("`stateoutline'"!="") {
29 | 			cap confirm file `"`geofolder'/state_coords_clean.dta"'
30 | 			if (_rc==0) local polygon polygon(data(`"`geofolder'/state_coords_clean"') ocolor(black) osize(`stateoutline' ...) `polygon_select')
31 | 			else if (_rc==601) {
32 | 				di as error `"stateoutline() requires the {it:state} geography to be installed"'
33 | 				di as error `"--> state_coords_clean.dta must be present in the geofolder"'
34 | 				exit 198				
35 | 			}
36 | 			else {
37 | 				error _rc
38 | 				exit _rc
39 | 			}
40 | 		}
41 | 
42 | 		spmap `spmapvar' using `"`geofolder'/cz_coords_clean"' `map_restriction', id(id) ///
43 | 			`clopt' ///
44 | 			`legopt' ///
45 | 			legend(pos(5) size(*1.8)) ///
46 | 			fcolor(`mapcolors') ndfcolor(`ndfcolor') ///
47 | 			oc(black ...) ndo(black) ///
48 | 			os(vvthin ...) nds(vvthin) ///
49 | 			`polygon' ///
50 | 			`spopt'
51 | 			
52 | 		* Save graph
53 | 		if (`"`savegraph'"'!="") __savegraph_maptile, savegraph(`savegraph') resolution(`resolution') `replace'
54 | 		
55 | 	}
56 | 	
57 | end
58 | 
59 | * Save map to file
60 | cap program drop __savegraph_maptile
61 | program define __savegraph_maptile
62 | 
63 | 	syntax, savegraph(string) resolution(string) [replace]
64 | 	
65 | 	* check file extension using a regular expression
66 | 	if regexm(`"`savegraph'"',"\.[a-zA-Z0-9]+$") local graphextension=regexs(0)
67 | 	
68 | 	* deal with different filetypes appropriately
69 | 	if inlist(`"`graphextension'"',".gph","") graph save `"`savegraph'"', `replace'
70 | 	else if inlist(`"`graphextension'"',".ps",".eps") graph export `"`savegraph'"', mag(`=round(100*`resolution')') `replace'
71 | 	else if (`"`graphextension'"'==".png") graph export `"`savegraph'"', width(`=round(3200*`resolution')') `replace'
72 | 	else if (`"`graphextension'"'==".tif") graph export `"`savegraph'"', width(`=round(1600*`resolution')') `replace'
73 | 	else graph export `"`savegraph'"', `replace'
74 | 
75 | end
76 | 
77 | 


--------------------------------------------------------------------------------
/code/ado_maptile_geo/cz_maptile.smcl:
--------------------------------------------------------------------------------
 1 | {smcl}
 2 | 
 3 | {pstd}{bf:maptile {hline 2} Geography: cz}{p_end}
 4 | 
 5 | 
 6 | {marker description}{...}
 7 | {title:Description}
 8 | 
 9 | {pstd}{bf:cz} generates a map of United States 1990 Commuting Zones.{p_end}
10 | 
11 | {pstd}It displays Alaska and Hawaii rescaled and moved to the bottom left of the map, below the continental US, for ease of viewing.{p_end}
12 | 
13 | 
14 | {marker geographic-id-variable}{...}
15 | {title:Geographic ID variable}
16 | 
17 | {pstd}{bf:cz} {hline 2} 1990 Commuting Zone codes{p_end}
18 | 
19 | 
20 | {marker geography-specific-options}{...}
21 | {title:Geography-Specific Options}
22 | 
23 | {synoptset 32 tabbed}{...}
24 | {synopt:{opth stateoutline(linewidthstyle)}}overlays the map with a (potentially thicker) line on state boundaries{p_end}
25 | {synopt:{bf:conus}}hides Alaska and Hawaii, on the map {bf:and} the state outline{p_end}
26 | 
27 | 
28 | {marker author}{...}
29 | {title:Author}
30 | 
31 | {pstd}Michael Stepner{break} stepner@mit.edu{p_end}


--------------------------------------------------------------------------------
/code/ado_maptile_geo/state_coords_clean.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelstepner/healthinequality-code/ea86c0d2f8762c8738d1ee8ca506c21d5a1c4ef7/code/ado_maptile_geo/state_coords_clean.dta


--------------------------------------------------------------------------------
/code/ado_maptile_geo/state_database_clean.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelstepner/healthinequality-code/ea86c0d2f8762c8738d1ee8ca506c21d5a1c4ef7/code/ado_maptile_geo/state_database_clean.dta


--------------------------------------------------------------------------------
/code/ado_maptile_geo/state_maptile.ado:
--------------------------------------------------------------------------------
 1 | *! 6sep2014, Michael Stepner, stepner@mit.edu
 2 | 
 3 | program define _maptile_state
 4 | 	syntax , [  geofolder(string) ///
 5 | 				mergedatabase ///
 6 | 				map spmapvar(varname) var(varname) binvar(varname) clopt(string) legopt(string) min(string) clbreaks(string) max(string) mapcolors(string) ndfcolor(string) ///
 7 | 					savegraph(string) replace resolution(string) map_restriction(string) spopt(string) ///
 8 | 				/* Geography-specific options */ ///
 9 | 				geoid(varname) ///
10 | 			 ]
11 | 	
12 | 	if ("`mergedatabase'"!="") {
13 | 		if ("`geoid'"=="") local geoid state
14 | 		
15 | 		if inlist("`geoid'","state","statefips","statename") novarabbrev merge 1:1 `geoid' using `"`geofolder'/state_database_clean"', nogen keepusing(`geoid' _polygonid)
16 | 		else {
17 | 			di as error "with geography(state), geoid() must be 'state', 'statefips', 'statename', or blank"
18 | 			exit 198
19 | 		}
20 | 		exit
21 | 	}
22 | 	
23 | 	if ("`map'"!="") {
24 | 	
25 | 		* Avoid having a "No Data" legend item just because DC is missing (it's not visible on the map, nor is it a state)
26 | 		sum `spmapvar' if _polygonid==6, meanonly
27 | 		if (r(N)==0) {
28 | 			* add map restriction
29 | 			if (`"`map_restriction'"'!="") local map_restriction `map_restriction' & _polygonid!=6
30 | 			else local map_restriction if _polygonid!=6
31 | 		}
32 | 
33 | 		spmap `spmapvar' using `"`geofolder'/state_coords_clean"' `map_restriction', id(_polygonid) ///
34 | 			`clopt' ///
35 | 			`legopt' ///
36 | 			legend(pos(5) size(*1.8)) ///
37 | 			fcolor(`mapcolors') ndfcolor(`ndfcolor') ///
38 | 			oc(black ...) ndo(black) ///
39 | 			os(vthin ...) nds(vthin) ///
40 | 			`spopt'
41 | 
42 | 		* Save graph
43 | 		if (`"`savegraph'"'!="") __savegraph_maptile, savegraph(`savegraph') resolution(`resolution') `replace'
44 | 		
45 | 	}
46 | 	
47 | end
48 | 
49 | * Save map to file
50 | cap program drop __savegraph_maptile
51 | program define __savegraph_maptile
52 | 
53 | 	syntax, savegraph(string) resolution(string) [replace]
54 | 	
55 | 	* check file extension using a regular expression
56 | 	if regexm(`"`savegraph'"',"\.[a-zA-Z0-9]+$") local graphextension=regexs(0)
57 | 	
58 | 	* deal with different filetypes appropriately
59 | 	if inlist(`"`graphextension'"',".gph","") graph save `"`savegraph'"', `replace'
60 | 	else if inlist(`"`graphextension'"',".ps",".eps") graph export `"`savegraph'"', mag(`=round(100*`resolution')') `replace'
61 | 	else if (`"`graphextension'"'==".png") graph export `"`savegraph'"', width(`=round(3200*`resolution')') `replace'
62 | 	else if (`"`graphextension'"'==".tif") graph export `"`savegraph'"', width(`=round(1600*`resolution')') `replace'
63 | 	else graph export `"`savegraph'"', `replace'
64 | 
65 | end
66 | 
67 | 


--------------------------------------------------------------------------------
/code/ado_maptile_geo/state_maptile.smcl:
--------------------------------------------------------------------------------
 1 | {smcl}
 2 | 
 3 | {pstd}{bf:maptile {hline 2} Geography: state}{p_end}
 4 | 
 5 | 
 6 | {marker description}{...}
 7 | {title:Description}
 8 | 
 9 | {pstd}{bf:state} generates a map of U.S. states.{p_end}
10 | 
11 | {pstd}It displays Alaska and Hawaii rescaled and moved to the bottom left of the map, below the continental US, for ease of viewing.{p_end}
12 | 
13 | 
14 | {marker geographic-id-variables}{...}
15 | {title:Geographic ID variables}
16 | 
17 | {synoptset 13 tabbed}{...}
18 | {synopt:{bf:state}}2-letter state abbreviations{p_end}
19 | {synopt:{bf:statefips}}2-digit state FIPS codes{p_end}
20 | {synopt:{bf:statename}}unabbreviated state names{p_end}
21 | 
22 | 
23 | {marker geography-specific-options}{...}
24 | {title:Geography-Specific Options}
25 | 
26 | {synoptset tabbed}{...}
27 | {synopt:{opth geoid(varname)}}specifies the geographic ID variable to use; default is {bf:geoid(state)}{p_end}
28 | 
29 | 
30 | {marker author}{...}
31 | {title:Author}
32 | 
33 | {pstd}Michael Stepner{break} stepner@mit.edu{p_end}


--------------------------------------------------------------------------------
/code/ado_ssc/_gwtmean.ado:
--------------------------------------------------------------------------------
1 | /*_gwtmean.ado  6-5-2001David Kantor, Institute for Policy StudiesJohns Hopkins UniversityThis is an egen weighted mean function, based on c:\stata\ado\_\_gmean.adoActually, it is just a very minor tweaking of _gmean.In fact this could be considered an enhancement to _gmean; just rename thisfile to _gmean and adjust the program define statement.This is used with egen as follows: egen newvar = wtmean(exp), by(byvar) weight(wgt_expr)This allows a weight option.  egen does not allow a weight in the usualmanner ([weight= wgtvar]), so we us an option.  If the weight option isomitted, then the action and results are identical to egen mean(_gmean.ado).  (If the weight expression is a non-zero constant, then, too,the results are identical to egen mean.)The weight macro captures the weight.  If it is non-empty, thenit is changed to include parentheses and the * operator.  The inclusionof parentheses actually allows an expression -- not just a variable.*/*! version 1.0.0  6-5-2001program define _gwtmean	version 3.0	local varlist "req new max(1)"	local exp "req nopre"	local if "opt"	local in "opt"	local options "by(string) weight(string)"	parse "`*'"	tempvar touse 	if "`weight'" ~= "" {		local weight "* (`weight')"	}	quietly {		gen byte `touse'=1 `if' `in'		sort `touse' `by'		by `touse' `by': replace `varlist' = /*			*/ sum((`exp')`weight')/sum(((`exp')!=.)`weight') if `touse'==1		by `touse' `by': replace `varlist' = `varlist'[_N]	}end


--------------------------------------------------------------------------------
/code/ado_ssc/maptile_geohelp.ado:
--------------------------------------------------------------------------------
 1 | *! version 1.02  1nov2015  Michael Stepner, stepner@mit.edu
 2 | 
 3 | /*** Unlicence (abridged):
 4 | This is free and unencumbered software released into the public domain.
 5 | It is provided "AS IS", without warranty of any kind.
 6 | 
 7 | For the full legal text of the Unlicense, see <http://unlicense.org>
 8 | */
 9 | 
10 | * Why did I include a formal license? Jeff Atwood gives good reasons:
11 | *  http://blog.codinghorror.com/pick-a-license-any-license/
12 | 
13 | 
14 | program define maptile_geohelp
15 | 	version 11
16 | 	
17 | 	set more off
18 | 	
19 | 	syntax name(name=geography id="geoname") [, geofolder(string)]
20 | 	
21 | 	* Set default directory
22 | 	if (`"`geofolder'"'=="") local geofolder `c(sysdir_personal)'maptile_geographies
23 | 	
24 | 	* Check that the geography is installed
25 | 	cap confirm file `"`geofolder'/`geography'_maptile.ado"'
26 | 	if (_rc!=0) {
27 | 		di as error "geography(`geography') specified, but it is not installed."
28 | 		
29 | 		if ("`geofolder'"=="`c(sysdir_personal)'maptile_geographies") di as text `"To see a list of installed geographies run: {stata maptile_geolist}"'
30 | 		else {
31 | 			di as text `"To see a list of installed geographies run:"'
32 | 			di as text `"   {stata maptile_geolist, geofolder(`geofolder')}"'
33 | 			di as text ""
34 | 		}
35 | 		exit 198
36 | 	}
37 | 	
38 | 	* Check that it has a help file
39 | 	cap confirm file `"`geofolder'/`geography'_maptile.smcl"'
40 | 	if (_rc!=0) {
41 | 		di as error `"{bf:`geography'} geography exists, but it does not have a help file. There is no file:"'
42 | 		di as text `" `geofolder'/`geography'_maptile.smcl"'
43 | 		exit 198
44 | 	}
45 | 
46 | 	* Display the help file	
47 | 	view `"`geofolder'/`geography'_maptile.smcl"'
48 | 
49 | end
50 | 


--------------------------------------------------------------------------------
/code/ado_ssc/maptile_geolist.ado:
--------------------------------------------------------------------------------
 1 | *! version 1.02  1nov2015  Michael Stepner, stepner@mit.edu
 2 | 
 3 | /*** Unlicence (abridged):
 4 | This is free and unencumbered software released into the public domain.
 5 | It is provided "AS IS", without warranty of any kind.
 6 | 
 7 | For the full legal text of the Unlicense, see <http://unlicense.org>
 8 | */
 9 | 
10 | * Why did I include a formal license? Jeff Atwood gives good reasons:
11 | *  http://blog.codinghorror.com/pick-a-license-any-license/
12 | 
13 | 
14 | program define maptile_geolist
15 | 	version 11
16 | 	
17 | 	set more off
18 | 	
19 | 	syntax [, geofolder(string)]
20 | 	
21 | 	* Set default directory
22 | 	if (`"`geofolder'"'=="") local geofolder `c(sysdir_personal)'maptile_geographies
23 | 	
24 | 	* Check that the specified directory exists (based on confirmdir.ado code by Dan Blanchette)
25 | 	local current_dir `"`c(pwd)'"'
26 | 	quietly capture cd `"`geofolder'"'
27 | 	if _rc!=0 {
28 | 		di as error `"unable to load directory `geofolder'"'
29 | 		exit 198
30 | 	}
31 | 	quietly cd `"`current_dir'"'
32 | 	
33 | 
34 | 	* Store all relevant files in local
35 | 	local geos : dir `"`geofolder'"' files "*_maptile.ado"
36 | 	
37 | 	* Output geo_names
38 | 	if (`"`geos'"'=="") di as text "no geography templates found"
39 | 	else {
40 | 		di `: subinstr local geos "_maptile.ado" "   ", all'
41 | 	}
42 | 	
43 | end
44 | 


--------------------------------------------------------------------------------
/code/ado_ssc/maptile_install.ado:
--------------------------------------------------------------------------------
 1 | *! version 1.02  1nov2015  Michael Stepner, stepner@mit.edu
 2 | 
 3 | /*** Unlicence (abridged):
 4 | This is free and unencumbered software released into the public domain.
 5 | It is provided "AS IS", without warranty of any kind.
 6 | 
 7 | For the full legal text of the Unlicense, see <http://unlicense.org>
 8 | */
 9 | 
10 | * Why did I include a formal license? Jeff Atwood gives good reasons:
11 | *  http://blog.codinghorror.com/pick-a-license-any-license/
12 | 
13 | 
14 | program define maptile_install
15 | 	version 11
16 | 	
17 | 	syntax using/, [replace]
18 | 	
19 | 	* Ensure that the directories exist
20 | 	cap mkdir "`c(sysdir_personal)'"
21 | 	cap mkdir "`c(sysdir_personal)'maptile_geographies"
22 | 	
23 | 	* Copy/download the specified geography
24 | 	qui copy `"`using'"' `"`c(sysdir_personal)'maptile_geographies/temp.zip"', replace
25 | 	
26 | 	* Change to the target directory
27 | 	local cwd `c(pwd)'
28 | 	di as text ""
29 | 	cd `"`c(sysdir_personal)'maptile_geographies"'
30 | 	
31 | 	* Extract the geography
32 | 	unzipfile temp.zip, `replace'
33 | 	erase temp.zip
34 | 
35 | 	* Change back to original directory
36 | 	qui cd `"`cwd'"'
37 | 	
38 | 	* Suggest reading help file
39 | 	di as text ""
40 | 	di as text "To see the help file of the geography template, run:"
41 | 	di as text "    {cmd:maptile_geohelp} {it:geoname}"
42 | 	
43 | end
44 | 


--------------------------------------------------------------------------------
/code/ado_ssc/save12.ado:
--------------------------------------------------------------------------------
 1 | *! version 1.0.0  9oct2016  Michael Stepner, stepner@mit.edu
 2 | 
 3 | /*** Unlicense (abridged):
 4 | This is free and unencumbered software released into the public domain.
 5 | It is provided "AS IS", without warranty of any kind.
 6 | 
 7 | For the full legal text of the Unlicense, see <http://unlicense.org>
 8 | */
 9 | 
10 | * Why did I include a formal license? Jeff Atwood gives good reasons:
11 | *  https://blog.codinghorror.com/pick-a-license-any-license/
12 | 
13 | program define save12
14 | 	version 12
15 | 	
16 | 	syntax [anything], [noLabel replace all]
17 | 
18 | 	if c(stata_version)>=12 & c(stata_version)<13 {
19 | 		save `anything', `label' `replace' `all'
20 | 	}
21 | 	else if c(stata_version)>=13 & c(stata_version)<14 {
22 | 		saveold `anything', `label' `replace' `all'
23 | 	}
24 | 	else if c(stata_version)>=14 {
25 | 		saveold `anything', `label' `replace' `all' version(12)
26 | 	}
27 | 	else {
28 | 		di as error "Must have Stata version 12 or higher to save in Stata 12 format."
29 | 		error 499
30 | 	}
31 | 
32 | end
33 | 


--------------------------------------------------------------------------------
/code/ado_ssc/save12.sthlp:
--------------------------------------------------------------------------------
 1 | {smcl}
 2 | {* *! version 1.0.0  9oct2016}{...}
 3 | {viewerjumpto "Syntax" "save12##syntax"}{...}
 4 | {viewerjumpto "Description" "save12##description"}{...}
 5 | {viewerjumpto "Options" "save12##options"}{...}
 6 | {viewerjumpto "Author" "save12##author"}{...}
 7 | {title:Title}
 8 | 
 9 | {p2colset 5 15 17 2}{...}
10 | {p2col :{hi:save12} {hline 2}}Save Stata 12 dataset{p_end}
11 | {p2colreset}{...}
12 | 
13 | 
14 | {marker syntax}{...}
15 | {title:Syntax}
16 | 
17 | {phang}
18 | Save data in memory to file in Stata 12 format
19 | 
20 | {p 8 16 2}
21 | {cmd:save12}
22 | {it:{help filename}}
23 | [{cmd:,} {it:options}]
24 | 
25 | 
26 | {synoptset 17}{...}
27 | {synopthdr :options}
28 | {synoptline}
29 | {synopt :{opt nol:abel}}omit value labels from the saved dataset{p_end}
30 | {synopt :{opt replace}}overwrite existing dataset{p_end}
31 | {synopt :{opt all}}save {cmd:e(sample)} with the dataset; programmer's
32 | option{p_end}
33 | {synoptline}
34 | 
35 | 
36 | {marker description}{...}
37 | {title:Description}
38 | 
39 | {pstd}
40 | {cmd:save12} provides a consistent command to save datasets in Stata 12 format,
41 | which facilitates collaboration between computers using different versions
42 | of Stata. Stata 12 and later versions of Stata can produce datasets in Stata 12
43 | format, but not using the same command. {cmd:save12} assures that the same code
44 | shared across multiple computers will save datasets in Stata 12 format,
45 | regardless of the host computer's version of Stata.
46 | 
47 | {pstd}
48 | {opt save12} stores the dataset currently in memory on disk in Stata 12 format
49 | under the name {it:{help filename}}.  If {it:filename} is not specified, the
50 | name under which the data were last known to Stata ({cmd:c(filename)}) is used.
51 | If {it:filename} is specified without an extension, {cmd:.dta} is used.  If your
52 | {it:filename} contains embedded spaces, remember to enclose it in double
53 | quotes.
54 | 
55 | {pstd}
56 | {opt save12} has been tested to work in Stata 12, 13 and 14. It will continue to
57 | work correctly in future versions of Stata so long as they maintain the same
58 | {bf:{help saveold}} syntax as Stata 14, with an option of {opt version(12)}.
59 | Stata 11 can read datasets in Stata 12 format, but cannot write datasets in
60 | Stata 12 format.
61 | 
62 | 
63 | {marker options}{...}
64 | {title:Options}
65 | 
66 | {phang}
67 | {opt nolabel} omits value labels from the saved dataset.
68 | The associations between variables and value-label names, however,
69 | are saved along with the dataset label and the variable labels.
70 | 
71 | {phang}
72 | {opt replace} permits {opt save12} to overwrite an existing dataset.
73 | 
74 | {phang}
75 | {opt all} is for use by programmers.  If specified, {cmd:e(sample)} will
76 | be saved with the dataset.  You could run a regression; {cmd:save12 mydata, all};
77 | {cmd:drop _all}; {cmd:use mydata}; and {cmd:predict yhat if e(sample)}.
78 | 
79 | 
80 | {marker author}{...}
81 | {title:Author}
82 | 
83 | {pstd}Michael Stepner{p_end}
84 | {pstd}stepner@mit.edu{p_end}
85 | 


--------------------------------------------------------------------------------
/code/ado_ssc/save13.ado:
--------------------------------------------------------------------------------
 1 | *! version 1.0.0  9oct2016  Michael Stepner, stepner@mit.edu
 2 | 
 3 | /*** Unlicense (abridged):
 4 | This is free and unencumbered software released into the public domain.
 5 | It is provided "AS IS", without warranty of any kind.
 6 | 
 7 | For the full legal text of the Unlicense, see <http://unlicense.org>
 8 | */
 9 | 
10 | * Why did I include a formal license? Jeff Atwood gives good reasons:
11 | *  https://blog.codinghorror.com/pick-a-license-any-license/
12 | 
13 | program define save13
14 | 	version 13
15 | 	
16 | 	syntax [anything], [noLabel replace all]
17 | 	
18 | 	if c(stata_version)>=13 & c(stata_version)<14 {
19 | 		save `anything', `label' `replace' `all'
20 | 	}
21 | 	else if c(stata_version)>=14 {
22 | 		saveold `anything', `label' `replace' `all' version(13)
23 | 	}
24 | 	else {
25 | 		di as error "Must have Stata version 13 or higher to save in Stata 13 format."
26 | 		error 499
27 | 	}
28 | 
29 | end
30 | 


--------------------------------------------------------------------------------
/code/ado_ssc/save13.sthlp:
--------------------------------------------------------------------------------
 1 | {smcl}
 2 | {* *! version 1.0.0  9oct2016}{...}
 3 | {viewerjumpto "Syntax" "save13##syntax"}{...}
 4 | {viewerjumpto "Description" "save13##description"}{...}
 5 | {viewerjumpto "Options" "save13##options"}{...}
 6 | {viewerjumpto "Author" "save13##author"}{...}
 7 | {title:Title}
 8 | 
 9 | {p2colset 5 15 17 2}{...}
10 | {p2col :{hi:save13} {hline 2}}Save Stata 13 dataset{p_end}
11 | {p2colreset}{...}
12 | 
13 | 
14 | {marker syntax}{...}
15 | {title:Syntax}
16 | 
17 | {phang}
18 | Save data in memory to file in Stata 13 format
19 | 
20 | {p 8 16 2}
21 | {cmd:save13}
22 | {it:{help filename}}
23 | [{cmd:,} {it:options}]
24 | 
25 | 
26 | {synoptset 17}{...}
27 | {synopthdr :options}
28 | {synoptline}
29 | {synopt :{opt nol:abel}}omit value labels from the saved dataset{p_end}
30 | {synopt :{opt replace}}overwrite existing dataset{p_end}
31 | {synopt :{opt all}}save {cmd:e(sample)} with the dataset; programmer's
32 | option{p_end}
33 | {synoptline}
34 | 
35 | 
36 | {marker description}{...}
37 | {title:Description}
38 | 
39 | {pstd}
40 | {cmd:save13} provides a consistent command to save datasets in Stata 13 format,
41 | which facilitates collaboration between computers using different versions
42 | of Stata. Stata 13 and later versions of Stata can produce datasets in Stata 13
43 | format, but not using the same command. {cmd:save13} assures that the same code
44 | shared across multiple computers will save datasets in Stata 13 format,
45 | regardless of the host computer's version of Stata.
46 | 
47 | {pstd}
48 | {opt save13} stores the dataset currently in memory on disk in Stata 13 format
49 | under the name {it:{help filename}}.  If {it:filename} is not specified, the
50 | name under which the data were last known to Stata ({cmd:c(filename)}) is used.
51 | If {it:filename} is specified without an extension, {cmd:.dta} is used.  If your
52 | {it:filename} contains embedded spaces, remember to enclose it in double
53 | quotes.
54 | 
55 | {pstd}
56 | {opt save13} has been tested to work in Stata 13 and Stata 14. It will continue to
57 | work correctly in future versions of Stata so long as they maintain the same
58 | {bf:{help saveold}} syntax as Stata 14, with an option of {opt version(13)}.
59 | 
60 | 
61 | {marker options}{...}
62 | {title:Options}
63 | 
64 | {phang}
65 | {opt nolabel} omits value labels from the saved dataset.
66 | The associations between variables and value-label names, however,
67 | are saved along with the dataset label and the variable labels.
68 | 
69 | {phang}
70 | {opt replace} permits {opt save13} to overwrite an existing dataset.
71 | 
72 | {phang}
73 | {opt all} is for use by programmers.  If specified, {cmd:e(sample)} will
74 | be saved with the dataset.  You could run a regression; {cmd:save13 mydata, all};
75 | {cmd:drop _all}; {cmd:use mydata}; and {cmd:predict yhat if e(sample)}.
76 | 
77 | 
78 | {marker author}{...}
79 | {title:Author}
80 | 
81 | {pstd}Michael Stepner{p_end}
82 | {pstd}stepner@mit.edu{p_end}
83 | 


--------------------------------------------------------------------------------
/code/ado_ssc/spmap.hlp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelstepner/healthinequality-code/ea86c0d2f8762c8738d1ee8ca506c21d5a1c4ef7/code/ado_ssc/spmap.hlp


--------------------------------------------------------------------------------
/code/ado_ssc/spmap_psl.ado:
--------------------------------------------------------------------------------
 1 | *! -spmap_psl-: Auxiliary program for -spmap-                                  
 2 | *! Version 1.3.0 - 13 March 2017                                               
 3 | *! Version 1.2.0 - 14 March 2008                                               
 4 | *! Version 1.1.0 - 7 May 2007                                                  
 5 | *! Version 1.0.0 - 7 December 2006                                             
 6 | *! Author: Maurizio Pisati                                                     
 7 | *! Department of Sociology and Social Research                                 
 8 | *! University of Milano Bicocca (Italy)                                        
 9 | *! maurizio.pisati@unimib.it                                                   
10 | 
11 | 
12 | 
13 | 
14 | *  ----------------------------------------------------------------------------
15 | *  1. Define program                                                           
16 | *  ----------------------------------------------------------------------------
17 | 
18 | program spmap_psl, sclass
19 | version 9.2
20 | 
21 | 
22 | 
23 | 
24 | *  ----------------------------------------------------------------------------
25 | *  2. Define syntax                                                            
26 | *  ----------------------------------------------------------------------------
27 | 
28 | syntax, List(string asis) Max(numlist min=1 max=1 >0 integer)   ///
29 |         Option(string asis) [Default(string)]
30 | 
31 | 
32 | 
33 | 
34 | *  ----------------------------------------------------------------------------
35 | *  3. Parse list                                                               
36 | *  ----------------------------------------------------------------------------
37 | 
38 | local FIRST : word 1 of `list'
39 | if (inlist("`FIRST'","=","..","...")) {
40 |    di as err `"{p}Option `option' does not accept symbol "`FIRST'" "'   ///
41 |              "as its first argument{p_end}"
42 |    exit 198
43 | }
44 | local NL : word count `list'
45 | forval i = 1/`NL' {
46 |    local ITEM : word `i' of `list'
47 |    if ("`ITEM'" == ".") {
48 |       if ("`default'" != "") {
49 |          local EL "`default'"
50 |          local PL `"`PL'"`EL'" "'
51 |       }
52 |       else {
53 |          di as err `"{p}Option `option' does not accept symbol ".", "'   ///
54 |                    `"since no default value exists for this "'           ///
55 |                    `"option{p_end}"'
56 |          exit 198
57 |       }
58 |    }
59 |    else if ("`ITEM'" == "=") {
60 |       local EL `"`: word `=`i'-1' of `PL''"'
61 |       local PL `"`PL'"`EL'" "'
62 |    }
63 |    else if ("`ITEM'" == ".." | "`ITEM'" == "...") {
64 |       local EL `"`: word `=`i'-1' of `PL''"'
65 |       forval j = `i'/`max' {
66 |          local PL `"`PL'"`EL'" "'
67 |       }
68 |       continue, break
69 |    }
70 |    else {
71 |       local PL `"`PL'"`ITEM'" "'
72 |    }
73 | }
74 | local NL : word count `PL'
75 | if (`NL' < `max') {
76 |    if ("`default'" != "") {
77 |       forval i = `=`NL'+1'/`max' {
78 |          local PL `"`PL'"`default'" "'
79 |       }
80 |    }
81 |    else {
82 |       di as err "{p}Option `option' requires exactly `max' arguments{p_end}"
83 |       exit 198
84 |    }
85 | }
86 | sreturn local pl `"`PL'"'
87 | 
88 | 
89 | 
90 | 
91 | *  ----------------------------------------------------------------------------
92 | *  4. End program                                                              
93 | *  ----------------------------------------------------------------------------
94 | 
95 | end
96 | 
97 | 
98 | 
99 | 


--------------------------------------------------------------------------------
/code/covariates/Health behavior maps.do:
--------------------------------------------------------------------------------
 1 | * Set $root
 2 | return clear
 3 | capture project, doinfo
 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)'  // using -project-
 5 | else {  // running directly
 6 | 	if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"'
 7 | 	do "${mortality_root}/code/set_environment.do"
 8 | }
 9 | 
10 | * Create required folders
11 | cap mkdir "$root/scratch/BRFSS maps"
12 | cap mkdir "$root/scratch/BRFSS maps/data"
13 | 
14 | 
15 | ***********************************
16 | *** CZ Maps of Health Behaviors ***
17 | ***********************************
18 | 
19 | * Load data
20 | project, original("${root}/data/derived/final_covariates/cz_full_covariates.dta")
21 | use "${root}/data/derived/final_covariates/cz_full_covariates.dta", clear
22 | 
23 | *** Create maps
24 | foreach healthvar in "cur_smoke" "bmi_obese" "exercise_any" {
25 | 
26 | 	if ("`healthvar'"=="exercise_any") local revcolor revcolor
27 | 
28 | 	replace `healthvar'_q1 = 100 * `healthvar'_q1
29 | 
30 | 	local title : var label `healthvar'_q1
31 | 
32 | 	maptile `healthvar'_q1, geo(cz) `revcolor' nquantiles(10) legd(1) ///
33 | 		geofolder("$root/code/ado_maptile_geo") ///
34 | 		twopt( ///
35 | 			title("`title'") ///
36 | 			subtitle("by Commuting Zone") ///
37 | 			legend(size(*0.8)) ///
38 | 		) ///
39 | 		savegraph("$root/scratch/BRFSS maps/CZ map - `title'.png") replace
40 | 	project, creates("$root/scratch/BRFSS maps/CZ map - `title'.png") preserve
41 | 	
42 | 	export delim cz `healthvar'_q1 using "$root/scratch/BRFSS maps/data/CZ map - `title'.csv", ///
43 | 		replace  // output data for formatted map
44 | 	project, creates("$root/scratch/BRFSS maps/data/CZ map - `title'.csv") preserve
45 | 	
46 | }
47 | 
48 | 


--------------------------------------------------------------------------------
/code/covariates/clean_educ.do:
--------------------------------------------------------------------------------
 1 | * Set $root
 2 | return clear
 3 | capture project, doinfo
 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)'  // using -project-
 5 | else {  // running directly
 6 | 	if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"'
 7 | 	do "${mortality_root}/code/set_environment.do"
 8 | }
 9 | 
10 | * Set convenient globals
11 | global cov_raw "$root/data/raw/Covariate Data"
12 | global cov_clean "$root/data/derived/covariate_data"
13 | 
14 | * Create required folders
15 | cap mkdir "$root/data/derived/covariate_data"
16 | 
17 | /*** Education, year 2000
18 | ***/
19 | 
20 | *******
21 | 
22 | *** Load Census Data
23 | project, original("${cov_raw}/census_education_2000_county.csv")
24 | insheet using "${cov_raw}/census_education_2000_county.csv", clear names
25 | ren geoid2 cty
26 | ren (hc01_vc31 hc01_vc32) (cs_educ_hs cs_educ_ba) // age 25+ with high school degree, bachelor's degree
27 | keep cty cs_educ_hs cs_educ_ba 
28 | recode cty (12086=12025) // Miami-Dade name/FIPS change
29 | lab var cs_educ_hs "Percent age 25+ with high school degree"
30 | lab var cs_educ_ba "Percent age 25+ with bachelor's degree"
31 | 
32 | //merge counties to CZs
33 | project, original("${cov_raw}/cty_covariates.dta") preserve
34 | merge 1:1 cty using "${cov_raw}/cty_covariates.dta", keepus(cz cty_pop2000) keep(match master)
35 | assert floor(cty/1000)==2 | cty==15005 if _merge!=3 // unmerged are either Alaska counties or Kalawao, Hawaii
36 | drop _merge
37 | drop if cz==.
38 | preserve
39 | keep cty cs_educ*
40 | save13 "${cov_clean}/cty_cs_educ.dta", replace
41 | project, creates("${cov_clean}/cty_cs_educ.dta")
42 | 
43 | * Collapse to CZ level
44 | restore
45 | collapse (mean) cs_educ_hs cs_educ_ba [w=cty_pop2000], by(cz)
46 | save13 "${cov_clean}/cz_cs_educ.dta", replace
47 | project, creates("${cov_clean}/cz_cs_educ.dta")
48 | 


--------------------------------------------------------------------------------
/code/covariates/clean_hospquality.do:
--------------------------------------------------------------------------------
  1 | * Set $root
  2 | return clear
  3 | capture project, doinfo
  4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)'  // using -project-
  5 | else {  // running directly
  6 | 	if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"'
  7 | 	do "${mortality_root}/code/set_environment.do"
  8 | }
  9 | 
 10 | * Set convenient globals
 11 | global cov_raw "$root/data/raw/Covariate Data"
 12 | global cov_clean "$root/data/derived/covariate_data"
 13 | 
 14 | * Create required folders
 15 | cap mkdir "$root/data/derived/covariate_data"
 16 | 
 17 | /*** Hospital Compare Data
 18 | 
 19 | 30-day hospital mortality rates for heart attacks, pneumonia, heart failure.
 20 | Averaged over years 2002-2013.
 21 | 
 22 | ***/
 23 | 
 24 | *******
 25 | 
 26 | 
 27 | *** Create SSA-cty FIPS crosswalk
 28 | project, original("${cov_raw}/ssa_cty_cw.dta")
 29 | use "${cov_raw}/ssa_cty_cw.dta", clear
 30 | *drop double-coding of miami-dade
 31 | drop if fips=="12086"
 32 | isid ssa
 33 | tempfile ssa_cty
 34 | save `ssa_cty'
 35 | 
 36 | 
 37 | *** Load data
 38 | project, original("${cov_raw}/Adj_mortality_2002to2013_byCounty.dta")
 39 | use "${cov_raw}/Adj_mortality_2002to2013_byCounty.dta", clear
 40 | drop service
 41 | drop if substr(cnty,-3,3)=="999" //state-level observations
 42 | ren cnty ssa
 43 | 
 44 | //merge to county FIPS identifiers
 45 | merge m:1 ssa using `ssa_cty', keepus(fips) keep(match master) nogen
 46 | drop if mi(fips)
 47 | ren fips cty
 48 | destring cty, replace
 49 | 
 50 | * Recode Counties
 51 | * Miami-Dade name/FIPS change, Broomfield split off from Boulder, Washabaugh Merged into Jackson, South Boston Merged into Halifax, Nansemond merged into Suffolk
 52 | recode cty (12086=12025) (8014 = 8013) (46131=46071) (51780=51083) (51695=51800)
 53 | 
 54 | * Collapse to County level
 55 | foreach cause in ami chf pn {
 56 | 	bys cty: egen temp = wtmean(adjmortmeas_`cause'all30day), weight(ndmortmeas_`cause'all)
 57 | 	drop adjmortmeas_`cause'all30day
 58 | 	ren temp adjmortmeas_`cause'all30day
 59 | }
 60 | collapse (mean) adj* (rawsum) nd*, by(cty)
 61 | 
 62 | 
 63 | drop if cty>=57000  //Drop Territories
 64 | project, original("${cov_raw}/cty_covariates.dta") preserve
 65 | merge 1:1 cty using "${cov_raw}/cty_covariates.dta", keepus(cz cty_pop2000 cz_pop2000)
 66 | assert floor(cty/1000)==2 if _merge!=3 // unmatched counties are Alaskan
 67 | drop _merge
 68 | drop if cz==.
 69 | 
 70 | preserve
 71 | 
 72 | *** County level
 73 | keep adj* cty cty_pop2000
 74 | 
 75 | * Create index for acute care (standardize & combine measures)
 76 | foreach v of varlist adjmortmeas_amiall30day adjmortmeas_chfall30day adjmortmeas_pnall30day {
 77 | 	qui sum `v' [w=cty_pop2000]
 78 | 	gen `v'_std = (`v'- `r(mean)')/`r(sd)'
 79 | }
 80 | gen mort_30day_hosp_z = (adjmortmeas_amiall30day_std + adjmortmeas_chfall30day_std + adjmortmeas_pnall30day_std)/3
 81 | 
 82 | * Output cty dataset
 83 | save13 "${cov_clean}/cty_hospitalcompare_30day.dta", replace
 84 | project, creates("${cov_clean}/cty_hospitalcompare_30day.dta")
 85 | 
 86 | 
 87 | *** CZ level
 88 | restore
 89 | 
 90 | * Collapse mortality rates to CZ level
 91 | foreach cause in ami chf pn {
 92 | 	bys cz: egen temp = wtmean(adjmortmeas_`cause'all30day), weight(ndmortmeas_`cause'all)
 93 | 	drop adjmortmeas_`cause'all30day
 94 | 	ren temp adjmortmeas_`cause'all30day
 95 | }
 96 | collapse (mean) adj* cz_pop2000, by(cz)
 97 | 
 98 | * Create index for acute care (standardize & combine measures)
 99 | foreach v of varlist adjmortmeas_amiall30day adjmortmeas_chfall30day adjmortmeas_pnall30day {
100 | 	qui sum `v' [w=cz_pop2000]
101 | 	gen `v'_std = (`v'- `r(mean)')/`r(sd)'
102 | }
103 | gen mort_30day_hosp_z = (adjmortmeas_amiall30day_std + adjmortmeas_chfall30day_std + adjmortmeas_pnall30day_std)/3
104 | drop cz_pop2000
105 | 
106 | * Output cz dataset
107 | save13 "${cov_clean}/cz_hospitalcompare_30day.dta", replace
108 | project, creates("${cov_clean}/cz_hospitalcompare_30day.dta")
109 | 


--------------------------------------------------------------------------------
/code/covariates/clean_pop_labforce_change1980to2000.do:
--------------------------------------------------------------------------------
  1 | * Set $root
  2 | return clear
  3 | capture project, doinfo
  4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)'  // using -project-
  5 | else {  // running directly
  6 | 	if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"'
  7 | 	do "${mortality_root}/code/set_environment.do"
  8 | }
  9 | 
 10 | * Set convenient globals
 11 | global cov_raw "$root/data/raw/Covariate Data"
 12 | global cov_clean "$root/data/derived/covariate_data"
 13 | 
 14 | * Create required folders
 15 | cap mkdir "$root/data/derived/covariate_data"
 16 | 
 17 | /*** County-level change in population and labor force, 1980-2000
 18 | ***/
 19 | 
 20 | *******
 21 | 
 22 | * Load Census data
 23 | project, original("${cov_raw}/nhgis0007_ts_nominal_county.csv")
 24 | import delimited using "${cov_raw}/nhgis0007_ts_nominal_county.csv", clear varnames(1) rowrange(3)
 25 | destring *, replace
 26 | 
 27 | //generate FIPS identifier, and keep vars from 1980 and 2000
 28 | gen cty = statefp*1000 + countyfp
 29 | gen statefips = statefp
 30 | ren (a00aa1980 a00aa2000) (pop1980 pop2000) // total population, 1980 & 2000
 31 | ren (b84aa1980 b84aa2000) (lf1980 lf2000) // population 16+ in the labor force, 1980 & 2000
 32 | drop if mi(pop1980) & mi(pop2000) & mi(lf1980) & mi(lf2000)
 33 | keep cty statefips pop* lf*
 34 | 
 35 | * Output at county level
 36 | drop if cty==.
 37 | 
 38 | //recode 1 county that changed FIPS (but not boundaries) over 1980-2000
 39 | gen flag = 1 if inlist(cty,12025,12086) // Miami-Dade
 40 | sort flag cty
 41 | replace pop2000 = pop2000[_n+1] if cty==12025
 42 | replace lf2000 = lf2000[_n+1] if cty==12025
 43 | drop if cty==12086
 44 | drop flag
 45 | 
 46 | //calculate percent change in population and labor force
 47 | gen pop_d_2000_1980 = (pop2000 - pop1980)/pop1980
 48 | gen lf_d_2000_1980 = (lf2000 - lf1980)/lf1980
 49 | 
 50 | //drop values for counties that changed boundaries over 1980-2000
 51 | replace pop_d_2000_1980 = . if inlist(cty, 4012, 4027, 35006, 35061, 51780, ///
 52 | 	51083, 30113, 30031, 30067)
 53 | replace lf_d_2000_1980 = . if inlist(cty, 4012, 4027, 35006, 35061, 51780, ///
 54 | 	51083, 30113, 30031, 30067)
 55 | //drop Alaska (many county changes)
 56 | replace pop_d_2000_1980 = . if statefips==2
 57 | replace lf_d_2000_1980 = . if statefips==2
 58 | //(excludes population ~1m with Alaska)
 59 | 
 60 | project, original("${cov_raw}/cty_covariates.dta") preserve
 61 | merge 1:1 cty using "${cov_raw}/cty_covariates.dta", keepus(cz) keep(match master)
 62 | //recode CZs of 2 unmatched counties
 63 | assert statefips==2 | cty==15005 | inlist(cty,51780,30113) if _merge!=3
 64 | drop _merge
 65 | replace cz = 2200 if cty==51780
 66 | replace cz = 34402 if cty==30113
 67 | drop if cz==. //Kalawao County, Hawaii (population ~90), unmatched counties in Alaska
 68 | 
 69 | preserve
 70 | keep if pop_d_2000_1980!=. | lf_d_2000_1980!=.
 71 | keep cty statefips pop_d_2000_1980 lf_d_2000_1980
 72 | order cty statefips pop_d_2000_1980 lf_d_2000_1980
 73 | save13 "${cov_clean}/cty_cs_popchange_labforce.dta", replace
 74 | project, creates("${cov_clean}/cty_cs_popchange_labforce.dta")
 75 | 
 76 | * Output at CZ level
 77 | restore
 78 | //recode populations of CZs with county changes (38100, 38300)
 79 | expand 2 if cty==4027, gen(flag)  //Yuma County
 80 | replace cz = 38300 if flag==1  //La Paz CZ
 81 | // La Paz CZ gets a small part of the pre-split Yuma County
 82 | replace pop1980 = 13844/(106895+13844) * pop1980 if flag==1
 83 | replace lf1980 =  13844/(106895+13844)* lf1980 if flag==1
 84 | replace pop2000 = . if flag==1
 85 | replace lf2000 = . if flag==1
 86 | 
 87 | // Yuma CZ gets most of the pre-split Yuma County
 88 | replace pop1980 = 106895/(106895+13844) * pop1980 if flag==0 & cty==4027
 89 | replace lf1980 = 106895/(106895+13844) * lf1980 if flag==0 & cty==4027
 90 | 
 91 | //collapse, and calculate percent change in population and labor force, 1980-2000
 92 | collapse (sum) pop1980 pop2000 lf1980 lf2000, by(cz)
 93 | 
 94 | gen pop_d_2000_1980 = (pop2000 - pop1980)/pop1980
 95 | gen lf_d_2000_1980 = (lf2000 - lf1980)/lf1980
 96 | 
 97 | keep cz pop_d_2000_1980 lf_d_2000_1980
 98 | save13 "${cov_clean}/cz_cs_popchange_labforce.dta", replace
 99 | project, creates("${cov_clean}/cz_cs_popchange_labforce.dta")
100 | 


--------------------------------------------------------------------------------
/code/covariates/clean_racefractions.do:
--------------------------------------------------------------------------------
 1 | * Set $root
 2 | return clear
 3 | capture project, doinfo
 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)'  // using -project-
 5 | else {  // running directly
 6 | 	if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"'
 7 | 	do "${mortality_root}/code/set_environment.do"
 8 | }
 9 | 
10 | * Set convenient globals
11 | global cov_raw "$root/data/raw/Covariate Data"
12 | global cov_clean "$root/data/derived/covariate_data"
13 | 
14 | * Create required folders
15 | cap mkdir "$root/data/derived/covariate_data"
16 | 
17 | /*** fraction Black, fraction Hispanic; year 2000
18 | ***/
19 | 
20 | *******
21 | 
22 | *** Load Census Data
23 | project, original("${cov_raw}/Census cty population by race/DEC_00_SF1_P008_with_ann.csv")
24 | import delim "${cov_raw}/Census cty population by race/DEC_00_SF1_P008_with_ann.csv", clear rowr(3) varn(1)
25 | destring *, replace
26 | ren (geoid2 vd01 vd04 vd10) (cty pop_total pop_black pop_hispanic)
27 | keep cty pop_*
28 | 
29 | // merge to CZs
30 | recode cty (12086=12025) // Miami-Dade fips change
31 | merge 1:1 cty using "${cov_raw}/cty_covariates.dta", keepus(cz) keep(match master)
32 | assert floor(cty/1000)==2 | cty==15005 if _merge!=3 // unmerged are Alaska counties or Kalawao, Hawaii
33 | drop _merge
34 | drop if cz==.
35 | preserve
36 | 
37 | // output at county level
38 | gen cs_frac_black = pop_black/pop_total
39 | gen cs_frac_hisp = pop_hispanic/pop_total
40 | keep cty cs_frac*
41 | save13 "${cov_clean}/cty_cs_popbyrace.dta", replace
42 | project, creates("${cov_clean}/cty_cs_popbyrace.dta")
43 | 
44 | // output at CZ level
45 | restore
46 | collapse (sum) pop*, by(cz)
47 | gen cs_frac_black = pop_black/pop_total
48 | gen cs_frac_hisp = pop_hispanic/pop_total
49 | keep cz cs_frac*
50 | save13 "${cov_clean}/cz_cs_popbyrace.dta", replace
51 | project, creates("${cov_clean}/cz_cs_popbyrace.dta")
52 | 


--------------------------------------------------------------------------------
/code/covariates/clean_uninsurance.do:
--------------------------------------------------------------------------------
 1 | * Set $root
 2 | return clear
 3 | capture project, doinfo
 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)'  // using -project-
 5 | else {  // running directly
 6 | 	if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"'
 7 | 	do "${mortality_root}/code/set_environment.do"
 8 | }
 9 | 
10 | * Set convenient globals
11 | global cov_raw "$root/data/raw/Covariate Data"
12 | global cov_clean "$root/data/derived/covariate_data"
13 | 
14 | * Create required folders
15 | cap mkdir "$root/data/derived/covariate_data"
16 | 
17 | /*** Percent uninsured, year 2010
18 | ***/
19 | 
20 | *******
21 | 
22 | 
23 | *** Load 2010 Small Area Health Insurance Estimates
24 | project, original("${cov_raw}/sahie2010.csv")
25 | import delimited "${cov_raw}/sahie2010.csv", delimiter(comma) varnames(4) rowrange(4) clear
26 | destring, replace
27 | 
28 | * Keep county-level estimates by all ages/races/genders/incomes
29 | * (Note that data is only for people below age 65, since Medicare starts at 65)
30 | keep if geocat==50 & agecat==0 & racecat==0 & sexcat==0 & iprcat==0
31 | keep stcou nic nui
32 | destring nic nui, replace force
33 | replace stcou = subinstr(stcou,"=","",.)
34 | replace stcou = subinstr(stcou,`"""',"",.)
35 | destring stcou, replace
36 | ren stcou cty
37 | recode cty (12086=12025) (8014 = 8013) // Miami-Dade name/FIPS change, Broomfield split off from Boulder
38 | collapse (sum) nic nui, by(cty)
39 | //merge counties to CZs
40 | 
41 | project, original("${cov_raw}/cty_covariates.dta") preserve
42 | merge 1:1 cty using "${cov_raw}/cty_covariates.dta", keepus(cz) keep(match master)
43 | assert floor(cty/1000)==2 | cty==15005 ///
44 | 	| inlist(cty,51560) if _merge!=3 // unmerged are Alaska counties, Kalawao, Hawaii, or Clifton Forge, VA
45 | drop _merge
46 | drop if cz==.
47 | preserve
48 | gen puninsured2010 = 100 * nui / (nic + nui)
49 | lab var puninsured2010 "Percent without Health Insurance in 2010"
50 | keep cty puninsured2010
51 | save13 "${cov_clean}/cty_cs_puninsured.dta", replace
52 | project, creates("${cov_clean}/cty_cs_puninsured.dta")
53 | 
54 | * Collapse to CZ level
55 | restore
56 | collapse (sum) nui nic, by(cz)
57 | gen puninsured2010 = 100 * nui / (nic + nui)
58 | lab var puninsured2010 "Percent without Health Insurance in 2010"
59 | keep cz puninsured2010
60 | save13 "${cov_clean}/cz_cs_puninsured.dta", replace
61 | project, creates("${cov_clean}/cz_cs_puninsured.dta")
62 | 


--------------------------------------------------------------------------------
/code/covariates/cz_names.do:
--------------------------------------------------------------------------------
 1 | * Set $root
 2 | return clear
 3 | capture project, doinfo
 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)'  // using -project-
 5 | else {  // running directly
 6 | 	if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"'
 7 | 	do "${mortality_root}/code/set_environment.do"
 8 | }
 9 | 
10 | * Set convenient globals
11 | global raw "$root/data/raw"
12 | global derived "$root/data/derived"
13 | 
14 | * Create required folders
15 | cap mkdir "$root/data/derived"
16 | cap mkdir "$root/data/derived/final_covariates"
17 | 
18 | ************
19 | 
20 | * Load Census 2000 Data
21 | project, original("${raw}/Covariate Data/SUB-EST00INT.csv")
22 | import delimited "${raw}/Covariate Data/SUB-EST00INT.csv", clear
23 | gen cty = state*1000 + county
24 | 
25 | * Convert 2000-2014 Inter/Postcensal counties to 1999 counties
26 | 	* see https://www.census.gov/geo/reference/county-changes.html
27 | drop if cty == 15005 // Kalawao, Hawaii (pop ~90)
28 | recode cty (12086 = 12025) // Miami-Dade FIPS change
29 | recode cty (8014 = 8013) // Broomfield County created from Boulder, CO
30 | recode cty (2282 = 2231) (2275 2195 = 2280) (2230 2105 = 2231) ///
31 | 	(2198 = 2201) (2068 = 2290) // County changes in Alaska
32 | 
33 | project, original("${raw}/Covariate Data/cty_covariates.dta") preserve
34 | merge m:1 cty using "${raw}/Covariate Data/cty_covariates.dta", keepusing(cty county_name cz cz_name statename state_id stateabbrv) 
35 | collapse (sum) popestimate2000 , by(name cz sumlev statename cz_name)
36 | ren cz_name czname_old
37 | keep if !mi(cz)
38 | 
39 | * String Parsing
40 | replace name = substr(name,1,strlen(name)-6) if lower(substr(name,-5,.)) == "(pt.)"
41 | replace name = substr(name,1,strlen(name)-12) if lower(substr(name,-11,.)) == "and borough"
42 | replace name = substr(name,1,strlen(name)-10) if lower(substr(name,-9,.)) == "(balance)"
43 | replace name = substr(name,1,strlen(name)-12) if lower(substr(name,-11,.)) == "government"
44 | 
45 | * Subset to cities and towns, Hawaiian Counties, townships with no large cities, and counties with no subdivisions
46 | gen toDrop = 0
47 | replace toDrop = 1 if sumlev <= 50
48 | replace toDrop = 1 if substr(name,-6,.) == "County"
49 | replace toDrop = 1 if substr(name,-6,.) == "Parish"
50 | replace toDrop = 1 if lower(substr(name,1,7)) == "balance"
51 | replace toDrop = 0 if sumlev == 50 & statename == "Hawaii"  //Note that the Hawaiian counties appear to have no subdivisions in the data
52 | bys cz: replace toDrop = 0 if _N == 1
53 | drop if toDrop
54 | gen city_pop = popestimate2000 if lower(substr(name,-8,.)) != "township"
55 | egen max_city_pop = max(city_pop), by(cz)
56 | replace toDrop = 1 if max_city_pop > 60e3 & lower(substr(name,-8,.)) == "township"
57 | drop if toDrop
58 | drop toDrop
59 | 
60 | * Keep the largest city
61 | gsort cz -popestimate2000
62 | by cz: keep if _n == 1
63 | 
64 | * More String Parsing
65 | gen suffix = word(name,-1)
66 | gen czname = substr(name,1,length(name) - length(suffix) - 1)
67 | replace czname = name if substr(name,1,8) == "Township"
68 | replace czname = substr(czname,1,strpos(czname,"-")-1) if strpos(czname,"-") & suffix != "city"
69 | replace czname = substr(czname,1,strpos(czname,"/")-1) if strpos(czname,"/")
70 | replace czname = substr(czname,1,length(czname) - length(word(czname,-1)) - 1) if lower(word(czname,-1)) == "charter"
71 | replace czname = "Boise" if czname == "Boise City"
72 | replace czname = "Galesburg" if czname == "Galesburg City"
73 | replace czname = "New York City" if czname == "New York"
74 | replace czname = "Washington DC" if statename == "District Of Columbia"
75 | 
76 | * Save 
77 | order cz czname statename
78 | keep cz czname statename czname_old
79 | save13 "${derived}/final_covariates/cz_names_updated.dta", replace
80 | project, creates("${derived}/final_covariates/cz_names_updated.dta")
81 | 
82 | 


--------------------------------------------------------------------------------
/code/covariates/list_of_covariates.do:
--------------------------------------------------------------------------------
 1 | *******
 2 | *** List of Covariates
 3 | *******
 4 | 
 5 | ** Variable categories
 6 | 
 7 | * Health Behaviors
 8 | local behavior_vars cur_smoke_q bmi_obese_q exercise_any_q
 9 | 
10 | * Health Care
11 | local insurance_vars puninsured2010 reimb_penroll_adj10
12 | local acute_vars mort_30day_hosp_z adjmortmeas_amiall30day adjmortmeas_chfall30day adjmortmeas_pnall30day  // Acute Care
13 | local preventive_vars med_prev_qual_z primcarevis_10 diab_hemotest_10 diab_eyeexam_10 diab_lipids_10 mammogram_10 amb_disch_per1000_10  // Preventive Care
14 | 
15 | * Environmental Factors
16 | local segregation_vars cs00_seg_inc cs00_seg_inc_pov25 cs00_seg_inc_aff75 cs_race_theil_2000  // Segregation
17 | 
18 | * Inequality and Social Cohesion
19 | local distribution_vars gini99 poor_share inc_share_1perc frac_middleclass  // Income Distribution
20 | local social_vars scap_ski90pcm rel_tot  // Social Cohesion
21 | local race_vars cs_frac_black cs_frac_hisp // Race & Ethnicity
22 | 
23 | * Labor Market Conditions
24 | local labor_vars unemp_rate pop_d_2000_1980 lf_d_2000_1980 cs_labforce cs_elf_ind_man
25 | 
26 | * Other Factors
27 | local migration_vars cs_born_foreign mig_inflow mig_outflow  // Migration
28 | local localgeo_vars pop_density frac_traveltime_lt15  // Local Geography
29 | local affluance_vars hhinc00 median_house_value  // Affluence
30 | local k12_vars ccd_exp_tot ccd_pup_tch_ratio score_r dropout_r  // K-12 Education
31 | local college_vars cs_educ_ba tuition gradrate_r  // College Education
32 | local socioecon_vars e_rank_b cs_fam_wkidsinglemom crime_total  // Socioeconomics
33 | local tax_vars subcty_exp_pc taxrate tax_st_diff_top20  // Local Taxation
34 | 
35 | ** All variables
36 | global covars_other `migration_vars' `localgeo_vars' `affluance_vars' ///
37 | 	`k12_vars' `college_vars' `socioecon_vars' `tax_vars'
38 | 
39 | global covars_all `behavior_vars' `insurance_vars' `acute_vars' `preventive_vars' ///
40 | 	`segregation_vars' `distribution_vars' `social_vars' `race_vars' `labor_vars' ///
41 | 	${covars_other}
42 | 


--------------------------------------------------------------------------------
/code/lifeexpectancy/Check correlations in CZ LE trends.do:
--------------------------------------------------------------------------------
 1 | * Set $root
 2 | return clear
 3 | capture project, doinfo
 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)'  // using -project-
 5 | else {  // running directly
 6 | 	if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"'
 7 | 	do "${mortality_root}/code/set_environment.do"
 8 | }
 9 | 
10 | * Set convenient globals
11 | global derived "${root}/data/derived"
12 | 
13 | * Create required folders
14 | cap mkdir "${root}/scratch/CZ trend correlations"
15 | 
16 | /*** Compute population-weighted correlations in CZ LE trends:
17 | 		- Between unadjusted and race-adjusted trends
18 | 		- Between trends for men and for women
19 | ***/
20 | 
21 | *** Check correlation between unadjusted and race-adjusted trends
22 | 
23 | * Load data
24 | project, original("${derived}/le_trends/cz_letrendsBY_gnd_hhincquartile.dta")
25 | use "${derived}/le_trends/cz_letrendsBY_gnd_hhincquartile.dta", clear
26 | 
27 | * Merge on CZ populations
28 | project, original("${root}/data/derived/final_covariates/cz_full_covariates.dta") preserve
29 | merge m:1 cz using "${root}/data/derived/final_covariates/cz_full_covariates.dta", ///
30 | 	assert(2 3) keep(3) nogen ///
31 | 	keepusing(pop2000)
32 | 
33 | * Compute population-weighted correlations by gender
34 | bys gnd: corr le_agg_b_year le_raceadj_b_year [w=pop2000]
35 | 
36 | * Compute population-weighted correlations by gender and income quartile
37 | isid cz gnd hh_inc_q
38 | statsby corr_raceadj_unadj = r(rho), by(gnd hh_inc_q) clear: corr le_raceadj_b_year le_agg_b_year [w=pop2000]
39 | 
40 | sum corr_raceadj_unadj
41 | scalarout using "${root}/scratch/CZ trend correlations/Correlation between CZ LE trends raceadj and unadj.csv", ///
42 | 	replace ///
43 | 	id("Min corr b/w race-unadj and race-adj CZ trends in a Gender x Quartile") ///
44 | 	num(`=r(min)') fmt(%9.3f)
45 | project, creates("${root}/scratch/CZ trend correlations/Correlation between CZ LE trends raceadj and unadj.csv")
46 | 
47 | 
48 | *** Check correlation between genders
49 | 
50 | * Load data
51 | project, original("${derived}/le_trends/cz_letrendsBY_gnd_hhincquartile.dta")
52 | use "${derived}/le_trends/cz_letrendsBY_gnd_hhincquartile.dta", clear
53 | drop le_agg_b_cons le_raceadj_b_cons
54 | rename *_b_year *_b_year_
55 | 
56 | reshape wide le_agg_b_year_ le_raceadj_b_year_, i(cz hh_inc_q) j(gnd) string
57 | 
58 | * Merge on CZ populations
59 | project, original("${root}/data/derived/final_covariates/cz_full_covariates.dta") preserve
60 | merge m:1 cz using "${root}/data/derived/final_covariates/cz_full_covariates.dta", ///
61 | 	assert(2 3) keep(3) nogen ///
62 | 	keepusing(pop2000)
63 | 
64 | * Compute correlations across genders
65 | corr le_agg_b_year_F le_agg_b_year_M [w=pop2000]
66 | corr le_raceadj_b_year_F le_raceadj_b_year_M [w=pop2000]
67 | 
68 | * Compute correlations across genders by Income Quartile
69 | bys hh_inc_q: corr le_agg_b_year_F le_agg_b_year_M [w=pop2000]
70 | bys hh_inc_q: corr le_raceadj_b_year_F le_raceadj_b_year_M [w=pop2000]
71 | 


--------------------------------------------------------------------------------
/code/lifeexpectancy/Compute Years of Social Security Eligibility.do:
--------------------------------------------------------------------------------
  1 | * Set $root
  2 | return clear
  3 | capture project, doinfo
  4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)'  // using -project-
  5 | else {  // running directly
  6 | 	if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"'
  7 | 	do "${mortality_root}/code/set_environment.do"
  8 | }
  9 | 
 10 | * Set convenient globals
 11 | global derived "${root}/data/derived"
 12 | 
 13 | * Create required folders
 14 | cap mkdir "$root/scratch/Medicare and Social Security Eligibility"
 15 | 
 16 | /*** Compute the expected number of years lived over age 65 for 40 year-olds
 17 | 	 by gender and income percentile, then report the p100-p1 gap.
 18 | ***/
 19 | 
 20 | ********
 21 | 
 22 | project, original("${root}/code/ado/generate_le_with_raceadj.ado")
 23 | 
 24 | 
 25 | * Calculate national Gompertz parameters by Gender x Income Percentile x Race
 26 | tempfile racegomp
 27 | generate_le_with_raceadj, returngompertz by(gnd pctile) ///
 28 | 	original ///
 29 | 	gompparameters("$derived/Gompertz Parameters/national_gompBY_gnd_hhincpctile.dta") ///
 30 | 	raceshares("$derived/raceshares/national_racesharesBY_age_gnd_hhincpctile.dta") ///
 31 | 	saving(`racegomp')
 32 | 	
 33 | use `racegomp', clear
 34 | keep if pctile == 1 | pctile == 100
 35 | 
 36 | 
 37 | * Add age dimension, for ages 40-120
 38 | isid gnd pctile
 39 | expand 81
 40 | gen int age=.
 41 | bys gnd pctile: replace age = _n + 39
 42 | 
 43 | * Merge in CDC mortality rates
 44 | project, original("${root}/data/derived/Mortality Rates/CDC-SSA Life Table/national_CDC_SSA_mortratesBY_gnd_age.dta") preserve
 45 | merge m:1 gnd age using "${root}/data/derived/Mortality Rates/CDC-SSA Life Table/national_CDC_SSA_mortratesBY_gnd_age.dta", ///
 46 | 	keepusing(cdc_mort) ///
 47 | 	assert(1 2 3) keep(1 3)  // ages<40 are _merge==2, age 120 is _merge==1
 48 | assert age==120 if _merge==1
 49 | drop _merge
 50 | 
 51 | * Generate survival curves
 52 | isid gnd pctile age
 53 | foreach r in "agg" "w" "b" "a" "h" {
 54 | 
 55 | 	gen surv_`r' = 1 if age==40
 56 | 	bys gnd pctile (age): replace surv_`r' = surv_`r'[_n-1] * ///
 57 | 		(1 - exp(gomp_int_`r' + gomp_slope_`r' * age[_n-1]) ) if inrange(age,41,90)  // mortality rates are mortality between [x,x+1)
 58 | 	replace surv_`r' = surv_`r'[_n-1] * (1 - cdc_mort[_n-1]) if age>90
 59 | 	
 60 | }
 61 | 
 62 | * Compute expected number of years lived above 65 (for a person alive at 40)
 63 | keep if age>=65
 64 | foreach var of varlist surv_* {
 65 | 	replace `var' = `var'/2 if age==65  // trapezoidal approximation for the integral
 66 | }
 67 | collapse (sum) surv_*, by(gnd pctile)
 68 | rename surv* ly65*
 69 | 
 70 | * Race-adjust expected years above 65
 71 | project, original("${root}/data/derived/raceshares/national_2000age40_racesharesBY_gnd.dta") preserve
 72 | merge m:1 gnd using "${root}/data/derived/raceshares/national_2000age40_racesharesBY_gnd.dta", ///
 73 | 	keepusing(raceshare_*) ///
 74 | 	assert(match) nogen
 75 | 
 76 | gen ly65_raceadj =  raceshare_black * ly65_b ///
 77 | 				+ raceshare_asian * ly65_a ///
 78 | 				+ raceshare_hispanic * ly65_h ///
 79 | 				+ raceshare_other * ly65_w
 80 | drop raceshare*
 81 | 
 82 | * Reshape wide on percentile
 83 | rename ly65* ly65*_p
 84 | ds ly65*
 85 | reshape wide `r(varlist)', i(gnd) j(pctile)
 86 | 
 87 | * Generate p100-p1 gap
 88 | gen ly65_raceadj_gap = ly65_raceadj_p100 - ly65_raceadj_p1
 89 | 
 90 | * Output
 91 | assert gnd==cond(_n==1,"F","M")
 92 | 
 93 | scalarout using "${root}/scratch/Medicare and Social Security Eligibility/Years Lived Above 65.csv", replace ///
 94 | 	id("Expected Years Lived Above 65: p100 minus p1 (Women)") ///
 95 | 	num(`=ly65_raceadj_gap[1]') fmt(%9.1f)
 96 | 
 97 | scalarout using "${root}/scratch/Medicare and Social Security Eligibility/Years Lived Above 65.csv", ///
 98 | 	id("Expected Years Lived Above 65: p100 minus p1 (Men)") ///
 99 | 	num(`=ly65_raceadj_gap[2]') fmt(%9.1f)
100 | project, creates("${root}/scratch/Medicare and Social Security Eligibility/Years Lived Above 65.csv")
101 | 


--------------------------------------------------------------------------------
/code/lifeexpectancy/Estimate NCHS State LEs pooling income groups.do:
--------------------------------------------------------------------------------
 1 | * Set $root
 2 | return clear
 3 | capture project, doinfo
 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)'  // using -project-
 5 | else {  // running directly
 6 | 	if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"'
 7 | 	do "${mortality_root}/code/set_environment.do"
 8 | }
 9 | 
10 | * Set convenient globals
11 | global derived "${root}/data/derived"
12 | 
13 | * Create required folders
14 | cap mkdir "${root}/data/derived/Mortality Rates/NCHS state pooled income"
15 | cap mkdir "${root}/data/derived/Gompertz Parameters/NCHS state pooled income"
16 | cap mkdir "${root}/data/derived/le_estimates/NCHS state pooled income"
17 | 
18 | 
19 | ***********************
20 | *** Mortality rates ***
21 | ***********************
22 | 
23 | * Load NCHS State mortality data by Gender x Age, pooling 1999-2014
24 | project, original("$root/data/raw/NCHS Underlying Cause of Death/Underlying Cause of Death, 1999-2014, by state, gender, and age.txt")
25 | import delimited "$root/data/raw/NCHS Underlying Cause of Death/Underlying Cause of Death, 1999-2014, by state, gender, and age.txt", clear
26 | drop if mi(gendercode)
27 | 
28 | assert mi(notes)
29 | drop notes
30 | 
31 | keep if inrange(singleyearagescode,40,76)
32 | destring population, replace
33 | 
34 | * Check that there are 51 states x 2 genders x 37 ages
35 | assert _N == 51*2*37
36 | 
37 | * Keep required vars and use our standard var names and formats
38 | keep statecode singleyearagescode gendercode population deaths
39 | ren (statecode singleyearagescode gendercode population) ///
40 | 	(st        age_at_d           gnd        count     )
41 | 	
42 | gen double mortrate = deaths/count
43 | drop deaths
44 | 
45 | * Output
46 | save13 "${root}/data/derived/Mortality Rates/NCHS state pooled income/st_NCHSmortratesBY_gnd_age.dta", replace
47 | project, creates("${root}/data/derived/Mortality Rates/NCHS state pooled income/st_NCHSmortratesBY_gnd_age.dta") preserve
48 | 
49 | 
50 | ***************************
51 | *** Gompertz parameters ***
52 | ***************************
53 | 
54 | * Calculate Gompertz parameters from mortality rates
55 | project, original("${root}/code/ado/estimate_gompertz2.ado") preserve
56 | project, original("${root}/code/ado/mle_gomp_est.ado") preserve
57 | project, original("${root}/code/ado/fastregby_gompMLE_julia.ado") preserve
58 | project, original("${root}/code/ado/estimate_gompertz.jl") preserve
59 | 
60 | estimate_gompertz2 st gnd, age(age_at_d) mort(mortrate) n(count) ///
61 | 	type(mle)
62 | 
63 | * Output
64 | save13 "${derived}/Gompertz Parameters/NCHS state pooled income/st_NCHSgompBY_gnd.dta", replace
65 | project, creates("${derived}/Gompertz Parameters/NCHS state pooled income/st_NCHSgompBY_gnd.dta") preserve
66 | 
67 | 
68 | ***********************
69 | *** Life expectancy ***
70 | ***********************
71 | 
72 | generate_le_with_raceadj, by(st gnd) ///
73 | 	gompparameters("${derived}/Gompertz Parameters/NCHS state pooled income/st_NCHSgompBY_gnd.dta") ///
74 | 	raceshares("${derived}/raceshares/st_racesharesBY_agebin_gnd.dta") ///
75 | 	saving("${derived}/le_estimates/NCHS state pooled income/st_NCHSleBY_gnd.dta")
76 | 


--------------------------------------------------------------------------------
/code/lifeexpectancy/Explore national trends at fixed income levels.do:
--------------------------------------------------------------------------------
 1 | * Set $root
 2 | return clear
 3 | capture project, doinfo
 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)'  // using -project-
 5 | else {  // running directly
 6 | 	if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"'
 7 | 	do "${mortality_root}/code/set_environment.do"
 8 | }
 9 | 
10 | * Set convenient globals
11 | global derived "${root}/data/derived"
12 | 
13 | * Create required folders
14 | cap mkdir "${root}/scratch/National LE Trends at fixed income levels or ranks"
15 | 
16 | 
17 | 
18 | ********************
19 | 
20 | 
21 | *** Estimate National LE Trend by Gender x Income Quartile, controlling for income levels
22 | 
23 | * Load mean household income level data
24 | project, original("${root}/data/derived/Mortality Rates/mskd_national_mortratesBY_gnd_hhincpctile_age_year.dta")
25 | use "${root}/data/derived/Mortality Rates/mskd_national_mortratesBY_gnd_hhincpctile_age_year.dta", clear
26 | isid gnd pctile age_at_d yod
27 | drop mortrate count
28 | 
29 | * Keep only incomes at age 40
30 | keep if age_at_d - lag == 40  // income measured at 40
31 | drop age_at_d lag
32 | 
33 | rename yod year
34 | isid gnd pctile year
35 | rename hh_inc hh_inc40
36 | 
37 | * Merge in annual national LE
38 | project, original("$derived/le_estimates/national_leBY_year_gnd_hhincpctile.dta") preserve
39 | merge 1:1 gnd pctile year using "$derived/le_estimates/national_leBY_year_gnd_hhincpctile.dta", ///
40 | 	assert(3) nogen
41 | 
42 | * Estimate trends by Gender x Income Quartile; controlling for income level at age 40 in each percentile in each year
43 | g quartile = ceil(pctile/25)
44 | statsby _b _se, by(gnd quartile) clear : reg le_raceadj year hh_inc40, vce(cl year)
45 | keep gnd quartile _b_year _se_year
46 | 
47 | * Compute upper & lower 95% confidence interval
48 | gen ci_h = _b_year + 1.96*_se_year
49 | gen ci_l = _b_year - 1.96*_se_year
50 | gen ci_str = "(" + string(ci_l,"%03.2f") + ", " + string(ci_h,"%03.2f") + ")"
51 | keep gnd quartile _b_year ci_str
52 | 
53 | * Reshape wide on quartile
54 | rename (_b_year ci_str) (le_raceadj_tr_q CI_le_raceadj_tr_q)
55 | reshape wide le_raceadj_tr_q CI_le_raceadj_tr_q, i(gnd) j(quartile)
56 | 
57 | * Output
58 | gsort - gnd
59 | export delim using "${root}/scratch/National LE Trends at fixed income levels or ranks/National LE trend by Quartile Controlling for Income.csv", replace
60 | project, creates("${root}/scratch/National LE Trends at fixed income levels or ranks/National LE trend by Quartile Controlling for Income.csv") preserve
61 | 
62 | 


--------------------------------------------------------------------------------
/code/lifeexpectancy/LE ranked lists of states.do:
--------------------------------------------------------------------------------
 1 | * Set $root
 2 | return clear
 3 | capture project, doinfo
 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)'  // using -project-
 5 | else {  // running directly
 6 | 	if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"'
 7 | 	do "${mortality_root}/code/set_environment.do"
 8 | }
 9 | 
10 | * Set convenient globals
11 | global derived "${root}/data/derived"
12 | 
13 | * Create required folders
14 | cap mkdir "$root/scratch/State LE ranked lists by income quartile"
15 | 
16 | 
17 | /*** Create ranked lists of State LE levels and State LE trends,
18 | 	 by income quartile.
19 | ***/
20 | 
21 | **************
22 | *** Levels ***
23 | **************
24 | 
25 | * Load state LE levels data
26 | project, original("$derived/le_estimates/st_leBY_gnd_hhincquartile.dta")
27 | use st gnd hh_inc_q le_raceadj using "$derived/le_estimates/st_leBY_gnd_hhincquartile.dta", clear
28 | 
29 | * Take unweighted average of men and women
30 | isid st gnd hh_inc_q
31 | collapse (mean) le_raceadj, by(st hh_inc_q)
32 | 
33 | * Merge on state names
34 | rename st statefips
35 | project, original("${root}/data/raw/Covariate Data/state_database.dta") preserve
36 | merge m:1 statefips using "${root}/data/raw/Covariate Data/state_database.dta", ///
37 | 	keepusing(state*) ///
38 | 	assert(3) nogen
39 | 
40 | * Reshape wide on income quartile
41 | rename le_raceadj le_raceadj_q
42 | reshape wide le_raceadj_q, i(statefips) j(hh_inc_q)
43 | order state*
44 | 
45 | * Ranked lists
46 | foreach q in 1 4 {
47 | 	sort le_raceadj_q`q'
48 | 	export delim state* le_raceadj_q`q' ///
49 | 		using "$root/scratch/State LE ranked lists by income quartile/State ranked LE levels Q`q'.csv", ///
50 | 		replace
51 | 	project, creates("$root/scratch/State LE ranked lists by income quartile/State ranked LE levels Q`q'.csv") preserve
52 | }
53 | 
54 | sort le_raceadj_q1
55 | maptile le_raceadj_q1, geo(state) geoid(statefips) cutv(`=le_raceadj_q1[10]+epsfloat()') rev ///
56 | 	geofolder("$root/code/ado_maptile_geo") ///
57 | 	savegraph("$root/scratch/State LE ranked lists by income quartile/State ranked LE levels Q`q' - geographic belt among 10 lowest states.png") replace
58 | project, creates("$root/scratch/State LE ranked lists by income quartile/State ranked LE levels Q`q' - geographic belt among 10 lowest states.png") preserve
59 | 
60 | **************
61 | *** Trends ***
62 | **************
63 | 
64 | * Load state LE trends data
65 | project, original("$derived/le_trends/st_letrendsBY_gnd_hhincquartile.dta")
66 | use st gnd hh_inc_q le_raceadj_b_year using "$derived/le_trends/st_letrendsBY_gnd_hhincquartile.dta", clear
67 | rename le_raceadj_b_year le_raceadj_tr
68 | 
69 | * Take unweighted average of men and women
70 | isid st gnd hh_inc_q
71 | collapse (mean) le_raceadj_tr, by(st hh_inc_q)
72 | 
73 | * Merge on state names
74 | rename st statefips
75 | project, original("${root}/data/raw/Covariate Data/state_database.dta") preserve
76 | merge m:1 statefips using "${root}/data/raw/Covariate Data/state_database.dta", ///
77 | 	keepusing(statename) ///
78 | 	assert(3) nogen
79 | 	
80 | * Reshape wide on income quartile
81 | rename le_raceadj_tr le_raceadj_tr_q
82 | reshape wide le_raceadj_tr_q, i(statefips) j(hh_inc_q)
83 | order state*
84 | 
85 | * Ranked lists
86 | sort le_raceadj_tr_q1
87 | export delim state* le_raceadj_tr_q1 ///
88 | 	using "$root/scratch/State LE ranked lists by income quartile/State ranked LE trends Q1.csv", ///
89 | 	replace
90 | project, creates("$root/scratch/State LE ranked lists by income quartile/State ranked LE trends Q1.csv") preserve
91 | 


--------------------------------------------------------------------------------
/code/lifeexpectancy/Plot international comparison of LE at age 40.do:
--------------------------------------------------------------------------------
 1 | * Set $root
 2 | return clear
 3 | capture project, doinfo
 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)'  // using -project-
 5 | else {  // running directly
 6 | 	if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"'
 7 | 	do "${mortality_root}/code/set_environment.do"
 8 | }
 9 | 
10 | * Set convenient globals
11 | global derived "${root}/data/derived"
12 | 
13 | if (c(os)=="Windows") global img wmf
14 | else global img png
15 | 
16 | * Create required folders
17 | cap mkdir "${root}/scratch/International comparison of LE at 40"
18 | cap mkdir "${root}/scratch/International comparison of LE at 40/data"
19 | 
20 | 
21 | ****************
22 | 
23 | * Load World Health Organization data on LE at 40
24 | project, original("${root}/data/raw/WHO/International_LEat40_in2013.dta")
25 | use "${root}/data/raw/WHO/International_LEat40_in2013.dta", clear
26 | 
27 | * Append our national LE estimates by Gender x Income Percentile, for select percentiles
28 | project, original("$derived/le_estimates/national_leBY_gnd_hhincpctile.dta") preserve
29 | append using "$derived/le_estimates/national_leBY_gnd_hhincpctile.dta", ///
30 | 	keep(le_raceadj gnd pctile) gen(_append)
31 | keep if _append==0 | inlist(pctile,1,25,50,100)
32 | replace country = "United States - P" + string(pctile) if _append==1
33 | drop _append
34 | 
35 | * Manually select specific countries to highlight
36 | gen byte highlight=0
37 | replace highlight=1 if gnd=="M" & (inlist(country,"Lesotho","Zambia","Sudan","India","Pakistan") | inlist(country,"Iraq","Libya","China","San Marino","United Kingdom","Canada"))
38 | replace highlight=1 if gnd=="F" & (inlist(country,"Sierra Leone","Zambia","Sudan","India","Pakistan") | inlist(country,"Iraq","Libya","China","United Kingdom","Canada","Japan"))
39 | replace highlight=. if !mi(pctile)
40 | 
41 | * Generate ranks by gender, where rank==1 means lowest LE
42 | bys gnd (le_raceadj): gen le_raceadj_rank = _n
43 | order gnd country pctile highlight le_raceadj le_raceadj_rank
44 | 
45 | 
46 | * Plot bar graphs of LE at age 40
47 | 
48 | foreach gender in "Men" "Women" {
49 | 
50 | 	local g=cond("`gender'"=="Men","M","F")
51 | 
52 | 	* Generate figure
53 | 	twoway	(spike le_raceadj le_raceadj_rank if highlight==0, hor lc(gs10)) ///
54 | 			(spike le_raceadj le_raceadj_rank if highlight==1, hor lc(navy)) ///
55 | 			(spike le_raceadj le_raceadj_rank if !mi(pctile), hor lc(maroon)) ///
56 | 			(scatter le_raceadj_rank le_raceadj if highlight==1, m(none) mlabel(country) mlabc(black)) ///
57 | 			(scatter le_raceadj_rank le_raceadj if !mi(pctile), m(none) mlabel(country) mlabc(maroon)) ///	
58 | 			if gnd=="`g'", ///
59 | 			graphregion(fcolor(white)) legend(off) ///
60 | 			ytitle("") ylab(none) ///
61 | 			ysize(7) xsize(8.25) fxsize(66.667) /// Create a canvas that is height 7 x width 5.5, without cutting off labels
62 | 			xtitle("Expected Age at Death for 40 Year Old `gender'") xlabel(60(5)90) ///
63 | 			title("") 
64 | 	graph export "${root}/scratch/International comparison of LE at 40/International comparison of LE at 40 - `gender'.${img}", replace
65 | 	project, creates("${root}/scratch/International comparison of LE at 40/International comparison of LE at 40 - `gender'.${img}") preserve
66 | 	
67 | 	* Export data underlying figure
68 | 	export delim if gnd=="`g'" ///
69 | 		using "${root}/scratch/International comparison of LE at 40/data/International comparison of LE at 40 - `gender'.csv", ///
70 | 		replace
71 | 	project, creates("${root}/scratch/International comparison of LE at 40/data/International comparison of LE at 40 - `gender'.csv") preserve
72 | 	
73 | }
74 | 


--------------------------------------------------------------------------------
/code/lifeexpectancy/Plot signal by local population level.do:
--------------------------------------------------------------------------------
  1 | * Set $root
  2 | return clear
  3 | capture project, doinfo
  4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)'  // using -project-
  5 | else {  // running directly
  6 | 	if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"'
  7 | 	do "${mortality_root}/code/set_environment.do"
  8 | }
  9 | 
 10 | * Set convenient globals
 11 | global derived "${root}/data/derived"
 12 | 
 13 | * Create required folders
 14 | cap mkdir "${root}/scratch/Signal to Noise"
 15 | 
 16 | /*** For each CZ or County life expectancy reported in online tables, plot
 17 | 	 fraction of variance that is signal against population size.
 18 | ***/
 19 | 
 20 | 
 21 | ****************
 22 | *** Programs ***
 23 | ****************
 24 | 
 25 | cap program drop plot_signalfrac_by_pop
 26 | program define plot_signalfrac_by_pop
 27 | 
 28 | 	/*** Plot fraction of variance that is signal against population size,
 29 | 		 separately by Gender x Income Quantile.
 30 | 	***/
 31 | 
 32 | 	syntax name using/, geo(name) nq(integer) label(string) [ year(name) incq(name) plotq(string) xlab(passthru) xline(passthru) xsize(passthru) ]
 33 | 	local var `namelist'
 34 | 	if ("`incq'"=="") local incq hh_inc_q
 35 | 	if ("`plotq'"=="") local plotq 1 4
 36 | 
 37 | 	* Load point estimates and bootstrap SDs
 38 | 	project, original(`"`using'"')
 39 | 	use `"`using'"', clear
 40 | 	isid `geo' gnd `incq' `year'
 41 | 	keep `geo' gnd `incq' `year' `var' sd_`var'
 42 | 	rename sd* sdnoise*
 43 | 	
 44 | 	* Merge in CZ populations
 45 | 	if ("`geo'"=="cz") {
 46 | 		project, original("${derived}/final_covariates/cz_pop.dta") preserve
 47 | 		merge m:1 `geo' using "${derived}/final_covariates/cz_pop.dta", ///
 48 | 			assert(2 3) keep(3) nogen
 49 | 	}
 50 | 	else if ("`geo'"=="cty") {
 51 | 		project, original("${derived}/final_covariates/cty_full_covariates.dta") preserve
 52 | 		merge m:1 `geo' using "${derived}/final_covariates/cty_full_covariates.dta", ///
 53 | 			assert(2 3) keep(3) nogen ///
 54 | 			keepusing(cty_pop2000)
 55 | 		rename cty_pop2000 pop2000
 56 | 	}
 57 | 	
 58 | 	* Generate total SD, in each ventile of CZ pop
 59 | 	fastxtile pop_quantile = pop2000, nq(`nq')
 60 | 	egen sdtotal_`var' = sd(`var'), by(gnd `incq' `year' pop_quantile)
 61 | 	
 62 | 	* Decompose variance into signal and noise
 63 | 	gen varnoise_`var' = sdnoise_`var'^2
 64 | 	gen vartotal_`var' = sdtotal_`var'^2
 65 | 	gen varsignal_`var' = vartotal_`var' - varnoise_`var'
 66 | 	gen fracsignal = varsignal_`var' / vartotal_`var'
 67 | 	
 68 | 	* Plot fraction of variance that is signal for Q1 and Q4, by Population
 69 | 	graph drop _all
 70 | 	foreach gender in "Male" "Female" {
 71 | 	
 72 | 		local g = substr("`gender'",1,1)
 73 | 
 74 | 		foreach q in `plotq' {
 75 | 		
 76 | 			if (`q'==1) local color navy
 77 | 			else local color maroon
 78 | 	
 79 | 			binscatter fracsignal pop2000 if gnd=="`g'" & `incq'==`q' & pop_quantile<`nq', ///
 80 | 				xq(pop_quantile) line(none) color(`color') ///
 81 | 				ytitle("Signal Variance / Total Variance") ///
 82 | 				xtitle("Population") ///
 83 | 				ylab(0(0.1)1, gmin gmax) ///
 84 | 				`xlab' `xline' ///
 85 | 				name(`g'q`q') subtitle("`gender' Q`q'")
 86 | 			local plots `plots' `g'q`q'
 87 | 				
 88 | 		}
 89 | 		
 90 | 	}
 91 | 	
 92 | 	graph combine `plots', rows(2) iscale(*0.8) ysize(7) `xsize' ///
 93 | 		title("Signal Fraction: `label'", size(medium)) ///
 94 | 		note("Note: Quantile with highest population omitted for scale.", size(vsmall))
 95 | 	graph export "${root}/scratch/Signal to Noise/`label' - Signal Fraction by `geo' pop.png", replace
 96 | 	graph drop `plots'
 97 | 	project, creates("${root}/scratch/Signal to Noise/`label' - Signal Fraction by `geo' pop.png")
 98 | 	
 99 | end
100 | 
101 | 
102 | **********************
103 | *** Generate Plots ***
104 | **********************
105 | 	
106 | *** CZ LE Levels, by Gender x Income Quartile
107 | plot_signalfrac_by_pop le_raceadj using "$derived/le_estimates_stderr/cz_SEleBY_gnd_hhincquartile.dta", ///
108 | 	geo(cz) label(CZ LE Levels) ///
109 | 	nq(20) ///
110 | 	xlab(25e3 "25k" 250e3 "250k" 500e3 "500k" 750e3 "750k" 1e6 "1M" 1.25e6 "1.25M" 1.5e6 "1.5M", grid)
111 | 
112 | *** CZ LE Trends, by Gender x Income Quartile
113 | plot_signalfrac_by_pop le_raceadj_b_year using "$derived/le_trends_stderr/cz_SEletrendsBY_gnd_hhincquartile.dta", ///
114 | 	geo(cz) label(CZ LE Trends) ///
115 | 	nq(6) ///
116 | 	xlab( 500e3 "500k" 1e6 "1M" 1.5e6 "1.5M" 2e6 "2M" 2.5e6 "2.5M" 3e6 "3M", grid)
117 | 
118 | *** CZ LE Levels, by Gender x Income Quartile x Year
119 | plot_signalfrac_by_pop le_raceadj using "$derived/le_estimates_stderr/cz_SEleBY_gnd_hhincquartile_year.dta", ///
120 | 	year(year) ///
121 | 	geo(cz) label(CZ LE Levels by Year) ///
122 | 	nq(6) ///
123 | 	xlab( 500e3 "500k" 1e6 "1M" 1.5e6 "1.5M" 2e6 "2M" 2.5e6 "2.5M" 3e6 "3M", grid)
124 | 
125 | *** CZ LE Levels, by Gender x Income Ventile
126 | plot_signalfrac_by_pop le_raceadj using "$derived/le_estimates_stderr/cz_SEleBY_gnd_hhincventile.dta", ///
127 | 	incq(hh_inc_v) plotq(1 5 10 15 20) xsize(10) ///
128 | 	geo(cz) label(CZ LE Levels by Ventile) ///
129 | 	nq(10) ///
130 | 	xlab(250e3 "250k" 500e3 "500k" 1e6 "1M" 1.5e6 "1.5M" 2e6 "2M", grid)
131 | 
132 | *** County LE Levels, by Gender x Income Quartile
133 | plot_signalfrac_by_pop le_raceadj using "$derived/le_estimates_stderr/cty_SEleBY_gnd_hhincquartile.dta", ///
134 | 	geo(cty) label(County LE Levels) ///
135 | 	nq(20) ///
136 | 	xlab(25e3 "25k" 100e3 "100k" 200e3 "200k" 300e3 "300k" 400e3 "400k" 500e3 "500k", grid)
137 | 
138 | 	
139 | 


--------------------------------------------------------------------------------
/code/lifeexpectancy/Sensitivity analyses of local LEs.do:
--------------------------------------------------------------------------------
  1 | * Set $root
  2 | return clear
  3 | capture project, doinfo
  4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)'  // using -project-
  5 | else {  // running directly
  6 | 	if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"'
  7 | 	do "${mortality_root}/code/set_environment.do"
  8 | }
  9 | 
 10 | * Set convenient globals
 11 | global derived "${root}/data/derived"
 12 | 
 13 | * Create required folders
 14 | cap mkdir "${root}/scratch/Sensitivity analyses of local LE"
 15 | 
 16 | /*** Check the correlation between various versions of the
 17 | 		CZ x Gender x Income Quartile estimates and our baseline
 18 | 		race-adjusted estimates.
 19 | ***/
 20 | 
 21 | 
 22 | *****************
 23 | *** Load data ***
 24 | *****************
 25 | 
 26 | * Load various life expectancies
 27 | project, original("$derived/le_estimates/cz_leBY_gnd_hhincquartile.dta")
 28 | use "$derived/le_estimates/cz_leBY_gnd_hhincquartile.dta", clear
 29 | keep cz gnd hh_inc_q le_agg le_raceadj
 30 | rename le_raceadj le_Baseline
 31 | rename le_agg le_NoRaceAdj
 32 | order cz gnd hh_inc_q le_Baseline
 33 | 
 34 | project, original("$derived/le_estimates/Expected life years until 77/cz_leBY_gnd_hhincquartile_GompertzTo77.dta") preserve
 35 | merge 1:1 cz gnd hh_inc_q using "$derived/le_estimates/Expected life years until 77/cz_leBY_gnd_hhincquartile_GompertzTo77.dta", ///
 36 | 	assert(3) nogen keepusing(le_raceadj)
 37 | rename le_raceadj le_LYuntil77
 38 | 
 39 | project, original("$derived/le_estimates/Gompertz extrapolation to 100/cz_leBY_gnd_hhincquartile_GompertzTo100.dta") preserve
 40 | merge 1:1 cz gnd hh_inc_q using "$derived/le_estimates/Gompertz extrapolation to 100/cz_leBY_gnd_hhincquartile_GompertzTo100.dta", ///
 41 | 	assert(3) nogen keepusing(le_raceadj)
 42 | rename le_raceadj le_GompertzTo100
 43 | 
 44 | project, original("$derived/le_estimates/OLS Gompertz estimation/cz_leBY_gnd_hhincquartile_OLS.dta") preserve
 45 | merge 1:1 cz gnd hh_inc_q using "$derived/le_estimates/OLS Gompertz estimation/cz_leBY_gnd_hhincquartile_OLS.dta", ///
 46 | 	assert(3) nogen keepusing(le_raceadj)
 47 | rename le_raceadj le_OLS
 48 | 
 49 | project, original("$derived/le_estimates/OLS Gompertz estimation/cz_leBY_gnd_hhincquartile_OLSIncomeControl.dta") preserve
 50 | merge 1:1 cz gnd hh_inc_q using "$derived/le_estimates/OLS Gompertz estimation/cz_leBY_gnd_hhincquartile_OLSIncomeControl.dta", ///
 51 | 	assert(1 3) nogen keepusing(le_raceadj) // many CZs don't have income controlled LEs due to missing a percentile of data
 52 | rename le_raceadj le_IncomeControl
 53 | 
 54 | project, original("$derived/le_estimates/Cost of Living Adjusted/cz_leBY_gnd_hhincquartile_COLIadjusted.dta") preserve
 55 | merge 1:1 cz gnd hh_inc_q using "$derived/le_estimates/Cost of Living Adjusted/cz_leBY_gnd_hhincquartile_COLIadjusted.dta", ///
 56 | 	assert(3) nogen keepusing(le_raceadj)
 57 | rename le_raceadj le_COLI
 58 | 
 59 | * Load population counts
 60 | project, original("${derived}/final_covariates/cz_pop.dta") preserve
 61 | merge m:1 cz using "${derived}/final_covariates/cz_pop.dta", nogen assert(2 3) keep(3)
 62 | 
 63 | * Check structure
 64 | assert !mi(cz,gnd,hh_inc_q,pop2000) & inrange(hh_inc_q,1,4)
 65 | 
 66 | ************************************
 67 | *** Count number of observations ***
 68 | ************************************
 69 | 
 70 | preserve
 71 | 
 72 | * Collapse to counts of CZs
 73 | collapse (count) le*, by(gnd hh_inc_q)
 74 | 
 75 | * Ensure that counts are constant across gender and income quartile
 76 | foreach var of varlist le* {
 77 | 	assert `var'==`var'[_n-1] if _n>2
 78 | }
 79 | 
 80 | * Collapse dataset of counts to gender
 81 | collapse (mean) le*, by(gnd)
 82 | 
 83 | * Output
 84 | rename le* czN_le*
 85 | export delim using "${root}/scratch/Sensitivity analyses of local LE/Counts of CZs in each sensitivity correlation.csv", replace
 86 | project, creates("${root}/scratch/Sensitivity analyses of local LE/Counts of CZs in each sensitivity correlation.csv")
 87 | 
 88 | restore
 89 | 
 90 | ****************************
 91 | *** Compute correlations ***
 92 | ****************************
 93 | 
 94 | * Store correlations in matrices
 95 | tempname corrM corrF tempcorr
 96 | forvalues q=1/4 {
 97 | 
 98 | 	foreach g in "M" "F" {
 99 | 		
100 | 		corr le_Baseline le_NoRaceAdj le_LYuntil77 le_GompertzTo100 le_OLS le_IncomeControl le_COLI ///
101 | 			if gnd=="`g'" & hh_inc_q==`q' [w=pop2000]
102 | 		
103 | 		matrix `tempcorr'=r(C)
104 | 		matrix `corr`g''=nullmat(`corr`g'') \ (`tempcorr'[1,2..7])
105 | 	}
106 | 	
107 | }
108 | 
109 | * Store correlations in dataset
110 | clear
111 | 
112 | svmat `corrM', names(col)
113 | rename le* M_le*
114 | 
115 | svmat `corrF', names(col)
116 | rename le* F_le*
117 | 
118 | gen hh_inc_q=_n
119 | order hh_inc_q
120 | 
121 | * Reformat correlations to 3 decimals
122 | foreach var of varlist M_le* F_le* {
123 | 	gen rho_`var'=string(`var',"%9.3f")
124 | }
125 | 
126 | * Export tables of correlations
127 | export delim hh_inc_q rho_M* ///
128 | 	using "${root}/scratch/Sensitivity analyses of local LE/CZ correlations of baseline raceadj LE with alternatives - Male.csv", ///
129 | 	replace
130 | 	
131 | export delim hh_inc_q rho_F* ///
132 | 	using "${root}/scratch/Sensitivity analyses of local LE/CZ correlations of baseline raceadj LE with alternatives - Female.csv", ///
133 | 	replace
134 | 	
135 | project, creates("${root}/scratch/Sensitivity analyses of local LE/CZ correlations of baseline raceadj LE with alternatives - Male.csv")
136 | project, creates("${root}/scratch/Sensitivity analyses of local LE/CZ correlations of baseline raceadj LE with alternatives - Female.csv")
137 | 


--------------------------------------------------------------------------------
/code/lifeexpectancy/State Level Inequality Correlations.do:
--------------------------------------------------------------------------------
 1 | * Set $root
 2 | return clear
 3 | capture project, doinfo
 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)'  // using -project-
 5 | else {  // running directly
 6 | 	if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"'
 7 | 	do "${mortality_root}/code/set_environment.do"
 8 | }
 9 | 
10 | * Create required folders
11 | cap mkdir "${root}/scratch/State Gini Correlations"
12 | 
13 | * Erase output numbers
14 | cap erase "${root}/scratch/State Gini Correlations/State income inequality correlations.csv"
15 | 
16 | * Set convenient globals
17 | global derived "${root}/data/derived"
18 | global raw "$root/data/raw"
19 | 
20 | ****************
21 | 
22 | * Import State Gini Index Data
23 | project, relies_on("${raw}/State Gini Index/ACS_06_EST_B19083.txt")
24 | project, original("${raw}/State Gini Index/ACS_06_EST_B19083_with_ann.csv")
25 | import delimited "${raw}/State Gini Index/ACS_06_EST_B19083_with_ann.csv", varnames(2) rowrange(2) clear 
26 | ren (id2 geography estimateginiindex) (state_id statename gini2006)
27 | keep state_id statename gini2006
28 | keep if inrange(state_id,1,56)
29 | tempfile state_gini
30 | save `state_gini'
31 | 
32 | project, original("${derived}/final_covariates/cty_full_covariates.dta")
33 | use "${derived}/final_covariates/cty_full_covariates.dta", clear
34 | collapse (sum) st_pop2000 = cty_pop2000, by(state_id)
35 | merge 1:1 state_id using `state_gini', assert(3) nogen
36 | ren state_id st
37 | 
38 | project, original("${derived}/le_estimates/st_leBY_gnd_hhincquartile.dta") preserve
39 | merge 1:m st using "${derived}/le_estimates/st_leBY_gnd_hhincquartile.dta", nogen assert(match) keepusing(gnd hh_inc_q le_raceadj)
40 | ren le_raceadj le_raceadj_q
41 | reshape wide le_raceadj_q, i(st gnd) j(hh_inc_q)
42 | 
43 | project, original("${derived}/le_estimates/st_leBY_gnd.dta") preserve
44 | merge 1:1 st gnd using "${derived}/le_estimates/st_leBY_gnd.dta", nogen assert(match) keepusing(le_raceadj)
45 | collapse (mean) le_raceadj*, by(st st_pop2000 statename gini2006)
46 | 
47 | * Output correlations between Gini and LE
48 | corr_reg le_raceadj gini2006 [w=st_pop2000], vce(robust)
49 | 
50 | scalarout using "${root}/scratch/State Gini Correlations/State income inequality correlations.csv", ///
51 | 	id("Corr of state Gini coefficient with LE pooling all income quartiles: coef") ///
52 | 	num(`=_b[vb]') fmt(%9.2f)
53 | test vb=0
54 | scalarout using "${root}/scratch/State Gini Correlations/State income inequality correlations.csv", ///
55 | 	id("Corr of state Gini coefficient with LE pooling all income quartiles: pval") ///
56 | 	num(`=r(p)') fmt(%9.2f)
57 | 
58 | 
59 | corr_reg le_raceadj_q1 gini2006 [w=st_pop2000], vce(robust)
60 | 
61 | scalarout using "${root}/scratch/State Gini Correlations/State income inequality correlations.csv", ///
62 | 	id("Corr of state Gini coefficient with LE pooling all income quartiles: coef") ///
63 | 	num(`=_b[vb]') fmt(%9.2f)
64 | test vb=0
65 | scalarout using "${root}/scratch/State Gini Correlations/State income inequality correlations.csv", ///
66 | 	id("Corr of state Gini coefficient with LE pooling all income quartiles: pval") ///
67 | 	num(`=r(p)') fmt(%9.2f)
68 | 
69 | project, creates("${root}/scratch/State Gini Correlations/State income inequality correlations.csv")
70 | 
71 | 


--------------------------------------------------------------------------------
/code/lifeexpectancy/compute_LE_trends.do:
--------------------------------------------------------------------------------
 1 | * Set $root
 2 | return clear
 3 | capture project, doinfo
 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)'  // using -project-
 5 | else {  // running directly
 6 | 	if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"'
 7 | 	do "${mortality_root}/code/set_environment.do"
 8 | }
 9 | 
10 | * Set convenient globals
11 | global derived "${root}/data/derived"
12 | 
13 | * Create required folders
14 | cap mkdir "${root}/data/derived/le_trends"
15 | cap mkdir "${root}/data/derived/le_trends/bootstrap"
16 | 
17 | 
18 | /*** Compute life expectancy trends estimates.
19 | ***/
20 | 
21 | 
22 | ****************
23 | *** Programs ***
24 | ****************
25 | 
26 | cap program drop compute_LE_trends
27 | program define compute_LE_trends
28 | 
29 | 	syntax using/, by(namelist) saving(string)
30 | 	
31 | 	project, uses(`"`using'"')
32 | 	
33 | 	* Calculate trends
34 | 	foreach var in le_raceadj le_agg {
35 | 	
36 | 		use `"`using'"', clear
37 | 		
38 | 		isid `by' year
39 | 		fastregby `var' year, by(`by') clear
40 | 		rename _b* `var'_b*
41 | 		
42 | 		tempfile trend_`var'
43 | 		qui save `trend_`var''
44 | 	}
45 | 	qui merge 1:1 `by' using `trend_le_raceadj', assert(3) nogen
46 | 		
47 | 	* Output
48 | 	label data "Annual trend in unadjusted and race-adjusted LE"
49 | 	sort `by'
50 | 	save13 `"`saving'"', replace
51 | 	project, creates(`"`saving'"')
52 | 	
53 | end
54 | 
55 | 
56 | ***************************************
57 | *** Compute Trends: Point Estimates ***
58 | ***************************************
59 | 
60 | * CZ Trends by Gender x Income Quartile
61 | compute_LE_trends using "$derived/le_estimates/cz_leBY_year_gnd_hhincquartile.dta", ///
62 | 	by(cz gnd hh_inc_q) ///
63 | 	saving("${derived}/le_trends/cz_letrendsBY_gnd_hhincquartile.dta")
64 | 
65 | 
66 | * State Trends by Gender x Income Quartile
67 | compute_LE_trends using "$derived/le_estimates/st_leBY_year_gnd_hhincquartile.dta", ///
68 | 	by(st gnd hh_inc_q) ///
69 | 	saving("${derived}/le_trends/st_letrendsBY_gnd_hhincquartile.dta")
70 | 
71 | 
72 | ************************************
73 | *** Compute Trends: Bootstrapped ***
74 | ************************************
75 | 
76 | * CZ Trends by Gender x Income Quartile
77 | compute_LE_trends using "$derived/le_estimates/bootstrap/bootstrap_cz_leBY_gnd_hhincquartile_year.dta", ///
78 | 	by(cz gnd hh_inc_q sample_num) ///
79 | 	saving("${derived}/le_trends/bootstrap/bootstrap_cz_letrendsBY_gnd_hhincquartile.dta")
80 | 
81 | 
82 | * State Trends by Gender x Income Quartile
83 | compute_LE_trends using "$derived/le_estimates/bootstrap/bootstrap_st_leBY_gnd_hhincquartile_year.dta", ///
84 | 	by(st gnd hh_inc_q sample_num) ///
85 | 	saving("${derived}/le_trends/bootstrap/bootstrap_st_letrendsBY_gnd_hhincquartile.dta")
86 | 


--------------------------------------------------------------------------------
/code/lifeexpectancy/le_trends_maps.do:
--------------------------------------------------------------------------------
 1 | * Set $root
 2 | return clear
 3 | capture project, doinfo
 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)'  // using -project-
 5 | else {  // running directly
 6 | 	if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"'
 7 | 	do "${mortality_root}/code/set_environment.do"
 8 | }
 9 | 
10 | * Set convenient globals
11 | global derived "${root}/data/derived"
12 | 
13 | * Create required folders
14 | cap mkdir "$root/scratch/LE maps"
15 | cap mkdir "$root/scratch/LE maps/data"
16 | 
17 | /*** Generate maps of Life Expectancy trends:
18 | 
19 | 	- by State x Income Quartile
20 | ***/
21 | 
22 | 
23 | ********************
24 | *** State Trends ***
25 | ********************
26 | 
27 | * Load data
28 | project, original("${root}/data/derived/le_trends/st_letrendsBY_gnd_hhincquartile.dta")
29 | use "${root}/data/derived/le_trends/st_letrendsBY_gnd_hhincquartile.dta", clear
30 | rename st statefips
31 | 
32 | * Map Q1
33 | foreach gender in "Male" "Female" {
34 | 
35 | 	local g=substr("`gender'",1,1)
36 | 
37 | 	maptile le_raceadj_b_year if gnd=="`g'" & hh_inc_q==1, geo(state) geoid(statefips) rev nq(10) ///
38 | 		geofolder("$root/code/ado_maptile_geo") ///
39 | 		twopt( ///
40 | 			title("Annual Changes in Race-Adjusted Expected Age at Death") ///
41 | 			subtitle("`gender's, Bottom Quartile") ///
42 | 			legend(size(*0.8)) ///
43 | 		) ///
44 | 		savegraph("$root/scratch/LE maps/STmap_leTrendsBY_gnd_hhincquartile_Q1_`gender'.png") replace
45 | 		
46 | 	export delim statefips le_raceadj_b_year ///
47 | 		using "$root/scratch/LE maps/data/STmap_leTrendsBY_gnd_hhincquartile_Q1_`gender'.csv" ///
48 | 		if gnd == "`g'" & hh_inc_q==1, replace
49 | 		
50 | 	project, creates("$root/scratch/LE maps/STmap_leTrendsBY_gnd_hhincquartile_Q1_`gender'.png") preserve
51 | 	project, creates("$root/scratch/LE maps/data/STmap_leTrendsBY_gnd_hhincquartile_Q1_`gender'.csv") preserve
52 | 	
53 | }
54 | 
55 | 
56 | * Calculate Q4-Q1 difference in trends
57 | isid statefips gnd hh_inc_q
58 | keep statefips gnd hh_inc_q le_raceadj_b_year
59 | rename le_raceadj_b_year le_raceadj_b_year_q
60 | reshape wide le_raceadj_b_year_q, i(statefips gnd) j(hh_inc_q)
61 | gen le_raceadj_b_year_diffq4q1 = le_raceadj_b_year_q4 - le_raceadj_b_year_q1
62 | 
63 | * Map Q4-Q1 difference
64 | foreach gender in "Male" "Female" {
65 | 
66 | 	local g=substr("`gender'",1,1)
67 | 
68 | 	maptile le_raceadj_b_year_diffq4q1 if gnd=="`g'", geo(state) geoid(statefips) nq(10) ///
69 | 		geofolder("$root/code/ado_maptile_geo") ///
70 | 		twopt( ///
71 | 			title("Annual Changes in Race-Adjusted Expected Age at Death") ///
72 | 			subtitle("`gender's, Difference Between Top and Bottom Quartiles") ///
73 | 			legend(size(*0.8)) ///
74 | 		) ///
75 | 		savegraph("$root/scratch/LE maps/STmap_leTrendsBY_gnd_hhincquartile_Q4-Q1diff_`gender'.png") replace
76 | 		
77 | 	export delim statefips le_raceadj_b_year_diffq4q1 ///
78 | 		using "$root/scratch/LE maps/data/STmap_leTrendsBY_gnd_hhincquartile_Q4-Q1diff_`gender'.csv" ///
79 | 		if gnd == "`g'", replace
80 | 		
81 | 	project, creates("$root/scratch/LE maps/STmap_leTrendsBY_gnd_hhincquartile_Q4-Q1diff_`gender'.png") preserve
82 | 	project, creates("$root/scratch/LE maps/data/STmap_leTrendsBY_gnd_hhincquartile_Q4-Q1diff_`gender'.csv") preserve
83 | 	
84 | }
85 | 	
86 | 


--------------------------------------------------------------------------------
/code/mortality/Calculate aggregate deaths.do:
--------------------------------------------------------------------------------
 1 | * Set $root
 2 | return clear
 3 | capture project, doinfo
 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)'  // using -project-
 5 | else {  // running directly
 6 | 	if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"'
 7 | 	do "${mortality_root}/code/set_environment.do"
 8 | }
 9 | 
10 | * Create required folders
11 | cap mkdir "${root}/scratch/Deaths and population counts in IRS and other samples"
12 | 
13 | /*** Calculate the aggregate number of deaths and mortality rate in our sample,
14 | 	 pooling all ages 40-76 and all years. Separately for men and women.
15 | ***/
16 | 
17 | *************
18 | 
19 | 
20 | * Load mortality rates
21 | project, uses("${root}/data/derived/Mortality Rates/national_mortratesBY_gnd_hhincpctile_age_year.dta")
22 | use "${root}/data/derived/Mortality Rates/national_mortratesBY_gnd_hhincpctile_age_year.dta", clear
23 | keep if age_at_d >= 40
24 | 
25 | * Collapse to aggregate deaths and population counts by Gender
26 | gen long deaths=mortrate*count
27 | 
28 | collapse (rawsum) count deaths, by(gnd)
29 | recast long count deaths
30 | format %12.0gc count deaths
31 | 
32 | * Generate mortality per 100,000
33 | gen mortper100K = deaths/count*100000
34 | format %9.1f mortper100K
35 | 
36 | * Output numbers
37 | assert gnd=="F" in 1
38 | 
39 | scalarout using "${root}/scratch/Deaths and population counts in IRS and other samples/Aggregate deaths and mortality rate by gender.csv", ///
40 | 	replace ///
41 | 	id("Aggregate deaths in sample 40-76: Men") ///
42 | 	num(`=deaths[2]') fmt(%12.0gc)
43 | 
44 | scalarout using "${root}/scratch/Deaths and population counts in IRS and other samples/Aggregate deaths and mortality rate by gender.csv", ///
45 | 	id("Aggregate mortality per 100,000 in sample 40-76: Men") ///
46 | 	num(`=mortper100K[2]') fmt(%9.1f)
47 | 	
48 | scalarout using "${root}/scratch/Deaths and population counts in IRS and other samples/Aggregate deaths and mortality rate by gender.csv", ///
49 | 	id("Aggregate deaths in sample 40-76: Women") ///
50 | 	num(`=deaths[1]') fmt(%12.0gc)
51 | 
52 | scalarout using "${root}/scratch/Deaths and population counts in IRS and other samples/Aggregate deaths and mortality rate by gender.csv", ///
53 | 	id("Aggregate mortality per 100,000 in sample 40-76: Women") ///
54 | 	num(`=mortper100K[1]') fmt(%9.1f)
55 | 
56 | project, creates("${root}/scratch/Deaths and population counts in IRS and other samples/Aggregate deaths and mortality rate by gender.csv")
57 | 


--------------------------------------------------------------------------------
/code/mortality/Construct multi-source death and population counts.do:
--------------------------------------------------------------------------------
  1 | * Set $root
  2 | return clear
  3 | capture project, doinfo
  4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)'  // using -project-
  5 | else {  // running directly
  6 | 	if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"'
  7 | 	do "${mortality_root}/code/set_environment.do"
  8 | }
  9 | 
 10 | * Set convenient globals
 11 | global raw "${root}/data/raw"
 12 | 
 13 | * Create required folders
 14 | cap mkdir "${root}/data/derived/Mortality Rates"
 15 | cap mkdir "${root}/data/derived/Mortality Rates/NCHS-SSA-IRS Deaths and Populations"
 16 | 
 17 | 
 18 | /*** Load and combine counts of deaths and populations
 19 | 	 by Gender x Age x Year from numerous sources:
 20 | 		- CDC NCHS
 21 | 		- SSA
 22 | 		- IRS
 23 | 		
 24 | 	 Covering years 2001-2014 and ages 40-76 (when available from each source).
 25 | ***/
 26 | 
 27 | *******
 28 | *** SSA DM1 count of deaths and population
 29 | *******
 30 | 
 31 | project, original("${raw}/Mortality Collapses/ssa_dm1_tab_mskd.dta")
 32 | use "${raw}/Mortality Collapses/ssa_dm1_tab_mskd.dta", clear
 33 | 
 34 | keep if gnd!="U"
 35 | assert inlist(gnd,"M","F")
 36 | 
 37 | reshape wide count, i(gnd year age) j(died)
 38 | ren (count0 count1) (dm1_pop dm1_dead) 
 39 | replace dm1_pop = dm1_pop + dm1_dead
 40 | 
 41 | keep if inrange(age,40,76) & year>=2001
 42 | 
 43 | tempfile ssa
 44 | save `ssa'
 45 | 
 46 | *******
 47 | *** IRS count of deaths and population (including zero incomes)
 48 | *******
 49 | 
 50 | project, original("${root}/code/ado/gen_mortrates2.ado")
 51 | 
 52 | project, original("${raw}/Mortality Collapses/mort_ad_v6_nat_byageyr_new_mskd.dta")
 53 | use "${raw}/Mortality Collapses/mort_ad_v6_nat_byageyr_new_mskd.dta", clear
 54 | 
 55 | * Clean up raw data
 56 | compress
 57 | drop if tax_yr<1999
 58 | assert inlist(gnd,"M","F")
 59 | 
 60 | assert pctile!=0
 61 | replace pctile=0 if pctile==-1  // in IRS collapse, zero incomes are coded as -1
 62 | replace pctile=-1 if pctile==.  // in IRS collapse, negative incomes are coded as .
 63 | 
 64 | ren deadby_* deadby_*_Mean
 65 | drop agebin_mort  // we already have exact age
 66 | 
 67 | * Calculate mortality rates
 68 | gen_mortrates2 gnd pctile, age(age) year(tax_yr) n(count)
 69 | 
 70 | * Partition into population and deaths by Positive and Zero earnings
 71 | rename age_at_d age
 72 | rename yod year
 73 | rename count irs_pop
 74 | 
 75 | gen irs_dead = chop(mortrate * irs_pop, 10e-6)
 76 | 
 77 | isid gnd age year pctile
 78 | replace pctile=1 if pctile>=1
 79 | label define poszeroneg 1 "posinc_" 0 "zeroinc_" -1 "neginc_"
 80 | label values pctile poszeroneg
 81 | 
 82 | collapse (sum) irs_pop irs_dead, by(gnd age year pctile)
 83 | compress
 84 | 
 85 | decode pctile, gen(pos_zero_neg)
 86 | drop pctile
 87 | reshape wide irs_@pop irs_@dead, i(gnd age year) j(pos_zero_neg) string
 88 | 
 89 | * Calculate total population and deaths
 90 | gen long irs_pop = irs_posinc_pop + irs_zeroinc_pop + irs_neginc_pop
 91 | gen long irs_dead = irs_posinc_dead + irs_zeroinc_dead + irs_neginc_dead
 92 | drop irs_neginc*
 93 | 
 94 | * Output
 95 | keep if inrange(age,40,76) & year>=2001
 96 | tempfile irs
 97 | save `irs'
 98 | 
 99 | 
100 | *******
101 | *** CDC NCHS count of deaths and population
102 | *******
103 | 
104 | project, original("${raw}/CDC-Census Counts/Underlying Cause of Death, 1999-2014.txt")
105 | import delimited "${raw}/CDC-Census Counts/Underlying Cause of Death, 1999-2014.txt", clear
106 | 
107 | drop if !mi(notes)
108 | keep singleyearagescode gendercode deaths population year
109 | ren (singleyearagescode gendercode deaths   population) ///
110 | 	(age                gnd        cdc_dead cdc_pop) 
111 | 
112 | destring age, replace force
113 | keep if inrange(age,40,76) & year>=2001
114 | destring cdc_pop, replace
115 | 
116 | tempfile cdc
117 | save `cdc'
118 | 
119 | *******
120 | *** Merge all death and population data together
121 | *******
122 | 
123 | use `ssa', clear
124 | merge 1:1 gnd age year using `irs', assert(1 3) nogen
125 | merge 1:1 gnd age year using `cdc', assert(3) nogen
126 | 
127 | compress
128 | 
129 | save13 "${root}/data/derived/Mortality Rates/NCHS-SSA-IRS Deaths and Populations/death_and_pop_counts_multisource.dta", replace
130 | project, creates("${root}/data/derived/Mortality Rates/NCHS-SSA-IRS Deaths and Populations/death_and_pop_counts_multisource.dta")
131 | 


--------------------------------------------------------------------------------
/code/mortality/Decompose mortality into medical v external.do:
--------------------------------------------------------------------------------
 1 | * Set $root
 2 | return clear
 3 | capture project, doinfo
 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)'  // using -project-
 5 | else {  // running directly
 6 | 	if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"'
 7 | 	do "${mortality_root}/code/set_environment.do"
 8 | }
 9 | 
10 | * Create required folders
11 | cap mkdir "${root}/scratch/Decompose mortality into medical v external"
12 | 
13 | ************
14 | 
15 | * Load data
16 | project, original("${root}/data/derived/final_covariates/cz_full_covariates.dta")
17 | use "${root}/data/derived/final_covariates/cz_full_covariates.dta", clear
18 | keep cz *_mort_coll* pop2000
19 | 
20 | rename ext* external*
21 | rename med* medical*
22 | 
23 | * Run regressions
24 | clear matrix
25 | foreach c in 0 1 {
26 | 	foreach m in "external" "medical" {
27 | 	
28 | 		reg `m'_mort_coll`c' total_mort_coll`c' [w=pop2000], robust
29 | 		matrix reg_decompose = nullmat(reg_decompose), ///
30 | 			(_b[total_mort_coll`c'] \ _se[total_mort_coll`c'] \ e(N))
31 | 			
32 | 		local cols `cols' `m'_coll`c'
33 | 	}
34 | }
35 | 
36 | * Store regression results in dataset
37 | matrix colnames reg_decompose = `cols'
38 | clear
39 | svmat reg_decompose, names(col)
40 | 
41 | gen result=""
42 | replace result = "coef" in 1
43 | replace result = "se" in 2
44 | replace result = "N" in 3
45 | order result
46 | 
47 | * Output
48 | export delim using "${root}/scratch/Decompose mortality into medical v external/Regressions decomposing mortality rates into medical vs external causes.csv", ///
49 | 	replace
50 | project, creates("${root}/scratch/Decompose mortality into medical v external/Regressions decomposing mortality rates into medical vs external causes.csv")
51 | 


--------------------------------------------------------------------------------
/code/mortality/Lag invariance.do:
--------------------------------------------------------------------------------
  1 | * Set $root
  2 | return clear
  3 | capture project, doinfo
  4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)'  // using -project-
  5 | else {  // running directly
  6 | 	if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"'
  7 | 	do "${mortality_root}/code/set_environment.do"
  8 | }
  9 | 
 10 | * Set convenient globals
 11 | if (c(os)=="Windows") global img wmf
 12 | else global img png
 13 | 
 14 | * Create required folders
 15 | cap mkdir "${root}/scratch/Lag invariance"
 16 | cap mkdir "${root}/scratch/Lag invariance/data"
 17 | 
 18 | /*** Examines the "lag invariance" property of mortality rates, and shows
 19 | 	that this is because of the high degree of serial correlation of income.
 20 | ***/
 21 | 	
 22 | **************************************************
 23 | *** Income Lag Invariance - Mortality Profiles ***
 24 | **************************************************
 25 | 	
 26 | * Load mortality data with all income lags
 27 | project, original("${root}/data/derived/Mortality Rates/With all income lags/mskd_national_mortratesBY_gnd_hhincpctile_age_year_WithAllLags.dta")
 28 | use "${root}/data/derived/Mortality Rates/With all income lags/mskd_national_mortratesBY_gnd_hhincpctile_age_year_WithAllLags.dta", clear
 29 | 
 30 | * Compute unweighted average mortality rate of 50-54 year olds in 2014
 31 | replace mortrate = mortrate * 100000
 32 | keep if inrange(age_at_d,50,54) & yod==2014
 33 | 
 34 | isid gnd pctile age_at_d lag
 35 | collapse (mean) mortrate, by(gnd pctile lag)
 36 | recast float mortrate, force
 37 | 
 38 | * Plot mortality-income profile at different income lags
 39 | foreach gender in "Male" "Female" {
 40 | 
 41 | 	local g=substr("`gender'",1,1)
 42 | 
 43 | 	* Plot fig
 44 | 	twoway	(scatter mortrate pctile if lag==2, ms(o)) ///
 45 | 			(scatter mortrate pctile if lag==5, ms(t)) ///
 46 | 			(scatter mortrate pctile if lag==10, ms(s)) ///
 47 | 			if gnd=="`g'", ///
 48 | 			ylab(0(500)1500, gmin gmax) ///
 49 | 			xtitle("Household Income Percentile")  ///
 50 | 			ytitle("Deaths per 100,000") title("") ///
 51 | 			legend( ///
 52 | 				ring(0) pos(2) c(1) order(3 2 1) bmargin(large) ///
 53 | 				label(1 "2 year lag") label(2 "5 year lag") label(3 "10 year lag") ///
 54 | 			)
 55 | 	graph export "${root}/scratch/Lag invariance/Mortality v Income Percentile profile of 2014 50-54yo with varying lags - `gender'.${img}", replace
 56 | 	project, creates("${root}/scratch/Lag invariance/Mortality v Income Percentile profile of 2014 50-54yo with varying lags - `gender'.${img}") preserve
 57 | 	
 58 | 	* Export data underlying fig
 59 | 	export delim if gnd=="`g'" & inlist(lag,2,5,10) ///
 60 | 		using "${root}/scratch/Lag invariance/data/Mortality v Income Percentile profile of 2014 50-54yo with varying lags - `gender'.csv", replace
 61 | 	project, creates("${root}/scratch/Lag invariance/data/Mortality v Income Percentile profile of 2014 50-54yo with varying lags - `gender'.csv") preserve
 62 | 	
 63 | }
 64 | 
 65 | ************************************
 66 | *** Serial Correlation of Income ***
 67 | ************************************
 68 | 
 69 | * Load correlations
 70 | foreach g in "M" "F" {
 71 | 	project, original("${root}/data/raw/IRS correlations between income lags/corr_`g'.xlsx")
 72 | 	import excel using "${root}/data/raw/IRS correlations between income lags/corr_`g'.xlsx", clear ///
 73 | 		firstrow
 74 | 	rename A corrvar
 75 | 	keep corrvar l0_pctile
 76 | 	
 77 | 	drop if corrvar=="mort"
 78 | 	gen byte lag = real( regexs(regexm(corrvar,"^l([0-9]+)_pctile$")) )
 79 | 	drop corrvar
 80 | 	
 81 | 	rename l0_pctile incomecorr_`g'
 82 | 	
 83 | 	tempfile income_corr_`g'
 84 | 	save `income_corr_`g''
 85 | }
 86 | 
 87 | * Combine male and female serial income correlations
 88 | use `income_corr_M'
 89 | merge 1:1 lag using `income_corr_F', assert(3) nogen
 90 | order lag
 91 | 
 92 | twoway	(connect incomecorr_M lag, m(t)) ///
 93 | 		(connect incomecorr_F lag, m(o)) ///
 94 | 	if lag<=10, ///
 95 | 	xtitle("Lag (x)") ///
 96 | 	ytitle("Correlation Between Rank in Year t and t - x") ///
 97 | 	ylabel(0 "0" .2 "0.2" .4 "0.4" .6 "0.6" .8 "0.8" 1 "1") ///	
 98 | 	legend( ///
 99 | 		ring(0) pos(4) c(1) bmargin(medium) ///
100 | 		label(1 "Men") label(2 "Women") ///
101 | 	)
102 | graph export "${root}/scratch/Lag invariance/Serial correlation of income.${img}", replace 
103 | project, creates("${root}/scratch/Lag invariance/Serial correlation of income.${img}") preserve
104 | 
105 | * Export data underlying fig
106 | export delim if lag<=10 ///
107 | 	using "${root}/scratch/Lag invariance/data/Serial correlation of income.csv", replace
108 | project, creates("${root}/scratch/Lag invariance/data/Serial correlation of income.csv")
109 | 
110 | 
111 | 


--------------------------------------------------------------------------------
/code/mortality/Output income means by national income quantile.do:
--------------------------------------------------------------------------------
 1 | * Set $root
 2 | return clear
 3 | capture project, doinfo
 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)'  // using -project-
 5 | else {  // running directly
 6 | 	if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"'
 7 | 	do "${mortality_root}/code/set_environment.do"
 8 | }
 9 | 
10 | * Set convenient globals
11 | global derived "${root}/data/derived"
12 | 
13 | * Create required folders
14 | cap mkdir "${root}/scratch/National income means by quantile"
15 | cap mkdir "${root}/scratch/Income means at national percentiles"
16 | 
17 | * Erase output numbers
18 | cap erase "${root}/scratch/Income means at national percentiles/Income means at national p5 and p95 by gender.csv"
19 | 
20 | /*** Output mean income by national income percentile, ventile, and quartile, 
21 | by gender, pooling across years and ages.
22 | ***/
23 | 
24 | ****************************************
25 | *** Mean income by income percentile ***
26 | ****************************************
27 | 
28 | * Load national mortality rates
29 | project, original("${derived}/Mortality Rates/mskd_national_mortratesBY_gnd_hhincpctile_age_year.dta")
30 | use if age_at_d>=40 using "${derived}/Mortality Rates/mskd_national_mortratesBY_gnd_hhincpctile_age_year.dta", clear
31 | 
32 | * Pool all years and ages (years 2001-2014, ages 40-76)
33 | collapse (mean) hh_inc (rawsum) count [w=count], by(gnd pctile)
34 | 
35 | * Export data
36 | save13 "${root}/scratch/National income means by quantile/National income means by percentile.dta", ///
37 | 	replace
38 | project, creates("${root}/scratch/National income means by quantile/National income means by percentile.dta") preserve
39 | 
40 | *** Paper numbers
41 | foreach g in "M" "F" {
42 | 	foreach p in 5 95 {
43 | 
44 | 		sum hh_inc if gnd=="`g'" & pctile==`p'
45 | 		assert r(N)==1
46 | 		scalarout using "${root}/scratch/Income means at national percentiles/Income means at national p5 and p95 by gender.csv", ///
47 | 			id("National mean income at p`p' pooling ages and years: `g'") ///
48 | 			num(`=r(mean)') fmt(%12.0fc)
49 | 	}
50 | }
51 | project, creates("${root}/scratch/Income means at national percentiles/Income means at national p5 and p95 by gender.csv")
52 | 
53 | 
54 | **************************************
55 | *** Mean income by income quartile ***
56 | **************************************
57 | 
58 | * Load national mortality rates
59 | project, original("${derived}/Mortality Rates/mskd_national_mortratesBY_gnd_hhincpctile_age_year.dta")
60 | use if age_at_d>=40 using "${derived}/Mortality Rates/mskd_national_mortratesBY_gnd_hhincpctile_age_year.dta", clear
61 | 
62 | * Collapse to quartiles, pooling all years and ages (years 2001-2014, ages 40-76)
63 | gen quartile=ceil(pctile/25)
64 | collapse (mean) hh_inc (rawsum) count [w=count], by(gnd quartile)
65 | 
66 | * Export data 
67 | save13 "${root}/scratch/National income means by quantile/National income means by quartile.dta", ///
68 | 	replace
69 | project, creates("${root}/scratch/National income means by quantile/National income means by quartile.dta")
70 | 
71 | 
72 | *************************************
73 | *** Mean income by income ventile ***
74 | *************************************
75 | 
76 | * Load national mortality rates
77 | project, original("${derived}/Mortality Rates/mskd_national_mortratesBY_gnd_hhincpctile_age_year.dta")
78 | use if age_at_d>=40 using "${derived}/Mortality Rates/mskd_national_mortratesBY_gnd_hhincpctile_age_year.dta", clear
79 | 
80 | * Collapse to ventiles, pooling all years and ages (years 2001-2014, ages 40-76)
81 | gen ventile=ceil(pctile/5)
82 | collapse (mean) hh_inc (rawsum) count [w=count], by(gnd ventile)
83 | 
84 | * Export data 
85 | save13 "${root}/scratch/National income means by quantile/National income means by ventile.dta", ///
86 | 	replace
87 | project, creates("${root}/scratch/National income means by quantile/National income means by ventile.dta")
88 | 
89 | 


--------------------------------------------------------------------------------
/code/mortality/Summary stats on number of obs and income.do:
--------------------------------------------------------------------------------
 1 | * Set $root
 2 | return clear
 3 | capture project, doinfo
 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)'  // using -project-
 5 | else {  // running directly
 6 | 	if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"'
 7 | 	do "${mortality_root}/code/set_environment.do"
 8 | }
 9 | 
10 | * Create required folders
11 | cap mkdir "${root}/scratch/Summary stats on number of observations age and income"
12 | 
13 | * Erase output numbers
14 | cap erase "${root}/scratch/Summary stats on number of observations age and income/Summary Stats.csv"
15 | 
16 | /*** Calculate the total number of observations, mean and median household income
17 | 	 for working individuals, and the mean age of individuals in our sample.
18 | ***/
19 | 
20 | *************
21 | 
22 | 
23 | * Load mortality rates
24 | project, original("${root}/data/derived/Mortality Rates/mskd_national_mortratesBY_gnd_hhincpctile_age_year.dta")
25 | use "${root}/data/derived/Mortality Rates/mskd_national_mortratesBY_gnd_hhincpctile_age_year.dta", clear
26 | keep if age_at_d >= 40
27 | 
28 | sum hh_inc [w=count] if age_at_d <= 63, detail
29 | scalarout using "${root}/scratch/Summary stats on number of observations age and income/Summary Stats.csv", ///
30 | 	id("Working Individuals Mean Household Income") ///
31 | 	num(`=r(mean)') fmt(%9.0fc)
32 | scalarout using "${root}/scratch/Summary stats on number of observations age and income/Summary Stats.csv", ///
33 | 	id("Working Individuals Median Household Income") ///
34 | 	num(`=r(p50)') fmt(%9.0fc)
35 | 	
36 | sum age_at_d [w=count]
37 | scalarout using "${root}/scratch/Summary stats on number of observations age and income/Summary Stats.csv", ///
38 | 	id("Working Individuals Mean Age") ///
39 | 	num(`=r(mean)') fmt(%9.1f)
40 | 	
41 | sum count
42 | scalarout using "${root}/scratch/Summary stats on number of observations age and income/Summary Stats.csv", ///
43 | 	id("Total Number of Observations") ///
44 | 	num(`=r(sum)') fmt(%14.0fc)
45 | 
46 | *****************************************
47 | *** Project creates: reported numbers ***
48 | *****************************************
49 | 
50 | project, creates("${root}/scratch/Summary stats on number of observations age and income/Summary Stats.csv")
51 | 
52 | 


--------------------------------------------------------------------------------
/code/mortality/construct_cdc_mortrates.do:
--------------------------------------------------------------------------------
 1 | * Set $root
 2 | return clear
 3 | capture project, doinfo
 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)'  // using -project-
 5 | else {  // running directly
 6 | 	if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"'
 7 | 	do "${mortality_root}/code/set_environment.do"
 8 | }
 9 | 
10 | * Set convenient globals
11 | global raw "${root}/data/raw"
12 | 
13 | * Create required folders
14 | cap mkdir "${root}/data/derived/Mortality Rates"
15 | cap mkdir "${root}/data/derived/Mortality Rates/CDC-SSA Life Table"
16 | 
17 | 
18 | /*** Generate national mortality rates by Gender x Age, for ages 0-119, from:
19 | 
20 | - CDC Life Tables for Ages 0-99, taking a simple average of years 2001-2011
21 | 	- CDC does not report mortality rates past age 99.
22 | 
23 | - SSA Life Tables for Ages 100-119, using data from 2000 life table
24 | 
25 | ***/
26 | 
27 | ********************
28 | 
29 | 
30 | *** Reference docs
31 | project, relies_on("${raw}/CDC Life Tables/source.txt")
32 | project, relies_on("${raw}/CDC Life Tables/download_CDC_life_tables.do")
33 | project, relies_on("${raw}/SSA Life Tables/source.txt")
34 | 
35 | *** Load CDC mortality rates from 0-99
36 | 
37 | * Load data
38 | project, original("${raw}/CDC Life Tables/CDC_LifeTables.dta")
39 | use "${raw}/CDC Life Tables/CDC_LifeTables.dta", clear
40 | tab year
41 | 
42 | * Average over years
43 | isid gnd age year
44 | collapse (mean) mortrate, by(gnd age)
45 | 
46 | 
47 | *** Append SSA mortality rates at 100+
48 | project, original("${raw}/SSA Life Tables/SSA_LifeTables_2000.dta") preserve
49 | append using "${raw}/SSA Life Tables/SSA_LifeTables_2000.dta", gen(_append)
50 | drop if _append==1 & age<100
51 | drop _append
52 | 
53 | *** Output
54 | sort gnd age
55 | bys gnd: assert _N==120 & age[1]==0 & age[120]==119
56 | rename mortrate cdc_mort
57 | save13 "${root}/data/derived/Mortality Rates/CDC-SSA Life Table/national_CDC_SSA_mortratesBY_gnd_age.dta", replace
58 | project, creates("${root}/data/derived/Mortality Rates/CDC-SSA Life Table/national_CDC_SSA_mortratesBY_gnd_age.dta")
59 | 


--------------------------------------------------------------------------------
/code/nlms/Explore mortality profiles implied by race shifters.do:
--------------------------------------------------------------------------------
 1 | * Set $root
 2 | return clear
 3 | capture project, doinfo
 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)'  // using -project-
 5 | else {  // running directly
 6 | 	if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"'
 7 | 	do "${mortality_root}/code/set_environment.do"
 8 | }
 9 | 
10 | * Set convenient globals
11 | if (c(os)=="Windows") global img wmf
12 | else global img png
13 | 
14 | 
15 | /*** Plot NLMS Gompertz mortality-age profiles implied by estimated race shifters.
16 | 	 - Separate series for each race.
17 | 	 - Separate panel for each gender.
18 | ***/
19 | 
20 | ************************
21 | *** Generate Figures ***
22 | ************************
23 | 
24 | project, original("${root}/data/derived/NLMS/nlms_v5_s11_sampleA.dta")
25 | use "${root}/data/derived/NLMS/nlms_v5_s11_sampleA.dta"
26 | 
27 | foreach g in "Male" "Female" {
28 | 
29 | 	streg i.csdiv i.incq i.racegrp if gnd==`"`=substr("`g'",1,1)'"', dist(gompertz) nohr ancillary(i.csdiv i.incq i.racegrp) coeflegend
30 | 	
31 | 	* Note: plotting a different income quartile or census plot would just shift and pivot all the lines,
32 | 	* 		without changing their relative positions.
33 | 	
34 | 	twoway	(function y = _b[_t:_cons] + _b[gamma:_cons] * (x - 40), range(40 90)) ///
35 | 			(function y = _b[_t:_cons] + _b[_t:2.racegrp] + (_b[gamma:_cons] + _b[gamma:2.racegrp]) * (x - 40), range(40 90)) ///
36 | 			(function y = _b[_t:_cons] + _b[_t:3.racegrp] + (_b[gamma:_cons] + _b[gamma:3.racegrp]) * (x - 40), range(40 90)) ///
37 | 			(function y = _b[_t:_cons] + _b[_t:4.racegrp] + (_b[gamma:_cons] + _b[gamma:4.racegrp]) * (x - 40), range(40 90)), ///
38 | 		title(`g') xtitle("Age") ytitle("Log Mortality Rate") ///
39 | 		xlab(40(10)90) ///
40 | 		legend(ring(0) pos(4) c(1) order(2 1 3 4)) legend(lab(1 White) lab(2 Black) lab(3 Hispanic) lab(4 Asian)) ///
41 | 		graphregion(fcolor(white))
42 | 	graph export "${root}/scratch/NLMS mortality profiles by race/NLMS_raceshifter_mortality_profiles_`g'.${img}", replace
43 | 	project, creates("${root}/scratch/NLMS mortality profiles by race/NLMS_raceshifter_mortality_profiles_`g'.${img}") preserve
44 | 	
45 | }
46 | 


--------------------------------------------------------------------------------
/code/nlms/nlms_generate_shifters.do:
--------------------------------------------------------------------------------
  1 | * Set $root
  2 | return clear
  3 | capture project, doinfo
  4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)'  // using -project-
  5 | else {  // running directly
  6 | 	if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"'
  7 | 	do "${mortality_root}/code/set_environment.do"
  8 | }
  9 | 
 10 | * Set working directory
 11 | cd "$root/data/derived/NLMS"
 12 | 
 13 | * Create required folders
 14 | cap mkdir "$root/data/derived/NLMS/raceshifters"
 15 | 
 16 | /*** Estimate the differences in Gompertz intercept and slope by race,
 17 | 	 relative to the non-Black non-Hispanic group.
 18 | 	 
 19 | 	 Generate the estimates for three different by-groups:
 20 | 	 - Sex
 21 | 	 - Sex x Income Quartile
 22 | 	 - Sex x Census Region
 23 | ***/
 24 | 
 25 | ****************
 26 | *** Programs ***
 27 | ****************
 28 | 
 29 | program define nlms_estimate_raceshifters
 30 | 
 31 | 	/*** Given an NLMS panel, estimate the Gompertz parameters for each race
 32 | 		 while controlling for Income Quartile and Census Divsion.
 33 | 		 
 34 | 		 Return the race shifters, which are the differences in Gompertz
 35 | 		 parameters relative to the non-Black non-Hispanic group.
 36 | 		 
 37 | 		 Has options for 3 by-groups:
 38 | 			- by Sex
 39 | 			- by Sex x Income Quartile
 40 | 			- by Sex x Census Region
 41 | 		 
 42 | 	***/
 43 | 		
 44 | 	syntax using/, [ bootstrap replace stderr save_bysex(string) save_bysexincq(string) save_bysexcsregion(string) ]
 45 | 
 46 | 	*****************
 47 | 	*** Load data ***
 48 | 	*****************
 49 | 	
 50 | 	* Load NLMS sample
 51 | 	use `"`using'"', clear
 52 | 	
 53 | 	* If bootstrapping: assign the number of draws for each observation,
 54 | 	* using weights to determine probability of draw
 55 | 	if ("`bootstrap'"!="") {
 56 | 	
 57 | 		wsample num_draws, wt(wt)
 58 | 		keep if num_draws>0  // for speed, drop unnecessary obs
 59 | 		
 60 | 		streset [fw=num_draws], noshow
 61 | 		drop wt startage endage inddea  // for speed, drop unnecessary vars
 62 | 		
 63 | 	}
 64 | 	
 65 | 	******************************
 66 | 	*** Estimate race shifters ***
 67 | 	******************************
 68 | 
 69 | 	*** Specify parameters to save
 70 | 	local save_est ///
 71 | diff_gomp_int_black = _b[_t:2.racegrp] ///
 72 | diff_gomp_int_hisp = _b[_t:3.racegrp] ///
 73 | diff_gomp_int_asian = _b[_t:4.racegrp] ///
 74 | diff_gomp_slope_black = _b[gamma:2.racegrp] ///
 75 | diff_gomp_slope_hisp = _b[gamma:3.racegrp] ///
 76 | diff_gomp_slope_asian = _b[gamma:4.racegrp]
 77 | 
 78 | 	if ("`stderr'"=="stderr") local save_est `save_est' ///
 79 | se_int_black = _se[_t:2.racegrp] ///
 80 | se_int_hisp = _se[_t:3.racegrp] ///
 81 | se_int_asian = _se[_t:4.racegrp] ///
 82 | se_slope_black = _se[gamma:2.racegrp] ///
 83 | se_slope_hisp = _se[gamma:3.racegrp] ///
 84 | se_slope_asian = _se[gamma:4.racegrp] ///
 85 | 
 86 | 	*** Run regressions
 87 | 
 88 | 	* by Sex
 89 | 	if ("`save_bysex'"!="") statsby `save_est', saving(`save_bysex', `replace') ///
 90 | 		by(gnd): streg i.csdiv i.incq i.racegrp, dist(gompertz) nohr ancillary(i.csdiv i.incq i.racegrp)
 91 | 
 92 | 	* by Sex x Income Quartile
 93 | 	if ("`save_bysexincq'"!="") statsby `save_est', saving(`save_bysexincq', `replace') ///
 94 | 		by(gnd incq): streg i.csdiv i.racegrp, dist(gompertz) nohr ancillary(i.csdiv i.racegrp)
 95 | 	
 96 | 	* by Sex x Census Region
 97 | 	if ("`save_bysexcsregion'"!="") statsby `save_est', saving(`save_bysexcsregion', `replace') ///
 98 | 		by(gnd csregion): streg i.csdiv i.incq i.racegrp, dist(gompertz) nohr ancillary(i.csdiv i.incq i.racegrp)
 99 | 	
100 | 	***************************
101 | 	*** save13 and add note ***
102 | 	***************************
103 | 	
104 | 	* If not bootstrapping, reopen each saved file and save13 it, with a note
105 | 	if ("`bootstrap'"=="") {
106 | 	
107 | 		foreach file in "`save_bysex'" "`save_bysexincq'" "`save_bysexcsregion'" {
108 | 			if ("`file'"=="") continue // skip by-group if wasn't specified
109 | 			
110 | 			use "`file'", clear
111 | 			label data "NOTE: Gompertz intercepts in this data correspond to intercepts at age 40."
112 | 			save13 `"`file'"', replace
113 | 		}
114 | 		
115 | 	}
116 | 	
117 | end
118 | 
119 | 
120 | ***************************************
121 | *** Generate Gompertz race shifters ***
122 | ***************************************
123 | 
124 | project, uses("nlms_v5_s11_sampleA.dta")
125 | 
126 | nlms_estimate_raceshifters using "nlms_v5_s11_sampleA.dta", stderr ///
127 | 	save_bysex("raceshifters/raceshifters_v5A_BYsex.dta") ///
128 | 	save_bysexincq("raceshifters/raceshifters_v5A_BYsex_incq.dta") ///
129 | 	save_bysexcsregion("raceshifters/raceshifters_v5A_BYsex_csregion.dta") ///
130 | 	replace
131 | 
132 | project, creates("raceshifters/raceshifters_v5A_BYsex.dta")
133 | project, creates("raceshifters/raceshifters_v5A_BYsex_incq.dta")
134 | project, creates("raceshifters/raceshifters_v5A_BYsex_csregion.dta")
135 | 
136 | 


--------------------------------------------------------------------------------
/code/nlms/nlms_loaddata.do:
--------------------------------------------------------------------------------
 1 | * Set $root
 2 | return clear
 3 | capture project, doinfo
 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)'  // using -project-
 5 | else {  // running directly
 6 | 	if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"'
 7 | 	do "${mortality_root}/code/set_environment.do"
 8 | }
 9 | 
10 | * Set convenient globals
11 | global v4 "${root}/data/raw/NLMS"
12 | global v5 "${root}/data/raw/NLMS v5"
13 | 
14 | * Create necessary folders
15 | cap mkdir "$root/data/derived/NLMS"
16 | 
17 | /*** Load raw NLMS data into Stata format
18 | ***/
19 | 
20 | ************************************
21 | *** Load NLMS PUMS v5, Sample 11 ***
22 | ************************************
23 | 
24 | *** Brief summary of PUMS Release 5 data, derived from docs
25 | 
26 | /*
27 | The NLMS PUMS v5 combines CPS and Census data with death certificates, over the period 1973-2011.
28 | We use the NLMS public-use files.
29 | 
30 | File 11 contains a subset of the 39 NLMS cohorts that can be followed prospectively for 11 years.
31 | In lieu of identifying the CPS year and starting point of mortality follow-up for each file,
32 | all of the records in File 11 have been assigned an imaginary starting point conceptually
33 | identified as April 1, 1990. These records are then tracked forward for 11 years to observe
34 | whether person in the file has died.
35 | 
36 | 
37 | Weights for Release 5 of File 11 are normalized to the U.S. non-institutionalized population on April 1, 1990.
38 | [There is an inconsistency on this point in the documentation we received from the NLMS,
39 | which we clarified with Norman Johnson at the Census Bureau.]
40 | */
41 | 
42 | 
43 | *** Documentation
44 | 
45 | project, relies_on("${v5}/docs/Reference Manual Version 5.pdf")
46 | project, relies_on("${v5}/docs/Read_me.pdf")
47 | 
48 | 
49 | *** Load data into Stata
50 | 
51 | * Load raw data
52 | project, original("${v5}/read_pubfile5.dct")
53 | project, original("${v5}/11.dat")
54 | project, original("${v4}/state_codes_nlms_fips.dta")  // note: crosswalk is same in v4 and v5
55 | 
56 | infile using "${v5}/read_pubfile5.dct", using("${v5}/11.dat") clear
57 | 
58 | * Merge on FIPS codes
59 | merge m:1 stater using "${v4}/state_codes_nlms_fips.dta", keepusing(statefips) nogen assert(3)
60 | 
61 | * Output
62 | compress
63 | isid record
64 | sort record
65 | save13 "$root/data/derived/NLMS/nlms_v5_s11_raw.dta", replace
66 | project, creates("$root/data/derived/NLMS/nlms_v5_s11_raw.dta")
67 | 
68 | 
69 | 


--------------------------------------------------------------------------------
/code/nlms/nlms_mortality_profiles_BYrace.do:
--------------------------------------------------------------------------------
 1 | * Set $root
 2 | return clear
 3 | capture project, doinfo
 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)'  // using -project-
 5 | else {  // running directly
 6 | 	if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"'
 7 | 	do "${mortality_root}/code/set_environment.do"
 8 | }
 9 | 
10 | * Set convenient globals
11 | if (c(os)=="Windows") global img wmf
12 | else global img png
13 | 
14 | * Create required folders
15 | cap mkdir "${root}/scratch/NLMS mortality profiles by race"
16 | cap mkdir "${root}/scratch/NLMS mortality profiles by race/data"
17 | 
18 | /*** Plot NLMS log mortality-agebin profiles from binned raw data,
19 | 		drawing OLS fit lines through them illustrating Gompertz relationship.
20 | 		
21 | 	 - Separate series for each race.
22 | 	 - Separate panel for each gender.
23 | ***/
24 | 
25 | 
26 | ********************
27 | *** Create panel ***
28 | ********************
29 | 
30 | * Load data
31 | project, original("${root}/data/derived/NLMS/nlms_v5_s11_sampleA.dta")
32 | use "${root}/data/derived/NLMS/nlms_v5_s11_sampleA.dta", clear
33 | stset, clear
34 | 
35 | * Create panel structure
36 | expand 11  // creates 11 duplicate obs, because we observe up to 11 full years
37 | bys record: replace age = age + (_n-1)  // increment age for 11 follow-up obs
38 | keep if inrange(age,startage,endage)  // keep observations in risk period (income lagged 2 years and before death)
39 | 
40 | * Replace age with average age in year of observation
41 | /* Someone who dies in a given year dies on average 6 months into the year */
42 | replace age=age+0.5
43 | 	
44 | * Generate indicator for year of death
45 | gen byte dead=0
46 | isid record age
47 | bys record (age): replace dead=1 if _n==_N & inddea==1
48 | 
49 | rename inddea r_dead
50 | label var r_dead "Death Ever Observed indicator"
51 | label var dead "Year of Death indicator"
52 | 	
53 | * Generate age bins, coding agebin as the center of the bin
54 | assert inrange(age,40,72)
55 | g agebin = floor(age/5)*5 + 2
56 | replace agebin = 71 if agebin==72  // center of last agebin (ages 70-72)
57 | assert inrange(agebin,40,71)
58 | 
59 | 
60 | ************************
61 | *** Generate Figures ***
62 | ************************
63 | 
64 | *** Collapse to mortality rates
65 | 
66 | * Define mortality rate in each Age Bin x Sex x Race x Income Quartile group G as
67 | * number of individual life-years in group G that are dead by a+1
68 | * divided by the number of individual life-years in group G
69 | collapse (mean) dead [w=wt], by(agebin gnd racegrp incq)
70 | 
71 | * Income-adjust by taking unweighted average across income bins
72 | collapse (mean) dead, by(agebin gnd racegrp)
73 | 
74 | 
75 | *** Create binscatters
76 | * Exclude agebin 71 because it's a 3-year bin instead of a 5-year bin
77 | 
78 | g l_mortrate = log(dead)
79 | 
80 | foreach g in "M" "F" {
81 | 
82 | 	* Generate scatter
83 | 	binscatter l_mortrate agebin if gnd=="`g'" & agebin<70, by(racegrp) ///
84 | 		mcolor(navy maroon) xscale(range(40 70)) ///
85 | 		xtit("Age Bin in Years") ytit("Log Mortality Rate") tit("") ///
86 | 		ylabel(-7(1)-3) yscale(range(-7 -3)) legend(ring(0) pos(4) c(1) order(2 1 3 4) bmargin(medium) ///
87 | 		label(1 "White") label(2 "Black") label(3 "Hispanic") label(4 "Asian")) msymbol(circle triangle square diamond) ///
88 | 		xlab(42 "40-44" 47 "45-49" 52 "50-54" 57 "55-59" 62 "60-64" 67 "65-69"/* 71 "70-72"*/)
89 | 	graph export "${root}/scratch/NLMS mortality profiles by race/NLMS_raw_mortality_profiles_wOLSfitline_`g'.${img}", replace
90 | 	project, creates("${root}/scratch/NLMS mortality profiles by race/NLMS_raw_mortality_profiles_wOLSfitline_`g'.${img}") preserve
91 | 
92 | 	* Export underlying data
93 | 	export delim gnd racegrp agebin l_mortrate if gnd=="`g'" & agebin<70 ///
94 | 		using "${root}/scratch/NLMS mortality profiles by race/data/NLMS_raw_mortality_profiles_wOLSfitline_`g'.csv", replace
95 | 	project, creates("${root}/scratch/NLMS mortality profiles by race/data/NLMS_raw_mortality_profiles_wOLSfitline_`g'.csv") preserve
96 | 	
97 | 	
98 | }
99 | 


--------------------------------------------------------------------------------
/code/raceshares/Explore CZ race share maps.do:
--------------------------------------------------------------------------------
 1 | * Set $root
 2 | return clear
 3 | capture project, doinfo
 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)'  // using -project-
 5 | else {  // running directly
 6 | 	if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"'
 7 | 	do "${mortality_root}/code/set_environment.do"
 8 | }
 9 | 
10 | * Set convenient globals
11 | global derived "${root}/data/derived"
12 | global img png
13 | 
14 | * Create necessary folders
15 | cap mkdir "${root}/scratch/Race share maps"
16 | 
17 | /*** Plot CZ maps of race shares, by:
18 | 		1. Sex
19 | 		2. Sex x Income Quartile
20 | ***/
21 | 
22 | project, original("$root/code/ado/compute_raceshares.ado")
23 | 
24 | ********************************
25 | *** WITHOUT income quartiles ***
26 | ********************************
27 | 
28 | * Load data
29 | project, original("$derived/raceshares/cz_racesharesBY_agebin_gnd.dta")
30 | use "$derived/raceshares/cz_racesharesBY_agebin_gnd.dta", clear
31 | 
32 | * Aggregate to CZ-level, and compute raceshares
33 | collapse (sum) pop_*, by(cz)
34 | compute_raceshares, by(cz)
35 | 
36 | * Convert race shares to percentages
37 | foreach var of varlist raceshare_* {
38 | 	replace `var'=`var'*100
39 | }
40 | 
41 | * Maps
42 | foreach r in "asian" "black" "hispanic" {
43 | 	maptile raceshare_`r', geo(cz) legd(1) cutv(5(5)40) ///
44 | 		geofolder("$root/code/ado_maptile_geo") ///
45 | 		twopt(title("Race Shares, all Q, `r' %") subtitle("last bin is >=40%")) ///
46 | 		savegraph("${root}/scratch/Race share maps/raceshares_ALLQ_`r'.${img}") replace
47 | 	project, creates("${root}/scratch/Race share maps/raceshares_ALLQ_`r'.${img}") preserve
48 | }
49 | maptile raceshare_other, geo(cz) legd(1) cutv(60(5)95) ///
50 | 	geofolder("$root/code/ado_maptile_geo") ///
51 | 	twopt(title("Race Shares, all Q, white %") subtitle("first bin is <=60%")) ///
52 | 	savegraph("${root}/scratch/Race share maps/raceshares_ALLQ_white.${img}") replace
53 | project, creates("${root}/scratch/Race share maps/raceshares_ALLQ_white.${img}") preserve
54 | 
55 | *****************************
56 | *** WITH income quartiles ***
57 | *****************************
58 | 
59 | * Load data
60 | project, original("$derived/raceshares/cz_racesharesBY_agebin_gnd_hhincquartile.dta")
61 | use "$derived/raceshares/cz_racesharesBY_agebin_gnd_hhincquartile.dta", clear
62 | 
63 | * Aggregate to CZ x Income Quartile level, and compute raceshares
64 | collapse (sum) pop_*, by(cz hh_inc_q)
65 | compute_raceshares, by(cz hh_inc_q)
66 | 
67 | * Convert race shares to percentages
68 | foreach var of varlist raceshare_* {
69 | 	replace `var'=`var'*100
70 | }
71 | 
72 | * Maps
73 | foreach q in 1 4 {
74 | 	foreach r in "asian" "black" "hispanic" {
75 | 		maptile raceshare_`r' if hh_inc_q==`q', geo(cz) legd(1) cutv(5(5)40) ///
76 | 			geofolder("$root/code/ado_maptile_geo") ///
77 | 			twopt(title("Race Shares, Q`q', `r' %") subtitle("last bin is >=40%")) ///
78 | 			savegraph("${root}/scratch/Race share maps/raceshares_Q`q'_`r'.${img}") replace
79 | 		project, creates("${root}/scratch/Race share maps/raceshares_Q`q'_`r'.${img}") preserve
80 | 	}
81 | 	maptile raceshare_other if hh_inc_q==`q', geo(cz) legd(1) cutv(60(5)95) ///
82 | 		geofolder("$root/code/ado_maptile_geo") ///
83 | 		twopt(title("Race Shares, Q`q', white %") subtitle("first bin is <=60%")) ///
84 | 		savegraph("${root}/scratch/Race share maps/raceshares_Q`q'_white.${img}") replace
85 | 	project, creates("${root}/scratch/Race share maps/raceshares_Q`q'_white.${img}") preserve
86 | }
87 | 


--------------------------------------------------------------------------------
/code/raceshares/Explore income distribution over 16 income bins in County SF3 data.do:
--------------------------------------------------------------------------------
 1 | * Set $root
 2 | return clear
 3 | capture project, doinfo
 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)'  // using -project-
 5 | else {  // running directly
 6 | 	if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"'
 7 | 	do "${mortality_root}/code/set_environment.do"
 8 | }
 9 | 
10 | * Create required folders
11 | cap mkdir "$root/scratch/SF3 income distributions"
12 | 
13 | ************************
14 | 
15 | *********************************************************
16 | *** Explore income distribution across 16 income bins ***
17 | *********************************************************
18 | 
19 | * Load data
20 | project, uses("$root/data/derived/raceshares/cty_racepopBY_workingagebin_hhincbin.dta")
21 | use "$root/data/derived/raceshares/cty_racepopBY_workingagebin_hhincbin.dta", clear
22 | 
23 | * Create national income distribution by agebin, collapsing over counties
24 | isid cty hh_inc_bin agebin
25 | collapse (sum) pop_total, by(hh_inc_bin agebin)
26 | 
27 | * Display income distribution
28 | bys agebin: tab hh_inc_bin [w=pop_total]
29 | 
30 | * Convert from population counts to relative frequencies, by agebin
31 | gen float frac_total=.
32 | foreach a in 35 45 55 {
33 | 	sum pop_total if agebin==`a', meanonly
34 | 	replace frac_total=pop_total/`r(sum)' if agebin==`a'
35 | }
36 | 
37 | * Reshape agebin wide
38 | drop pop_total
39 | rename frac_total frac_agebin
40 | reshape wide frac_agebin, i(hh_inc_bin) j(agebin)
41 | 
42 | * Output
43 | export delim "$root/scratch/SF3 income distributions/national_popfracBY_agebin_hhincbin.csv", replace
44 | project, creates("$root/scratch/SF3 income distributions/national_popfracBY_agebin_hhincbin.csv") preserve
45 | 
46 | 
47 | ******************************************************************
48 | *** Explore manual assignment of 16 income bins into quartiles ***
49 | ******************************************************************
50 | 
51 | * Reshape agebin long
52 | reshape long frac_agebin, i(hh_inc_bin) j(agebin)
53 | 
54 | * Manually assign income bins to quartiles, separately by age bin
55 | gen byte hh_inc_q = hh_inc_bin
56 | recode hh_inc_q (1/5 = 1) (6/9 = 2) (10/11 = 3) (12/16 = 4) if agebin==35
57 | recode hh_inc_q (1/6 = 1) (7/10 = 2) (11/12 = 3) (13/16 = 4) if agebin==45
58 | recode hh_inc_q (1/4 = 1) (5/8 = 2) (9/11 = 3) (12/16 = 4) if agebin==55
59 | 
60 | * Collapse fractions to quartiles
61 | collapse (sum) frac_agebin, by(hh_inc_q agebin)
62 | 
63 | * Reshape agebin wide
64 | reshape wide frac_agebin, i(hh_inc_q) j(agebin)
65 | 
66 | * Output
67 | export delim "$root/scratch/SF3 income distributions/national_popfracBY_agebin_hhincq.csv", replace
68 | project, creates("$root/scratch/SF3 income distributions/national_popfracBY_agebin_hhincq.csv")
69 | 


--------------------------------------------------------------------------------
/code/raceshares/cty_cz_st_raceshareBY_year_agebin_gnd_hhincquantile.do:
--------------------------------------------------------------------------------
 1 | * Set $root
 2 | return clear
 3 | capture project, doinfo
 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)'  // using -project-
 5 | else {  // running directly
 6 | 	if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"'
 7 | 	do "${mortality_root}/code/set_environment.do"
 8 | }
 9 | 
10 | /***
11 | 	Generate County, CZ and State race shares by
12 | 	(Year x) 5-Year Age Bin x Gender (x Income Quartile).
13 | ***/
14 | 
15 | ****************
16 | 
17 | 
18 | project, relies_on("$root/code/ado/compute_raceshares.ado")
19 | 
20 | cap program drop produce_local_raceshares
21 | program define produce_local_raceshares
22 | 
23 | 	/*** Loads race population data that has an income quantile dimension, then
24 | 		 computes and saves race shares:
25 | 			1. by Year x Geo x Age Bin x Gender x Quantile
26 | 			2. by 		 Geo x Age Bin x Gender x Quantile
27 | 			3. by 		 Geo x Age Bin x Gender
28 | 			
29 | 		 Each set of generated race shares can be individually turned on or off.
30 | 	***/
31 | 	
32 | 	syntax , geo(name) qvar(name) dta_qstub(string) [ trends levels noinc ]
33 | 
34 | 	* Load data
35 | 	project, uses("$root/data/derived/raceshares/`geo'_racepopBY_year_agebin_gnd_`dta_qstub'.dta")
36 | 	use "$root/data/derived/raceshares/`geo'_racepopBY_year_agebin_gnd_`dta_qstub'.dta", clear
37 | 	assert !mi(pop_black, pop_asian, pop_hispanic, pop_other)
38 | 	
39 | 	*** Trends, by year ***
40 | 	
41 | 	if "`trends'"=="trends" {
42 | 		* Compute race shares
43 | 		compute_raceshares, by(`geo' year agebin gnd `qvar')
44 | 		
45 | 		* Save data
46 | 		save13 "$root/data/derived/raceshares/`geo'_racesharesBY_year_agebin_gnd_`dta_qstub'.dta", replace
47 | 		project, creates("$root/data/derived/raceshares/`geo'_racesharesBY_year_agebin_gnd_`dta_qstub'.dta") preserve
48 | 	}
49 | 	
50 | 	*** Levels, pooled ***
51 | 	
52 | 	* Pool populations over all years
53 | 	isid year `geo' agebin gnd `qvar'
54 | 	collapse (sum) pop_*, by(`geo' agebin gnd `qvar')
55 | 	
56 | 	if "`levels'"=="levels" {
57 | 		* Compute race shares
58 | 		compute_raceshares, by(`geo' agebin gnd `qvar')
59 | 		
60 | 		* Save data
61 | 		save13 "$root/data/derived/raceshares/`geo'_racesharesBY_agebin_gnd_`dta_qstub'.dta", replace
62 | 		project, creates("$root/data/derived/raceshares/`geo'_racesharesBY_agebin_gnd_`dta_qstub'.dta") preserve
63 | 	}
64 | 	
65 | 	*** Levels without income dim, pooled ***
66 | 	
67 | 	if "`inc'"=="noinc" {  // note: Stata drops the "no" prefix for the local of options beginning with no
68 | 		* Pool populations over income bins
69 | 		isid `geo' agebin gnd `qvar'
70 | 		collapse (sum) pop_*, by(`geo' agebin gnd)
71 | 	
72 | 		* Compute race shares
73 | 		compute_raceshares, by(`geo' agebin gnd)
74 | 		
75 | 		* Save data
76 | 		save13 "$root/data/derived/raceshares/`geo'_racesharesBY_agebin_gnd.dta", replace
77 | 		project, creates("$root/data/derived/raceshares/`geo'_racesharesBY_agebin_gnd.dta")
78 | 	}
79 | 	
80 | end
81 | 	
82 | ***************************
83 | *** Compute race shares ***
84 | ***************************
85 | 
86 | * Quartile race shares by County, CZ, State
87 | foreach geo in cty cz st {
88 | 	produce_local_raceshares, geo(`geo') qvar(hh_inc_q) dta_qstub(hhincquartile) trends levels noinc
89 | }
90 | 
91 | * Note: some county-level cells have 0 total population, and race shares will therefore be missing
92 | 
93 | * Ventile race shares by CZ
94 | produce_local_raceshares, geo(cz) qvar(hh_inc_v) dta_qstub(hhincventile) levels
95 | 


--------------------------------------------------------------------------------
/code/raceshares/cty_racepopBY_year_agebin_gnd.do:
--------------------------------------------------------------------------------
  1 | * Set $root
  2 | return clear
  3 | capture project, doinfo
  4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)'  // using -project-
  5 | else {  // running directly
  6 | 	if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"'
  7 | 	do "${mortality_root}/code/set_environment.do"
  8 | }
  9 | 
 10 | /***
 11 | 
 12 | Uses the county-level Intercensal and Postcensal estimate tables, which contain
 13 | racial populations by County x 5-Year Age Bin x Gender x Year, each July.
 14 | 
 15 | We use these data to generate the county population of each race by 
 16 | Age Bin x Gender x Year (2001-2014) from the 2000s and 2010s
 17 | Intercensal/Postcensal estimates.
 18 | 
 19 | ***/
 20 | 
 21 | 
 22 | ********************************************************************
 23 | 
 24 | cap program drop clean_cty_censal_estimates
 25 | program define clean_cty_censal_estimates
 26 | 	
 27 | 	* Generate 5-digit county FIPS code
 28 | 	gen long cty = 1000*state + county
 29 | 	
 30 | 	* Keep only desired age bins
 31 | 	* --> age bins are 5-year age bins, labeled as the bottom age in each age bin
 32 | 	keep if inrange(agegrp,9,16)
 33 | 	replace agegrp=40+(agegrp-9)*5
 34 | 	rename agegrp agebin
 35 | 	
 36 | 	* Keep only required race pops
 37 | 	keep year cty agebin tot_male tot_female h_male h_female nhba_male nhba_female nhaa_male nhaa_female
 38 | 	
 39 | 	* Reshape gender long
 40 | 	reshape long tot_ nhba_ nhaa_ h_, i(year cty agebin) j(gnd) string
 41 | 	assert inlist(gnd,"male","female")
 42 | 	replace gnd=cond(gnd=="male","M","F")
 43 | 	compress gnd
 44 | 	
 45 | 	* Rename pop vars
 46 | 	rename tot_ pop_total
 47 | 	rename nhba_ pop_black
 48 | 	rename nhaa_ pop_asian
 49 | 	rename h_ pop_hispanic
 50 | 	
 51 | 	* Generate 'other' population count
 52 | 	gen long pop_other = pop_total - pop_black - pop_asian - pop_hispanic
 53 | 	drop pop_total
 54 | 
 55 | end
 56 | 
 57 | ***************************************
 58 | *** 2000-2010 Intercensal Estimates ***
 59 | ***************************************
 60 | 
 61 | project, relies_on("$root/data/raw/Census County Intercensal Estimates 2000-2010/source.txt") // source URL
 62 | project, relies_on("$root/data/raw/Census County Intercensal Estimates 2000-2010/Download county intercensal pop data.do") // automated download code
 63 | project, relies_on("$root/data/raw/Census County Intercensal Estimates 2000-2010/CO-EST00INT-ALLDATA.pdf") // data dictionary
 64 | 
 65 | * Load intercensal population data
 66 | project, original("$root/data/raw/Census County Intercensal Estimates 2000-2010/county_intercensal_2000s_allstates.dta")
 67 | use "$root/data/raw/Census County Intercensal Estimates 2000-2010/county_intercensal_2000s_allstates.dta", clear
 68 | 
 69 | * Reshape/rename raw intercensal data
 70 | clean_cty_censal_estimates
 71 | 
 72 | * Keep only annual July estimates (drop 2000 and 2010 April Census counts)
 73 | * --> see data dictionary for year codes
 74 | drop if inlist(year,1,12)
 75 | recode year (2=2000) (3=2001) (4=2002) (5=2003) (6=2004) (7=2005) (8=2006) ///
 76 | 	(9=2007) (10=2008) (11=2009) (13=2010)
 77 | 
 78 | * Save
 79 | keep if inrange(year,2000,2009)
 80 | tempfile cty_intercensal
 81 | save `cty_intercensal'
 82 | 
 83 | **************************************
 84 | *** 2010-2014 Postcensal Estimates ***
 85 | **************************************
 86 | 
 87 | project, relies_on("$root/data/raw/Census County Postcensal Estimates 2010-2014/source.txt") // source URL
 88 | project, relies_on("$root/data/raw/Census County Postcensal Estimates 2010-2014/CC-EST2014-ALLDATA.pdf") // data dictionary
 89 | 
 90 | * Load postcensal population data
 91 | project, original("$root/data/raw/Census County Postcensal Estimates 2010-2014/CC-EST2014-ALLDATA.csv")
 92 | insheet using "$root/data/raw/Census County Postcensal Estimates 2010-2014/CC-EST2014-ALLDATA.csv", clear
 93 | 
 94 | * Reshape/rename raw postcensal data
 95 | clean_cty_censal_estimates
 96 | 
 97 | * Keep only annual July estimates (drop 2010 April Census counts)
 98 | * --> see data dictionary for year codes
 99 | keep if inrange(year,3,7)
100 | recode year (3=2010) (4=2011) (5=2012) (6=2013) (7=2014)
101 | 
102 | 
103 | ***********************************
104 | *** Combine data from 2001-2014 ***
105 | ***********************************
106 | append using `cty_intercensal'
107 | keep if inrange(year,2001,2014)
108 | 
109 | * Convert 2000-2014 Inter/Postcensal counties to 1999 counties
110 | 	* see https://www.census.gov/geo/reference/county-changes.html
111 | isid cty year gnd agebin
112 | drop if cty == 15005 // Kalawao, Hawaii (pop ~90)
113 | recode cty (12086 = 12025) // Miami-Dade FIPS change
114 | recode cty (8014 = 8013) // Broomfield County created from Boulder, CO
115 | recode cty (2282 = 2231) (2275 2195 = 2280) (2230 2105 = 2231) ///
116 | 	(2198 = 2201) (2068 = 2290) // County changes in Alaska
117 | 
118 | collapse (sum) pop*, by(cty year gnd agebin) // combine counties that split in 2000-2014
119 | 
120 | /* Note: Bedford City (cty==51515) appears in 2001-2009 but not 2010-2014.  See: -tab year-.
121 | 	
122 | 	This is because Bedford City was merged into a larger county in 2010-2014. It therefore
123 | 	appears in 1999 and 2000-2009 but not afterwards. 
124 | */
125 | 
126 | 
127 | * Output
128 | compress
129 | order year agebin gnd cty
130 | sort year agebin gnd cty
131 | assert !mi(pop_black, pop_asian, pop_hispanic, pop_other)
132 | save13 "$root/data/derived/raceshares/cty_racepopBY_year_agebin_gnd.dta", replace
133 | project, creates("$root/data/derived/raceshares/cty_racepopBY_year_agebin_gnd.dta")
134 | 
135 | 


--------------------------------------------------------------------------------
/code/raceshares/national_Explore race fraction smoothing.do:
--------------------------------------------------------------------------------
  1 | * Set $root
  2 | return clear
  3 | capture project, doinfo
  4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)'  // using -project-
  5 | else {  // running directly
  6 | 	if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"'
  7 | 	do "${mortality_root}/code/set_environment.do"
  8 | }
  9 | 
 10 | * Create required folders
 11 | cap mkdir "$root/scratch/Race fraction smoothing figs"
 12 | 
 13 | /*** 1. Visualize the smoothing of racial income distributions using
 14 | 		different lowess bandwidths.
 15 | 	 
 16 | 		Separately by:
 17 | 		 - Race
 18 | 		 - Age
 19 | 
 20 | 	 2. Verify whether the smoothed pecentiles are approximately equal sized.
 21 | ***/
 22 | 
 23 | 
 24 | ****************
 25 | *** Programs ***
 26 | ****************
 27 | 
 28 | cap program drop smoothing_graph
 29 | program define smoothing_graph
 30 | 	/*** For a given Lowess bandwidth, create a graph of the lowess-smoothed
 31 | 		 racial income distribution and the underlying scatterpoints.
 32 | 	***/
 33 | 
 34 | 	syntax , race(string) age(integer) income_pctile(varname) lowess_bw(real) [ export ]
 35 | 
 36 | 	if "`race'"=="all" {
 37 | 		foreach race in black asian hispanic other {
 38 | 			smoothing_graph, race(`race') age(`age') income_pctile(`income_pctile') lowess_bw(`lowess_bw') `export'
 39 | 		}
 40 | 	}
 41 | 	else {
 42 | 
 43 | 		tw  (lowess frac_of_`race' `income_pctile' if age==`age' & gnd=="M" & `income_pctile'>0, bw(`lowess_bw')) ///
 44 | 			(lowess frac_of_`race' `income_pctile' if age==`age' & gnd=="F" & `income_pctile'>0, bw(`lowess_bw')) ///
 45 | 			(scatter frac_of_`race' `income_pctile' if age==`age' & gnd=="M" & `income_pctile'>0, mc(navy) ms(o)) ///
 46 | 			(scatter frac_of_`race' `income_pctile' if age==`age' & gnd=="F" & `income_pctile'>0, mc(maroon) ms(o)) ///
 47 | 			(scatter frac_of_`race' `income_pctile' if age==`age' & gnd=="M" & `income_pctile'<=0, mc(navy) ms(T)) ///
 48 | 			(scatter frac_of_`race' `income_pctile' if age==`age' & gnd=="F" & `income_pctile'<=0, mc(maroon) ms(T)) ///
 49 | 			, ///
 50 | 			legend(lab(1 "Male") lab(2 "Female") order(1 2)) ///
 51 | 			ytitle("Fraction of `race' age `age' population")
 52 | 		if ("`export'"=="export") {
 53 | 			local exportfile ${root}/scratch/Race fraction smoothing figs/racefrac_age`age'`race'_bw0`=subinstr("`lowess_bw'",".","",.)'.png
 54 | 			graph export `"`exportfile'"', replace
 55 | 			project, creates(`"`exportfile'"') preserve
 56 | 		}
 57 | 		
 58 | 		*lowess frac_of_`race' `income_pctile' if age==`age' & `income_pctile'>0, by(gnd) bw(`lowess_bw')
 59 | 		*if ("`export'"=="export") graph export "$root/scratch/lowess_racefrac_age`age'`race'_BY_sex_hhincpctile.png", replace
 60 | 		
 61 | 	}
 62 | 
 63 | end
 64 | 
 65 | ************************************************
 66 | *** Visualize different smoothing bandwidths ***
 67 | ************************************************
 68 | 
 69 | * Load data
 70 | project, original("$root/data/derived/raceshares/national_racefractionsBY_workingage_gnd_hhincpctile.dta")
 71 | use "$root/data/derived/raceshares/national_racefractionsBY_workingage_gnd_hhincpctile.dta", clear
 72 | drop smoothfrac_*
 73 | 
 74 | * Generate and save lowess figs
 75 | forvalues a=40(5)60 {
 76 | 	smoothing_graph, race(all) age(`a') income_pctile(hh_inc_pctile) lowess_bw(0.2) export
 77 | 	smoothing_graph, race(all) age(`a') income_pctile(hh_inc_pctile) lowess_bw(0.3) export
 78 | }
 79 | 
 80 | 
 81 | *********************************************************
 82 | *** Check population size in smoothed percentile bins ***
 83 | *********************************************************
 84 | 
 85 | * Load data
 86 | project, original("$root/data/derived/raceshares/national_racefractionsBY_workingage_gnd_hhincpctile.dta")
 87 | use "$root/data/derived/raceshares/national_racefractionsBY_workingage_gnd_hhincpctile.dta", clear
 88 | 
 89 | * Collapse away income dimension
 90 | isid age gnd hh_inc_pctile
 91 | collapse (sum) pop_*, by(age gnd)
 92 | 
 93 | * Generate an income bin dimension
 94 | expand 102
 95 | bys age gnd: gen int hh_inc_pctile = _n - 2
 96 | replace hh_inc_pctile=-2 if hh_inc_pctile==-1
 97 | order age gnd hh_inc_pctile
 98 | 
 99 | * Merge back in racial income distributions
100 | merge 1:1 age gnd hh_inc_pctile using "$root/data/derived/raceshares/national_racefractionsBY_workingage_gnd_hhincpctile.dta", ///
101 | 	assert(3) keepusing(smoothfrac_*) nogen
102 | 
103 | * Apply race fractions to adjust population counts
104 | foreach race in black asian hispanic other {
105 | 	replace pop_`race' = pop_`race' * smoothfrac_of_`race'
106 | }
107 | drop smoothfrac_*
108 | 
109 | * Generate total population
110 | gen double pop_total = pop_black + pop_asian + pop_hispanic + pop_other
111 | 
112 | * Analyze variation in total population across income percentiles
113 | egen max_percentile_pop=max(pop_total) if hh_inc_pctile>0, by(age gnd)
114 | egen min_percentile_pop=min(pop_total) if hh_inc_pctile>0, by(age gnd)
115 | egen mean_percentile_pop=mean(pop_total) if hh_inc_pctile>0, by(age gnd)
116 | 
117 | collapse (mean) *_percentile_pop, by(age gnd)
118 | 
119 | * Study relative error in population size of income percentiles
120 | gen relerr=(max_percentile_pop-min_percentile_pop)/mean_percentile_pop
121 | sum relerr, d
122 | 


--------------------------------------------------------------------------------
/code/raceshares/national_Race share extrapolation check - nonparametric age 51 test.do:
--------------------------------------------------------------------------------
 1 | * Set $root
 2 | return clear
 3 | capture project, doinfo
 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)'  // using -project-
 5 | else {  // running directly
 6 | 	if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"'
 7 | 	do "${mortality_root}/code/set_environment.do"
 8 | }
 9 | 
10 | * Set global aliases
11 | global racedata $root/data/derived/raceshares
12 | 
13 | * Create required folders
14 | cap mkdir "$root/scratch/Age 51 Retirement Age Extrapolation Test"
15 | 
16 | /*** Check how well we do predicting race shares in retirement ages,
17 | 	 by treating 52 as the retirement age instead of 62 and seeing
18 | 	 how well we predict race shares from 52-61.
19 | 
20 | 	 Explanation:
21 | 
22 | 		 We only measure contemporaneous income in the Census, so we
23 | 		 don't know how the race shares of 62-year-olds by their income
24 | 		 when they were 61.  We therefore use the income distribution
25 | 		 at age 61 to impute the income dimension at all retirement ages.
26 | 	
27 | 		 In practice, we can't test how well this approximation works
28 | 		 because the true race shares for retirees by working income are
29 | 		 unobsevable. (If they were observable, we'd be using them.)
30 | 	
31 | 		 We therefore do a test where we treat 52 as the retirement age,
32 | 		 use the racial income distributions at age 51 to impute income
33 | 		 for ages 52-61, and compare the resulting race shares to the
34 | 		 true race shares.
35 | 
36 | ***/
37 | 
38 | ***********************
39 | 
40 | 
41 | *******************************************************************
42 | *** Nonparametric extrapolation check with age 51 extrapolation ***
43 | *******************************************************************
44 | 
45 | * Load race shares based on age 51 extrapolation
46 | project, original("$racedata/Age 51 Retirement Age Extrapolation Test/national_racesharesBY_age_gnd_hhincpctile_51test.dta")
47 | use "$racedata/Age 51 Retirement Age Extrapolation Test/national_racesharesBY_age_gnd_hhincpctile_51test.dta", clear
48 | rename raceshare_* raceshare_51test_*
49 | 
50 | * Merge true race shares, based on observed age 51-61 income distributions
51 | project, original("$racedata/national_racesharesBY_age_gnd_hhincpctile.dta") preserve
52 | merge 1:1 age gnd pctile using "$racedata/national_racesharesBY_age_gnd_hhincpctile.dta", ///
53 | 	keep(match) assert(2 3) nogen
54 | 
55 | * Check that race shares match exactly at age==51, since we're using age 51 race fracs
56 | foreach r in black asian hispanic other {
57 | 	assert abs(raceshare_51test_`r'-raceshare_`r')<10^-6 if age==51
58 | }
59 | 
60 | * Output figures
61 | foreach income_pctile in 1 10 25 50 75 90 99 {
62 | 	foreach race in black asian hispanic other {		
63 | 		tw  (connect raceshare_`race' age) ///
64 | 			(connect raceshare_51test_`race' age, lpattern(dash)) ///
65 | 			if pctile == `income_pctile', ///
66 | 			by(gnd, title(`race' p`income_pctile')) ///
67 | 			legend(lab(1 "Observed") lab(2 "Extrapolated")) ///
68 | 			xlab(51(2)61) ///
69 | 			xtitle(Age) ytitle(Race Share)
70 | 		graph export "$root/scratch/Age 51 Retirement Age Extrapolation Test/raceshare_51test_p`income_pctile'_`race'.png", replace
71 | 		project, creates("$root/scratch/Age 51 Retirement Age Extrapolation Test/raceshare_51test_p`income_pctile'_`race'.png") preserve
72 | 	}
73 | }
74 | 


--------------------------------------------------------------------------------
/code/raceshares/national_racepopBY_year_age_gnd_incpctile.do:
--------------------------------------------------------------------------------
  1 | * Set $root
  2 | return clear
  3 | capture project, doinfo
  4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)'  // using -project-
  5 | else {  // running directly
  6 | 	if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"'
  7 | 	do "${mortality_root}/code/set_environment.do"
  8 | }
  9 | 
 10 | * Set global aliases
 11 | global racedata $root/data/derived/raceshares
 12 | 
 13 | /***
 14 | 
 15 | Add an income dimension to the national race population data.
 16 | 
 17 | Uses race fractions derived from the 2000 Census public-use microdata
 18 | to assign income bins to the race populations in the 2000-2014
 19 | Intercensal/Postcensal estimates.
 20 | 
 21 | We apply the age 61 racial income distribution to the racial population for each
 22 | age 62-76.  This imputes the income dimension of the last working age we see for
 23 | people who are at retirement ages.  The procedure is an approximation of the
 24 | procedure we perform at the IRS, where we use peoples' income at age 61 at all
 25 | later ages. Note that we only observe contemporary income in the Census.
 26 | 
 27 | We verify the accuracy of this approximation using a parametric simulation and a
 28 | non-parametric check where we extrapolate forward from age 51.
 29 | 
 30 | ***/
 31 | 
 32 | ************************
 33 | 
 34 | cap program drop impute_income_dimension
 35 | program define impute_income_dimension
 36 | 
 37 | 	/*** Loads racial population data by Year x Age x Gender,
 38 | 		 then imputes an income dimension using racial income distributions,
 39 | 		 which are Age x Gender (same distribution used in all years).
 40 | 		 
 41 | 		 Uses the income distribution at the last working age for the
 42 | 		 income dimension at all subsequent ages.
 43 | 	***/
 44 | 	
 45 | 	syntax using/, incomevar(name) [ retirementage(integer 62) ]
 46 | 	
 47 | 	project, uses(`"`using'"')
 48 | 	
 49 | 	* Load race population data
 50 | 	project, uses("$racedata/national_racepopBY_year_age_gnd.dta")
 51 | 	use "$racedata/national_racepopBY_year_age_gnd.dta", clear
 52 | 	
 53 | 	* Generate an income bin dimension
 54 | 	isid year age gnd
 55 | 	expand 102
 56 | 	bys year age gnd: gen int `incomevar' = _n - 2
 57 | 	replace `incomevar'=-2 if `incomevar'==-1
 58 | 	order year age gnd `incomevar'
 59 | 	
 60 | 	* Generate a temporary "age" variable corresponding to the age of the income distribution
 61 | 	* we want merged in.
 62 | 	rename age trueage
 63 | 	assert inrange(trueage,40,76)
 64 | 	recode trueage (`retirementage'/76 = `=`retirementage'-1'), gen(age)
 65 | 	
 66 | 	* Merge in racial income distributions
 67 | 	merge m:1 age gnd `incomevar' using `"`using'"', ///
 68 | 		assert(2 3) keep(3) keepusing(smoothfrac_*) nogen
 69 | 		// note: _merge==2 happens when we set the retirement age < 62, so the income distributions at some working ages are not used.
 70 | 
 71 | 	drop age
 72 | 	rename trueage age
 73 | 	
 74 | 	* Apply race fractions to adjust counts
 75 | 	foreach race in black asian hispanic other {
 76 | 		replace pop_`race' = pop_`race' * smoothfrac_of_`race'
 77 | 	}
 78 | 	drop smoothfrac_*
 79 | 			
 80 | 	* Output
 81 | 	sort year age gnd `incomevar'
 82 | 	isid year age gnd `incomevar'
 83 | 	compress
 84 | 
 85 | end
 86 | 
 87 | 
 88 | *************************************
 89 | *** 2000 Census, Household income ***
 90 | *************************************
 91 | 
 92 | impute_income_dimension using "$racedata/national_racefractionsBY_workingage_gnd_hhincpctile.dta", ///
 93 | 	incomevar(hh_inc_pctile)
 94 | 
 95 | save13 "$racedata/national_racepopBY_year_age_gnd_hhincpctile.dta", replace
 96 | project, creates("$racedata/national_racepopBY_year_age_gnd_hhincpctile.dta")
 97 | 
 98 | 
 99 | **************************************
100 | *** 2000 Census, Individual income ***
101 | **************************************
102 | 
103 | impute_income_dimension using "$racedata/Individual income/national_racefractionsBY_workingage_gnd_INDincpctile.dta", ///
104 | 	incomevar(ind_inc_pctile)
105 | 	
106 | save13 "$racedata/Individual income/national_racepopBY_year_age_gnd_INDincpctile.dta", replace
107 | project, creates("$racedata/Individual income/national_racepopBY_year_age_gnd_INDincpctile.dta")
108 | 
109 | ***************************************
110 | *** 2008-2012 ACS, Household income ***
111 | ***************************************
112 | 
113 | impute_income_dimension using "$racedata/2008-2012 ACS income distribution/national_racefractionsBY_workingage_gnd_hhincpctile_ACS.dta", ///
114 | 	incomevar(hh_inc_pctile)
115 | 	
116 | save13 "$racedata/2008-2012 ACS income distribution/national_racepopBY_year_age_gnd_hhincpctile_ACS.dta", replace
117 | project, creates("$racedata/2008-2012 ACS income distribution/national_racepopBY_year_age_gnd_hhincpctile_ACS.dta")
118 | 
119 | 
120 | ********************************************************************************
121 | *** Age 51 Retirement Age Extrapolation Test (2000 Census, Household Income) ***
122 | ********************************************************************************
123 | 
124 | impute_income_dimension using "$racedata/national_racefractionsBY_workingage_gnd_hhincpctile.dta", ///
125 | 	incomevar(hh_inc_pctile) retirementage(52)
126 | 	
127 | keep if inrange(age,51,61)
128 | 
129 | cap mkdir "$racedata/Age 51 Retirement Age Extrapolation Test/"
130 | save13 "$racedata/Age 51 Retirement Age Extrapolation Test/national_racepopBY_year_age_gnd_hhincpctile_51test.dta", replace
131 | project, creates("$racedata/Age 51 Retirement Age Extrapolation Test/national_racepopBY_year_age_gnd_hhincpctile_51test.dta")
132 | 


--------------------------------------------------------------------------------
/code/raceshares/national_raceshareBY_gnd.do:
--------------------------------------------------------------------------------
 1 | * Set $root
 2 | return clear
 3 | capture project, doinfo
 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)'  // using -project-
 5 | else {  // running directly
 6 | 	if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"'
 7 | 	do "${mortality_root}/code/set_environment.do"
 8 | }
 9 | 
10 | 
11 | ************************
12 | 
13 | project, original("$root/code/ado/compute_raceshares.ado")
14 | 
15 | * Load age 40 race population data by Gender from 2000 Census
16 | project, uses("$root/data/derived/raceshares/national_2000age40_racepopBY_gnd.dta")
17 | use "$root/data/derived/raceshares/national_2000age40_racepopBY_gnd.dta", clear
18 | 
19 | * Generate race shares
20 | compute_raceshares, by(gnd)
21 | 
22 | * Output
23 | isid gnd
24 | save13 "$root/data/derived/raceshares/national_2000age40_racesharesBY_gnd.dta", replace
25 | project, creates("$root/data/derived/raceshares/national_2000age40_racesharesBY_gnd.dta")
26 | 


--------------------------------------------------------------------------------
/code/raceshares/national_raceshareBY_year_age_gnd_incpctile.do:
--------------------------------------------------------------------------------
  1 | * Set $root
  2 | return clear
  3 | capture project, doinfo
  4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)'  // using -project-
  5 | else {  // running directly
  6 | 	if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"'
  7 | 	do "${mortality_root}/code/set_environment.do"
  8 | }
  9 | 
 10 | * Set global aliases
 11 | global racedata $root/data/derived/raceshares
 12 | 
 13 | /***
 14 | 
 15 | Generate national race shares by (Year x) Age x Gender x Income Percentile.
 16 | 
 17 | ***/
 18 | 
 19 | ****************
 20 | *** Programs ***
 21 | ****************
 22 | 
 23 | project, original("$root/code/ado/compute_raceshares.ado")
 24 | 
 25 | 
 26 | *************************************
 27 | *** 2000 Census, Household income ***
 28 | *************************************
 29 | 
 30 | * Load data
 31 | project, uses("$racedata/national_racepopBY_year_age_gnd_hhincpctile.dta")
 32 | use "$racedata/national_racepopBY_year_age_gnd_hhincpctile.dta", clear
 33 | assert !mi(pop_black, pop_asian, pop_hispanic, pop_other)
 34 | rename hh_inc_pctile pctile  // match IRS collapse varname
 35 | 
 36 | *** Trends, by year ***
 37 | 
 38 | * Compute race shares
 39 | compute_raceshares, by(year age gnd pctile)
 40 | 
 41 | * Save data
 42 | save13 "$racedata/national_racesharesBY_year_age_gnd_hhincpctile.dta", replace
 43 | project, creates("$racedata/national_racesharesBY_year_age_gnd_hhincpctile.dta") preserve
 44 | 
 45 | *** Levels, pooled ***
 46 | 
 47 | * Pool populations over all years
 48 | isid year age gnd pctile
 49 | collapse (sum) pop_*, by(age gnd pctile)
 50 | 
 51 | * Compute race shares
 52 | compute_raceshares, by(age gnd pctile)
 53 | 
 54 | * Save data
 55 | save13 "$racedata/national_racesharesBY_age_gnd_hhincpctile.dta", replace
 56 | project, creates("$racedata/national_racesharesBY_age_gnd_hhincpctile.dta")
 57 | 
 58 | 
 59 | **************************************
 60 | *** 2000 Census, Individual income ***
 61 | **************************************
 62 | 
 63 | * Load data
 64 | project, uses("$racedata/Individual income/national_racepopBY_year_age_gnd_INDincpctile.dta")
 65 | use "$racedata/Individual income/national_racepopBY_year_age_gnd_INDincpctile.dta", clear
 66 | assert !mi(pop_black, pop_asian, pop_hispanic, pop_other)
 67 | 
 68 | * Pool populations over all years
 69 | isid year age gnd ind_inc_pctile
 70 | collapse (sum) pop_*, by(age gnd ind_inc_pctile)
 71 | 
 72 | * Compute race shares
 73 | compute_raceshares, by(age gnd ind_inc_pctile)
 74 | rename ind_inc_pctile indv_earn_pctile  // match IRS collapse varname
 75 | 
 76 | * Save data
 77 | save13 "$racedata/Individual income/national_racesharesBY_age_gnd_INDincpctile.dta", replace
 78 | project, creates("$racedata/Individual income/national_racesharesBY_age_gnd_INDincpctile.dta")
 79 | 
 80 | 
 81 | ***************************************
 82 | *** 2008-2012 ACS, Household income ***
 83 | ***************************************
 84 | 
 85 | * Load data
 86 | project, uses("$racedata/2008-2012 ACS income distribution/national_racepopBY_year_age_gnd_hhincpctile_ACS.dta")
 87 | use "$racedata/2008-2012 ACS income distribution/national_racepopBY_year_age_gnd_hhincpctile_ACS.dta", clear
 88 | assert !mi(pop_black, pop_asian, pop_hispanic, pop_other)
 89 | 
 90 | * Pool populations over all years
 91 | isid year age gnd hh_inc_pctile
 92 | collapse (sum) pop_*, by(age gnd hh_inc_pctile)
 93 | 
 94 | * Compute race shares
 95 | compute_raceshares, by(age gnd hh_inc_pctile)
 96 | rename hh_inc_pctile pctile  // match IRS collapse varname
 97 | 
 98 | * Save data
 99 | save13 "$racedata/2008-2012 ACS income distribution/national_racesharesBY_age_gnd_hhincpctile_ACS.dta", replace
100 | project, creates("$racedata/2008-2012 ACS income distribution/national_racesharesBY_age_gnd_hhincpctile_ACS.dta")
101 | 
102 | 
103 | ********************************************************************************
104 | *** Age 51 Retirement Age Extrapolation Test (2000 Census, Household Income) ***
105 | ********************************************************************************
106 | 
107 | * Load data
108 | project, uses("$racedata/Age 51 Retirement Age Extrapolation Test/national_racepopBY_year_age_gnd_hhincpctile_51test.dta")
109 | use "$racedata/Age 51 Retirement Age Extrapolation Test/national_racepopBY_year_age_gnd_hhincpctile_51test.dta", clear
110 | assert !mi(pop_black, pop_asian, pop_hispanic, pop_other)
111 | 
112 | * Pool populations over all years
113 | isid year age gnd hh_inc_pctile
114 | collapse (sum) pop_*, by(age gnd hh_inc_pctile)
115 | 
116 | * Compute race shares
117 | compute_raceshares, by(age gnd hh_inc_pctile)
118 | rename hh_inc_pctile pctile  // match IRS collapse varname
119 | 
120 | * Save data
121 | save13 "$racedata/Age 51 Retirement Age Extrapolation Test/national_racesharesBY_age_gnd_hhincpctile_51test.dta", replace
122 | project, creates("$racedata/Age 51 Retirement Age Extrapolation Test/national_racesharesBY_age_gnd_hhincpctile_51test.dta")
123 | 


--------------------------------------------------------------------------------
/code/set_bootstrap.do:
--------------------------------------------------------------------------------
1 | global reps 1000
2 | global splitobs 625000
3 | 


--------------------------------------------------------------------------------
/code/set_environment.do:
--------------------------------------------------------------------------------
 1 | version 13.1
 2 | clear all
 3 | set more off
 4 | pause on
 5 | 
 6 | global root "${mortality_root}"
 7 | 
 8 | adopath ++ "$root/code/ado_ssc"
 9 | adopath ++ "$root/code/ado"
10 | 
11 | * Disable project (since running do-files directly)
12 | cap program drop project
13 | program define project
14 | 	di "Project is disabled, skipping project command. (To re-enable, run -{stata program drop project}-)"
15 | end
16 | 


--------------------------------------------------------------------------------
/code/tests/Check completeness of mortality data.do:
--------------------------------------------------------------------------------
  1 | 
  2 | * Set $root
  3 | return clear
  4 | capture project, doinfo
  5 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)'  // using -project-
  6 | else {  // running directly
  7 | 	if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"'
  8 | 	do "${mortality_root}/code/set_environment.do"
  9 | }
 10 | 
 11 | * Set convenient globals
 12 | global raw_data "${root}/data/raw"
 13 | global derived "${root}/data/derived"
 14 | 
 15 | * Create required folders
 16 | cap mkdir "${root}/data/derived/Gompertz Parameters"
 17 | cap mkdir "${root}/data/derived/Gompertz Parameters/OLS"
 18 | 
 19 | /*** Test whether all observations are included in mortality rate files
 20 | loaded in estimate_irs_gompertz_parameters.do.
 21 | ***/
 22 | 
 23 | 
 24 | ********************************************************************
 25 | **************  National Life Expectancy Estimates *****************
 26 | ********************************************************************
 27 | 
 28 | * Load mortality rates
 29 | project, original("${derived}/Mortality Rates/national_mortratesBY_gnd_hhincpctile_age_year.dta")
 30 | use "${derived}/Mortality Rates/national_mortratesBY_gnd_hhincpctile_age_year.dta", clear
 31 | keep if age_at_d >= 40
 32 | 
 33 | * Assert that the correct number of observations exist per age
 34 | forval age = 40/63 { // full years 2001-2014
 35 | 	bys age_at_d: assert _N == 2*100*(2014-2001+1) if age_at_d == `age'
 36 | }
 37 | 
 38 | forval age = 64/76 { // years 2001-2013 for age 64, 2001-2012 for age 65, etc.
 39 | 	bys age_at_d: assert _N == 2*100*(76-`age'+1) if age_at_d == `age'
 40 | }
 41 | 
 42 | 
 43 | ********************************************************************
 44 | *********** Life Expectancy Estimates by Commuting Zone ************
 45 | ********************************************************************
 46 | 
 47 | * Load mortality rates
 48 | project, original("${derived}/Mortality Rates/cz_mortratesBY_gnd_hhincquartile_age_year.dta")
 49 | use "${derived}/Mortality Rates/cz_mortratesBY_gnd_hhincquartile_age_year.dta", clear
 50 | keep if age_at_d>=40
 51 | assert inrange(hh_inc_q, 1 ,4)
 52 | 
 53 | * Check if correct number of observations exist per CZ with pop >= 25k
 54 | merge m:1 cz using "$derived/final_covariates/cz_pop.dta", nogen keep(match master)
 55 | keep if pop2000 >= 25000
 56 | 
 57 | * Should be 2*14*(63-40+1) + 2*(13+12+11+10+9+8+7+6+5+4+3+2+1) = 854 observations per CZ x quartile
 58 | collapse (count) cz_q_obs = mortrate, by(hh_inc_q cz)
 59 | bys cz: assert _N==4
 60 | count if cz_q_obs!=854
 61 | di in red "Fraction of quartile x CZ groups missing observations : "`r(N)'/_N // 2.5% of CZ x quartiles are missing observations
 62 | 
 63 | * Should be 854*4 = 3,416 observations per CZ
 64 | collapse (sum) cz_obs = cz_q_obs, by(cz)
 65 | count if cz_obs!=3416
 66 | di in red "Fraction of CZs missing observations : "`r(N)'/_N // 9.7% of CZs are missing observations
 67 | 
 68 | 
 69 | ********************************************************************
 70 | *************** Life Expectancy Estimates by County ****************
 71 | ********************************************************************
 72 | 
 73 | * Load mortality rates
 74 | project, original("${derived}/Mortality Rates/cty_mortratesBY_gnd_hhincquartile_age_year.dta")
 75 | use "${derived}/Mortality Rates/cty_mortratesBY_gnd_hhincquartile_age_year.dta", clear
 76 | keep if age_at_d>=40
 77 | assert inrange(hh_inc_q, 1 ,4)
 78 | 
 79 | * Check if correct number of observations exist per county with pop >= 25k
 80 | merge m:1 cty using "$derived/final_covariates/cty_full_covariates.dta", nogen keep(match master) keepus(cty_pop2000)
 81 | keep if cty_pop2000 >= 25000
 82 | 
 83 | * Should be 2*14*(63-40+1) + 2*(13+12+11+10+9+8+7+6+5+4+3+2+1) = 854 observations per county x quartile
 84 | collapse (count) cty_q_obs = mortrate, by(hh_inc_q cty)
 85 | bys cty: assert _N==4
 86 | count if cty_q_obs!=854
 87 | di in red "Fraction of quartile x county groups missing observations : "`r(N)'/_N // 6.5% of county x quartiles are missing observations
 88 | 
 89 | * Should be 854*4 = 3,416 observations per county
 90 | collapse (sum) cty_obs = cty_q_obs, by(cty)
 91 | count if cty_obs!=3416
 92 | di in red "Fraction of counties missing observations : "`r(N)'/_N // 21.4% of counties are missing observations
 93 | 
 94 | 
 95 | ********************************************************************
 96 | *************** Life Expectancy Estimates by State *****************
 97 | ********************************************************************
 98 | 
 99 | * Load mortality rates
100 | project, original("${derived}/Mortality Rates/st_mortratesBY_gnd_hhincquartile_age_year.dta")
101 | use "${derived}/Mortality Rates/st_mortratesBY_gnd_hhincquartile_age_year.dta", clear
102 | keep if age_at_d >= 40
103 | assert inrange(hh_inc_q, 1 ,4)
104 | 
105 | * Check if correct number of observations exist per state
106 | 
107 | * Should be 2*14*(63-40+1) + 2*(13+12+11+10+9+8+7+6+5+4+3+2+1) = 854 observations per state x quartile
108 | collapse (count) st_q_obs = mortrate, by(hh_inc_q st)
109 | bys st: assert _N==4
110 | count if st_q_obs!=854
111 | di in red "Fraction of quartile x state groups missing observations : "`r(N)'/_N // 0% of state x quartiles are missing observations
112 | 
113 | * Should be 854*4 = 3,416 observations per county
114 | collapse (sum) st_obs = st_q_obs, by(st)
115 | count if st_obs!=3416
116 | di in red "Fraction of states missing observations : "`r(N)'/_N // 0% of states are missing observations
117 | 


--------------------------------------------------------------------------------
/code/tests/Explore runtimes for estimating race shifters.do:
--------------------------------------------------------------------------------
 1 | * Set $root
 2 | return clear
 3 | capture project, doinfo
 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)'  // using -project-
 5 | else {  // running directly
 6 | 	if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"'
 7 | 	do "${mortality_root}/code/set_environment.do"
 8 | }
 9 | 
10 | * Set working directory
11 | cd "$root/data/derived/NLMS"
12 | 
13 | /*** Test runtime of different methods for estimating Gompertz race shifters,
14 | 	 which is especially relevant for bootstrap runtimes.
15 | ***/
16 | 
17 | ********
18 | 
19 | 
20 | *** Load data
21 | timer clear
22 | 
23 | project, original("nlms_v5_s11_sampleA.dta")
24 | use "nlms_v5_s11_sampleA.dta", clear
25 | 
26 | 
27 | *** Method 1: Loop and store estimates
28 | timer on 1
29 | foreach g in "M" "F" {
30 | 	streg i.csdiv i.incq i.racegrp if gnd=="`g'", dist(gompertz) nohr ancillary(i.csdiv i.incq i.racegrp)
31 | 	estimates store est`g'
32 | }
33 | timer off 1
34 | 
35 | 
36 | *** Method 2: statsby, saving
37 | 
38 | * Specify parameters to save
39 | local save_est ///
40 | diff_gomp_int_black = _b[_t:2.racegrp] ///
41 | diff_gomp_int_hisp = _b[_t:3.racegrp] ///
42 | diff_gomp_int_asian = _b[_t:4.racegrp] ///
43 | diff_gomp_slope_black = _b[gamma:2.racegrp] ///
44 | diff_gomp_slope_hisp = _b[gamma:3.racegrp] ///
45 | diff_gomp_slope_asian = _b[gamma:4.racegrp]
46 | 
47 | * Run regression
48 | timer on 2
49 | 
50 | tempfile save_bysex
51 | statsby `save_est', saving(`save_bysex', `replace') ///
52 | 	by(gnd): streg i.csdiv i.incq i.racegrp, dist(gompertz) nohr ancillary(i.csdiv i.incq i.racegrp)
53 | 
54 | timer off 2
55 | 
56 | 
57 | *** Method 3: statsby, clear
58 | timer on 3
59 | 
60 | statsby `save_est', clear ///
61 | 	by(gnd): streg i.csdiv i.incq i.racegrp, dist(gompertz) nohr ancillary(i.csdiv i.incq i.racegrp)
62 | 
63 | timer off 3
64 | 
65 | 		
66 | *** Display times
67 | timer list
68 | 


--------------------------------------------------------------------------------
/code/tests/Simulate various methods of deriving LE from Gompertz parameters.do:
--------------------------------------------------------------------------------
 1 | * Set $root
 2 | return clear
 3 | capture project, doinfo
 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)'  // using -project-
 5 | else {  // running directly
 6 | 	if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"'
 7 | 	do "${mortality_root}/code/set_environment.do"
 8 | }
 9 | 
10 | * Set convenient globals
11 | global derived "${root}/data/derived"
12 | 
13 | * Load LE functions
14 | project, original("${root}/code/ado/generate_le_with_raceadj.ado")
15 | include "$root/code/ado/generate_le_with_raceadj.ado"
16 | 
17 | * Add ado files to project
18 | project, original("${root}/code/ado/fastregby.ado")
19 | 
20 | 
21 | /*** Simulates various methods for integrating the survival curve based on the
22 | 	 "discretized Gompertz parameters" and compares their results to the true LE.
23 | ***/
24 | 
25 | *************
26 | 
27 | 
28 | * Load Gompertz parameters for national percentiles
29 | project, original("${derived}/Gompertz Parameters/national_gompBY_gnd_hhincpctile.dta")
30 | use "${derived}/Gompertz Parameters/national_gompBY_gnd_hhincpctile.dta", clear
31 | isid gnd pctile
32 | keep gnd pctile gomp_int gomp_slope
33 | 
34 | * Create an age dimension
35 | expand 76-40+1
36 | bys gnd pctile: gen age_at_d = 40 + _n - 1
37 | 
38 | * Compute discrete-time mortality rates at age [x,x+1)
39 | gen mort_discrete = 1 - exp( 1 / gomp_slope * ( exp(gomp_int + gomp_slope*age_at_d) - exp(gomp_int + gomp_slope*(age_at_d+1)) ) )
40 | 
41 | * Estimate discretized Gompertz parameters
42 | gen log_mort_discrete = log(mort_discrete)
43 | fastregby log_mort_discrete age_at_d, by(gnd pctile) clear
44 | ren (_b_age_at_d _b_cons) (gomp_slope_estimated gomp_int_estimated)
45 | order gomp_int_estimated gomp_slope_estimated, last
46 | 
47 | * Merge in true Gompertz parameters
48 | merge 1:1 gnd pctile using "${derived}/Gompertz Parameters/national_gompBY_gnd_hhincpctile.dta", ///
49 | 	assert(2 3) keep(3) nogen keepusing(gomp*)
50 | rename (gomp_int gomp_slope) (gomp_int_true gomp_slope_true)
51 | 
52 | * Adjust Gompertz intercept to midpoint of integer bins, since "age 40" in estimation is actually age [40,41)
53 | gen gomp_int_adjusted = gomp_int_estimated - 0.5 * gomp_slope_estimated
54 | 
55 | 
56 | * Compute expected life years at true Gompertz parameters
57 | rename (gomp_int_true gomp_slope_true) (gomp_int gomp_slope)
58 | gen_gomp_lifeyears_cont, startage(40) endage(90)
59 | rename expectedLY_gomp expectedLY_true
60 | rename (gomp_int gomp_slope) (gomp_int_true gomp_slope_true)
61 | 
62 | * Compute expected life years at estimated unadjusted Gompertz parameters
63 | rename (gomp_int_estimated gomp_slope_estimated) (gomp_int gomp_slope)
64 | gen_gomp_lifeyears_cont, startage(40) endage(90)
65 | rename expectedLY_gomp expectedLY_est
66 | rename (gomp_int gomp_slope) (gomp_int_estimated gomp_slope_estimated)
67 | 
68 | * Compute expected life years at estimated adjusted Gompertz parameters
69 | rename (gomp_int_adjusted gomp_slope_estimated) (gomp_int gomp_slope)
70 | gen_gomp_lifeyears_cont, startage(40) endage(90)
71 | rename expectedLY_gomp expectedLY_adj
72 | rename (gomp_int gomp_slope) (gomp_int_adjusted gomp_slope_estimated)
73 | 
74 | * Compute expected life years at estimated Gompertz parameters, DISCRETE method
75 | rename (gomp_int_estimated gomp_slope_estimated) (gomp_int gomp_slope)
76 | mata: gen_gomp_lifeyears_disc(40, 90)
77 | rename expectedLY_gomp expectedLY_estdiscrete
78 | rename (gomp_int gomp_slope) (gomp_int_estimated gomp_slope_estimated)
79 | 
80 | 
81 | foreach t in est adj estdiscrete {
82 | 	gen diff_`t' = expectedLY_`t' - expectedLY_true
83 | 	gen reldiff_`t' = diff_`t' / expectedLY_true
84 | }
85 | 
86 | 
87 | sum diff*
88 | sum reldiff*
89 | 


--------------------------------------------------------------------------------
/code/tests/Test Julia Gompertz matches Stata Gompertz.do:
--------------------------------------------------------------------------------
 1 | * Set $root
 2 | return clear
 3 | capture project, doinfo
 4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)'  // using -project-
 5 | else {  // running directly
 6 | 	if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"'
 7 | 	do "${mortality_root}/code/set_environment.do"
 8 | }
 9 | 
10 | * Check for Julia configuration
11 | project, original("${root}/code/set_julia.do")
12 | confirm file "${root}/code/set_julia.do"
13 | 
14 | * Create required folders
15 | cap mkdir "${root}/scratch/tests"
16 | cap mkdir "${root}/scratch/tests/Julia Gompertz matches Stata Gompertz"
17 | 
18 | * Add ado files to project
19 | project, original("${root}/code/ado/estimate_gompertz2.ado")
20 | project, original("${root}/code/ado/mle_gomp_est.ado")
21 | project, original("${root}/code/ado/fastregby_gompMLE_julia.ado")
22 | project, original("${root}/code/ado/estimate_gompertz.jl")
23 | 
24 | /*** Test whether Gompertz estimates generated by Julia
25 | 	 match those generated in Stata.
26 | ***/
27 | 
28 | 
29 | ********************************************************************
30 | **************  National Life Expectancy Estimates *****************
31 | ********************************************************************
32 | 
33 | *******
34 | *** National, by Gender x Income Percentile
35 | *******
36 | 
37 | * Load mortality rates
38 | project, original("${root}/data/derived/Mortality Rates/mskd_national_mortratesBY_gnd_hhincpctile_age_year.dta")
39 | use "${root}/data/derived/Mortality Rates/mskd_national_mortratesBY_gnd_hhincpctile_age_year.dta", clear
40 | keep if age_at_d >= 40
41 | 
42 | * Calculate Gompertz parameters from mortality rates
43 | foreach prog in Stata Julia {
44 | 
45 | 	if ("`prog'"=="Stata") global juliapath ""
46 | 	else include "${root}/code/set_julia.do"
47 | 
48 | 	foreach vce in oim "" {
49 | 
50 | 		preserve
51 | 
52 | 		estimate_gompertz2 gnd pctile, age(age_at_d) mort(mortrate) n(count) ///
53 | 			collapsefrom(gnd pctile age_at_d yod) type(mle) vce(`vce')
54 | 
55 | 		save "${root}/scratch/tests/Julia Gompertz matches Stata Gompertz/national_gompBY_gnd_hhincpctile - `prog' `vce'.dta", replace
56 | 		project, creates("${root}/scratch/tests/Julia Gompertz matches Stata Gompertz/national_gompBY_gnd_hhincpctile - `prog' `vce'.dta")
57 | 
58 | 		restore
59 | 
60 | 	}
61 | 
62 | }
63 | 
64 | 
65 | *** Perform comparisons
66 | 
67 | cap program drop compare_gompest
68 | program define compare_gompest
69 | 
70 | 	syntax, [vce_julia(name) vce_stata(name) acceptedreldif(string)]
71 | 
72 | 	project, uses("${root}/scratch/tests/Julia Gompertz matches Stata Gompertz/national_gompBY_gnd_hhincpctile - Julia `vce_julia'.dta")
73 | 	project, uses("${root}/scratch/tests/Julia Gompertz matches Stata Gompertz/national_gompBY_gnd_hhincpctile - Stata `vce_stata'.dta")
74 | 
75 | 	use "${root}/scratch/tests/Julia Gompertz matches Stata Gompertz/national_gompBY_gnd_hhincpctile - Julia `vce_julia'.dta", clear
76 | 
77 | 	rename gomp* Jgomp*
78 | 	rename A* JA*
79 | 
80 | 	merge 1:1 gnd pctile using "${root}/scratch/tests/Julia Gompertz matches Stata Gompertz/national_gompBY_gnd_hhincpctile - Stata `vce_stata'.dta", ///
81 | 		assert(3) nogen
82 | 
83 | 	foreach var of varlist gomp* A* {
84 | 		gen diff_`var' = reldif(J`var', `var')
85 | 		if ("`acceptedreldif'"!="") assert diff_`var'<`acceptedreldif'
86 | 	}
87 | 	sum diff*
88 | 	drop diff*
89 | 
90 | end
91 | 
92 | compare_gompest, vce_julia("") vce_stata("") acceptedreldif(1e-5)
93 | compare_gompest, vce_julia("oim") vce_stata("oim") acceptedreldif(1e-5)
94 | 


--------------------------------------------------------------------------------
/code/tests/Test gen_mortrates.do:
--------------------------------------------------------------------------------
  1 | * Set $root
  2 | return clear
  3 | capture project, doinfo
  4 | if (_rc==0 & !mi(r(pname))) global root `r(pdir)'  // using -project-
  5 | else {  // running directly
  6 | 	if ("${mortality_root}"=="") do `"`c(sysdir_personal)'profile.do"'
  7 | 	do "${mortality_root}/code/set_environment.do"
  8 | }
  9 | 
 10 | * Create required folders
 11 | cap mkdir "${root}/scratch/tests"
 12 | cap mkdir "${root}/scratch/tests/Test gen_mortrates ado"
 13 | 
 14 | * Add ado files to project
 15 | project, original("${root}/code/ado/gen_mortrates2.ado")
 16 | 
 17 | 
 18 | /*** Test whether gen_mortrates2.ado properly handles input data.
 19 | ***/
 20 | 
 21 | 
 22 | *********************************************************
 23 | *** Generate synthetic output: mortality rate dataset ***
 24 | *********************************************************
 25 | 
 26 | * Construct mortality rate dataset
 27 | set obs 37
 28 | 
 29 | gen grp=1
 30 | gen byte age_at_d=_n+39
 31 | gen lag = 2 + (age_at_d>=64) * (age_at_d-63)
 32 | gen int yod = lag + 1999
 33 | 
 34 | gen mortrate=exp(-10 + 0.1 * age_at_d)  // Gompertz with int=-10 and slope=0.1
 35 | gen count = 100000 in 1
 36 | replace count = cond(lag==2, 100000, count[_n-1] * (1-mortrate[_n-1])) in 2/`=_N'
 37 | 
 38 | * Reduce precision of pop counts (which are always integers in true data)
 39 | replace count = round(count)
 40 | 
 41 | * Output
 42 | save13 "${root}/scratch/tests/Test gen_mortrates ado/synthetic_mortrates.dta", replace
 43 | project, creates("${root}/scratch/tests/Test gen_mortrates ado/synthetic_mortrates.dta")
 44 | 
 45 | 
 46 | ************************************************
 47 | *** Generate synthetic input: deadby dataset ***
 48 | ************************************************
 49 | 
 50 | * Construct deadby dataset
 51 | clear
 52 | set obs 24
 53 | 
 54 | gen grp=1
 55 | gen age=_n+37
 56 | gen tax_yr=1999
 57 | 
 58 | gen long count=100000
 59 | 
 60 | gen byte deadby_1_Mean = 0
 61 | forvalues i=2/18 {
 62 | 
 63 | 	local survivors count * (1-deadby_`=`i'-1'_Mean)
 64 | 	local mortrate exp(-10 + 0.1 * (age+`i'))
 65 | 	local deathsini `survivors' * `mortrate'
 66 | 
 67 | 	gen double deadby_`i'_Mean = (count - `survivors' + `deathsini') / count if tax_yr + `i' <= 2014
 68 | }
 69 | 
 70 | * Output
 71 | save13 "${root}/scratch/tests/Test gen_mortrates ado/synthetic_deadby.dta", replace
 72 | project, creates("${root}/scratch/tests/Test gen_mortrates ado/synthetic_deadby.dta")
 73 | 
 74 | 
 75 | *********************
 76 | *** Perform Tests ***
 77 | *********************
 78 | 
 79 | *** Test with perfect input data
 80 | 
 81 | * Generate mortrates
 82 | project, uses("${root}/scratch/tests/Test gen_mortrates ado/synthetic_deadby.dta")
 83 | use "${root}/scratch/tests/Test gen_mortrates ado/synthetic_deadby.dta", clear
 84 | 
 85 | gen_mortrates2 grp, age(age) year(tax_yr) n(count)
 86 | 
 87 | * Reduce precision
 88 | recast float mortrate count, force
 89 | replace count=round(count)
 90 | 
 91 | * Perform comparison
 92 | project, uses("${root}/scratch/tests/Test gen_mortrates ado/synthetic_mortrates.dta") preserve
 93 | cf _all using "${root}/scratch/tests/Test gen_mortrates ado/synthetic_mortrates.dta"
 94 | 
 95 | 
 96 | *** Test with a dropped deadby column
 97 | 
 98 | * Generate mortrates
 99 | project, uses("${root}/scratch/tests/Test gen_mortrates ado/synthetic_deadby.dta")
100 | use "${root}/scratch/tests/Test gen_mortrates ado/synthetic_deadby.dta", clear
101 | 
102 | drop deadby_3_Mean
103 | 
104 | rcof "gen_mortrates2 grp, age(age) year(tax_yr) n(count)" == 111
105 | 
106 | 
107 | *** Test with a missing deadby column
108 | 
109 | * Generate mortrates
110 | project, uses("${root}/scratch/tests/Test gen_mortrates ado/synthetic_deadby.dta")
111 | use "${root}/scratch/tests/Test gen_mortrates ado/synthetic_deadby.dta", clear
112 | 
113 | replace deadby_3_Mean=.
114 | 
115 | rcof "gen_mortrates2 grp, age(age) year(tax_yr) n(count)" == 9
116 | 
117 | 
118 | *** Test with missing LAST deadby column
119 | 
120 | * Generate mortrates
121 | project, uses("${root}/scratch/tests/Test gen_mortrates ado/synthetic_deadby.dta")
122 | use "${root}/scratch/tests/Test gen_mortrates ado/synthetic_deadby.dta", clear
123 | 
124 | replace deadby_15_Mean=.
125 | 
126 | gen_mortrates2 grp, age(age) year(tax_yr) n(count)
127 | 
128 | * Reduce precision
129 | recast float mortrate count, force
130 | replace count=round(count)
131 | 
132 | * Perform comparison
133 | isid grp age_at_d yod
134 | ds grp age_at_d yod, not
135 | foreach var in `r(varlist)' {
136 | 	rename `var' gen_`var'
137 | }
138 | 
139 | project, uses("${root}/scratch/tests/Test gen_mortrates ado/synthetic_mortrates.dta") preserve
140 | merge 1:1 grp age_at_d yod using "${root}/scratch/tests/Test gen_mortrates ado/synthetic_mortrates.dta"
141 | 
142 | count if _merge==2
143 | assert r(N)==1
144 | 
145 | foreach var of varlist lag mortrate count {
146 | 	assert gen_`var' == `var' | mi(gen_`var')
147 | }
148 | 
149 | 
150 | *** Test with missing unused deadby data
151 | 
152 | * Generate mortrates
153 | project, uses("${root}/scratch/tests/Test gen_mortrates ado/synthetic_deadby.dta")
154 | use "${root}/scratch/tests/Test gen_mortrates ado/synthetic_deadby.dta", clear
155 | 
156 | forvalues i=3/18 {
157 | 	replace deadby_`i'_Mean=. if age!=61
158 | }
159 | 
160 | gen_mortrates2 grp, age(age) year(tax_yr) n(count)
161 | 
162 | * Reduce precision
163 | recast float mortrate count, force
164 | replace count=round(count)
165 | 
166 | * Perform comparison
167 | project, uses("${root}/scratch/tests/Test gen_mortrates ado/synthetic_mortrates.dta") preserve
168 | cf _all using "${root}/scratch/tests/Test gen_mortrates ado/synthetic_mortrates.dta"
169 | 


--------------------------------------------------------------------------------
/mortality.do:
--------------------------------------------------------------------------------
  1 | 
  2 | *** Initialization ***
  3 | version 13.1
  4 | set more off
  5 | set varabbrev off
  6 | 
  7 | project, doinfo
  8 | local pdir=r(pdir)
  9 | adopath ++ "`pdir'/code/ado_ssc"
 10 | adopath ++ "`pdir'/code/ado"
 11 | 
 12 | project, original("`pdir'/code/ado/scheme-leap.scheme")
 13 | set scheme leap
 14 | 
 15 | cap mkdir "`pdir'/scratch"
 16 | 
 17 | *** Load dependencies into project ***
 18 | 
 19 | project, relies_on("`pdir'/code/set_environment.do")
 20 | project, relies_on("`pdir'/code/ado/scalarout.ado")
 21 | project, relies_on("`pdir'/code/ado_ssc/save13.ado")
 22 | 
 23 | foreach dir in "ado_maptile_geo" "ado_ssc" {
 24 | 	
 25 | 	cd `"`pdir'/code/`dir'"'
 26 | 	local files : dir "`c(pwd)'" files "*"
 27 | 
 28 | 	foreach file in `files' {
 29 | 		if substr("`file'",1,1)!="." project, relies_on("`file'")
 30 | 	}
 31 | 
 32 | 	cd `"`pdir'"'
 33 | 
 34 | }
 35 | 
 36 | 
 37 | ******************
 38 | *** Covariates ***
 39 | ******************
 40 | 
 41 | project, do("code/covariates/Health behavior maps.do")
 42 | 
 43 | 
 44 | *********************
 45 | *** Race shifters ***
 46 | *********************
 47 | 
 48 | * Figures analyzing race shifters
 49 | project, do(code/nlms/nlms_mortality_profiles_BYrace.do)
 50 | project, do(code/nlms/nlms_analyze_raceshifters_BYincq_BYcsregion.do)
 51 | project, do("code/nlms/Explore mortality profiles implied by race shifters.do")
 52 | 
 53 | 
 54 | *******************
 55 | *** Race shares ***
 56 | *******************
 57 | 
 58 | * National sensitivity checks
 59 | project, do("code/raceshares/national_Explore race fraction smoothing.do")
 60 | project, do("code/raceshares/national_Race share extrapolation check - nonparametric age 51 test.do")
 61 | 
 62 | 
 63 | * Local race shares
 64 | project, do("code/raceshares/Explore CZ race share maps.do")
 65 | 
 66 | 
 67 | ***********************
 68 | *** Mortality rates ***
 69 | ***********************
 70 | 
 71 | project, do("code/mortality/Plot observed mortality and survival profiles.do")
 72 | project, do("code/mortality/Sample comparison - NCHS SSA IRS.do")
 73 | project, do("code/mortality/Lag invariance.do")
 74 | project, do("code/mortality/Decompose mortality into medical v external.do")
 75 | 
 76 | project, do("code/mortality/Summary stats on number of obs and income.do")
 77 | project, do("code/mortality/Output income means by national income quantile.do")
 78 | 
 79 | *************************
 80 | *** Life Expectancies ***
 81 | *************************
 82 | 
 83 | project, do("code/lifeexpectancy/Plot signal by local population level.do")
 84 | 
 85 | project, do("code/lifeexpectancy/Generate Signal Standard Deviations.do")
 86 | 
 87 | project, do("code/lifeexpectancy/Check correlations in CZ LE trends.do")
 88 | 
 89 | project, do("code/lifeexpectancy/Plot national LE profiles by percentile.do")
 90 | project, do("code/lifeexpectancy/National LE profile sensitivity checks.do")
 91 | project, do("code/lifeexpectancy/Plot national LE trends by income.do")
 92 | project, do("code/lifeexpectancy/Plot major city LE profiles by ventile.do")
 93 | project, do(code/lifeexpectancy/le_levels_maps.do)
 94 | project, do(code/lifeexpectancy/le_trends_maps.do)
 95 | project, do(code/lifeexpectancy/LE ranked lists of states.do)
 96 | project, do(code/lifeexpectancy/le_correlations.do)
 97 | project, do("code/lifeexpectancy/State Level Inequality Correlations.do")
 98 | project, do("code/lifeexpectancy/Plot international comparison of LE at age 40.do")
 99 | project, do("code/lifeexpectancy/List top 10 and bottom 10 CZs.do")
100 | project, do("code/lifeexpectancy/Plot CZ LE trend scatters.do")
101 | 
102 | project, do("code/lifeexpectancy/Sensitivity analyses of local LEs.do")
103 | project, do("code/lifeexpectancy/Standard errors vs population size.do")
104 | 
105 | project, do("code/lifeexpectancy/Explore national trends at fixed income levels.do")
106 | 
107 | project, do("code/lifeexpectancy/Compute Years of Social Security Eligibility.do")
108 | 
109 | ****************************************
110 | *** Output numbered figures & tables ***
111 | ****************************************
112 | 
113 | project, do(code/assign_fig_tab_numbers.do)
114 | 


--------------------------------------------------------------------------------
/mortality_datagen.do:
--------------------------------------------------------------------------------
  1 | 
  2 | *** Initialization ***
  3 | version 13.1
  4 | set more off
  5 | set varabbrev off
  6 | 
  7 | project, doinfo
  8 | local pdir=r(pdir)
  9 | adopath ++ "`pdir'/code/ado_ssc"
 10 | adopath ++ "`pdir'/code/ado"
 11 | 
 12 | cap mkdir "`pdir'/data/derived"
 13 | cap mkdir "`pdir'/scratch"
 14 | 
 15 | *** Load dependencies into project ***
 16 | 
 17 | project, relies_on("`pdir'/code/set_environment.do")
 18 | project, relies_on("`pdir'/code/ado_ssc/save13.ado")
 19 | 
 20 | ******************
 21 | *** Covariates ***
 22 | ******************
 23 | 
 24 | project, do(code/covariates/cz_names.do)
 25 | 
 26 | project, do(code/covariates/clean_pop_labforce_change1980to2000.do)
 27 | project, do(code/covariates/clean_educ.do)
 28 | project, do(code/covariates/clean_uninsurance.do)
 29 | project, do(code/covariates/clean_racefractions.do)
 30 | project, do(code/covariates/clean_dartmouthatlas.do)
 31 | project, do(code/covariates/clean_brfss.do)
 32 | project, do(code/covariates/clean_hospquality.do)
 33 | project, do(code/covariates/clean_causeofdeath.do)
 34 | 
 35 | project, do(code/covariates/combine_all_covariates.do)
 36 | 
 37 | 
 38 | *********************
 39 | *** Race shifters ***
 40 | *********************
 41 | 
 42 | * Prepare NLMS data
 43 | project, do(code/nlms/nlms_loaddata.do)
 44 | project, do(code/nlms/nlms_createsample.do)
 45 | 
 46 | * Generate race shifters
 47 | project, do(code/nlms/nlms_generate_shifters.do)
 48 | 
 49 | * Bootstrap race shifters
 50 | project, do(code/nlms/nlms_bootstrap_shifters.do)
 51 | 
 52 | 
 53 | *******************
 54 | *** Race shares ***
 55 | *******************
 56 | 
 57 | ** National
 58 | project, do(code/raceshares/national_racepopBY_year_age_gnd.do)  // load population data
 59 | 
 60 | project, do(code/raceshares/national_racefracBY_workingage_gnd_incpctile.do)  // compute income distributions
 61 | project, do(code/raceshares/national_racepopBY_year_age_gnd_incpctile.do)  // impute income dimension in pop data
 62 | 
 63 | project, do(code/raceshares/national_raceshareBY_year_age_gnd_incpctile.do)  // pooled 2001-2014 and by-year race shares
 64 | project, do(code/raceshares/national_raceshareBY_gnd.do)  // reference race shares
 65 | 
 66 | ** Local
 67 | 
 68 | * Local race populations
 69 | project, do(code/raceshares/cty_racepopBY_year_agebin_gnd.do)  // county pop without income dim from Inter/Postcensal estimates
 70 | 
 71 | * Local income distributions
 72 | project, do(code/raceshares/cty_racepopBY_workingagebin_hhincbin.do)  // HH counts with income bins from Census 2000 SF3 tables
 73 | project, do("code/raceshares/Explore income distribution over 16 income bins in County SF3 data.do")
 74 | project, do("code/raceshares/SF3 incbin weights for national income quantiles.do")
 75 | project, do(code/raceshares/cty_cz_st_racefracBY_workingagebin_hhincquantile.do)
 76 | project, do(code/raceshares/cty_cz_st_racepopBY_year_agebin_gnd_hhincquantile.do)
 77 | 
 78 | * Local race shares
 79 | project, do(code/raceshares/cty_cz_st_raceshareBY_year_agebin_gnd_hhincquantile.do)
 80 | 
 81 | 
 82 | ***********************
 83 | *** Mortality rates ***
 84 | ***********************
 85 | 
 86 | project, do(code/mortality/construct_cdc_mortrates.do)
 87 | 
 88 | 
 89 | *************************
 90 | *** Life Expectancies ***
 91 | *************************
 92 | 
 93 | project, do(code/lifeexpectancy/generate_life_expectancies.do)
 94 | project, do(code/lifeexpectancy/bootstrap_le.do)
 95 | project, do(code/lifeexpectancy/compute_LE_trends.do)
 96 | project, do(code/lifeexpectancy/generate_bootstrap_confidence_estimates.do)
 97 | 
 98 | project, do("code/lifeexpectancy/Estimate NCHS State LEs pooling income groups.do")
 99 | 
100 | **************************
101 | *** Online Data Tables ***
102 | **************************
103 | 
104 | project, do(code/create_online_tables.do)
105 | 


--------------------------------------------------------------------------------
/mortality_init.do:
--------------------------------------------------------------------------------
 1 | 
 2 | *** Initialization ***
 3 | version 13.1
 4 | set more off
 5 | set varabbrev off
 6 | 
 7 | project, doinfo
 8 | local pdir=r(pdir)
 9 | adopath ++ "`pdir'/code/ado_ssc"
10 | adopath ++ "`pdir'/code/ado"
11 | 
12 | cap mkdir "`pdir'/data/derived"
13 | cap mkdir "`pdir'/scratch"
14 | 
15 | *** Load dependencies into project ***
16 | 
17 | project, relies_on("`pdir'/code/set_environment.do")
18 | project, relies_on("`pdir'/code/ado_ssc/save13.ado")
19 | 
20 | ***********************
21 | *** Mortality rates ***
22 | ***********************
23 | 
24 | project, do("code/mortality/Construct multi-source death and population counts.do")
25 | project, do(code/mortality/convert_IRScollapses_to_mortrates.do)
26 | 
27 | project, do(code/mortality/estimate_irs_gompertz_parameters.do)
28 | 
29 | project, do("code/mortality/Calculate aggregate deaths.do")
30 | 


--------------------------------------------------------------------------------
/mortality_tests.do:
--------------------------------------------------------------------------------
 1 | 
 2 | *** Initialization ***
 3 | version 13.1
 4 | set more off
 5 | set varabbrev off
 6 | 
 7 | project, doinfo
 8 | local pdir=r(pdir)
 9 | adopath ++ "`pdir'/code/ado_ssc"
10 | adopath ++ "`pdir'/code/ado"
11 | 
12 | 
13 | ******************
14 | 
15 | project, do("code/tests/Explore runtimes for estimating race shifters.do")
16 | 
17 | project, do("code/tests/Test Julia Gompertz matches Stata Gompertz.do")
18 | project, do("code/tests/Test gen_mortrates.do")
19 | project, do("code/tests/Check completeness of mortality data.do")
20 | 
21 | project, do("code/tests/Simulate various methods of deriving LE from Gompertz parameters.do")
22 | 
23 | 


--------------------------------------------------------------------------------