├── .gitignore ├── 01_make_data_for_dep_index.R ├── 02_pca_on_dep_index_data.R ├── 03_calculate_dep_index_by_ZCTA.R ├── 2018_dep_index ├── 01_make_data_for_dep_index.R ├── 02_pca_on_dep_index_data.R ├── 03_calculate_dep_index_by_ZCTA.R ├── 04_compare_2015_and_2018_dep_index.R ├── 05_make_interactive_map.R ├── 2015_vs_2018.png ├── ACS_deprivation_index_by_census_tracts.csv ├── ACS_deprivation_index_by_census_tracts.rds ├── ACS_deprivation_index_by_zipcode.csv ├── ACS_deprivation_index_by_zipcode.rds ├── README.md └── figs │ ├── PCs_pairs_plot.jpg │ ├── acs_data_pairs_plot.jpg │ ├── acs_measure_weights_on_dep_index.jpg │ ├── dep_index_and_acs_measures_xyplots.jpg │ ├── dep_index_density.jpg │ ├── variance_of_acs_explained_by_dep_index.jpg │ └── variance_of_acs_explained_by_dep_index.md ├── ACS_deprivation_index_by_census_tracts.csv ├── ACS_deprivation_index_by_census_tracts.rds ├── ACS_deprivation_index_by_zipcode.csv ├── ACS_deprivation_index_by_zipcode.rds ├── CITATION.cff ├── README.md ├── dep_index.Rproj ├── figs ├── PCs_pairs_plot.jpg ├── acs_data_pairs_plot.jpg ├── acs_measure_weights_on_dep_index.jpg ├── dep_index_and_acs_measures_xyplots.jpg ├── dep_index_density.jpg ├── dep_index_nationwide_map.jpeg ├── deprivation_index_map_cincinnati.jpeg ├── variance_of_acs_explained_by_dep_index.jpg └── variance_of_acs_explained_by_dep_index.md ├── interactive_map_2018_dep_index.html └── weighted_avg ├── dep_index_2018_pop_under_18_weighted_avg.R ├── dep_index_pop_under_18_weighted_avg.R └── dep_index_pop_under_18_weighted_avg.html /.gitignore: -------------------------------------------------------------------------------- 1 | *.pdf 2 | data_for_dep_index.rds 3 | .Rproj.user 4 | .DS_Store 5 | /s3_downloads/ 6 | interactive_map_2018_dep_index.html 7 | -------------------------------------------------------------------------------- /01_make_data_for_dep_index.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(sf) 3 | library(tidycensus) 4 | census_api_key(Sys.getenv('CENSUS_API_KEY')) 5 | library(tigris) 6 | options(tigris_use_cache=TRUE) 7 | options(tigris_class='sf') 8 | 9 | #### get all ACS tract-level variables ----------------------------------- 10 | 11 | states_needed <- tigris::fips_codes %>% 12 | select(state_code, state_name) %>% 13 | filter(! state_name %in% c('American Samoa', 'Guam', 'Northern Mariana Islands', 14 | 'Puerto Rico', 'U.S. Minor Outlying Islands', 15 | 'U.S. Virgin Islands')) %>% 16 | unique() %>% 17 | pull(state_code) 18 | 19 | ## fraction_poverty 20 | # income in past 12 months below poverty level 21 | # B17001_001: total 22 | # B17001_002: n 23 | acs_poverty <- get_acs(geography = 'tract', 24 | variables = 'B17001_002', 25 | summary_var = 'B17001_001', 26 | endyear = 2015, 27 | state = states_needed) %>% 28 | mutate(fraction_poverty = estimate / summary_est) %>% 29 | select(GEOID, fraction_poverty) 30 | 31 | ## median_income 32 | # median household income in the past 12 months in 2015 inflation-adjusted dollars 33 | # B19013_001: est 34 | acs_income <- get_acs(geography = 'tract', 35 | variables = 'B19013_001', 36 | endyear = 2015, 37 | state = states_needed) %>% 38 | mutate(median_income = estimate) %>% 39 | select(GEOID, median_income) 40 | 41 | ## fraction_high_school_edu 42 | # population 25 and older with edu attainment of at least high school graduate (includes GED equivalency) 43 | # B15003_001: total 44 | # B15003_{017 - 025}: n 45 | acs_edu <- get_acs(geography = 'tract', 46 | variables = paste0('B15003_0',17:25), 47 | summary_var = 'B15003_001', 48 | endyear = 2015, 49 | state = states_needed) %>% 50 | group_by(GEOID) %>% 51 | summarize(high_school_edu = sum(estimate), 52 | total = unique(summary_est)) %>% 53 | mutate(fraction_high_school_edu = high_school_edu / total) %>% 54 | select(GEOID, fraction_high_school_edu) 55 | 56 | ## fraction_no_health_ins 57 | # no type of insurance coverage 58 | # B27010_001: total 59 | # B27010_{017,033,050,066}: n 60 | acs_ins <- get_acs(geography = 'tract', 61 | variables = paste0('B27010_0',c(17, 33, 50, 66)), 62 | summary_var = 'B27010_001', 63 | endyear = 2015, 64 | state = states_needed) %>% 65 | group_by(GEOID) %>% 66 | summarize(no_health_ins = sum(estimate), 67 | total = unique(summary_est)) %>% 68 | mutate(fraction_no_health_ins = no_health_ins / total) %>% 69 | select(GEOID, fraction_no_health_ins) 70 | 71 | ## fraction_assisted_income 72 | # public assistance income or food Stamps/SNAP in the past 12 months for households 73 | # B19058_001: total 74 | # B19058_002: n 75 | acs_assisted_income <- get_acs(geography = 'tract', 76 | variables = 'B19058_002', 77 | summary_var = 'B19058_001', 78 | endyear = 2015, 79 | state = states_needed) %>% 80 | group_by(GEOID) %>% 81 | mutate(fraction_assisted_income = estimate / summary_est) %>% 82 | select(GEOID, fraction_assisted_income) 83 | 84 | ## fraction_vacant_housing 85 | # vacancy status: 86 | # B25002_001: total 87 | # B25002_003: n 88 | acs_vacancy_status <- get_acs(geography = 'tract', 89 | variables = 'B25002_003', 90 | summary_var = 'B25002_001', 91 | endyear = 2015, 92 | state = states_needed) %>% 93 | group_by(GEOID) %>% 94 | mutate(fraction_vacant_housing = estimate / summary_est) %>% 95 | select(GEOID, fraction_vacant_housing) 96 | 97 | ## merge all acs variables in to data 98 | 99 | d <- reduce(.x = list(acs_assisted_income, acs_edu, acs_income, 100 | acs_ins, acs_poverty, acs_vacancy_status), 101 | .f = function(.x, .y) left_join(.x, .y, by='GEOID')) %>% 102 | rename(census_tract_fips = GEOID) 103 | 104 | saveRDS(d, 'data_for_dep_index.rds') 105 | -------------------------------------------------------------------------------- /02_pca_on_dep_index_data.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | 3 | d <- readRDS('data_for_dep_index.rds') %>% as_tibble() 4 | 5 | #### visualize data for dep index -------------------------------------- 6 | dir.create('figs') 7 | 8 | library(GGally) 9 | 10 | d %>% ungroup() %>% select(-census_tract_fips) %>% 11 | ggpairs(lower = list(continuous = wrap('points', alpha=0.1))) 12 | save_pdf('figs/acs_data_pairs_plot.pdf', width=12, height=12, jpg=TRUE) 13 | 14 | 15 | # # Hamilton county only 16 | # d_hamilton <- d %>% 17 | # filter(substr(census_tract_fips, 1, 5) == '39061') 18 | # d_hamilton %>% 19 | # ungroup() %>% 20 | # select(-census_tract_fips) %>% 21 | # ggpairs(lower = list(continuous = wrap('points', alpha=0.3))) 22 | # save_pdf('figs/acs_data_pairs_plot_Hamilton_only.pdf', width=11, height=11, jpg=TRUE) 23 | 24 | #### principal components analysis ---------------------------------------- 25 | 26 | # will be missing for 999 of 73,056 census tracts 27 | d_pca <- d %>% 28 | na.omit() %>% 29 | ungroup() %>% 30 | select(-census_tract_fips) %>% 31 | prcomp(center=TRUE, scale=TRUE) 32 | 33 | # table variance explained by component 34 | summary(d_pca)$importance %>% 35 | as_tibble() %>% 36 | mutate(measure = row.names(summary(d_pca)$importance)) %>% 37 | slice(-1) %>% 38 | select(measure, everything()) %>% 39 | knitr::kable(digits=2) %>% 40 | cat(file='figs/variance_of_acs_explained_by_dep_index.md', sep='\n') 41 | 42 | # plot variance explained by component 43 | summary(d_pca)$importance %>% 44 | as_tibble() %>% 45 | mutate(measure = row.names(summary(d_pca)$importance)) %>% 46 | gather(component, value, -measure) %>% 47 | filter(! measure == 'Standard deviation') %>% 48 | # filter(measure == 'Proportion of Variance') %>% 49 | ggplot(aes(component, value, alpha=measure)) + 50 | geom_bar(stat='identity', position=position_dodge(0)) + 51 | labs(title = 'Variance of ACS Measures Expained by Deprivation Indices') + 52 | theme(legend.title=element_blank()) + 53 | xlab('index') + ylab('variance') 54 | save_pdf('figs/variance_of_acs_explained_by_dep_index.pdf', width=10, height=4, jpg=TRUE) 55 | 56 | # inverse sign all weights so higher PC1 means more deprivation 57 | dep_weights <- d_pca$rotation %>% 58 | as_tibble() %>% 59 | mutate(measure = row.names(d_pca$rotation)) %>% 60 | gather(component, weight, -measure) %>% 61 | mutate(weight = -1 * weight) 62 | 63 | # plot loading weights 64 | dep_weights %>% 65 | ggplot(aes(measure, weight)) + 66 | geom_bar(stat='identity') + 67 | coord_flip() + 68 | facet_wrap(~ component) + 69 | labs(title='Weights of ACS Measure on Deprivation Indices') + 70 | xlab(' ') 71 | save_pdf('figs/acs_measure_weights_on_dep_index.pdf', width=10, height=6, jpg=TRUE) 72 | 73 | # visualize transformed indices for the census tracts 74 | d_pca$x %>% 75 | as_tibble() %>% 76 | ggpairs(lower = list(continuous = wrap('points', alpha=0.3))) 77 | save_pdf('figs/PCs_pairs_plot.pdf', width=12, height=12, jpg=TRUE) 78 | 79 | # take the first pc and norm to [0,1] 80 | # reverse magnitude so higher value means higher deprivation 81 | dep_index <- d_pca$x %>% 82 | as_tibble() %>% 83 | select(dep_index = PC1) %>% 84 | mutate(dep_index = -1 * dep_index) %>% 85 | mutate(dep_index = (dep_index - min(dep_index)) / diff(range(dep_index))) %>% 86 | mutate(census_tract_fips = d %>% na.omit() %>% pull(census_tract_fips)) 87 | 88 | # visualize univariate distribution of dep_index 89 | dep_index %>% 90 | ggplot(aes(dep_index)) + 91 | geom_density(fill='lightgrey') + 92 | labs(title='Distribution of Deprivation Index for All US Census Tracts') + 93 | xlab('deprivation index') 94 | save_pdf('figs/dep_index_density.pdf', width=10, height = 5, jpg=TRUE) 95 | 96 | # merge in and save 97 | d <- left_join(d, dep_index, by='census_tract_fips') 98 | saveRDS(d, 'ACS_deprivation_index_by_census_tracts.rds') 99 | rio::export(d, 'ACS_deprivation_index_by_census_tracts.csv') 100 | 101 | ## save as shapefile 102 | us_tracts <- read_sf('../us_tract_2015', 'US_tract_2015') 103 | tracts_data <- left_join(us_tracts, d, by=c('GEOID' = 'census_tract_fips')) 104 | 105 | st_write(tracts_data, '../dep_index_2015.shp') 106 | 107 | # pairs plot including indices 108 | d %>% 109 | ungroup() %>% 110 | select(-census_tract_fips) %>% 111 | gather(measure, value, -dep_index) %>% 112 | ggplot(aes(dep_index, value)) + 113 | geom_point(alpha=0.5) + 114 | facet_wrap(~ measure, scales='free') + 115 | labs(title = 'Relationship of Deprivation Index with ACS Measures') + 116 | xlab('deprivation index') + ylab('') 117 | save_pdf('figs/dep_index_and_acs_measures_xyplots.pdf', width=12, height=7, jpg=TRUE) 118 | 119 | -------------------------------------------------------------------------------- /03_calculate_dep_index_by_ZCTA.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | 3 | dep_index <- readRDS(gzcon(url('https://github.com/cole-brokamp/dep_index/raw/master/ACS_deprivation_index_by_census_tracts.rds'))) 4 | 5 | ZCTA_tract_crosswalk <- 6 | read.table("https://www2.census.gov/geo/docs/maps-data/data/rel/zcta_tract_rel_10.txt", 7 | header = TRUE, 8 | sep = ',', 9 | colClasses = c('ZCTA5' = 'character' , 'GEOID' = 'character')) %>% 10 | select(ZCTA5, GEOID) 11 | 12 | dep_index_ZCTA <- left_join(dep_index, ZCTA_data, by = c('census_tract_fips' = 'GEOID')) 13 | 14 | # set ZCTA values as mean of all containing tract values 15 | # make value NA if result is NaN because no non-missing values are available for a ZCTA 16 | ZCTA_data <- dep_index_ZCTA %>% 17 | group_by(ZCTA5) %>% 18 | summarize_if(is.numeric, mean, na.rm = TRUE) %>% 19 | mutate_if(is.numeric, na_if, y = 'NaN') 20 | 21 | write.csv(ZCTA_data, file= 'ACS_deprivation_index_by_zipcode.csv', row.names = FALSE) 22 | saveRDS(ZCTA_data, "ACS_deprivation_index_by_zipcode.rds") 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /2018_dep_index/01_make_data_for_dep_index.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(sf) 3 | library(tidycensus) 4 | census_api_key(Sys.getenv('CENSUS_API_KEY')) 5 | library(tigris) 6 | options(tigris_use_cache=TRUE) 7 | options(tigris_class='sf') 8 | 9 | #### get all ACS tract-level variables ----------------------------------- 10 | 11 | states_needed <- tigris::fips_codes %>% 12 | select(state_code, state_name) %>% 13 | filter(! state_name %in% c('American Samoa', 'Guam', 'Northern Mariana Islands', 14 | 'Puerto Rico', 'U.S. Minor Outlying Islands', 15 | 'U.S. Virgin Islands')) %>% 16 | unique() %>% 17 | pull(state_code) 18 | 19 | ## fraction_poverty 20 | # income in past 12 months below poverty level 21 | # B17001_001: total 22 | # B17001_002: n 23 | acs_poverty <- get_acs(geography = 'tract', 24 | variables = 'B17001_002', 25 | summary_var = 'B17001_001', 26 | year = 2018, 27 | state = states_needed) %>% 28 | mutate(fraction_poverty = estimate / summary_est) %>% 29 | select(GEOID, fraction_poverty) 30 | 31 | ## median_income 32 | # median household income in the past 12 months in 2015 inflation-adjusted dollars 33 | # B19013_001: est 34 | acs_income <- get_acs(geography = 'tract', 35 | variables = 'B19013_001', 36 | year = 2018, 37 | state = states_needed) %>% 38 | mutate(median_income = estimate) %>% 39 | select(GEOID, median_income) 40 | 41 | ## fraction_high_school_edu 42 | # population 25 and older with edu attainment of at least high school graduate (includes GED equivalency) 43 | # B15003_001: total 44 | # B15003_{017 - 025}: n 45 | acs_edu <- get_acs(geography = 'tract', 46 | variables = paste0('B15003_0',17:25), 47 | summary_var = 'B15003_001', 48 | year = 2018, 49 | state = states_needed) %>% 50 | group_by(GEOID) %>% 51 | summarize(high_school_edu = sum(estimate), 52 | total = unique(summary_est)) %>% 53 | mutate(fraction_high_school_edu = high_school_edu / total) %>% 54 | select(GEOID, fraction_high_school_edu) 55 | 56 | ## fraction_no_health_ins 57 | # no type of insurance coverage 58 | # B27010_001: total 59 | # B27010_{017,033,050,066}: n 60 | acs_ins <- get_acs(geography = 'tract', 61 | variables = paste0('B27010_0',c(17, 33, 50, 66)), 62 | summary_var = 'B27010_001', 63 | year = 2018, 64 | state = states_needed) %>% 65 | group_by(GEOID) %>% 66 | summarize(no_health_ins = sum(estimate), 67 | total = unique(summary_est)) %>% 68 | mutate(fraction_no_health_ins = no_health_ins / total) %>% 69 | select(GEOID, fraction_no_health_ins) 70 | 71 | ## fraction_assisted_income 72 | # public assistance income or food Stamps/SNAP in the past 12 months for households 73 | # B19058_001: total 74 | # B19058_002: n 75 | acs_assisted_income <- get_acs(geography = 'tract', 76 | variables = 'B19058_002', 77 | summary_var = 'B19058_001', 78 | year = 2018, 79 | state = states_needed) %>% 80 | group_by(GEOID) %>% 81 | mutate(fraction_assisted_income = estimate / summary_est) %>% 82 | select(GEOID, fraction_assisted_income) 83 | 84 | ## fraction_vacant_housing 85 | # vacancy status: 86 | # B25002_001: total 87 | # B25002_003: n 88 | acs_vacancy_status <- get_acs(geography = 'tract', 89 | variables = 'B25002_003', 90 | summary_var = 'B25002_001', 91 | year = 2018, 92 | state = states_needed) %>% 93 | group_by(GEOID) %>% 94 | mutate(fraction_vacant_housing = estimate / summary_est) %>% 95 | select(GEOID, fraction_vacant_housing) 96 | 97 | ## merge all acs variables in to data 98 | 99 | d <- reduce(.x = list(acs_assisted_income, acs_edu, acs_income, 100 | acs_ins, acs_poverty, acs_vacancy_status), 101 | .f = function(.x, .y) left_join(.x, .y, by='GEOID')) %>% 102 | rename(census_tract_fips = GEOID) 103 | 104 | saveRDS(d, './2018_dep_index/data_for_dep_index.rds') 105 | -------------------------------------------------------------------------------- /2018_dep_index/02_pca_on_dep_index_data.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | 3 | d <- readRDS('./2018_dep_index/data_for_dep_index.rds') %>% as_tibble() 4 | 5 | #### visualize data for dep index -------------------------------------- 6 | dir.create('./2018_dep_index/figs') 7 | 8 | library(GGally) 9 | 10 | d %>% ungroup() %>% select(-census_tract_fips) %>% 11 | ggpairs(lower = list(continuous = wrap('points', alpha=0.1))) 12 | ggsave('./2018_dep_index/figs/acs_data_pairs_plot.jpg', width=12, height=12) 13 | # CB::save_pdf('./2018_dep_index/figs/acs_data_pairs_plot.pdf', width=12, height=12, jpg=TRUE) 14 | 15 | 16 | # # Hamilton county only 17 | # d_hamilton <- d %>% 18 | # filter(substr(census_tract_fips, 1, 5) == '39061') 19 | # d_hamilton %>% 20 | # ungroup() %>% 21 | # select(-census_tract_fips) %>% 22 | # ggpairs(lower = list(continuous = wrap('points', alpha=0.3))) 23 | # save_pdf('figs/acs_data_pairs_plot_Hamilton_only.pdf', width=11, height=11, jpg=TRUE) 24 | 25 | #### principal components analysis ---------------------------------------- 26 | 27 | # will be missing for 999 of 73,056 census tracts 28 | d_pca <- d %>% 29 | na.omit() %>% 30 | ungroup() %>% 31 | select(-census_tract_fips) %>% 32 | prcomp(center=TRUE, scale=TRUE) 33 | 34 | # table variance explained by component 35 | summary(d_pca)$importance %>% 36 | as_tibble() %>% 37 | mutate(measure = row.names(summary(d_pca)$importance)) %>% 38 | slice(-1) %>% 39 | select(measure, everything()) %>% 40 | knitr::kable(digits=2) %>% 41 | cat(file='./2018_dep_index/figs/variance_of_acs_explained_by_dep_index.md', sep='\n') 42 | 43 | # plot variance explained by component 44 | summary(d_pca)$importance %>% 45 | as_tibble() %>% 46 | mutate(measure = row.names(summary(d_pca)$importance)) %>% 47 | gather(component, value, -measure) %>% 48 | filter(! measure == 'Standard deviation') %>% 49 | # filter(measure == 'Proportion of Variance') %>% 50 | ggplot(aes(component, value, alpha=measure)) + 51 | geom_bar(stat='identity', position=position_dodge(0)) + 52 | labs(title = 'Variance of ACS Measures Expained by Deprivation Indices') + 53 | theme(legend.title=element_blank()) + 54 | xlab('index') + ylab('variance') 55 | ggsave('./2018_dep_index/figs/variance_of_acs_explained_by_dep_index.jpg', width=10, height=4) 56 | # CB::save_pdf('./2018_dep_index/figs/variance_of_acs_explained_by_dep_index.pdf', width=10, height=4, jpg=TRUE) 57 | 58 | # DO NOT inverse sign all weights so higher PC1 means more deprivation 59 | dep_weights <- d_pca$rotation %>% 60 | as_tibble() %>% 61 | mutate(measure = row.names(d_pca$rotation)) %>% 62 | gather(component, weight, -measure) 63 | 64 | # plot loading weights 65 | dep_weights %>% 66 | ggplot(aes(measure, weight)) + 67 | geom_bar(stat='identity') + 68 | coord_flip() + 69 | facet_wrap(~ component) + 70 | labs(title='Weights of ACS Measure on Deprivation Indices') + 71 | xlab(' ') 72 | ggsave('./2018_dep_index/figs/acs_measure_weights_on_dep_index.jpg', width=10, height=6) 73 | # CB::save_pdf('./2018_dep_index/figs/acs_measure_weights_on_dep_index.pdf', width=10, height=6, jpg=TRUE) 74 | 75 | # visualize transformed indices for the census tracts 76 | d_pca$x %>% 77 | as_tibble() %>% 78 | ggpairs(lower = list(continuous = wrap('points', alpha=0.3))) 79 | ggsave('./2018_dep_index/figs/PCs_pairs_plot.jpg', width=12, height=12) 80 | # CB::save_pdf('./2018_dep_index/figs/PCs_pairs_plot.pdf', width=12, height=12, jpg=TRUE) 81 | 82 | # take the first pc and norm to [0,1] 83 | # DO NOT reverse magnitude so higher value means higher deprivation 84 | dep_index <- d_pca$x %>% 85 | as_tibble() %>% 86 | select(dep_index = PC1) %>% 87 | mutate(dep_index = (dep_index - min(dep_index)) / diff(range(dep_index))) %>% 88 | mutate(census_tract_fips = d %>% na.omit() %>% pull(census_tract_fips)) 89 | 90 | # visualize univariate distribution of dep_index 91 | dep_index %>% 92 | ggplot(aes(dep_index)) + 93 | geom_density(fill='lightgrey') + 94 | labs(title='Distribution of Deprivation Index for All US Census Tracts') + 95 | xlab('deprivation index') 96 | ggsave('./2018_dep_index/figs/dep_index_density.jpg', width=10, height = 5) 97 | # CB::save_pdf('./2018_dep_index/figs/dep_index_density.pdf', width=10, height = 5, jpg=TRUE) 98 | 99 | # merge in and save 100 | d <- left_join(d, dep_index, by='census_tract_fips') 101 | saveRDS(d, './2018_dep_index/ACS_deprivation_index_by_census_tracts.rds') 102 | rio::export(d, './2018_dep_index/ACS_deprivation_index_by_census_tracts.csv') 103 | 104 | ## save as shapefile 105 | # states_needed <- tigris::fips_codes %>% 106 | # select(state_code, state_name) %>% 107 | # filter(! state_name %in% c('American Samoa', 'Guam', 'Northern Mariana Islands', 108 | # 'Puerto Rico', 'U.S. Minor Outlying Islands', 109 | # 'U.S. Virgin Islands')) %>% 110 | # unique() %>% 111 | # pull(state_code) 112 | # 113 | # us_tracts <- map(states_needed, ~tigris::tracts(state = .x, year = 2018)) 114 | # 115 | # tracts_data <- left_join(us_tracts, d, by=c('GEOID' = 'census_tract_fips')) 116 | # 117 | # st_write(tracts_data, '../dep_index_2018.shp') 118 | 119 | # pairs plot including indices 120 | d %>% 121 | ungroup() %>% 122 | select(-census_tract_fips) %>% 123 | gather(measure, value, -dep_index) %>% 124 | ggplot(aes(dep_index, value)) + 125 | geom_point(alpha=0.5) + 126 | facet_wrap(~ measure, scales='free') + 127 | labs(title = 'Relationship of Deprivation Index with ACS Measures') + 128 | xlab('deprivation index') + ylab('') 129 | ggsave('./2018_dep_index/figs/dep_index_and_acs_measures_xyplots.jpg', width=12, height=7) 130 | # CB::save_pdf('./2018_dep_index/figs/dep_index_and_acs_measures_xyplots.pdf', width=12, height=7, jpg=TRUE) 131 | 132 | -------------------------------------------------------------------------------- /2018_dep_index/03_calculate_dep_index_by_ZCTA.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | 3 | dep_index <- readRDS(gzcon(url('https://github.com/cole-brokamp/dep_index/raw/master/dep_index/ACS_deprivation_index_by_census_tracts.rds'))) 4 | 5 | ZCTA_tract_crosswalk <- 6 | read.table("https://www2.census.gov/geo/docs/maps-data/data/rel/zcta_tract_rel_10.txt", 7 | header = TRUE, 8 | sep = ',', 9 | colClasses = c('ZCTA5' = 'character' , 'GEOID' = 'character')) %>% 10 | select(ZCTA5, GEOID) 11 | 12 | dep_index_ZCTA <- left_join(dep_index, ZCTA_tract_crosswalk, by = c('census_tract_fips' = 'GEOID')) 13 | 14 | # set ZCTA values as mean of all containing tract values 15 | # make value NA if result is NaN because no non-missing values are available for a ZCTA 16 | ZCTA_data <- dep_index_ZCTA %>% 17 | group_by(ZCTA5) %>% 18 | summarize_if(is.numeric, mean, na.rm = TRUE) %>% 19 | mutate_if(is.numeric, na_if, y = 'NaN') 20 | 21 | write_csv(ZCTA_data, './2018_dep_index/ACS_deprivation_index_by_zipcode.csv') 22 | saveRDS(ZCTA_data, "./2018_dep_index/ACS_deprivation_index_by_zipcode.rds") 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /2018_dep_index/04_compare_2015_and_2018_dep_index.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | 3 | dep_index <- 'https://github.com/cole-brokamp/dep_index/raw/master/ACS_deprivation_index_by_census_tracts.rds' %>% 4 | url() %>% 5 | gzcon() %>% 6 | readRDS() %>% 7 | as_tibble() %>% 8 | select(census_tract_fips, 9 | dep_index_2015 = dep_index) 10 | 11 | dep_index_2018 <- readRDS('./2018_dep_index/ACS_deprivation_index_by_census_tracts.rds') %>% 12 | select(census_tract_fips, 13 | dep_index_2018 = dep_index) 14 | 15 | compare_dep <- left_join(dep_index, dep_index_2018, by = 'census_tract_fips') 16 | 17 | ggplot() + 18 | geom_point(aes(x = dep_index_2015, y = dep_index_2018), 19 | data = compare_dep) + 20 | ggpubr::stat_cor(aes(x = dep_index_2015, y = dep_index_2018), 21 | data = compare_dep) 22 | ggsave('./2018_dep_index/2015_vs_2018.png') 23 | 24 | cor.test(x = compare_dep$dep_index_2015, y = compare_dep$dep_index_2018) 25 | -------------------------------------------------------------------------------- /2018_dep_index/05_make_interactive_map.R: -------------------------------------------------------------------------------- 1 | library(leaflet) 2 | library(leafgl) 3 | library(leaflet.extras) 4 | library(sf) 5 | options(viewer = NULL) 6 | 7 | dep_index <- 8 | s3::s3_get("s3://geomarker/tract_dep_index_2018.rds") |> 9 | readRDS() 10 | 11 | tracts <- tigris::tracts(year = 2019, cb = TRUE, progress_bar = FALSE) 12 | 13 | d <- 14 | dplyr::left_join(tracts, dep_index, by = c("GEOID" = "census_tract_fips")) |> 15 | dplyr::select(dep_index) |> 16 | st_cast("POLYGON") 17 | 18 | d$dep_index <- round(d$dep_index, digits = 2) 19 | d <- na.omit(d) 20 | 21 | national_map <- leaflet() |> 22 | addProviderTiles(provider = providers$CartoDB.Positron) |> 23 | addGlPolygons( 24 | data = d, 25 | fillColor = "dep_index", 26 | fillOpacity = 0.8, popup = c("dep_index"), 27 | src = TRUE 28 | ) |> 29 | setView(-93.65, 38.0285, zoom = 5) |> 30 | addScaleBar( 31 | position = "bottomright", options = 32 | scaleBarOptions(metric = TRUE) 33 | ) |> 34 | addFullscreenControl(position = "bottomleft") |> 35 | addResetMapButton() |> 36 | addControlGPS( 37 | options = 38 | gpsOptions( 39 | position = "topleft", 40 | activate = TRUE, 41 | autoCenter = TRUE, 42 | setView = TRUE 43 | ) 44 | ) 45 | 46 | mapview::mapshot(national_map, "interactive_map_2018_dep_index.html") 47 | -------------------------------------------------------------------------------- /2018_dep_index/2015_vs_2018.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geomarker-io/dep_index/fc262bbfd713f7f303de745c80cccebf674363c2/2018_dep_index/2015_vs_2018.png -------------------------------------------------------------------------------- /2018_dep_index/ACS_deprivation_index_by_census_tracts.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geomarker-io/dep_index/fc262bbfd713f7f303de745c80cccebf674363c2/2018_dep_index/ACS_deprivation_index_by_census_tracts.rds -------------------------------------------------------------------------------- /2018_dep_index/ACS_deprivation_index_by_zipcode.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geomarker-io/dep_index/fc262bbfd713f7f303de745c80cccebf674363c2/2018_dep_index/ACS_deprivation_index_by_zipcode.rds -------------------------------------------------------------------------------- /2018_dep_index/README.md: -------------------------------------------------------------------------------- 1 | # A Nationwide Community Deprivation Index 2 | 3 | Please see the main [README](../README.md) for methodological details. This document only contains output relevant to the update from the 2015 version of the index to the 2018 version. 4 | 5 | ## Getting the data 6 | 7 | ### Download the 2018 index CSV file 8 | 9 | The data is contained in a CSV file called [ACS_deprivation_index_by_census_tracts.csv](https://github.com/geomarker-io/dep_index/raw/master/2018_dep_index/ACS_deprivation_index_by_census_tracts.csv) which is a table of census tracts listed by their FIPS ID and corresponding deprivation index. Also included for each tract are the six individual ACS measures used to create the deprivation index. 10 | 11 | ### Import 2018 index directly into `R` 12 | 13 | Use the following code to download the deprivation index data.frame directly into R: 14 | 15 | ``` 16 | dep_index <- 'https://github.com/geomarker-io/dep_index/raw/master/2018_dep_index/ACS_deprivation_index_by_census_tracts.rds' %>% 17 | url() %>% 18 | gzcon() %>% 19 | readRDS() %>% 20 | as_tibble() 21 | ``` 22 | 23 | ## 2018 ZIP code deprivation index 24 | 25 | The deprivation index is also available by zip codes, denoted using the [ZIP Code Tabulation Area (ZCTA)](https://en.wikipedia.org/wiki/ZIP_Code_Tabulation_Area) boundaries. The value for each ZCTA is calculated as the mean of all of its intersecting census tracts. Download the 2018 file located at `2018_dep_index/ACS_deprivation_index_by_zipcode.csv` or use the above code to read it into R by replacing the RDS file name with `ACS_deprivation_index_by_zipcode.rds`. 26 | 27 | ## Updated PCA results for 2018 ACS data 28 | 29 | ### Pairs plot of ACS estimates 30 | 31 | ![](figs/acs_data_pairs_plot.jpg) 32 | 33 | ### PCA 34 | 35 | ![](figs/variance_of_acs_explained_by_dep_index.jpg) 36 | 37 | ![](figs/acs_measure_weights_on_dep_index.jpg) 38 | 39 | ### Distribution of index 40 | 41 | ![](figs/dep_index_density.jpg) 42 | 43 | ### Relationship between index and ACS measure 44 | 45 | ![](figs/dep_index_and_acs_measures_xyplots.jpg) 46 | -------------------------------------------------------------------------------- /2018_dep_index/figs/PCs_pairs_plot.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geomarker-io/dep_index/fc262bbfd713f7f303de745c80cccebf674363c2/2018_dep_index/figs/PCs_pairs_plot.jpg -------------------------------------------------------------------------------- /2018_dep_index/figs/acs_data_pairs_plot.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geomarker-io/dep_index/fc262bbfd713f7f303de745c80cccebf674363c2/2018_dep_index/figs/acs_data_pairs_plot.jpg -------------------------------------------------------------------------------- /2018_dep_index/figs/acs_measure_weights_on_dep_index.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geomarker-io/dep_index/fc262bbfd713f7f303de745c80cccebf674363c2/2018_dep_index/figs/acs_measure_weights_on_dep_index.jpg -------------------------------------------------------------------------------- /2018_dep_index/figs/dep_index_and_acs_measures_xyplots.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geomarker-io/dep_index/fc262bbfd713f7f303de745c80cccebf674363c2/2018_dep_index/figs/dep_index_and_acs_measures_xyplots.jpg -------------------------------------------------------------------------------- /2018_dep_index/figs/dep_index_density.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geomarker-io/dep_index/fc262bbfd713f7f303de745c80cccebf674363c2/2018_dep_index/figs/dep_index_density.jpg -------------------------------------------------------------------------------- /2018_dep_index/figs/variance_of_acs_explained_by_dep_index.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geomarker-io/dep_index/fc262bbfd713f7f303de745c80cccebf674363c2/2018_dep_index/figs/variance_of_acs_explained_by_dep_index.jpg -------------------------------------------------------------------------------- /2018_dep_index/figs/variance_of_acs_explained_by_dep_index.md: -------------------------------------------------------------------------------- 1 | |measure | PC1| PC2| PC3| PC4| PC5| PC6| 2 | |:----------------------|----:|----:|----:|----:|----:|----:| 3 | |Proportion of Variance | 0.57| 0.16| 0.12| 0.07| 0.05| 0.03| 4 | |Cumulative Proportion | 0.57| 0.74| 0.85| 0.92| 0.97| 1.00| 5 | -------------------------------------------------------------------------------- /ACS_deprivation_index_by_census_tracts.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geomarker-io/dep_index/fc262bbfd713f7f303de745c80cccebf674363c2/ACS_deprivation_index_by_census_tracts.rds -------------------------------------------------------------------------------- /ACS_deprivation_index_by_zipcode.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geomarker-io/dep_index/fc262bbfd713f7f303de745c80cccebf674363c2/ACS_deprivation_index_by_zipcode.rds -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: "If you use the deprivation index, please cite it as below." 3 | authors: 4 | - family-names: "Brokamp" 5 | given-names: "Cole" 6 | title: "Material Deprivation Index" 7 | version: 0.3 8 | date-released: 2020-08-05 9 | url: "https://geomarker.io/dep_index" 10 | preferred-citation: 11 | type: article 12 | authors: 13 | - family-names: "Brokamp" 14 | given-names: "Cole" 15 | - family-names: "Beck" 16 | given-names: "Andrew F." 17 | - family-names: "Goyal" 18 | given-names: "Neera K." 19 | - family-names: "Ryan" 20 | given-names: "Patrick" 21 | - family-names: "Greenberg" 22 | given-names: "James M." 23 | - family-names: "Hall" 24 | given-names: "Eric S." 25 | doi: "https://doi.org/10.1016/j.annepidem.2018.11.008" 26 | journal: "Annals of Epidemiology" 27 | start: 37 # First page number 28 | end: 43 # Last page number 29 | title: "Material Community Deprivation and Hospital Utilization During the First Year of Life: An Urban Population-Based Cohort Study" 30 | issue: 30 31 | year: 2019 -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # A Nationwide Community Deprivation Index 2 | 3 | [![License: GPL v3](https://img.shields.io/badge/License-GPL%20v3-blue.svg)](http://www.gnu.org/licenses/gpl-3.0) 4 | 5 | **Citation for Scientific Publications:** If you use the deprivation index in a scientific publication, please cite our manuscript detailing its creation and application to health outcomes: 6 | 7 | > Cole Brokamp, Andrew F. Beck, Neera K. Goyal, Patrick Ryan, James M. Greenberg, Eric S. Hall. Material Community Deprivation and Hospital Utilization During the First Year of Life: An Urban Population-Based Cohort Study. *Annals of Epidemiology*. 30. 37-43. 2019. [Download](https://colebrokamp-website.s3.amazonaws.com/publications/Brokamp_AoE_2019.pdf). 8 | 9 | **2018 Update:** The [2018_dep_index](/2018_dep_index) folder contains an version of the same deprivation index, but updated with data from the 2018 5-yr American Community Survey data. See the [2018 README](/2018_dep_index/README.md) for details on getting and using the data at the census tract and ZIP code level. 10 | 11 | -------------------------------------------- 12 | 13 | ## Overview 14 | 15 | Socioeconomic variables from the American Community Survey (ACS) are frequently used in place of or in addition to individual confounders in observational studies. There are several census tract level measures that can be used to capture "community deprivation" and they are often highly correlated, e.g., median household income and educational attainment. Choosing only one ACS measure might not capture the entirety of community deprivation, but using more than one ACS measure can lead to problems in statistical models due to colinearity. 16 | 17 | Here, we create a deprivation index for each census tract in the United States based on a principal components analysis of six different 2015 ACS measures. The first component explains over 60% of the total variance present in the five different ACS measures and is dubbed the "Deprivation Index". Rescaling and normalizing forces the index to range from 0 to 1, with a higher index being more deprived. 18 | 19 | ![](figs/dep_index_nationwide_map.jpeg) 20 | 21 | The high resolution of the tract level is masked when using a nationwide scale. Below is an example of how the index looks across the Greater Cincinnati, Ohio area: 22 | 23 | ![](figs/deprivation_index_map_cincinnati.jpeg) 24 | 25 | ## Getting the data 26 | 27 | ### Download the CSV file 28 | 29 | The data is contained in a CSV file called [ACS_deprivation_index_by_census_tracts.csv](https://github.com/geomarker-io/dep_index/raw/master/ACS_deprivation_index_by_census_tracts.csv) which is a table of 73,056 census tracts listed by their FIPS ID and corresponding deprivation index. Also included for each tract are the six individual ACS measures used to create the deprivation index. 30 | 31 | ### Import Directly Into `R` 32 | 33 | Use the following code to download the deprivation index data.frame directly into R: 34 | 35 | ``` 36 | dep_index <- 'https://github.com/geomarker-io/dep_index/raw/master/ACS_deprivation_index_by_census_tracts.rds' %>% 37 | url() %>% 38 | gzcon() %>% 39 | readRDS() %>% 40 | as_tibble() 41 | ``` 42 | 43 | ### ZIP Code Deprivation Index 44 | 45 | The deprivation index is also available by zip codes, denoted using the [ZIP Code Tabulation Area (ZCTA)](https://en.wikipedia.org/wiki/ZIP_Code_Tabulation_Area) boundaries. The value for each ZCTA is calculated as the mean of all of its intersecting census tracts. Download the file called `ACS_deprivation_index_by_zipcode.csv` or use the above code to read it into R by replacing the RDS file name with `ACS_deprivation_index_by_zipcode.rds`. 46 | 47 | ## Details on Creating the Index 48 | 49 | The following census tract level variables were derived from the 2015 5-year American Community Survey: 50 | 51 | - `fraction_poverty`: fraction of population with income in past 12 months below poverty level 52 | - `median_income`: median household income in the past 12 months in 2015 inflation-adjusted dollars 53 | - `fraction_high_school_edu`: fraction of population 25 and older with educational attainment of at least high school graduation (includes GED equivalency) 54 | - `fraction_no_health_ins`: fraction of poulation with no health insurance coverage 55 | - `acs_assisted_income`: fraction of households receiving public assistance income or food stamps or SNAP in the past 12 months 56 | - `fraction_vacant_housing`: fraction of houses that are vacant 57 | 58 | Looking at a pairs plot of all the ACS estimates, we can see that they are mostly highly correlated with one another: 59 | 60 | ![](figs/acs_data_pairs_plot.jpg) 61 | 62 | Carrying out a principal component analysis results in six components, with the first explaining over 70% of the total variance in the six ACS measures: 63 | 64 | ![](figs/variance_of_acs_explained_by_dep_index.jpg) 65 | 66 | Looking at the loading weights used to calculate the principal components, we can see that (other than the fraction of vacant housing) the first component is somewhat equally weighted by each ACS measure, each in the expected direction such that a higher first component value is associated with higher deprivation. 67 | 68 | The first component is dubbed the "Deprivation Index"; rescaling and normalizing forces the index to range from 0 to 1, with a higher index being more deprived. 69 | 70 | ![](figs/acs_measure_weights_on_dep_index.jpg) 71 | 72 | Applying the weights to the data for all census tracts leads to a deprivation index assignment for US census tracts with non-missing ACS measures. 999 of the 73,056 total US census tracts have a missing value for the deprivation index because of a missing value for one or more of the underlying ACS measures. 73 | 74 | ![](figs/dep_index_density.jpg) 75 | 76 | We calculated the nationwide mean (0.37) and standard error (0.0006) for the deprivation index by weighting each tract-level deprivation index by its population under age 18. Details are in the [weighted_avg/dep_index_pop_under_18_weighted_avg.R](weighted_avg/dep_index_pop_under_18_weighted_avg.R) file. 77 | 78 | | Measure | 2015 Deprivation Index | 2018 Deprivation Index | 79 | | :-------- | -----: | -----------: | 80 | | Mean | 0.37 | 0.35 | 81 | | Standard Error | 0.0006 | 0.0006 | 82 | | Standard Deviation | 0.13 | 0.13 | 83 | | 25th percentile | 0.27 | 0.25 | 84 | | Median | 0.36 | 0.33 | 85 | | 75th percentile | 0.46 | 0.43 | 86 | 87 | 88 | We can verify the relationship between the deprivation index and each of the ACS measures individually with scatter plots: 89 | 90 | ![](figs/dep_index_and_acs_measures_xyplots.jpg) 91 | 92 | ## Reproducibility 93 | 94 | `01_make_data_for_dep_index.R` is used to fetch the ACS census tract level data and then `02_pca_on_dep_index_data.R` is used to carry out the principal components analysis, create the deprivation index, and create the images used in this document. `03_calculate_dep_index_by_ZCTA.R` calculates each measurement and deprivation index for each ZIP code. 95 | -------------------------------------------------------------------------------- /dep_index.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | -------------------------------------------------------------------------------- /figs/PCs_pairs_plot.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geomarker-io/dep_index/fc262bbfd713f7f303de745c80cccebf674363c2/figs/PCs_pairs_plot.jpg -------------------------------------------------------------------------------- /figs/acs_data_pairs_plot.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geomarker-io/dep_index/fc262bbfd713f7f303de745c80cccebf674363c2/figs/acs_data_pairs_plot.jpg -------------------------------------------------------------------------------- /figs/acs_measure_weights_on_dep_index.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geomarker-io/dep_index/fc262bbfd713f7f303de745c80cccebf674363c2/figs/acs_measure_weights_on_dep_index.jpg -------------------------------------------------------------------------------- /figs/dep_index_and_acs_measures_xyplots.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geomarker-io/dep_index/fc262bbfd713f7f303de745c80cccebf674363c2/figs/dep_index_and_acs_measures_xyplots.jpg -------------------------------------------------------------------------------- /figs/dep_index_density.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geomarker-io/dep_index/fc262bbfd713f7f303de745c80cccebf674363c2/figs/dep_index_density.jpg -------------------------------------------------------------------------------- /figs/dep_index_nationwide_map.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geomarker-io/dep_index/fc262bbfd713f7f303de745c80cccebf674363c2/figs/dep_index_nationwide_map.jpeg -------------------------------------------------------------------------------- /figs/deprivation_index_map_cincinnati.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geomarker-io/dep_index/fc262bbfd713f7f303de745c80cccebf674363c2/figs/deprivation_index_map_cincinnati.jpeg -------------------------------------------------------------------------------- /figs/variance_of_acs_explained_by_dep_index.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geomarker-io/dep_index/fc262bbfd713f7f303de745c80cccebf674363c2/figs/variance_of_acs_explained_by_dep_index.jpg -------------------------------------------------------------------------------- /figs/variance_of_acs_explained_by_dep_index.md: -------------------------------------------------------------------------------- 1 | |measure | PC1| PC2| PC3| PC4| PC5| PC6| 2 | |:----------------------|---:|----:|----:|----:|----:|----:| 3 | |Proportion of Variance | 0.6| 0.16| 0.10| 0.06| 0.04| 0.03| 4 | |Cumulative Proportion | 0.6| 0.76| 0.86| 0.93| 0.97| 1.00| 5 | -------------------------------------------------------------------------------- /weighted_avg/dep_index_2018_pop_under_18_weighted_avg.R: -------------------------------------------------------------------------------- 1 | suppressPackageStartupMessages(library(tidyverse)) 2 | 3 | states <- tigris::states(cb = TRUE) %>% 4 | select(NAME) %>% 5 | sf::st_drop_geometry() %>% 6 | filter(! NAME %in% c('Guam', 'Commonwealth of the Northern Mariana Islands', 7 | 'United States Virgin Islands', 'American Samoa', 'Puerto Rico')) 8 | 9 | tract_pop_under_18 <- tidycensus::get_acs(geography = 'tract', 10 | variables = c(paste0('B01001_00', 1:6), paste0('B01001_0', 27:30)), 11 | state = states$NAME, 12 | year = 2018) %>% 13 | group_by(GEOID) %>% 14 | summarize(pop_under_18 = sum(estimate)) 15 | 16 | dep_index <- 'https://github.com/geomarker-io/dep_index/raw/master/2018_dep_index/ACS_deprivation_index_by_census_tracts.rds' %>% 17 | url() %>% 18 | gzcon() %>% 19 | readRDS() %>% 20 | as_tibble() 21 | 22 | dep_index <- dep_index %>% 23 | left_join(tract_pop_under_18, by = c('census_tract_fips' = 'GEOID')) 24 | 25 | ## base method 26 | wt_mean <- weighted.mean(x = dep_index$dep_index, w = dep_index$pop_under_18, na.rm = TRUE) 27 | wt_mean 28 | ## about 1000 tracts are missing deprivation index 29 | 30 | # weighted_variance 31 | dev_sq <- (dep_index$dep_index - wt_mean)^2 32 | wt_var <- weighted.mean(x = dev_sq, w = dep_index$pop_under_18, na.rm = TRUE) 33 | 34 | # weighted sd 35 | wt_sd <- sqrt(wt_var) 36 | 37 | ## manual 38 | t <- dep_index %>% 39 | select(census_tract_fips, dep_index, pop_under_18) %>% 40 | mutate(wt_dep = dep_index * pop_under_18) %>% 41 | filter(!is.na(dep_index), !is.na(pop_under_18)) 42 | 43 | sum(t$wt_dep) / sum(t$pop_under_18) 44 | 45 | ## diagis pkg 46 | diagis::weighted_mean(x = dep_index$dep_index, w = dep_index$pop_under_18, na.rm = TRUE) 47 | diagis::weighted_se(x = dep_index$dep_index, w = dep_index$pop_under_18, na.rm = TRUE) 48 | 49 | ## quantiles 50 | library(spatstat) 51 | weighted.median(x = dep_index$dep_index, w = dep_index$pop_under_18, na.rm = TRUE) 52 | weighted.quantile(x = dep_index$dep_index, w = dep_index$pop_under_18, 53 | probs = c(0.25, 0.50, 0.75), na.rm = TRUE) 54 | 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /weighted_avg/dep_index_pop_under_18_weighted_avg.R: -------------------------------------------------------------------------------- 1 | suppressPackageStartupMessages(library(tidyverse)) 2 | 3 | states <- tigris::states(cb = TRUE) %>% 4 | select(NAME) %>% 5 | sf::st_drop_geometry() %>% 6 | filter(! NAME %in% c('Guam', 'Commonwealth of the Northern Mariana Islands', 7 | 'United States Virgin Islands', 'American Samoa', 'Puerto Rico')) 8 | 9 | tract_pop_under_18 <- tidycensus::get_acs(geography = 'tract', 10 | variables = c(paste0('B01001_00', 1:6), paste0('B01001_0', 27:30)), 11 | state = states$NAME, 12 | year = 2015) %>% 13 | group_by(GEOID) %>% 14 | summarize(pop_under_18 = sum(estimate)) 15 | 16 | dep_index <- 'https://github.com/cole-brokamp/dep_index/raw/master/ACS_deprivation_index_by_census_tracts.rds' %>% 17 | url() %>% 18 | gzcon() %>% 19 | readRDS() %>% 20 | as_tibble() 21 | 22 | dep_index <- dep_index %>% 23 | left_join(tract_pop_under_18, by = c('census_tract_fips' = 'GEOID')) 24 | 25 | ## base method 26 | wt_mean <- weighted.mean(x = dep_index$dep_index, w = dep_index$pop_under_18, na.rm = TRUE) 27 | wt_mean 28 | ## about 1000 tracts are missing deprivation index 29 | 30 | # weighted_variance 31 | dev_sq <- (dep_index$dep_index - wt_mean)^2 32 | wt_var <- weighted.mean(x = dev_sq, w = dep_index$pop_under_18, na.rm = TRUE) 33 | 34 | # weighted sd 35 | wt_sd <- sqrt(wt_var) 36 | 37 | ## manual 38 | t <- dep_index %>% 39 | select(census_tract_fips, dep_index, pop_under_18) %>% 40 | mutate(wt_dep = dep_index * pop_under_18) %>% 41 | filter(!is.na(dep_index), !is.na(pop_under_18)) 42 | 43 | sum(t$wt_dep) / sum(t$pop_under_18) 44 | 45 | ## diagis pkg 46 | diagis::weighted_mean(x = dep_index$dep_index, w = dep_index$pop_under_18, na.rm = TRUE) 47 | diagis::weighted_se(x = dep_index$dep_index, w = dep_index$pop_under_18, na.rm = TRUE) 48 | 49 | ## quantiles 50 | library(spatstat) 51 | weighted.median(x = dep_index$dep_index, w = dep_index$pop_under_18, na.rm = TRUE) 52 | weighted.quantile(x = dep_index$dep_index, w = dep_index$pop_under_18, 53 | probs = c(0.25, 0.50, 0.75), na.rm = TRUE) 54 | 55 | 56 | 57 | 58 | --------------------------------------------------------------------------------