├── .gitignore ├── Data ├── anes.rds └── recs.rds ├── DataCleaningScripts ├── ANES_DataPrep.Rmd ├── ANES_DataPrep.md ├── RECS_DataPrep.Rmd └── RECS_DataPrep.md ├── Exercises ├── CategorialExercises.R ├── CategorialExercises.Rmd ├── CategorialExercises_solutions.R ├── CategorialExercises_solutions.Rmd ├── CategorialExercises_solutions.html ├── ContinuousExercises.R ├── ContinuousExercises.Rmd ├── ContinuousExercises_solutions.R ├── ContinuousExercises_solutions.Rmd ├── ContinuousExercises_solutions.html ├── WarmUpExercises.R ├── WarmUpExercises.Rmd ├── WarmUpExercises_solutions.R ├── WarmUpExercises_solutions.Rmd └── WarmUpExercises_solutions.html ├── FinalizeMaterials.R ├── LICENSE ├── Presentation ├── Slides.R ├── Slides.Rmd ├── Slides.html ├── Slides.pdf ├── Slides.pptx ├── Slides_files │ └── figure-html │ │ └── plot_sf_elbill_disp-1.png └── xaringan-themer.css ├── README.md ├── RawData ├── ANES_2016 │ ├── anes_timeseries_2016.sav │ ├── anes_timeseries_2016_qnaire_post.pdf │ ├── anes_timeseries_2016_qnaire_pre.pdf │ └── anes_timeseries_2016_userguidecodebook.pdf └── RECS_2015 │ ├── 2020_RECS-457A.pdf │ ├── README.md │ ├── codebook_publicv4.xlsx │ ├── microdata_v3.pdf │ └── recs2015_public_v4.csv └── tidy-survey-short-course.Rproj /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | debug.log -------------------------------------------------------------------------------- /Data/anes.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/szimmer/tidy-survey-aapor-2021/5a82c36eb619bf67ccec2593a770904eedcd1d18/Data/anes.rds -------------------------------------------------------------------------------- /Data/recs.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/szimmer/tidy-survey-aapor-2021/5a82c36eb619bf67ccec2593a770904eedcd1d18/Data/recs.rds -------------------------------------------------------------------------------- /DataCleaningScripts/ANES_DataPrep.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "American National Election Studies (ANES) 2016 Time Series Study Data Prep" 3 | output: github_document 4 | --- 5 | 6 | ```{r setup, include=FALSE} 7 | knitr::opts_chunk$set(echo = TRUE) 8 | ``` 9 | 10 | ## Data information 11 | 12 | All data and resources were downloaded from https://electionstudies.org/data-center/2016-time-series-study/ on April 3, 2021. 13 | 14 | American National Election Studies. 2019. ANES 2016 Time Series Study [dataset and documentation]. September 4, 2019 version. www.electionstudies.org 15 | ```{r loadpackageh, message=FALSE} 16 | library(here) #easy relative paths 17 | ``` 18 | 19 | 20 | 21 | ```{r loadpackages} 22 | library(tidyverse) #data manipulation 23 | library(haven) #data import 24 | library(tidylog) #informative logging messages 25 | ``` 26 | ## Import data and create derived variables 27 | 28 | ```{r derivedata} 29 | anes_in <- read_sav(here("RawData", "ANES_2016", "anes_timeseries_2016.sav")) 30 | 31 | 32 | anes <- anes_in %>% 33 | select('V160102', 'V160201', 'V160202', 'V160501', 'V161004', 'V161005', 'V161006', 'V161024x', 'V161158x', 'V161215', 'V161219', 'V161267', 'V161267', 'V161270', 'V161310x', 'V161342', 'V161361x', 'V162031', 'V162031x', 'V162034', 'V162034a', 'V162062x', 'V162062x' 34 | ) %>% 35 | mutate( 36 | InterviewMode=fct_recode(as.character(V160501), FTF="1", Web="2"), 37 | Weight=V160102, 38 | Stratum=as.factor(V160201), 39 | VarUnit=as.factor(V160202), 40 | Age=if_else(V161267>0, as.numeric(V161267), NA_real_), 41 | AgeGroup=cut(Age, c(17, 29, 39, 49, 59, 69, 200), 42 | labels=c("18-29", "30-39", "40-49", "50-59", "60-69", "70 or older")), 43 | Gender=factor( 44 | case_when( 45 | V161342==1~"Male", 46 | V161342==2~"Female", 47 | V161342==3~"Other", 48 | TRUE~NA_character_ 49 | ), 50 | levels=c("Male", "Female", "Other") 51 | ), 52 | RaceEth=factor( 53 | case_when( 54 | V161310x==1~"White", 55 | V161310x==2~"Black", 56 | V161310x==5~"Hispanic", 57 | V161310x==3~"Asian, NH/PI", 58 | near(V161310x, 4)~"AI/AN", 59 | near(V161310x, 6)~"Other/multiple race", 60 | TRUE ~ NA_character_ 61 | ), 62 | levels=c("White", "Black", "Hispanic", "Asian, NH/PI", "AI/AN", "Other/multiple race", NA_character_) 63 | ), 64 | PartyID=factor( 65 | case_when( 66 | V161158x==1~"Strong democrat", 67 | V161158x==2~"Not very strong democrat", 68 | V161158x==3~"Independent-democrat", 69 | V161158x==4~"Independent", 70 | V161158x==5~"Independent-republican", 71 | V161158x==6~"Not very strong republican", 72 | V161158x==7~"Strong republican", 73 | TRUE ~ NA_character_ 74 | ), 75 | levels=c("Strong democrat", "Not very strong democrat", "Independent-democrat", "Independent", "Independent-republican", "Not very strong republican", "Strong republican") 76 | ), 77 | Education=factor( 78 | case_when( 79 | V161270 <=0~NA_character_, 80 | V161270 <= 8~"Less than HS", 81 | V161270==9|V161270==90~"High school", 82 | V161270<=12~"Post HS", 83 | V161270==13~"Bachelor's", 84 | V161270<=16~"Graduate", 85 | TRUE~NA_character_ 86 | ), 87 | levels=c("Less than HS", "High school", "Post HS", "Bachelor's", "Graduate") 88 | ), 89 | Income=cut(V161361x, c(-5, 1:28), 90 | labels=c("Under $5k", 91 | "$5-10k", "$10-12.5k", "$12.5-15", "$15-17.5k", "$17.5-20k", "$20-22.5k", "$22.5-25k", "$25-27.5k", "$27.5-30k", "$30-35k", "$35-40k", "$40-45k", "$45-50k", "$50-55k", "$55-60k", "$60-65k","$65-70k", "$70-75k", "$75-80k", "$80-90k", "$90-100k","$100-110k", "$110-125k", "$125-150k", "$150-175k", "$175-250k", "$250k or more" ) 92 | ), 93 | Income7=fct_collapse( 94 | Income, 95 | "Under $20k"=c("Under $5k", "$5-10k", "$10-12.5k", "$12.5-15", "$15-17.5k", "$17.5-20k"), 96 | "$20-40k"=c("$20-22.5k", "$22.5-25k", "$25-27.5k", "$27.5-30k", "$30-35k", "$35-40k"), 97 | "$40-60k"=c( "$40-45k", "$45-50k", "$50-55k", "$55-60k"), 98 | "$60-80k"=c( "$60-65k", "$65-70k", "$70-75k", "$75-80k"), 99 | "$80-100k"=c("$80-90k", "$90-100k"), 100 | "$100-125k"=c("$100-110k", "$110-125k"), 101 | "$125k or more"=c("$125-150k", "$150-175k", "$175-250k", "$250k or more") 102 | ), 103 | CampaignInterest=factor( 104 | case_when( 105 | V161004==1~"Very much interested", 106 | V161004==2~"Somewhat interested", 107 | V161004==3~"Not much interested", 108 | TRUE~NA_character_ 109 | ), 110 | levels=c("Very much interested", "Somewhat interested", "Not much interested") 111 | ), 112 | TrustGovernment=factor( 113 | case_when( 114 | V161215==1~"Always", 115 | V161215==2~"Most of the time", 116 | V161215==3~"About half the time", 117 | V161215==4~"Some of the time", 118 | V161215==5~"Never", 119 | TRUE~NA_character_ 120 | ), 121 | levels=c("Always", "Most of the time", "About half the time", "Some of the time", "Never") 122 | ), 123 | TrustPeople=factor( 124 | case_when( 125 | V161219==1~"Always", 126 | V161219==2~"Most of the time", 127 | V161219==3~"About half the time", 128 | V161219==4~"Some of the time", 129 | V161219==5~"Never", 130 | TRUE ~ NA_character_ 131 | ), 132 | levels=c("Always", "Most of the time", "About half the time", "Some of the time", "Never") 133 | ), 134 | VotedPres2012=factor( 135 | case_when( 136 | V161005==1~"Yes", 137 | V161005==2~"No", 138 | TRUE~NA_character_ 139 | ), levels=c("Yes", "No") 140 | ), 141 | VotedPres2012_selection=factor( 142 | case_when( 143 | V161006==1~"Obama", 144 | V161006==2~"Romney", 145 | V161006==5~"Other", 146 | TRUE~NA_character_ 147 | ), levels=c("Obama", "Romney", "Other") 148 | ), 149 | VotedPres2016=factor( 150 | case_when( 151 | V162031x==1~"Yes", 152 | V162031x==0~"No", 153 | TRUE~NA_character_ 154 | ), levels=c("Yes", "No") 155 | ), 156 | VotedPres2016_selection=factor( 157 | case_when( 158 | V162062x==1~"Clinton", 159 | V162062x==2~"Trump", 160 | V162062x >=3 ~"Other", 161 | TRUE~NA_character_ 162 | ), levels=c("Clinton", "Trump", "Other") 163 | ), 164 | EarlyVote2016=factor( 165 | case_when( 166 | V161024x==4~"Yes", 167 | VotedPres2016=="Yes"~"No", 168 | TRUE~NA_character_ 169 | ), levels=c("Yes", "No") 170 | ) 171 | ) 172 | 173 | 174 | 175 | summary(anes) 176 | ``` 177 | 178 | 179 | ## Check derived variables for correct coding 180 | 181 | ```{r checkvars} 182 | 183 | anes %>% count(InterviewMode, V160501) 184 | anes %>% group_by(AgeGroup) %>% summarise(minAge=min(Age), maxAge=max(Age), minV=min(V161267), maxV=max(V161267)) 185 | anes %>% count(Gender, V161342) 186 | anes %>% count(RaceEth, V161310x) 187 | anes %>% count(PartyID, V161158x) 188 | anes %>% count(Education, V161270) 189 | anes %>% count(Income, Income7, V161361x) %>% print(n=30) 190 | anes %>% count(CampaignInterest, V161004) 191 | anes %>% count(TrustGovernment, V161215) 192 | anes %>% count(TrustPeople, V161219) 193 | anes %>% count(VotedPres2012, V161005) 194 | anes %>% count(VotedPres2012_selection, V161006) 195 | anes %>% count(VotedPres2016, V162031x) 196 | anes %>% count(VotedPres2016_selection, V162062x) 197 | anes %>% count(EarlyVote2016, V161024x, VotedPres2016) 198 | 199 | anes %>% 200 | summarise(WtSum=sum(Weight)) %>% 201 | pull(WtSum) 202 | 203 | ``` 204 | ## Save data 205 | 206 | ```{r savedat} 207 | write_rds(anes, here("Data", "anes.rds"), compress="gz") 208 | ``` 209 | 210 | 211 | -------------------------------------------------------------------------------- /DataCleaningScripts/ANES_DataPrep.md: -------------------------------------------------------------------------------- 1 | American National Election Studies (ANES) 2016 Time Series Study Data 2 | Prep 3 | ================ 4 | 5 | ## Data information 6 | 7 | All data and resources were downloaded from 8 | on 9 | April 3, 2021. 10 | 11 | American National Election Studies. 2019. ANES 2016 Time Series Study 12 | \[dataset and documentation\]. September 4, 2019 version. 13 | www.electionstudies.org 14 | 15 | ``` r 16 | library(here) #easy relative paths 17 | ``` 18 | 19 | ``` r 20 | library(tidyverse) #data manipulation 21 | ``` 22 | 23 | ## -- Attaching packages ----------------------------- tidyverse 1.3.0 -- 24 | 25 | ## v ggplot2 3.3.3 v purrr 0.3.4 26 | ## v tibble 3.1.0 v dplyr 1.0.5 27 | ## v tidyr 1.1.3 v stringr 1.4.0 28 | ## v readr 1.4.0 v forcats 0.5.1 29 | 30 | ## -- Conflicts -------------------------------- tidyverse_conflicts() -- 31 | ## x dplyr::filter() masks stats::filter() 32 | ## x dplyr::lag() masks stats::lag() 33 | 34 | ``` r 35 | library(haven) #data import 36 | library(tidylog) #informative logging messages 37 | ``` 38 | 39 | ## 40 | ## Attaching package: 'tidylog' 41 | 42 | ## The following objects are masked from 'package:dplyr': 43 | ## 44 | ## add_count, add_tally, anti_join, count, distinct, distinct_all, 45 | ## distinct_at, distinct_if, filter, filter_all, filter_at, filter_if, 46 | ## full_join, group_by, group_by_all, group_by_at, group_by_if, 47 | ## inner_join, left_join, mutate, mutate_all, mutate_at, mutate_if, 48 | ## relocate, rename, rename_all, rename_at, rename_if, rename_with, 49 | ## right_join, sample_frac, sample_n, select, select_all, select_at, 50 | ## select_if, semi_join, slice, slice_head, slice_max, slice_min, 51 | ## slice_sample, slice_tail, summarise, summarise_all, summarise_at, 52 | ## summarise_if, summarize, summarize_all, summarize_at, summarize_if, 53 | ## tally, top_frac, top_n, transmute, transmute_all, transmute_at, 54 | ## transmute_if, ungroup 55 | 56 | ## The following objects are masked from 'package:tidyr': 57 | ## 58 | ## drop_na, fill, gather, pivot_longer, pivot_wider, replace_na, 59 | ## spread, uncount 60 | 61 | ## The following object is masked from 'package:stats': 62 | ## 63 | ## filter 64 | 65 | ## Import data and create derived variables 66 | 67 | ``` r 68 | anes_in <- read_sav(here("RawData", "ANES_2016", "anes_timeseries_2016.sav")) 69 | 70 | 71 | anes <- anes_in %>% 72 | select('V160102', 'V160201', 'V160202', 'V160501', 'V161004', 'V161005', 'V161006', 'V161024x', 'V161158x', 'V161215', 'V161219', 'V161267', 'V161267', 'V161270', 'V161310x', 'V161342', 'V161361x', 'V162031', 'V162031x', 'V162034', 'V162034a', 'V162062x', 'V162062x' 73 | ) %>% 74 | mutate( 75 | InterviewMode=fct_recode(as.character(V160501), FTF="1", Web="2"), 76 | Weight=V160102, 77 | Stratum=as.factor(V160201), 78 | VarUnit=as.factor(V160202), 79 | Age=if_else(V161267>0, as.numeric(V161267), NA_real_), 80 | AgeGroup=cut(Age, c(17, 29, 39, 49, 59, 69, 200), 81 | labels=c("18-29", "30-39", "40-49", "50-59", "60-69", "70 or older")), 82 | Gender=factor( 83 | case_when( 84 | V161342==1~"Male", 85 | V161342==2~"Female", 86 | V161342==3~"Other", 87 | TRUE~NA_character_ 88 | ), 89 | levels=c("Male", "Female", "Other") 90 | ), 91 | RaceEth=factor( 92 | case_when( 93 | V161310x==1~"White", 94 | V161310x==2~"Black", 95 | V161310x==5~"Hispanic", 96 | V161310x==3~"Asian, NH/PI", 97 | near(V161310x, 4)~"AI/AN", 98 | near(V161310x, 6)~"Other/multiple race", 99 | TRUE ~ NA_character_ 100 | ), 101 | levels=c("White", "Black", "Hispanic", "Asian, NH/PI", "AI/AN", "Other/multiple race", NA_character_) 102 | ), 103 | PartyID=factor( 104 | case_when( 105 | V161158x==1~"Strong democrat", 106 | V161158x==2~"Not very strong democrat", 107 | V161158x==3~"Independent-democrat", 108 | V161158x==4~"Independent", 109 | V161158x==5~"Independent-republican", 110 | V161158x==6~"Not very strong republican", 111 | V161158x==7~"Strong republican", 112 | TRUE ~ NA_character_ 113 | ), 114 | levels=c("Strong democrat", "Not very strong democrat", "Independent-democrat", "Independent", "Independent-republican", "Not very strong republican", "Strong republican") 115 | ), 116 | Education=factor( 117 | case_when( 118 | V161270 <=0~NA_character_, 119 | V161270 <= 8~"Less than HS", 120 | V161270==9|V161270==90~"High school", 121 | V161270<=12~"Post HS", 122 | V161270==13~"Bachelor's", 123 | V161270<=16~"Graduate", 124 | TRUE~NA_character_ 125 | ), 126 | levels=c("Less than HS", "High school", "Post HS", "Bachelor's", "Graduate") 127 | ), 128 | Income=cut(V161361x, c(-5, 1:28), 129 | labels=c("Under $5k", 130 | "$5-10k", "$10-12.5k", "$12.5-15", "$15-17.5k", "$17.5-20k", "$20-22.5k", "$22.5-25k", "$25-27.5k", "$27.5-30k", "$30-35k", "$35-40k", "$40-45k", "$45-50k", "$50-55k", "$55-60k", "$60-65k","$65-70k", "$70-75k", "$75-80k", "$80-90k", "$90-100k","$100-110k", "$110-125k", "$125-150k", "$150-175k", "$175-250k", "$250k or more" ) 131 | ), 132 | Income7=fct_collapse( 133 | Income, 134 | "Under $20k"=c("Under $5k", "$5-10k", "$10-12.5k", "$12.5-15", "$15-17.5k", "$17.5-20k"), 135 | "$20-40k"=c("$20-22.5k", "$22.5-25k", "$25-27.5k", "$27.5-30k", "$30-35k", "$35-40k"), 136 | "$40-60k"=c( "$40-45k", "$45-50k", "$50-55k", "$55-60k"), 137 | "$60-80k"=c( "$60-65k", "$65-70k", "$70-75k", "$75-80k"), 138 | "$80-100k"=c("$80-90k", "$90-100k"), 139 | "$100-125k"=c("$100-110k", "$110-125k"), 140 | "$125k or more"=c("$125-150k", "$150-175k", "$175-250k", "$250k or more") 141 | ), 142 | CampaignInterest=factor( 143 | case_when( 144 | V161004==1~"Very much interested", 145 | V161004==2~"Somewhat interested", 146 | V161004==3~"Not much interested", 147 | TRUE~NA_character_ 148 | ), 149 | levels=c("Very much interested", "Somewhat interested", "Not much interested") 150 | ), 151 | TrustGovernment=factor( 152 | case_when( 153 | V161215==1~"Always", 154 | V161215==2~"Most of the time", 155 | V161215==3~"About half the time", 156 | V161215==4~"Some of the time", 157 | V161215==5~"Never", 158 | TRUE~NA_character_ 159 | ), 160 | levels=c("Always", "Most of the time", "About half the time", "Some of the time", "Never") 161 | ), 162 | TrustPeople=factor( 163 | case_when( 164 | V161219==1~"Always", 165 | V161219==2~"Most of the time", 166 | V161219==3~"About half the time", 167 | V161219==4~"Some of the time", 168 | V161219==5~"Never", 169 | TRUE ~ NA_character_ 170 | ), 171 | levels=c("Always", "Most of the time", "About half the time", "Some of the time", "Never") 172 | ), 173 | VotedPres2012=factor( 174 | case_when( 175 | V161005==1~"Yes", 176 | V161005==2~"No", 177 | TRUE~NA_character_ 178 | ), levels=c("Yes", "No") 179 | ), 180 | VotedPres2012_selection=factor( 181 | case_when( 182 | V161006==1~"Obama", 183 | V161006==2~"Romney", 184 | V161006==5~"Other", 185 | TRUE~NA_character_ 186 | ), levels=c("Obama", "Romney", "Other") 187 | ), 188 | VotedPres2016=factor( 189 | case_when( 190 | V162031x==1~"Yes", 191 | V162031x==0~"No", 192 | TRUE~NA_character_ 193 | ), levels=c("Yes", "No") 194 | ), 195 | VotedPres2016_selection=factor( 196 | case_when( 197 | V162062x==1~"Clinton", 198 | V162062x==2~"Trump", 199 | V162062x >=3 ~"Other", 200 | TRUE~NA_character_ 201 | ), levels=c("Clinton", "Trump", "Other") 202 | ), 203 | EarlyVote2016=factor( 204 | case_when( 205 | V161024x==4~"Yes", 206 | VotedPres2016=="Yes"~"No", 207 | TRUE~NA_character_ 208 | ), levels=c("Yes", "No") 209 | ) 210 | ) 211 | ``` 212 | 213 | ## select: dropped 1,821 variables (version, V160001, V160001_orig, V160101, V160101f, …) 214 | 215 | ## mutate: new variable 'InterviewMode' (factor) with 2 unique values and 0% NA 216 | 217 | ## new variable 'Weight' (double) with 2,609 unique values and 0% NA 218 | 219 | ## new variable 'Stratum' (factor) with 132 unique values and 0% NA 220 | 221 | ## new variable 'VarUnit' (factor) with 3 unique values and 0% NA 222 | 223 | ## new variable 'Age' (double) with 74 unique values and 3% NA 224 | 225 | ## new variable 'AgeGroup' (factor) with 7 unique values and 3% NA 226 | 227 | ## new variable 'Gender' (factor) with 4 unique values and 1% NA 228 | 229 | ## new variable 'RaceEth' (factor) with 7 unique values and 1% NA 230 | 231 | ## new variable 'PartyID' (factor) with 8 unique values and 1% NA 232 | 233 | ## new variable 'Education' (factor) with 6 unique values and 1% NA 234 | 235 | ## new variable 'Income' (factor) with 29 unique values and 5% NA 236 | 237 | ## new variable 'Income7' (factor) with 8 unique values and 5% NA 238 | 239 | ## new variable 'CampaignInterest' (factor) with 3 unique values and 0% NA 240 | 241 | ## new variable 'TrustGovernment' (factor) with 6 unique values and 1% NA 242 | 243 | ## new variable 'TrustPeople' (factor) with 6 unique values and <1% NA 244 | 245 | ## new variable 'VotedPres2012' (factor) with 3 unique values and <1% NA 246 | 247 | ## new variable 'VotedPres2012_selection' (factor) with 4 unique values and 28% NA 248 | 249 | ## new variable 'VotedPres2016' (factor) with 3 unique values and 22% NA 250 | 251 | ## new variable 'VotedPres2016_selection' (factor) with 4 unique values and 34% NA 252 | 253 | ## new variable 'EarlyVote2016' (factor) with 3 unique values and 32% NA 254 | 255 | ``` r 256 | summary(anes) 257 | ``` 258 | 259 | ## V160102 V160201 V160202 V160501 260 | ## Min. :0.0000 Min. : 1.00 Min. :1.000 Min. :1.000 261 | ## 1st Qu.:0.3934 1st Qu.: 36.00 1st Qu.:1.000 1st Qu.:1.000 262 | ## Median :0.7481 Median : 71.00 Median :1.500 Median :2.000 263 | ## Mean :0.8541 Mean : 69.58 Mean :1.505 Mean :1.724 264 | ## 3rd Qu.:1.1294 3rd Qu.:105.00 3rd Qu.:2.000 3rd Qu.:2.000 265 | ## Max. :6.4445 Max. :133.00 Max. :3.000 Max. :2.000 266 | ## 267 | ## V161004 V161005 V161006 V161024x 268 | ## Min. :1.0 Min. :-9.000 Min. :-9.0000 Min. :1.000 269 | ## 1st Qu.:1.0 1st Qu.: 1.000 1st Qu.:-1.0000 1st Qu.:3.000 270 | ## Median :1.0 Median : 1.000 Median : 1.0000 Median :3.000 271 | ## Mean :1.6 Mean : 1.232 Mean : 0.6773 Mean :2.804 272 | ## 3rd Qu.:2.0 3rd Qu.: 2.000 3rd Qu.: 2.0000 3rd Qu.:3.000 273 | ## Max. :3.0 Max. : 2.000 Max. : 6.0000 Max. :4.000 274 | ## 275 | ## V161158x V161215 V161219 V161267 276 | ## Min. :-9.000 Min. :-9.00 Min. :-9.000 Min. :-9.00 277 | ## 1st Qu.: 2.000 1st Qu.: 3.00 1st Qu.: 2.000 1st Qu.:33.00 278 | ## Median : 4.000 Median : 4.00 Median : 3.000 Median :49.00 279 | ## Mean : 3.792 Mean : 3.49 Mean : 2.831 Mean :47.92 280 | ## 3rd Qu.: 6.000 3rd Qu.: 4.00 3rd Qu.: 4.000 3rd Qu.:63.00 281 | ## Max. : 7.000 Max. : 5.00 Max. : 5.000 Max. :90.00 282 | ## 283 | ## V161270 V161310x V161342 V161361x 284 | ## Min. :-9.00 Min. :-2.000 Min. :-9.000 Min. :-9.00 285 | ## 1st Qu.: 9.00 1st Qu.: 1.000 1st Qu.: 1.000 1st Qu.: 8.00 286 | ## Median :11.00 Median : 1.000 Median : 2.000 Median :15.00 287 | ## Mean :11.66 Mean : 1.787 Mean : 1.432 Mean :14.25 288 | ## 3rd Qu.:13.00 3rd Qu.: 2.000 3rd Qu.: 2.000 3rd Qu.:22.00 289 | ## Max. :95.00 Max. : 6.000 Max. : 3.000 Max. :28.00 290 | ## 291 | ## V162031 V162031x V162034 V162034a 292 | ## Min. :-8.000 Min. :-8.0000 Min. :-9.0000 Min. :-9.0000 293 | ## 1st Qu.:-1.000 1st Qu.: 0.0000 1st Qu.:-1.0000 1st Qu.:-1.0000 294 | ## Median : 4.000 Median : 1.0000 Median : 1.0000 Median : 1.0000 295 | ## Mean : 1.759 Mean : 0.2349 Mean :-0.4625 Mean :-0.1468 296 | ## 3rd Qu.: 4.000 3rd Qu.: 1.0000 3rd Qu.: 1.0000 3rd Qu.: 2.0000 297 | ## Max. : 4.000 Max. : 1.0000 Max. : 2.0000 Max. : 9.0000 298 | ## 299 | ## V162062x InterviewMode Weight Stratum VarUnit 300 | ## Min. :-9.0000 FTF:1180 Min. :0.0000 123 : 57 1:2135 301 | ## 1st Qu.:-2.0000 Web:3090 1st Qu.:0.3934 121 : 55 2:2115 302 | ## Median : 1.0000 Median :0.7481 126 : 55 3: 20 303 | ## Mean : 0.3393 Mean :0.8541 118 : 52 304 | ## 3rd Qu.: 2.0000 3rd Qu.:1.1294 108 : 50 305 | ## Max. : 5.0000 Max. :6.4445 107 : 46 306 | ## (Other):3955 307 | ## Age AgeGroup Gender RaceEth 308 | ## Min. :18.00 18-29 :651 Male :1987 White :3038 309 | ## 1st Qu.:34.00 30-39 :761 Female:2231 Black : 397 310 | ## Median :50.00 40-49 :620 Other : 11 Hispanic : 450 311 | ## Mean :49.58 50-59 :781 NA's : 41 Asian, NH/PI : 148 312 | ## 3rd Qu.:63.00 60-69 :769 AI/AN : 27 313 | ## Max. :90.00 70 or older:567 Other/multiple race: 177 314 | ## NA's :121 NA's :121 NA's : 33 315 | ## PartyID Education Income 316 | ## Strong democrat :890 Less than HS: 282 Under $5k: 275 317 | ## Strong republican :721 High school : 815 $80-90k : 231 318 | ## Independent :579 Post HS :1499 $30-35k : 213 319 | ## Not very strong democrat :559 Bachelor's : 955 $60-65k : 205 320 | ## Not very strong republican:508 Graduate : 680 $50-55k : 204 321 | ## (Other) :990 NA's : 39 (Other) :2940 322 | ## NA's : 23 NA's : 202 323 | ## Income7 CampaignInterest TrustGovernment 324 | ## $20-40k :773 Very much interested:2230 Always : 66 325 | ## Under $20k :703 Somewhat interested :1519 Most of the time : 429 326 | ## $40-60k :621 Not much interested : 521 About half the time:1382 327 | ## $125k or more:615 Some of the time :1826 328 | ## $60-80k :576 Never : 545 329 | ## (Other) :780 NA's : 22 330 | ## NA's :202 331 | ## TrustPeople VotedPres2012 VotedPres2012_selection VotedPres2016 332 | ## Always : 50 Yes :3117 Obama :1728 Yes :2887 333 | ## Most of the time :1765 No :1137 Romney:1268 No : 444 334 | ## About half the time:1305 NA's: 16 Other : 58 NA's: 939 335 | ## Some of the time : 947 NA's :1216 336 | ## Never : 188 337 | ## NA's : 15 338 | ## 339 | ## VotedPres2016_selection EarlyVote2016 340 | ## Clinton:1364 Yes : 156 341 | ## Trump :1245 No :2731 342 | ## Other : 202 NA's:1383 343 | ## NA's :1459 344 | ## 345 | ## 346 | ## 347 | 348 | ## Check derived variables for correct coding 349 | 350 | ``` r 351 | anes %>% count(InterviewMode, V160501) 352 | ``` 353 | 354 | ## count: now 2 rows and 3 columns, ungrouped 355 | 356 | ## # A tibble: 2 x 3 357 | ## InterviewMode V160501 n 358 | ## 359 | ## 1 FTF 1 [1. FTF /CASI] 1180 360 | ## 2 Web 2 [2. Web] 3090 361 | 362 | ``` r 363 | anes %>% group_by(AgeGroup) %>% summarise(minAge=min(Age), maxAge=max(Age), minV=min(V161267), maxV=max(V161267)) 364 | ``` 365 | 366 | ## group_by: one grouping variable (AgeGroup) 367 | 368 | ## summarise: now 7 rows and 5 columns, ungrouped 369 | 370 | ## # A tibble: 7 x 5 371 | ## AgeGroup minAge maxAge minV maxV 372 | ## 373 | ## 1 18-29 18 29 18 29 374 | ## 2 30-39 30 39 30 39 375 | ## 3 40-49 40 49 40 49 376 | ## 4 50-59 50 59 50 59 377 | ## 5 60-69 60 69 60 69 378 | ## 6 70 or older 70 90 70 90 [90. Age 90 or older] 379 | ## 7 NA NA NA -9 [-9. RF (year of b~ -8 [-8. DK (year of birth, F~ 380 | 381 | ``` r 382 | anes %>% count(Gender, V161342) 383 | ``` 384 | 385 | ## count: now 4 rows and 3 columns, ungrouped 386 | 387 | ## # A tibble: 4 x 3 388 | ## Gender V161342 n 389 | ## 390 | ## 1 Male 1 [1. Male] 1987 391 | ## 2 Female 2 [2. Female] 2231 392 | ## 3 Other 3 [3. Other] 11 393 | ## 4 NA -9 [-9. Refused] 41 394 | 395 | ``` r 396 | anes %>% count(RaceEth, V161310x) 397 | ``` 398 | 399 | ## count: now 7 rows and 3 columns, ungrouped 400 | 401 | ## # A tibble: 7 x 3 402 | ## RaceEth V161310x n 403 | ## 404 | ## 1 White 1 [1. White, non-Hispanic] 3038 405 | ## 2 Black 2 [2. Black, non-Hispanic] 397 406 | ## 3 Hispanic 5 [5. Hispanic] 450 407 | ## 4 Asian, NH/PI 3 [3. Asian, native Hawaiian or other Pacif Islr,non-~ 148 408 | ## 5 AI/AN 4 [4. Native American or Alaska Native, non-Hispanic] 27 409 | ## 6 Other/multiple ~ 6 [6. Other non-Hispanic incl multiple races [WEB: bl~ 177 410 | ## 7 NA -2 [-2. Missing] 33 411 | 412 | ``` r 413 | anes %>% count(PartyID, V161158x) 414 | ``` 415 | 416 | ## count: now 9 rows and 3 columns, ungrouped 417 | 418 | ## # A tibble: 9 x 3 419 | ## PartyID V161158x n 420 | ## 421 | ## 1 Strong democrat 1 [1. Strong Democrat] 890 422 | ## 2 Not very strong democ~ 2 [2. Not very strong Democract] 559 423 | ## 3 Independent-democrat 3 [3. Independent-Democrat] 490 424 | ## 4 Independent 4 [4. Independent] 579 425 | ## 5 Independent-republican 5 [5. Independent-Republican] 500 426 | ## 6 Not very strong repub~ 6 [6. Not very strong Republican] 508 427 | ## 7 Strong republican 7 [7. Strong Republican] 721 428 | ## 8 NA -9 [-9. RF (-9) in V161155 (FTF only) /-9 in V16~ 12 429 | ## 9 NA -8 [-8. DK (-8) in V161156 or V161157 (FTF only)] 11 430 | 431 | ``` r 432 | anes %>% count(Education, V161270) 433 | ``` 434 | 435 | ## count: now 19 rows and 3 columns, ungrouped 436 | 437 | ## # A tibble: 19 x 3 438 | ## Education V161270 n 439 | ## 440 | ## 1 Less than HS 1 [1. Less than 1st grade] 1 441 | ## 2 Less than HS 2 [2. 1st, 2nd, 3rd or 4th grade] 3 442 | ## 3 Less than HS 3 [3. 5th or 6th grade] 15 443 | ## 4 Less than HS 4 [4. 7th or 8th grade] 22 444 | ## 5 Less than HS 5 [5. 9th grade] 32 445 | ## 6 Less than HS 6 [6. 10th grade] 40 446 | ## 7 Less than HS 7 [7. 11th grade] 62 447 | ## 8 Less than HS 8 [8. 12th grade no diploma] 107 448 | ## 9 High school 9 [9. High school graduate- high school diploma or equiv~ 810 449 | ## 10 High school 90 [90. Other specify given as: high school graduate] 5 450 | ## 11 Post HS 10 [10. Some college but no degree] 898 451 | ## 12 Post HS 11 [11. Associate degree in college - occupational /vocat~ 313 452 | ## 13 Post HS 12 [12. Associate degree in college -- academic program] 288 453 | ## 14 Bachelor's 13 [13. Bachelor's degree (for example: BA, AB, BS)] 955 454 | ## 15 Graduate 14 [14. Master's degree (for example: MA, MS, MENG, MED, ~ 499 455 | ## 16 Graduate 15 [15. Professional school degree (for example: MD, DDS,~ 88 456 | ## 17 Graduate 16 [16. Doctorate degree (for example: PHD, EDD)] 93 457 | ## 18 NA -9 [-9. Refused] 15 458 | ## 19 NA 95 [95. Other SPECIFY] 24 459 | 460 | ``` r 461 | anes %>% count(Income, Income7, V161361x) %>% print(n=30) 462 | ``` 463 | 464 | ## count: now 30 rows and 4 columns, ungrouped 465 | 466 | ## # A tibble: 30 x 4 467 | ## Income Income7 V161361x n 468 | ## 469 | ## 1 Under $5k Under $20k 1 [01. Under $5,000] 275 470 | ## 2 $5-10k Under $20k 2 [02. $5,000-$9,999] 96 471 | ## 3 $10-12.5k Under $20k 3 [03. $10,000-$12,499] 133 472 | ## 4 $12.5-15 Under $20k 4 [04. $12,500-$14,999] 37 473 | ## 5 $15-17.5k Under $20k 5 [05. $15,000-$17,499] 110 474 | ## 6 $17.5-20k Under $20k 6 [06. $17,500-$19,999] 52 475 | ## 7 $20-22.5k $20-40k 7 [07. $20,000-$22,499] 153 476 | ## 8 $22.5-25k $20-40k 8 [08. $22,500-$24,999] 64 477 | ## 9 $25-27.5k $20-40k 9 [09. $25,000-$27,499] 143 478 | ## 10 $27.5-30k $20-40k 10 [10. $27,500-$29,999] 34 479 | ## 11 $30-35k $20-40k 11 [11. $30,000-$34,999] 213 480 | ## 12 $35-40k $20-40k 12 [12. $35,000-$39,999] 166 481 | ## 13 $40-45k $40-60k 13 [13. $40,000-$44,999] 178 482 | ## 14 $45-50k $40-60k 14 [14. $45,000-$49,999] 154 483 | ## 15 $50-55k $40-60k 15 [15. $50,000-$54,999] 204 484 | ## 16 $55-60k $40-60k 16 [16. $55,000-$59,999] 85 485 | ## 17 $60-65k $60-80k 17 [17. $60,000-$64,999] 205 486 | ## 18 $65-70k $60-80k 18 [18. $65,000-$69,999] 107 487 | ## 19 $70-75k $60-80k 19 [19. $70,000-$74,999] 138 488 | ## 20 $75-80k $60-80k 20 [20. $75,000-$79,999] 126 489 | ## 21 $80-90k $80-100k 21 [21. $80,000-$89,999] 231 490 | ## 22 $90-100k $80-100k 22 [22. $90,000-$99,999] 176 491 | ## 23 $100-110k $100-125k 23 [23. $100,000-$109,999] 191 492 | ## 24 $110-125k $100-125k 24 [24. $110,000-$124,999] 182 493 | ## 25 $125-150k $125k or more 25 [25. $125,000-$149,999] 166 494 | ## 26 $150-175k $125k or more 26 [26. $150,000-$174,999] 154 495 | ## 27 $175-250k $125k or more 27 [27. $175,000-$249,999] 154 496 | ## 28 $250k or mo~ $125k or more 28 [28. $250,000 or more] 141 497 | ## 29 NA NA -9 [-9. Refused] 190 498 | ## 30 NA NA -5 [-5. Interview breakoff (sufficient part~ 12 499 | 500 | ``` r 501 | anes %>% count(CampaignInterest, V161004) 502 | ``` 503 | 504 | ## count: now 3 rows and 3 columns, ungrouped 505 | 506 | ## # A tibble: 3 x 3 507 | ## CampaignInterest V161004 n 508 | ## 509 | ## 1 Very much interested 1 [1. Very much interested] 2230 510 | ## 2 Somewhat interested 2 [2. Somewhat interested] 1519 511 | ## 3 Not much interested 3 [3. Not much interested] 521 512 | 513 | ``` r 514 | anes %>% count(TrustGovernment, V161215) 515 | ``` 516 | 517 | ## count: now 7 rows and 3 columns, ungrouped 518 | 519 | ## # A tibble: 7 x 3 520 | ## TrustGovernment V161215 n 521 | ## 522 | ## 1 Always 1 [1. Always] 66 523 | ## 2 Most of the time 2 [2. Most of the time] 429 524 | ## 3 About half the time 3 [3. About half the time] 1382 525 | ## 4 Some of the time 4 [4. Some of the time] 1826 526 | ## 5 Never 5 [5. Never] 545 527 | ## 6 NA -9 [-9. Refused] 19 528 | ## 7 NA -8 [-8. Don't know (FTF only)] 3 529 | 530 | ``` r 531 | anes %>% count(TrustPeople, V161219) 532 | ``` 533 | 534 | ## count: now 7 rows and 3 columns, ungrouped 535 | 536 | ## # A tibble: 7 x 3 537 | ## TrustPeople V161219 n 538 | ## 539 | ## 1 Always 1 [1. Always] 50 540 | ## 2 Most of the time 2 [2. Most of the time] 1765 541 | ## 3 About half the time 3 [3. About half the time] 1305 542 | ## 4 Some of the time 4 [4. Some of the time] 947 543 | ## 5 Never 5 [5. Never] 188 544 | ## 6 NA -9 [-9. Refused] 14 545 | ## 7 NA -8 [-8. Don't know (FTF only)] 1 546 | 547 | ``` r 548 | anes %>% count(VotedPres2012, V161005) 549 | ``` 550 | 551 | ## count: now 4 rows and 3 columns, ungrouped 552 | 553 | ## # A tibble: 4 x 3 554 | ## VotedPres2012 V161005 n 555 | ## 556 | ## 1 Yes 1 [1. Yes, voted] 3117 557 | ## 2 No 2 [2. No, didn't vote] 1137 558 | ## 3 NA -9 [-9. Refused] 2 559 | ## 4 NA -8 [-8. Don't know (FTF only)] 14 560 | 561 | ``` r 562 | anes %>% count(VotedPres2012_selection, V161006) 563 | ``` 564 | 565 | ## count: now 7 rows and 3 columns, ungrouped 566 | 567 | ## # A tibble: 7 x 3 568 | ## VotedPres2012_select~ V161006 n 569 | ## 570 | ## 1 Obama 1 [1. Barack Obama] 1728 571 | ## 2 Romney 2 [2. Mitt Romney] 1268 572 | ## 3 Other 5 [5. Other SPECIFY] 58 573 | ## 4 NA -9 [-9. Refused] 47 574 | ## 5 NA -8 [-8. Don't know (FTF only)] 13 575 | ## 6 NA -1 [-1. Inap, 2,-8,-9 in V161005] 1153 576 | ## 7 NA 6 [6. Other specify - specified as: Did not vot~ 3 577 | 578 | ``` r 579 | anes %>% count(VotedPres2016, V162031x) 580 | ``` 581 | 582 | ## count: now 4 rows and 3 columns, ungrouped 583 | 584 | ## # A tibble: 4 x 3 585 | ## VotedPres2016 V162031x n 586 | ## 587 | ## 1 Yes 1 [1. Voted in 2016] 2887 588 | ## 2 No 0 [0. Did not vote in 2016] 444 589 | ## 3 NA -8 [-8. Don't know (in V162031)] 1 590 | ## 4 NA -2 [-2. Missing, 3 in V162022 /FTF: -8,-9 in V162022 /WEB~ 938 591 | 592 | ``` r 593 | anes %>% count(VotedPres2016_selection, V162062x) 594 | ``` 595 | 596 | ## count: now 8 rows and 3 columns, ungrouped 597 | 598 | ## # A tibble: 8 x 3 599 | ## VotedPres2016_select~ V162062x n 600 | ## 601 | ## 1 Clinton 1 [1. Hillary Clinton] 1364 602 | ## 2 Trump 2 [2. Donald Trump] 1245 603 | ## 3 Other 3 [3. Gary Johnson] 118 604 | ## 4 Other 4 [4. Jill Stein] 32 605 | ## 5 Other 5 [5. Other candidate SPECIFY] 52 606 | ## 6 NA -9 [-9. Refused] 31 607 | ## 7 NA -8 [-8. Don't know (FTF only)] 2 608 | ## 8 NA -2 [-2. Missing, no vote for Pres in Post /no Pos~ 1426 609 | 610 | ``` r 611 | anes %>% count(EarlyVote2016, V161024x, VotedPres2016) 612 | ``` 613 | 614 | ## count: now 10 rows and 4 columns, ungrouped 615 | 616 | ## # A tibble: 10 x 4 617 | ## EarlyVote2016 V161024x VotedPres2016 n 618 | ## 619 | ## 1 Yes 4 [4. Registered and voted early] Yes 156 620 | ## 2 No 1 [1. Not (or DK /RF if) registered, does ~ Yes 28 621 | ## 3 No 2 [2. Not (or DK /RF if) registered, inten~ Yes 65 622 | ## 4 No 3 [3. Registered but did not vote early (o~ Yes 2638 623 | ## 5 NA 1 [1. Not (or DK /RF if) registered, does ~ No 31 624 | ## 6 NA 1 [1. Not (or DK /RF if) registered, does ~ NA 322 625 | ## 7 NA 2 [2. Not (or DK /RF if) registered, inten~ No 46 626 | ## 8 NA 2 [2. Not (or DK /RF if) registered, inten~ NA 120 627 | ## 9 NA 3 [3. Registered but did not vote early (o~ No 367 628 | ## 10 NA 3 [3. Registered but did not vote early (o~ NA 497 629 | 630 | ``` r 631 | anes %>% 632 | summarise(WtSum=sum(Weight)) %>% 633 | pull(WtSum) 634 | ``` 635 | 636 | ## summarise: now one row and one column, ungrouped 637 | 638 | ## [1] 3646.921 639 | 640 | ## Save data 641 | 642 | ``` r 643 | write_rds(anes, here("Data", "anes.rds"), compress="gz") 644 | ``` 645 | -------------------------------------------------------------------------------- /DataCleaningScripts/RECS_DataPrep.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Residential Energy Consumption Survey (RECS) 2015 Data Prep" 3 | output: github_document 4 | --- 5 | 6 | ```{r setup, include=FALSE} 7 | knitr::opts_chunk$set(echo = TRUE) 8 | ``` 9 | 10 | ## Data information 11 | 12 | All data and resources were downloaded from https://www.eia.gov/consumption/residential/data/2015/index.php?view=microdata on March 3, 2021. 13 | 14 | ```{r loadpackageh, message=FALSE} 15 | library(here) #easy relative paths 16 | ``` 17 | 18 | ```{r loadpackages} 19 | library(tidyverse) #data manipulation 20 | library(haven) #data import 21 | library(tidylog) #informative logging messages 22 | ``` 23 | ## Import data and create derived variables 24 | 25 | ```{r derivedata} 26 | recs_in <- read_csv(here("RawData", "RECS_2015", "recs2015_public_v4.csv")) 27 | 28 | recs <- recs_in %>% 29 | select(DOEID, REGIONC, DIVISION, METROMICRO, UATYP10, TYPEHUQ, YEARMADERANGE, HEATHOME, EQUIPMUSE, TEMPHOME, TEMPGONE, TEMPNITE, AIRCOND, USECENAC, TEMPHOMEAC, TEMPGONEAC, TEMPNITEAC, TOTCSQFT, TOTHSQFT, TOTSQFT_EN, TOTUCSQFT, TOTUSQFT, NWEIGHT, starts_with("BRRWT"), CDD30YR, CDD65, CDD80, CLIMATE_REGION_PUB, IECC_CLIMATE_PUB, HDD30YR, HDD65, HDD50, GNDHDD65, BTUEL, DOLLAREL, BTUNG, DOLLARNG, BTULP, DOLLARLP, BTUFO, DOLLARFO, TOTALBTU, TOTALDOL, BTUWOOD=WOODBTU, BTUPELLET=PELLETBTU ) %>% 30 | mutate( 31 | Region=parse_factor( 32 | case_when( 33 | REGIONC==1~"Northeast", 34 | REGIONC==2~"Midwest", 35 | REGIONC==3~"South", 36 | REGIONC==4~"West", 37 | ), levels=c("Northeast", "Midwest", "South", "West")), 38 | Division=parse_factor( 39 | case_when( 40 | DIVISION==1~"New England", 41 | DIVISION==2~"Middle Atlantic", 42 | DIVISION==3~"East North Central", 43 | DIVISION==4~"West North Central", 44 | DIVISION==5~"South Atlantic", 45 | DIVISION==6~"East South Central", 46 | DIVISION==7~"West South Central", 47 | DIVISION==8~"Mountain North", 48 | DIVISION==9~"Mountain South", 49 | DIVISION==10~"Pacific", 50 | ), levels=c("New England", "Middle Atlantic", "East North Central", "West North Central", "South Atlantic", "East South Central", "West South Central", "Mountain North", "Mountain South", "Pacific")), 51 | MSAStatus=fct_recode(METROMICRO, "Metropolitan Statistical Area"="METRO", "Micropolitan Statistical Area"="MICRO", "None"="NONE"), 52 | Urbanicity=parse_factor( 53 | case_when( 54 | UATYP10=="U"~"Urban Area", 55 | UATYP10=="C"~"Urban Cluster", 56 | UATYP10=="R"~"Rural" 57 | ), 58 | levels=c("Urban Area", "Urban Cluster", "Rural") 59 | ), 60 | HousingUnitType=parse_factor( 61 | case_when( 62 | TYPEHUQ==1~"Mobile home", 63 | TYPEHUQ==2~"Single-family detached", 64 | TYPEHUQ==3~"Single-family attached", 65 | TYPEHUQ==4~"Apartment: 2-4 Units", 66 | TYPEHUQ==5~"Apartment: 5 or more units", 67 | ), levels=c("Mobile home", "Single-family detached", "Single-family attached", "Apartment: 2-4 Units", "Apartment: 5 or more units")), 68 | YearMade=parse_factor( 69 | case_when( 70 | YEARMADERANGE==1~"Before 1950", 71 | YEARMADERANGE==2~"1950-1959", 72 | YEARMADERANGE==3~"1960-1969", 73 | YEARMADERANGE==4~"1970-1979", 74 | YEARMADERANGE==5~"1980-1989", 75 | YEARMADERANGE==6~"1990-1999", 76 | YEARMADERANGE==7~"2000-2009", 77 | YEARMADERANGE==8~"2010-2015", 78 | ), 79 | levels=c("Before 1950", "1950-1959", "1960-1969", "1970-1979", "1980-1989", "1990-1999", "2000-2009", "2010-2015"), 80 | ordered = TRUE 81 | ), 82 | SpaceHeatingUsed=as.logical(HEATHOME), 83 | HeatingBehavior=parse_factor( 84 | case_when( 85 | EQUIPMUSE==1~"Set one temp and leave it", 86 | EQUIPMUSE==2~"Manually adjust at night/no one home", 87 | EQUIPMUSE==3~"Program thermostat to change at certain times", 88 | EQUIPMUSE==4~"Turn on or off as needed", 89 | EQUIPMUSE==5~"No control", 90 | EQUIPMUSE==9~"Other", 91 | EQUIPMUSE==-9~NA_character_), 92 | levels=c("Set one temp and leave it", "Manually adjust at night/no one home", "Program thermostat to change at certain times", "Turn on or off as needed", "No control", "Other") 93 | ), 94 | WinterTempDay=if_else(TEMPHOME>0, TEMPHOME, NA_real_), 95 | WinterTempAway=if_else(TEMPGONE>0, TEMPGONE, NA_real_), 96 | WinterTempNight=if_else(TEMPNITE>0, TEMPNITE, NA_real_), 97 | ACUsed=as.logical(AIRCOND), 98 | ACBehavior=parse_factor( 99 | case_when( 100 | USECENAC==1~"Set one temp and leave it", 101 | USECENAC==2~"Manually adjust at night/no one home", 102 | USECENAC==3~"Program thermostat to change at certain times", 103 | USECENAC==4~"Turn on or off as needed", 104 | USECENAC==5~"No control", 105 | USECENAC==-9~NA_character_), 106 | levels=c("Set one temp and leave it", "Manually adjust at night/no one home", "Program thermostat to change at certain times", "Turn on or off as needed", "No control") 107 | ), 108 | SummerTempDay=if_else(TEMPHOMEAC>0, TEMPHOMEAC, NA_real_), 109 | SummerTempAway=if_else(TEMPGONEAC>0, TEMPGONEAC, NA_real_), 110 | SummerTempNight=if_else(TEMPNITEAC>0, TEMPNITEAC, NA_real_), 111 | ClimateRegion_BA=parse_factor(CLIMATE_REGION_PUB), 112 | ClimateRegion_IECC=factor(IECC_CLIMATE_PUB) 113 | 114 | ) 115 | 116 | ``` 117 | 118 | 119 | ## Check derived variables for correct coding 120 | 121 | ```{r checkvars} 122 | recs %>% count(Region, REGIONC) 123 | recs %>% count(Division, DIVISION) 124 | recs %>% count(MSAStatus, METROMICRO) 125 | recs %>% count(Urbanicity, UATYP10) 126 | recs %>% count(HousingUnitType, TYPEHUQ) 127 | recs %>% count(YearMade, YEARMADERANGE) 128 | recs %>% count(SpaceHeatingUsed, HEATHOME) 129 | recs %>% count(HeatingBehavior, EQUIPMUSE) 130 | recs %>% count(ACUsed, AIRCOND) 131 | recs %>% count(ACBehavior, USECENAC) 132 | recs %>% count(ClimateRegion_BA, CLIMATE_REGION_PUB) 133 | recs %>% count(ClimateRegion_IECC, IECC_CLIMATE_PUB) 134 | 135 | ``` 136 | ## Save data 137 | 138 | ```{r savedat} 139 | recs_out <- recs %>% 140 | select(DOEID, Region, Division, MSAStatus, Urbanicity, HousingUnitType, YearMade, SpaceHeatingUsed, HeatingBehavior, WinterTempDay, WinterTempAway, WinterTempNight, ACUsed, ACBehavior, SummerTempDay, SummerTempAway, SummerTempNight, TOTCSQFT, TOTHSQFT, TOTSQFT_EN, TOTUCSQFT, TOTUSQFT, NWEIGHT, starts_with("BRRWT"), CDD30YR, CDD65, CDD80, ClimateRegion_BA, ClimateRegion_IECC, HDD30YR, HDD65, HDD50, GNDHDD65, BTUEL, DOLLAREL, BTUNG, DOLLARNG, BTULP, DOLLARLP, BTUFO, DOLLARFO, TOTALBTU, TOTALDOL, BTUWOOD, BTUPELLET) 141 | 142 | summary(recs_out) 143 | write_rds(recs_out, here("Data", "recs.rds"), compress="gz") 144 | ``` 145 | 146 | 147 | -------------------------------------------------------------------------------- /DataCleaningScripts/RECS_DataPrep.md: -------------------------------------------------------------------------------- 1 | Residential Energy Consumption Survey (RECS) 2015 Data Prep 2 | ================ 3 | 4 | ## Data information 5 | 6 | All data and resources were downloaded from 7 | 8 | on March 3, 2021. 9 | 10 | ``` r 11 | library(here) #easy relative paths 12 | ``` 13 | 14 | ## Warning: package 'here' was built under R version 4.0.4 15 | 16 | ``` r 17 | library(tidyverse) #data manipulation 18 | ``` 19 | 20 | ## -- Attaching packages ------------------------------------------------------------------------------ tidyverse 1.3.0 -- 21 | 22 | ## v ggplot2 3.3.2 v purrr 0.3.4 23 | ## v tibble 3.0.3 v dplyr 1.0.2 24 | ## v tidyr 1.1.2 v stringr 1.4.0 25 | ## v readr 1.3.1 v forcats 0.5.0 26 | 27 | ## -- Conflicts --------------------------------------------------------------------------------- tidyverse_conflicts() -- 28 | ## x dplyr::filter() masks stats::filter() 29 | ## x dplyr::lag() masks stats::lag() 30 | 31 | ``` r 32 | library(haven) #data import 33 | library(tidylog) #informative logging messages 34 | ``` 35 | 36 | ## Warning: package 'tidylog' was built under R version 4.0.4 37 | 38 | ## 39 | ## Attaching package: 'tidylog' 40 | 41 | ## The following objects are masked from 'package:dplyr': 42 | ## 43 | ## add_count, add_tally, anti_join, count, distinct, distinct_all, 44 | ## distinct_at, distinct_if, filter, filter_all, filter_at, filter_if, 45 | ## full_join, group_by, group_by_all, group_by_at, group_by_if, 46 | ## inner_join, left_join, mutate, mutate_all, mutate_at, mutate_if, 47 | ## relocate, rename, rename_all, rename_at, rename_if, rename_with, 48 | ## right_join, sample_frac, sample_n, select, select_all, select_at, 49 | ## select_if, semi_join, slice, slice_head, slice_max, slice_min, 50 | ## slice_sample, slice_tail, summarise, summarise_all, summarise_at, 51 | ## summarise_if, summarize, summarize_all, summarize_at, summarize_if, 52 | ## tally, top_frac, top_n, transmute, transmute_all, transmute_at, 53 | ## transmute_if, ungroup 54 | 55 | ## The following objects are masked from 'package:tidyr': 56 | ## 57 | ## drop_na, fill, gather, pivot_longer, pivot_wider, replace_na, 58 | ## spread, uncount 59 | 60 | ## The following object is masked from 'package:stats': 61 | ## 62 | ## filter 63 | 64 | ## Import data and create derived variables 65 | 66 | ``` r 67 | recs_in <- read_csv(here("RawData", "RECS_2015", "recs2015_public_v4.csv")) 68 | ``` 69 | 70 | ## Parsed with column specification: 71 | ## cols( 72 | ## .default = col_double(), 73 | ## METROMICRO = col_character(), 74 | ## UATYP10 = col_character(), 75 | ## CLIMATE_REGION_PUB = col_character(), 76 | ## IECC_CLIMATE_PUB = col_character() 77 | ## ) 78 | 79 | ## See spec(...) for full column specifications. 80 | 81 | ``` r 82 | recs <- recs_in %>% 83 | select(DOEID, REGIONC, DIVISION, METROMICRO, UATYP10, TYPEHUQ, YEARMADERANGE, HEATHOME, EQUIPMUSE, TEMPHOME, TEMPGONE, TEMPNITE, AIRCOND, USECENAC, TEMPHOMEAC, TEMPGONEAC, TEMPNITEAC, TOTCSQFT, TOTHSQFT, TOTSQFT_EN, TOTUCSQFT, TOTUSQFT, NWEIGHT, starts_with("BRRWT"), CDD30YR, CDD65, CDD80, CLIMATE_REGION_PUB, IECC_CLIMATE_PUB, HDD30YR, HDD65, HDD50, GNDHDD65, BTUEL, DOLLAREL, BTUNG, DOLLARNG, BTULP, DOLLARLP, BTUFO, DOLLARFO, TOTALBTU, TOTALDOL, BTUWOOD=WOODBTU, BTUPELLET=PELLETBTU ) %>% 84 | mutate( 85 | Region=parse_factor( 86 | case_when( 87 | REGIONC==1~"Northeast", 88 | REGIONC==2~"Midwest", 89 | REGIONC==3~"South", 90 | REGIONC==4~"West", 91 | ), levels=c("Northeast", "Midwest", "South", "West")), 92 | Division=parse_factor( 93 | case_when( 94 | DIVISION==1~"New England", 95 | DIVISION==2~"Middle Atlantic", 96 | DIVISION==3~"East North Central", 97 | DIVISION==4~"West North Central", 98 | DIVISION==5~"South Atlantic", 99 | DIVISION==6~"East South Central", 100 | DIVISION==7~"West South Central", 101 | DIVISION==8~"Mountain North", 102 | DIVISION==9~"Mountain South", 103 | DIVISION==10~"Pacific", 104 | ), levels=c("New England", "Middle Atlantic", "East North Central", "West North Central", "South Atlantic", "East South Central", "West South Central", "Mountain North", "Mountain South", "Pacific")), 105 | MSAStatus=fct_recode(METROMICRO, "Metropolitan Statistical Area"="METRO", "Micropolitan Statistical Area"="MICRO", "None"="NONE"), 106 | Urbanicity=parse_factor( 107 | case_when( 108 | UATYP10=="U"~"Urban Area", 109 | UATYP10=="C"~"Urban Cluster", 110 | UATYP10=="R"~"Rural" 111 | ), 112 | levels=c("Urban Area", "Urban Cluster", "Rural") 113 | ), 114 | HousingUnitType=parse_factor( 115 | case_when( 116 | TYPEHUQ==1~"Mobile home", 117 | TYPEHUQ==2~"Single-family detached", 118 | TYPEHUQ==3~"Single-family attached", 119 | TYPEHUQ==4~"Apartment: 2-4 Units", 120 | TYPEHUQ==5~"Apartment: 5 or more units", 121 | ), levels=c("Mobile home", "Single-family detached", "Single-family attached", "Apartment: 2-4 Units", "Apartment: 5 or more units")), 122 | YearMade=parse_factor( 123 | case_when( 124 | YEARMADERANGE==1~"Before 1950", 125 | YEARMADERANGE==2~"1950-1959", 126 | YEARMADERANGE==3~"1960-1969", 127 | YEARMADERANGE==4~"1970-1979", 128 | YEARMADERANGE==5~"1980-1989", 129 | YEARMADERANGE==6~"1990-1999", 130 | YEARMADERANGE==7~"2000-2009", 131 | YEARMADERANGE==8~"2010-2015", 132 | ), 133 | levels=c("Before 1950", "1950-1959", "1960-1969", "1970-1979", "1980-1989", "1990-1999", "2000-2009", "2010-2015"), 134 | ordered = TRUE 135 | ), 136 | SpaceHeatingUsed=as.logical(HEATHOME), 137 | HeatingBehavior=parse_factor( 138 | case_when( 139 | EQUIPMUSE==1~"Set one temp and leave it", 140 | EQUIPMUSE==2~"Manually adjust at night/no one home", 141 | EQUIPMUSE==3~"Program thermostat to change at certain times", 142 | EQUIPMUSE==4~"Turn on or off as needed", 143 | EQUIPMUSE==5~"No control", 144 | EQUIPMUSE==9~"Other", 145 | EQUIPMUSE==-9~NA_character_), 146 | levels=c("Set one temp and leave it", "Manually adjust at night/no one home", "Program thermostat to change at certain times", "Turn on or off as needed", "No control", "Other") 147 | ), 148 | WinterTempDay=if_else(TEMPHOME>0, TEMPHOME, NA_real_), 149 | WinterTempAway=if_else(TEMPGONE>0, TEMPGONE, NA_real_), 150 | WinterTempNight=if_else(TEMPNITE>0, TEMPNITE, NA_real_), 151 | ACUsed=as.logical(AIRCOND), 152 | ACBehavior=parse_factor( 153 | case_when( 154 | USECENAC==1~"Set one temp and leave it", 155 | USECENAC==2~"Manually adjust at night/no one home", 156 | USECENAC==3~"Program thermostat to change at certain times", 157 | USECENAC==4~"Turn on or off as needed", 158 | USECENAC==5~"No control", 159 | USECENAC==-9~NA_character_), 160 | levels=c("Set one temp and leave it", "Manually adjust at night/no one home", "Program thermostat to change at certain times", "Turn on or off as needed", "No control") 161 | ), 162 | SummerTempDay=if_else(TEMPHOMEAC>0, TEMPHOMEAC, NA_real_), 163 | SummerTempAway=if_else(TEMPGONEAC>0, TEMPGONEAC, NA_real_), 164 | SummerTempNight=if_else(TEMPNITEAC>0, TEMPNITEAC, NA_real_), 165 | ClimateRegion_BA=parse_factor(CLIMATE_REGION_PUB), 166 | ClimateRegion_IECC=factor(IECC_CLIMATE_PUB) 167 | 168 | ) 169 | ``` 170 | 171 | ## select: renamed 2 variables (BTUWOOD, BTUPELLET) and dropped 619 variables 172 | 173 | ## mutate: new variable 'Region' (factor) with 4 unique values and 0% NA 174 | 175 | ## new variable 'Division' (factor) with 10 unique values and 0% NA 176 | 177 | ## new variable 'MSAStatus' (factor) with 3 unique values and 0% NA 178 | 179 | ## new variable 'Urbanicity' (factor) with 3 unique values and 0% NA 180 | 181 | ## new variable 'HousingUnitType' (factor) with 5 unique values and 0% NA 182 | 183 | ## new variable 'YearMade' (ordered factor) with 8 unique values and 0% NA 184 | 185 | ## new variable 'SpaceHeatingUsed' (logical) with 2 unique values and 0% NA 186 | 187 | ## new variable 'HeatingBehavior' (factor) with 7 unique values and 0% NA 188 | 189 | ## new variable 'WinterTempDay' (double) with 35 unique values and 5% NA 190 | 191 | ## new variable 'WinterTempAway' (double) with 37 unique values and 5% NA 192 | 193 | ## new variable 'WinterTempNight' (double) with 38 unique values and 5% NA 194 | 195 | ## new variable 'ACUsed' (logical) with 2 unique values and 0% NA 196 | 197 | ## new variable 'ACBehavior' (factor) with 6 unique values and 0% NA 198 | 199 | ## new variable 'SummerTempDay' (double) with 38 unique values and 13% NA 200 | 201 | ## new variable 'SummerTempAway' (double) with 35 unique values and 13% NA 202 | 203 | ## new variable 'SummerTempNight' (double) with 36 unique values and 13% NA 204 | 205 | ## new variable 'ClimateRegion_BA' (factor) with 5 unique values and 0% NA 206 | 207 | ## new variable 'ClimateRegion_IECC' (factor) with 11 unique values and 0% NA 208 | 209 | ## Check derived variables for correct coding 210 | 211 | ``` r 212 | recs %>% count(Region, REGIONC) 213 | ``` 214 | 215 | ## count: now 4 rows and 3 columns, ungrouped 216 | 217 | ## # A tibble: 4 x 3 218 | ## Region REGIONC n 219 | ## 220 | ## 1 Northeast 1 794 221 | ## 2 Midwest 2 1327 222 | ## 3 South 3 2010 223 | ## 4 West 4 1555 224 | 225 | ``` r 226 | recs %>% count(Division, DIVISION) 227 | ``` 228 | 229 | ## count: now 10 rows and 3 columns, ungrouped 230 | 231 | ## # A tibble: 10 x 3 232 | ## Division DIVISION n 233 | ## 234 | ## 1 New England 1 253 235 | ## 2 Middle Atlantic 2 541 236 | ## 3 East North Central 3 836 237 | ## 4 West North Central 4 491 238 | ## 5 South Atlantic 5 1058 239 | ## 6 East South Central 6 372 240 | ## 7 West South Central 7 580 241 | ## 8 Mountain North 8 228 242 | ## 9 Mountain South 9 242 243 | ## 10 Pacific 10 1085 244 | 245 | ``` r 246 | recs %>% count(MSAStatus, METROMICRO) 247 | ``` 248 | 249 | ## count: now 3 rows and 3 columns, ungrouped 250 | 251 | ## # A tibble: 3 x 3 252 | ## MSAStatus METROMICRO n 253 | ## 254 | ## 1 Metropolitan Statistical Area METRO 4745 255 | ## 2 Micropolitan Statistical Area MICRO 584 256 | ## 3 None NONE 357 257 | 258 | ``` r 259 | recs %>% count(Urbanicity, UATYP10) 260 | ``` 261 | 262 | ## count: now 3 rows and 3 columns, ungrouped 263 | 264 | ## # A tibble: 3 x 3 265 | ## Urbanicity UATYP10 n 266 | ## 267 | ## 1 Urban Area U 3928 268 | ## 2 Urban Cluster C 598 269 | ## 3 Rural R 1160 270 | 271 | ``` r 272 | recs %>% count(HousingUnitType, TYPEHUQ) 273 | ``` 274 | 275 | ## count: now 5 rows and 3 columns, ungrouped 276 | 277 | ## # A tibble: 5 x 3 278 | ## HousingUnitType TYPEHUQ n 279 | ## 280 | ## 1 Mobile home 1 286 281 | ## 2 Single-family detached 2 3752 282 | ## 3 Single-family attached 3 479 283 | ## 4 Apartment: 2-4 Units 4 311 284 | ## 5 Apartment: 5 or more units 5 858 285 | 286 | ``` r 287 | recs %>% count(YearMade, YEARMADERANGE) 288 | ``` 289 | 290 | ## count: now 8 rows and 3 columns, ungrouped 291 | 292 | ## # A tibble: 8 x 3 293 | ## YearMade YEARMADERANGE n 294 | ## 295 | ## 1 Before 1950 1 858 296 | ## 2 1950-1959 2 544 297 | ## 3 1960-1969 3 565 298 | ## 4 1970-1979 4 928 299 | ## 5 1980-1989 5 874 300 | ## 6 1990-1999 6 786 301 | ## 7 2000-2009 7 901 302 | ## 8 2010-2015 8 230 303 | 304 | ``` r 305 | recs %>% count(SpaceHeatingUsed, HEATHOME) 306 | ``` 307 | 308 | ## count: now 2 rows and 3 columns, ungrouped 309 | 310 | ## # A tibble: 2 x 3 311 | ## SpaceHeatingUsed HEATHOME n 312 | ## 313 | ## 1 FALSE 0 258 314 | ## 2 TRUE 1 5428 315 | 316 | ``` r 317 | recs %>% count(HeatingBehavior, EQUIPMUSE) 318 | ``` 319 | 320 | ## count: now 7 rows and 3 columns, ungrouped 321 | 322 | ## # A tibble: 7 x 3 323 | ## HeatingBehavior EQUIPMUSE n 324 | ## 325 | ## 1 Set one temp and leave it 1 2156 326 | ## 2 Manually adjust at night/no one home 2 1414 327 | ## 3 Program thermostat to change at certain times 3 972 328 | ## 4 Turn on or off as needed 4 761 329 | ## 5 No control 5 114 330 | ## 6 Other 9 11 331 | ## 7 -2 258 332 | 333 | ``` r 334 | recs %>% count(ACUsed, AIRCOND) 335 | ``` 336 | 337 | ## count: now 2 rows and 3 columns, ungrouped 338 | 339 | ## # A tibble: 2 x 3 340 | ## ACUsed AIRCOND n 341 | ## 342 | ## 1 FALSE 0 737 343 | ## 2 TRUE 1 4949 344 | 345 | ``` r 346 | recs %>% count(ACBehavior, USECENAC) 347 | ``` 348 | 349 | ## count: now 6 rows and 3 columns, ungrouped 350 | 351 | ## # A tibble: 6 x 3 352 | ## ACBehavior USECENAC n 353 | ## 354 | ## 1 Set one temp and leave it 1 1661 355 | ## 2 Manually adjust at night/no one home 2 984 356 | ## 3 Program thermostat to change at certain times 3 727 357 | ## 4 Turn on or off as needed 4 438 358 | ## 5 No control 5 2 359 | ## 6 -2 1874 360 | 361 | ``` r 362 | recs %>% count(ClimateRegion_BA, CLIMATE_REGION_PUB) 363 | ``` 364 | 365 | ## count: now 5 rows and 3 columns, ungrouped 366 | 367 | ## # A tibble: 5 x 3 368 | ## ClimateRegion_BA CLIMATE_REGION_PUB n 369 | ## 370 | ## 1 Hot-Dry/Mixed-Dry Hot-Dry/Mixed-Dry 750 371 | ## 2 Hot-Humid Hot-Humid 1036 372 | ## 3 Mixed-Humid Mixed-Humid 1468 373 | ## 4 Cold/Very Cold Cold/Very Cold 2008 374 | ## 5 Marine Marine 424 375 | 376 | ``` r 377 | recs %>% count(ClimateRegion_IECC, IECC_CLIMATE_PUB) 378 | ``` 379 | 380 | ## count: now 11 rows and 3 columns, ungrouped 381 | 382 | ## # A tibble: 11 x 3 383 | ## ClimateRegion_IECC IECC_CLIMATE_PUB n 384 | ## 385 | ## 1 1A-2A 1A-2A 846 386 | ## 2 2B 2B 106 387 | ## 3 3A 3A 637 388 | ## 4 3B-4B 3B-4B 644 389 | ## 5 3C 3C 209 390 | ## 6 4A 4A 1021 391 | ## 7 4C 4C 215 392 | ## 8 5A 5A 1240 393 | ## 9 5B-5C 5B-5C 332 394 | ## 10 6A-6B 6A-6B 376 395 | ## 11 7A-7B-7AK-8AK 7A-7B-7AK-8AK 60 396 | 397 | ## Save data 398 | 399 | ``` r 400 | recs_out <- recs %>% 401 | select(DOEID, Region, Division, MSAStatus, Urbanicity, HousingUnitType, YearMade, SpaceHeatingUsed, HeatingBehavior, WinterTempDay, WinterTempAway, WinterTempNight, ACUsed, ACBehavior, SummerTempDay, SummerTempAway, SummerTempNight, TOTCSQFT, TOTHSQFT, TOTSQFT_EN, TOTUCSQFT, TOTUSQFT, NWEIGHT, starts_with("BRRWT"), CDD30YR, CDD65, CDD80, ClimateRegion_BA, ClimateRegion_IECC, HDD30YR, HDD65, HDD50, GNDHDD65, BTUEL, DOLLAREL, BTUNG, DOLLARNG, BTULP, DOLLARLP, BTUFO, DOLLARFO, TOTALBTU, TOTALDOL, BTUWOOD, BTUPELLET) 402 | ``` 403 | 404 | ## select: dropped 18 variables (REGIONC, DIVISION, METROMICRO, UATYP10, TYPEHUQ, …) 405 | 406 | ``` r 407 | summary(recs_out) 408 | ``` 409 | 410 | ## DOEID Region Division 411 | ## Min. :10001 Northeast: 794 Pacific :1085 412 | ## 1st Qu.:11422 Midwest :1327 South Atlantic :1058 413 | ## Median :12844 South :2010 East North Central: 836 414 | ## Mean :12844 West :1555 West South Central: 580 415 | ## 3rd Qu.:14265 Middle Atlantic : 541 416 | ## Max. :15686 West North Central: 491 417 | ## (Other) :1095 418 | ## MSAStatus Urbanicity 419 | ## Metropolitan Statistical Area:4745 Urban Area :3928 420 | ## Micropolitan Statistical Area: 584 Urban Cluster: 598 421 | ## None : 357 Rural :1160 422 | ## 423 | ## 424 | ## 425 | ## 426 | ## HousingUnitType YearMade SpaceHeatingUsed 427 | ## Mobile home : 286 1970-1979 :928 Mode :logical 428 | ## Single-family detached :3752 2000-2009 :901 FALSE:258 429 | ## Single-family attached : 479 1980-1989 :874 TRUE :5428 430 | ## Apartment: 2-4 Units : 311 Before 1950:858 431 | ## Apartment: 5 or more units: 858 1990-1999 :786 432 | ## 1960-1969 :565 433 | ## (Other) :774 434 | ## HeatingBehavior WinterTempDay 435 | ## Set one temp and leave it :2156 Min. :50.00 436 | ## Manually adjust at night/no one home :1414 1st Qu.:68.00 437 | ## Program thermostat to change at certain times: 972 Median :70.00 438 | ## Turn on or off as needed : 761 Mean :70.06 439 | ## No control : 114 3rd Qu.:72.00 440 | ## Other : 11 Max. :90.00 441 | ## NA : 258 NA's :258 442 | ## WinterTempAway WinterTempNight ACUsed 443 | ## Min. :50.00 Min. :50.00 Mode :logical 444 | ## 1st Qu.:65.00 1st Qu.:65.00 FALSE:737 445 | ## Median :68.00 Median :68.00 TRUE :4949 446 | ## Mean :67.12 Mean :68.06 447 | ## 3rd Qu.:70.00 3rd Qu.:70.00 448 | ## Max. :90.00 Max. :90.00 449 | ## NA's :258 NA's :258 450 | ## ACBehavior SummerTempDay 451 | ## Set one temp and leave it :1661 Min. :50.00 452 | ## Manually adjust at night/no one home : 984 1st Qu.:70.00 453 | ## Program thermostat to change at certain times: 727 Median :72.00 454 | ## Turn on or off as needed : 438 Mean :72.66 455 | ## No control : 2 3rd Qu.:76.00 456 | ## NA :1874 Max. :90.00 457 | ## NA's :737 458 | ## SummerTempAway SummerTempNight TOTCSQFT TOTHSQFT TOTSQFT_EN 459 | ## Min. :50.00 Min. :50.00 Min. : 0.0 Min. : 0 Min. : 221 460 | ## 1st Qu.:71.00 1st Qu.:70.00 1st Qu.: 466.2 1st Qu.:1008 1st Qu.:1100 461 | ## Median :75.00 Median :72.00 Median :1218.5 Median :1559 Median :1774 462 | ## Mean :74.63 Mean :71.82 Mean :1454.5 Mean :1816 Mean :2081 463 | ## 3rd Qu.:78.00 3rd Qu.:75.00 3rd Qu.:2094.0 3rd Qu.:2400 3rd Qu.:2766 464 | ## Max. :90.00 Max. :90.00 Max. :8066.0 Max. :8066 Max. :8501 465 | ## NA's :737 NA's :737 466 | ## TOTUCSQFT TOTUSQFT NWEIGHT BRRWT1 467 | ## Min. : 0.0 Min. : 0.0 Min. : 1236 Min. : 1836 468 | ## 1st Qu.: 0.0 1st Qu.: 0.0 1st Qu.: 13874 1st Qu.: 9859 469 | ## Median : 400.0 Median : 250.0 Median : 18510 Median : 16942 470 | ## Mean : 793.9 Mean : 432.6 Mean : 20789 Mean : 20789 471 | ## 3rd Qu.:1150.0 3rd Qu.: 569.8 3rd Qu.: 24840 3rd Qu.: 27219 472 | ## Max. :7986.0 Max. :6660.0 Max. :139307 Max. :203902 473 | ## 474 | ## BRRWT2 BRRWT3 BRRWT4 BRRWT5 475 | ## Min. : 685.9 Min. : 543.9 Min. : 699.7 Min. : 649.3 476 | ## 1st Qu.: 9733.0 1st Qu.: 9575.3 1st Qu.: 9518.5 1st Qu.: 9598.5 477 | ## Median : 16993.7 Median : 16698.7 Median : 17034.2 Median : 16487.5 478 | ## Mean : 20789.3 Mean : 20789.3 Mean : 20789.3 Mean : 20789.3 479 | ## 3rd Qu.: 27825.1 3rd Qu.: 27941.8 3rd Qu.: 27931.5 3rd Qu.: 27856.7 480 | ## Max. :189788.1 Max. :180155.3 Max. :159902.6 Max. :141796.4 481 | ## 482 | ## BRRWT6 BRRWT7 BRRWT8 BRRWT9 483 | ## Min. : 638.7 Min. : 564.1 Min. : 591 Min. : 545.2 484 | ## 1st Qu.: 9501.7 1st Qu.: 9534.4 1st Qu.: 9653 1st Qu.: 9595.0 485 | ## Median : 16150.6 Median : 16332.5 Median : 16802 Median : 17352.7 486 | ## Mean : 20789.3 Mean : 20789.3 Mean : 20789 Mean : 20789.3 487 | ## 3rd Qu.: 28092.8 3rd Qu.: 27992.5 3rd Qu.: 27926 3rd Qu.: 27753.7 488 | ## Max. :189031.8 Max. :192311.7 Max. :195071 Max. :117167.3 489 | ## 490 | ## BRRWT10 BRRWT11 BRRWT12 BRRWT13 491 | ## Min. : 732.5 Min. : 586.1 Min. : 549.8 Min. : 668 492 | ## 1st Qu.: 9077.6 1st Qu.: 9448.5 1st Qu.: 9388.2 1st Qu.: 9757 493 | ## Median : 16601.9 Median : 16172.3 Median : 16167.4 Median : 16584 494 | ## Mean : 20789.3 Mean : 20789.3 Mean : 20789.3 Mean : 20789 495 | ## 3rd Qu.: 28089.9 3rd Qu.: 28022.1 3rd Qu.: 28075.4 3rd Qu.: 27455 496 | ## Max. :183073.4 Max. :195408.4 Max. :197373.3 Max. :182228 497 | ## 498 | ## BRRWT14 BRRWT15 BRRWT16 BRRWT17 499 | ## Min. : 544.5 Min. : 671.4 Min. : 603.4 Min. : 563.3 500 | ## 1st Qu.: 9491.8 1st Qu.: 9341.8 1st Qu.: 9804.6 1st Qu.: 9593.2 501 | ## Median : 17028.9 Median : 15996.8 Median : 16562.6 Median : 16750.8 502 | ## Mean : 20789.3 Mean : 20789.3 Mean : 20789.3 Mean : 20789.3 503 | ## 3rd Qu.: 27975.3 3rd Qu.: 28117.5 3rd Qu.: 27322.1 3rd Qu.: 27458.0 504 | ## Max. :173341.2 Max. :179152.7 Max. :210507.2 Max. :195346.9 505 | ## 506 | ## BRRWT18 BRRWT19 BRRWT20 BRRWT21 507 | ## Min. : 517.2 Min. : 657 Min. : 682.2 Min. : 689.4 508 | ## 1st Qu.: 9839.6 1st Qu.: 9776 1st Qu.: 9569.2 1st Qu.: 9663.9 509 | ## Median : 16560.5 Median : 16779 Median : 16881.2 Median : 16503.8 510 | ## Mean : 20789.3 Mean : 20789 Mean : 20789.3 Mean : 20789.3 511 | ## 3rd Qu.: 27636.2 3rd Qu.: 27986 3rd Qu.: 27467.7 3rd Qu.: 27863.0 512 | ## Max. :158094.9 Max. :197236 Max. :146347.4 Max. :181583.8 513 | ## 514 | ## BRRWT22 BRRWT23 BRRWT24 BRRWT25 515 | ## Min. : 581.3 Min. : 658.4 Min. : 698.7 Min. : 541.3 516 | ## 1st Qu.: 9805.3 1st Qu.: 9597.1 1st Qu.: 9387.9 1st Qu.: 9502.9 517 | ## Median : 16711.4 Median : 16205.0 Median : 16398.2 Median : 17120.6 518 | ## Mean : 20789.3 Mean : 20789.3 Mean : 20789.3 Mean : 20789.3 519 | ## 3rd Qu.: 27503.4 3rd Qu.: 27855.2 3rd Qu.: 27791.0 3rd Qu.: 28108.8 520 | ## Max. :173557.2 Max. :182366.0 Max. :170970.0 Max. :128220.6 521 | ## 522 | ## BRRWT26 BRRWT27 BRRWT28 BRRWT29 523 | ## Min. : 832.9 Min. : 1372 Min. : 764.7 Min. : 854 524 | ## 1st Qu.: 9593.2 1st Qu.: 9333 1st Qu.: 9358.0 1st Qu.: 9596 525 | ## Median : 16642.2 Median : 16671 Median : 16663.4 Median : 16336 526 | ## Mean : 20789.3 Mean : 20789 Mean : 20789.3 Mean : 20789 527 | ## 3rd Qu.: 28018.5 3rd Qu.: 27832 3rd Qu.: 28065.9 3rd Qu.: 27506 528 | ## Max. :176770.0 Max. :176453 Max. :210413.6 Max. :194434 529 | ## 530 | ## BRRWT30 BRRWT31 BRRWT32 BRRWT33 531 | ## Min. : 680.6 Min. : 868.4 Min. : 645.1 Min. : 714.2 532 | ## 1st Qu.: 9689.3 1st Qu.: 9493.1 1st Qu.: 9370.6 1st Qu.: 9530.8 533 | ## Median : 16683.8 Median : 16876.0 Median : 16594.5 Median : 16839.7 534 | ## Mean : 20789.3 Mean : 20789.3 Mean : 20789.3 Mean : 20789.3 535 | ## 3rd Qu.: 27613.1 3rd Qu.: 27807.8 3rd Qu.: 28250.9 3rd Qu.: 27610.2 536 | ## Max. :118557.6 Max. :197960.8 Max. :182658.3 Max. :183414.8 537 | ## 538 | ## BRRWT34 BRRWT35 BRRWT36 BRRWT37 539 | ## Min. : 1880 Min. : 629.3 Min. : 980.2 Min. : 634.6 540 | ## 1st Qu.: 9703 1st Qu.: 9842.0 1st Qu.: 9439.6 1st Qu.: 9276.7 541 | ## Median : 16380 Median : 17204.4 Median : 16440.6 Median : 16620.9 542 | ## Mean : 20789 Mean : 20789.3 Mean : 20789.3 Mean : 20789.3 543 | ## 3rd Qu.: 27846 3rd Qu.: 27533.4 3rd Qu.: 28354.2 3rd Qu.: 27754.3 544 | ## Max. :130246 Max. :125674.9 Max. :171375.9 Max. :209103.9 545 | ## 546 | ## BRRWT38 BRRWT39 BRRWT40 BRRWT41 547 | ## Min. : 738.1 Min. : 684.5 Min. : 1531 Min. : 1406 548 | ## 1st Qu.: 9737.9 1st Qu.: 9389.5 1st Qu.: 9624 1st Qu.: 9776 549 | ## Median : 16862.8 Median : 16797.7 Median : 16644 Median : 16910 550 | ## Mean : 20789.3 Mean : 20789.3 Mean : 20789 Mean : 20789 551 | ## 3rd Qu.: 27710.0 3rd Qu.: 27850.3 3rd Qu.: 27858 3rd Qu.: 27616 552 | ## Max. :187208.7 Max. :136106.4 Max. :165612 Max. :145467 553 | ## 554 | ## BRRWT42 BRRWT43 BRRWT44 BRRWT45 555 | ## Min. : 943.8 Min. : 683.3 Min. : 866.4 Min. : 1105 556 | ## 1st Qu.: 9446.7 1st Qu.: 9563.6 1st Qu.: 9595.5 1st Qu.: 9563 557 | ## Median : 16177.2 Median : 16999.1 Median : 17034.6 Median : 16629 558 | ## Mean : 20789.3 Mean : 20789.3 Mean : 20789.3 Mean : 20789 559 | ## 3rd Qu.: 28089.3 3rd Qu.: 27724.1 3rd Qu.: 27593.8 3rd Qu.: 27773 560 | ## Max. :189726.6 Max. :192302.9 Max. :190671.5 Max. :160108 561 | ## 562 | ## BRRWT46 BRRWT47 BRRWT48 BRRWT49 563 | ## Min. : 750.7 Min. : 1230 Min. : 684.4 Min. : 627.1 564 | ## 1st Qu.: 9616.2 1st Qu.: 9362 1st Qu.: 9383.9 1st Qu.: 9489.0 565 | ## Median : 16821.6 Median : 16243 Median : 16720.3 Median : 17068.6 566 | ## Mean : 20789.3 Mean : 20789 Mean : 20789.3 Mean : 20789.3 567 | ## 3rd Qu.: 27563.3 3rd Qu.: 27547 3rd Qu.: 27965.8 3rd Qu.: 27829.1 568 | ## Max. :183963.8 Max. :196001 Max. :199079.7 Max. :203407.7 569 | ## 570 | ## BRRWT50 BRRWT51 BRRWT52 BRRWT53 571 | ## Min. : 1638 Min. : 922.9 Min. : 749.9 Min. : 871.8 572 | ## 1st Qu.: 9601 1st Qu.: 9704.7 1st Qu.: 9496.9 1st Qu.: 9489.1 573 | ## Median : 16788 Median : 16706.2 Median : 16442.9 Median : 16494.9 574 | ## Mean : 20789 Mean : 20789.3 Mean : 20789.3 Mean : 20789.3 575 | ## 3rd Qu.: 27667 3rd Qu.: 27755.8 3rd Qu.: 27621.2 3rd Qu.: 28075.0 576 | ## Max. :223546 Max. :161561.8 Max. :146056.0 Max. :143796.6 577 | ## 578 | ## BRRWT54 BRRWT55 BRRWT56 BRRWT57 579 | ## Min. : 687.9 Min. : 2056 Min. : 623.7 Min. : 713.4 580 | ## 1st Qu.: 9623.3 1st Qu.: 9595 1st Qu.: 9798.4 1st Qu.: 9393.8 581 | ## Median : 16662.9 Median : 16589 Median : 16624.8 Median : 17198.4 582 | ## Mean : 20789.3 Mean : 20789 Mean : 20789.3 Mean : 20789.3 583 | ## 3rd Qu.: 27612.8 3rd Qu.: 27857 3rd Qu.: 27650.0 3rd Qu.: 27964.1 584 | ## Max. :174657.5 Max. :206797 Max. :226169.8 Max. :162193.6 585 | ## 586 | ## BRRWT58 BRRWT59 BRRWT60 BRRWT61 587 | ## Min. : 905.5 Min. : 630.7 Min. : 1275 Min. : 546.4 588 | ## 1st Qu.: 9559.2 1st Qu.: 9623.7 1st Qu.: 9577 1st Qu.: 9387.4 589 | ## Median : 16540.0 Median : 16656.6 Median : 16197 Median : 16376.3 590 | ## Mean : 20789.3 Mean : 20789.3 Mean : 20789 Mean : 20789.3 591 | ## 3rd Qu.: 27780.9 3rd Qu.: 27577.8 3rd Qu.: 27781 3rd Qu.: 28016.5 592 | ## Max. :211170.6 Max. :206702.7 Max. :169387 Max. :122260.9 593 | ## 594 | ## BRRWT62 BRRWT63 BRRWT64 BRRWT65 595 | ## Min. : 739.7 Min. : 671.5 Min. : 926.4 Min. : 1144 596 | ## 1st Qu.: 9643.5 1st Qu.: 9455.3 1st Qu.: 9400.5 1st Qu.: 9597 597 | ## Median : 17067.2 Median : 16632.1 Median : 16508.1 Median : 16442 598 | ## Mean : 20789.3 Mean : 20789.3 Mean : 20789.3 Mean : 20789 599 | ## 3rd Qu.: 27540.6 3rd Qu.: 28020.8 3rd Qu.: 27693.9 3rd Qu.: 27348 600 | ## Max. :158200.9 Max. :196933.9 Max. :217490.7 Max. :239712 601 | ## 602 | ## BRRWT66 BRRWT67 BRRWT68 BRRWT69 603 | ## Min. : 1264 Min. : 684.8 Min. : 1053 Min. : 1676 604 | ## 1st Qu.: 9758 1st Qu.: 9588.0 1st Qu.: 9245 1st Qu.: 9371 605 | ## Median : 16565 Median : 16560.8 Median : 16464 Median : 16682 606 | ## Mean : 20789 Mean : 20789.3 Mean : 20789 Mean : 20789 607 | ## 3rd Qu.: 27884 3rd Qu.: 27838.7 3rd Qu.: 28108 3rd Qu.: 27957 608 | ## Max. :157193 Max. :179204.9 Max. :183266 Max. :193274 609 | ## 610 | ## BRRWT70 BRRWT71 BRRWT72 BRRWT73 611 | ## Min. : 758.4 Min. : 892.2 Min. : 695.5 Min. : 875 612 | ## 1st Qu.: 9622.5 1st Qu.: 9451.9 1st Qu.: 9516.0 1st Qu.: 9734 613 | ## Median : 16676.4 Median : 16482.8 Median : 16717.8 Median : 16930 614 | ## Mean : 20789.3 Mean : 20789.3 Mean : 20789.3 Mean : 20789 615 | ## 3rd Qu.: 27897.7 3rd Qu.: 27882.7 3rd Qu.: 27611.7 3rd Qu.: 27756 616 | ## Max. :146583.8 Max. :126528.3 Max. :196704.6 Max. :184412 617 | ## 618 | ## BRRWT74 BRRWT75 BRRWT76 BRRWT77 619 | ## Min. : 541.6 Min. : 669.7 Min. : 617 Min. : 560.5 620 | ## 1st Qu.: 9503.9 1st Qu.: 9835.9 1st Qu.: 9385 1st Qu.: 9673.8 621 | ## Median : 16128.6 Median : 16921.5 Median : 17000 Median : 16713.6 622 | ## Mean : 20789.3 Mean : 20789.3 Mean : 20789 Mean : 20789.3 623 | ## 3rd Qu.: 27849.9 3rd Qu.: 27352.3 3rd Qu.: 27558 3rd Qu.: 27712.8 624 | ## Max. :125833.8 Max. :194829.8 Max. :212262 Max. :234971.4 625 | ## 626 | ## BRRWT78 BRRWT79 BRRWT80 BRRWT81 627 | ## Min. : 526.7 Min. : 651.1 Min. : 675.7 Min. : 681.2 628 | ## 1st Qu.: 9744.1 1st Qu.: 9549.7 1st Qu.: 9554.4 1st Qu.: 9489.0 629 | ## Median : 17098.9 Median : 16676.0 Median : 16707.8 Median : 16769.3 630 | ## Mean : 20789.3 Mean : 20789.3 Mean : 20789.3 Mean : 20789.3 631 | ## 3rd Qu.: 27459.8 3rd Qu.: 27857.9 3rd Qu.: 27688.3 3rd Qu.: 27901.5 632 | ## Max. :152055.4 Max. :180157.0 Max. :165661.6 Max. :191740.1 633 | ## 634 | ## BRRWT82 BRRWT83 BRRWT84 BRRWT85 635 | ## Min. : 563.6 Min. : 656.9 Min. : 652.7 Min. : 675.4 636 | ## 1st Qu.: 9216.4 1st Qu.: 9634.4 1st Qu.: 9432.5 1st Qu.: 9551.2 637 | ## Median : 16121.6 Median : 16516.9 Median : 16454.8 Median : 16902.2 638 | ## Mean : 20789.3 Mean : 20789.3 Mean : 20789.3 Mean : 20789.3 639 | ## 3rd Qu.: 28253.1 3rd Qu.: 27725.8 3rd Qu.: 28006.4 3rd Qu.: 27325.4 640 | ## Max. :171004.8 Max. :184719.0 Max. :191550.3 Max. :198238.4 641 | ## 642 | ## BRRWT86 BRRWT87 BRRWT88 BRRWT89 643 | ## Min. : 680.3 Min. : 551.7 Min. : 704.2 Min. : 644.9 644 | ## 1st Qu.: 9619.8 1st Qu.: 9436.6 1st Qu.: 9393.1 1st Qu.: 9643.2 645 | ## Median : 16772.0 Median : 16799.0 Median : 16778.6 Median : 16586.1 646 | ## Mean : 20789.3 Mean : 20789.3 Mean : 20789.3 Mean : 20789.3 647 | ## 3rd Qu.: 27638.1 3rd Qu.: 28046.3 3rd Qu.: 27789.9 3rd Qu.: 28075.4 648 | ## Max. :232065.5 Max. :179835.0 Max. :166866.1 Max. :144299.3 649 | ## 650 | ## BRRWT90 BRRWT91 BRRWT92 BRRWT93 651 | ## Min. : 649.2 Min. : 568.2 Min. : 591.9 Min. : 545.3 652 | ## 1st Qu.: 9467.7 1st Qu.: 9506.3 1st Qu.: 9610.6 1st Qu.: 9688.4 653 | ## Median : 16212.0 Median : 16781.5 Median : 16524.1 Median : 16258.4 654 | ## Mean : 20789.3 Mean : 20789.3 Mean : 20789.3 Mean : 20789.3 655 | ## 3rd Qu.: 28020.8 3rd Qu.: 27876.1 3rd Qu.: 27915.1 3rd Qu.: 27728.8 656 | ## Max. :175279.5 Max. :205917.4 Max. :225638.4 Max. :117260.5 657 | ## 658 | ## BRRWT94 BRRWT95 BRRWT96 CDD30YR 659 | ## Min. : 716.2 Min. : 566.4 Min. : 551.1 Min. : 0 660 | ## 1st Qu.: 9561.6 1st Qu.: 9530.2 1st Qu.: 9533.2 1st Qu.: 712 661 | ## Median : 17099.7 Median : 16577.2 Median : 16358.9 Median :1150 662 | ## Mean : 20789.3 Mean : 20789.3 Mean : 20789.3 Mean :1451 663 | ## 3rd Qu.: 27853.9 3rd Qu.: 27441.4 3rd Qu.: 27823.1 3rd Qu.:1880 664 | ## Max. :207264.3 Max. :205015.8 Max. :171550.8 Max. :5792 665 | ## 666 | ## CDD65 CDD80 ClimateRegion_BA ClimateRegion_IECC 667 | ## Min. : 0 Min. : 0.0 Hot-Dry/Mixed-Dry: 750 5A :1240 668 | ## 1st Qu.: 793 1st Qu.: 10.0 Hot-Humid :1036 4A :1021 669 | ## Median :1378 Median : 60.0 Mixed-Humid :1468 1A-2A : 846 670 | ## Mean :1719 Mean : 174.7 Cold/Very Cold :2008 3B-4B : 644 671 | ## 3rd Qu.:2231 3rd Qu.: 208.0 Marine : 424 3A : 637 672 | ## Max. :6607 Max. :2297.0 6A-6B : 376 673 | ## (Other): 922 674 | ## HDD30YR HDD65 HDD50 GNDHDD65 675 | ## Min. : 0 Min. : 0 Min. : 0 Min. : 0 676 | ## 1st Qu.: 2102 1st Qu.:1881 1st Qu.: 260 1st Qu.: 1337 677 | ## Median : 4353 Median :3878 Median :1260 Median : 3704 678 | ## Mean : 4087 Mean :3708 Mean :1486 Mean : 3578 679 | ## 3rd Qu.: 5967 3rd Qu.:5467 3rd Qu.:2499 3rd Qu.: 5630 680 | ## Max. :12184 Max. :9843 Max. :4956 Max. :11851 681 | ## 682 | ## BTUEL DOLLAREL BTUNG DOLLARNG 683 | ## Min. : 201.6 Min. : 18.72 Min. : 0 Min. : 0.0 684 | ## 1st Qu.: 20221.3 1st Qu.: 815.12 1st Qu.: 0 1st Qu.: 0.0 685 | ## Median : 32582.4 Median :1253.02 Median : 17961 Median : 231.8 686 | ## Mean : 37630.7 Mean :1403.78 Mean : 33331 Mean : 346.8 687 | ## 3rd Qu.: 49670.6 3rd Qu.:1830.83 3rd Qu.: 57126 3rd Qu.: 605.1 688 | ## Max. :215695.7 Max. :8121.56 Max. :306594 Max. :2789.8 689 | ## 690 | ## BTULP DOLLARLP BTUFO DOLLARFO 691 | ## Min. : 0 Min. : 0.00 Min. : 0 Min. : 0.00 692 | ## 1st Qu.: 0 1st Qu.: 0.00 1st Qu.: 0 1st Qu.: 0.00 693 | ## Median : 0 Median : 0.00 Median : 0 Median : 0.00 694 | ## Mean : 3192 Mean : 67.72 Mean : 3569 Mean : 64.08 695 | ## 3rd Qu.: 0 3rd Qu.: 0.00 3rd Qu.: 0 3rd Qu.: 0.00 696 | ## Max. :220435 Max. :5121.27 Max. :273608 Max. :4700.03 697 | ## 698 | ## TOTALBTU TOTALDOL BTUWOOD BTUPELLET 699 | ## Min. : 201.6 Min. : 60.46 Min. : 0 Min. : 0.0 700 | ## 1st Qu.: 42655.8 1st Qu.: 1175.49 1st Qu.: 0 1st Qu.: 0.0 701 | ## Median : 68663.3 Median : 1724.60 Median : 0 Median : 0.0 702 | ## Mean : 77722.9 Mean : 1882.34 Mean : 4140 Mean : 197.4 703 | ## 3rd Qu.:103832.9 3rd Qu.: 2385.84 3rd Qu.: 0 3rd Qu.: 0.0 704 | ## Max. :490187.4 Max. :10135.99 Max. :295476 Max. :115500.0 705 | ## 706 | 707 | ``` r 708 | write_rds(recs_out, here("Data", "recs.rds"), compress="gz") 709 | ``` 710 | -------------------------------------------------------------------------------- /Exercises/CategorialExercises.R: -------------------------------------------------------------------------------- 1 | #' --- 2 | #' title: "Categorical Data Analysis Exercise Solutions" 3 | #' output: 4 | #' html_document: 5 | #' df_print: paged 6 | #' --- 7 | #' 8 | #' # Set-up 9 | ## ------------------------------------------------------------------- 10 | library(tidyverse) # for tidyverse 11 | library(here) # for file paths 12 | library(survey) # for survey analysis 13 | library(srvyr) # for tidy survey analysis 14 | 15 | anes <- read_rds(here("Data", "anes.rds")) %>% 16 | mutate(Weight=Weight/sum(Weight)*224059005) 17 | # adjust weight to sum to citizen pop, 18+ in Nov 2016 per ANES methodology documentation 18 | 19 | anes_des <- anes %>% 20 | as_survey_design(weights = Weight, 21 | strata = Stratum, 22 | ids = VarUnit, 23 | nest = TRUE) 24 | 25 | #' 26 | #' # Part 1 27 | #' 28 | #' 1. How many females have a graduate degree? 29 | #' 30 | ## ------------------------------------------------------------------- 31 | 32 | 33 | 34 | #' 35 | #' 2. What percentage of people identify as "Strong democrat"? 36 | #' 37 | ## ------------------------------------------------------------------- 38 | 39 | 40 | #' 41 | #' 3. What percentage of people who voted in the 2016 election identify as "Strong republican"? 42 | #' 43 | ## ------------------------------------------------------------------- 44 | 45 | 46 | #' 47 | #' 4. What percentage of people voted in both the 2012 election and in the 2016 election? Include the confidence interval. 48 | #' 49 | ## ------------------------------------------------------------------- 50 | 51 | 52 | #' 53 | #' 5. What is the design effect for the proportion of people who voted early? 54 | #' 55 | ## ------------------------------------------------------------------- 56 | 57 | 58 | #' 59 | #' # Part 2 60 | #' 61 | #' 1. Is there a relationship between PartyID and When people voted in the 2016 election (on election day or early voting)? 62 | #' 63 | ## ------------------------------------------------------------------- 64 | 65 | 66 | #' 67 | #' 2. Is there a relationship between PartyID and trust in the government? 68 | #' 69 | ## ------------------------------------------------------------------- 70 | 71 | 72 | #' 73 | #' 74 | #' # Bonus 75 | #' 76 | #' 1. What percentage of people lean republican? 77 | #' 78 | ## ------------------------------------------------------------------- 79 | 80 | 81 | #' 82 | #' 2. Were people who lean democrat more likely to vote early in the 2020 election? 83 | #' 84 | ## ------------------------------------------------------------------- 85 | 86 | 87 | -------------------------------------------------------------------------------- /Exercises/CategorialExercises.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Categorical Data Analysis Exercise Solutions" 3 | output: 4 | html_document: 5 | df_print: paged 6 | --- 7 | 8 | # Set-up 9 | ```{r} 10 | library(tidyverse) # for tidyverse 11 | library(here) # for file paths 12 | library(survey) # for survey analysis 13 | library(srvyr) # for tidy survey analysis 14 | 15 | anes <- read_rds(here("Data", "anes.rds")) %>% 16 | mutate(Weight=Weight/sum(Weight)*224059005) 17 | # adjust weight to sum to citizen pop, 18+ in Nov 2016 per ANES methodology documentation 18 | 19 | anes_des <- anes %>% 20 | as_survey_design(weights = Weight, 21 | strata = Stratum, 22 | ids = VarUnit, 23 | nest = TRUE) 24 | ``` 25 | 26 | # Part 1 27 | 28 | 1. How many females have a graduate degree? 29 | 30 | ```{r} 31 | 32 | 33 | ``` 34 | 35 | 2. What percentage of people identify as "Strong democrat"? 36 | 37 | ```{r} 38 | 39 | ``` 40 | 41 | 3. What percentage of people who voted in the 2016 election identify as "Strong republican"? 42 | 43 | ```{r} 44 | 45 | ``` 46 | 47 | 4. What percentage of people voted in both the 2012 election and in the 2016 election? Include the confidence interval. 48 | 49 | ```{r} 50 | 51 | ``` 52 | 53 | 5. What is the design effect for the proportion of people who voted early? 54 | 55 | ```{r} 56 | 57 | ``` 58 | 59 | # Part 2 60 | 61 | 1. Is there a relationship between PartyID and When people voted in the 2016 election (on election day or early voting)? 62 | 63 | ```{r} 64 | 65 | ``` 66 | 67 | 2. Is there a relationship between PartyID and trust in the government? 68 | 69 | ```{r} 70 | 71 | ``` 72 | 73 | 74 | # Bonus 75 | 76 | 1. What percentage of people lean republican? 77 | 78 | ```{r} 79 | 80 | ``` 81 | 82 | 2. Were people who lean democrat more likely to vote early in the 2020 election? 83 | 84 | ```{r} 85 | 86 | ``` -------------------------------------------------------------------------------- /Exercises/CategorialExercises_solutions.R: -------------------------------------------------------------------------------- 1 | #' --- 2 | #' title: "Categorical Data Analysis Exercise Solutions" 3 | #' output: 4 | #' html_document: 5 | #' df_print: paged 6 | #' --- 7 | #' 8 | #' # Set-up 9 | ## ------------------------------------------------------------------- 10 | library(tidyverse) # for tidyverse 11 | library(here) # for file paths 12 | library(survey) # for survey analysis 13 | library(srvyr) # for tidy survey analysis 14 | 15 | anes <- read_rds(here("Data", "anes.rds")) %>% 16 | mutate(Weight=Weight/sum(Weight)*224059005) 17 | # adjust weight to sum to citizen pop, 18+ in Nov 2016 per ANES methodology documentation 18 | 19 | anes_des <- anes %>% 20 | as_survey_design(weights = Weight, 21 | strata = Stratum, 22 | ids = VarUnit, 23 | nest = TRUE) 24 | 25 | #' 26 | #' # Part 1 27 | #' 28 | #' 1. How many females have a graduate degree? 29 | #' 30 | ## ------------------------------------------------------------------- 31 | #Option 1: 32 | anes_des %>% 33 | filter(Gender=="Female", Education=="Graduate") %>% 34 | survey_count(name="n") 35 | #Option 2: 36 | anes_des %>% 37 | filter(Gender=="Female", Education=="Graduate") %>% 38 | summarize( 39 | N=survey_total(), .groups="drop" 40 | ) 41 | 42 | 43 | #' 44 | #' 2. What percentage of people identify as "Strong democrat"? 45 | #' 46 | ## ------------------------------------------------------------------- 47 | anes_des %>% 48 | group_by(PartyID) %>% 49 | summarize( 50 | p=survey_mean() 51 | ) %>% 52 | filter(PartyID=="Strong democrat") 53 | 54 | #' 55 | #' 3. What percentage of people who voted in the 2016 election identify as "Strong republican"? 56 | #' 57 | ## ------------------------------------------------------------------- 58 | anes_des %>% 59 | filter(VotedPres2016=="Yes") %>% 60 | group_by(PartyID) %>% 61 | summarize( 62 | p=survey_mean() 63 | ) 64 | 65 | #' 66 | #' 4. What percentage of people voted in both the 2012 election and in the 2016 election? Include the confidence interval. 67 | #' 68 | ## ------------------------------------------------------------------- 69 | anes_des %>% 70 | group_by(groups=interaction(VotedPres2012, VotedPres2016)) %>% 71 | filter(!is.na(groups)) %>% 72 | summarize( 73 | VotedPres2012=VotedPres2012[1], 74 | VotedPres2016=VotedPres2016[1], 75 | p=survey_mean(var="ci") 76 | ) 77 | 78 | #' 79 | #' 5. What is the design effect for the proportion of people who voted early? 80 | #' 81 | ## ------------------------------------------------------------------- 82 | anes_des %>% 83 | filter(!is.na(EarlyVote2016)) %>% 84 | group_by(EarlyVote2016) %>% 85 | summarize( 86 | p=survey_mean(deff=TRUE), 87 | N=survey_total() 88 | ) 89 | 90 | #' 91 | #' # Part 2 92 | #' 93 | #' 1. Is there a relationship between PartyID and When people voted in the 2016 election (on election day or early voting)? 94 | #' 95 | ## ------------------------------------------------------------------- 96 | anes_des %>% 97 | svychisq(design=., 98 | formula=~PartyID +EarlyVote2016) 99 | 100 | #' 101 | #' 2. Is there a relationship between PartyID and trust in the government? 102 | #' 103 | ## ------------------------------------------------------------------- 104 | anes_des %>% 105 | svychisq(design=., 106 | formula=~PartyID+TrustGovernment, 107 | statistic="Wald") 108 | 109 | #' 110 | #' 111 | #' # Bonus 112 | #' 113 | #' 1. What percentage of people lean republican? 114 | #' 115 | ## ------------------------------------------------------------------- 116 | 117 | #Solution 1: Using forcats package 118 | anes_des %>% 119 | mutate(PartyID3=fct_collapse(PartyID, 120 | LeanDem=c("Strong democrat", 121 | "Not very strong democrat", 122 | "Independent-democrat"), 123 | LeanRep=c("Strong republican", 124 | "Not very strong republican", 125 | "Independent-republican"), 126 | other_level="Other")) %>% 127 | group_by(PartyID3) %>% 128 | summarize(p=survey_mean()) 129 | 130 | #Solution 2: Using case_when 131 | anes_des %>% 132 | mutate(PartyID3=case_when(PartyID %in% c("Strong democrat", 133 | "Not very strong democrat", 134 | "Independent-democrat")~"LeanDem", 135 | PartyID %in% c("Strong republican", 136 | "Not very strong republican", 137 | "Independent-republican")~"LeanRep", 138 | is.na(PartyID)~NA_character_, 139 | TRUE~"Other")) %>% 140 | group_by(PartyID3) %>% 141 | summarize(p=survey_mean()) 142 | 143 | 144 | #' 145 | #' 2. Were people who lean democrat more likely to vote early in the 2020 election? 146 | #' 147 | ## ------------------------------------------------------------------- 148 | 149 | earlyv_glm<-anes_des %>% 150 | mutate(PartyID3=fct_collapse(PartyID, 151 | LeanDem=c("Strong democrat", 152 | "Not very strong democrat", 153 | "Independent-democrat"), 154 | LeanRep=c("Strong republican", 155 | "Not very strong republican", 156 | "Independent-republican"), 157 | other_level="Other")) %>% 158 | svyglm(design=., 159 | formula=(EarlyVote2016=="Yes")~PartyID3, 160 | family=quasibinomial(), 161 | na.action=na.omit) 162 | 163 | summary(earlyv_glm) 164 | 165 | -------------------------------------------------------------------------------- /Exercises/CategorialExercises_solutions.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Categorical Data Analysis Exercise Solutions" 3 | output: 4 | html_document: 5 | df_print: paged 6 | --- 7 | 8 | # Set-up 9 | ```{r} 10 | library(tidyverse) # for tidyverse 11 | library(here) # for file paths 12 | library(survey) # for survey analysis 13 | library(srvyr) # for tidy survey analysis 14 | 15 | anes <- read_rds(here("Data", "anes.rds")) %>% 16 | mutate(Weight=Weight/sum(Weight)*224059005) 17 | # adjust weight to sum to citizen pop, 18+ in Nov 2016 per ANES methodology documentation 18 | 19 | anes_des <- anes %>% 20 | as_survey_design(weights = Weight, 21 | strata = Stratum, 22 | ids = VarUnit, 23 | nest = TRUE) 24 | ``` 25 | 26 | # Part 1 27 | 28 | 1. How many females have a graduate degree? 29 | 30 | ```{r} 31 | #Option 1: 32 | anes_des %>% 33 | filter(Gender=="Female", Education=="Graduate") %>% 34 | survey_count(name="n") 35 | #Option 2: 36 | anes_des %>% 37 | filter(Gender=="Female", Education=="Graduate") %>% 38 | summarize( 39 | N=survey_total(), .groups="drop" 40 | ) 41 | 42 | ``` 43 | 44 | 2. What percentage of people identify as "Strong democrat"? 45 | 46 | ```{r} 47 | anes_des %>% 48 | group_by(PartyID) %>% 49 | summarize( 50 | p=survey_mean() 51 | ) %>% 52 | filter(PartyID=="Strong democrat") 53 | ``` 54 | 55 | 3. What percentage of people who voted in the 2016 election identify as "Strong republican"? 56 | 57 | ```{r} 58 | anes_des %>% 59 | filter(VotedPres2016=="Yes") %>% 60 | group_by(PartyID) %>% 61 | summarize( 62 | p=survey_mean() 63 | ) 64 | ``` 65 | 66 | 4. What percentage of people voted in both the 2012 election and in the 2016 election? Include the confidence interval. 67 | 68 | ```{r} 69 | anes_des %>% 70 | group_by(groups=interaction(VotedPres2012, VotedPres2016)) %>% 71 | filter(!is.na(groups)) %>% 72 | summarize( 73 | VotedPres2012=VotedPres2012[1], 74 | VotedPres2016=VotedPres2016[1], 75 | p=survey_mean(var="ci") 76 | ) 77 | ``` 78 | 79 | 5. What is the design effect for the proportion of people who voted early? 80 | 81 | ```{r} 82 | anes_des %>% 83 | filter(!is.na(EarlyVote2016)) %>% 84 | group_by(EarlyVote2016) %>% 85 | summarize( 86 | p=survey_mean(deff=TRUE), 87 | N=survey_total() 88 | ) 89 | ``` 90 | 91 | # Part 2 92 | 93 | 1. Is there a relationship between PartyID and When people voted in the 2016 election (on election day or early voting)? 94 | 95 | ```{r} 96 | anes_des %>% 97 | svychisq(design=., 98 | formula=~PartyID +EarlyVote2016) 99 | ``` 100 | 101 | 2. Is there a relationship between PartyID and trust in the government? 102 | 103 | ```{r} 104 | anes_des %>% 105 | svychisq(design=., 106 | formula=~PartyID+TrustGovernment, 107 | statistic="Wald") 108 | ``` 109 | 110 | 111 | # Bonus 112 | 113 | 1. What percentage of people lean republican? 114 | 115 | ```{r} 116 | 117 | #Solution 1: Using forcats package 118 | anes_des %>% 119 | mutate(PartyID3=fct_collapse(PartyID, 120 | LeanDem=c("Strong democrat", 121 | "Not very strong democrat", 122 | "Independent-democrat"), 123 | LeanRep=c("Strong republican", 124 | "Not very strong republican", 125 | "Independent-republican"), 126 | other_level="Other")) %>% 127 | group_by(PartyID3) %>% 128 | summarize(p=survey_mean()) 129 | 130 | #Solution 2: Using case_when 131 | anes_des %>% 132 | mutate(PartyID3=case_when(PartyID %in% c("Strong democrat", 133 | "Not very strong democrat", 134 | "Independent-democrat")~"LeanDem", 135 | PartyID %in% c("Strong republican", 136 | "Not very strong republican", 137 | "Independent-republican")~"LeanRep", 138 | is.na(PartyID)~NA_character_, 139 | TRUE~"Other")) %>% 140 | group_by(PartyID3) %>% 141 | summarize(p=survey_mean()) 142 | 143 | ``` 144 | 145 | 2. Were people who lean democrat more likely to vote early in the 2020 election? 146 | 147 | ```{r} 148 | 149 | earlyv_glm<-anes_des %>% 150 | mutate(PartyID3=fct_collapse(PartyID, 151 | LeanDem=c("Strong democrat", 152 | "Not very strong democrat", 153 | "Independent-democrat"), 154 | LeanRep=c("Strong republican", 155 | "Not very strong republican", 156 | "Independent-republican"), 157 | other_level="Other")) %>% 158 | svyglm(design=., 159 | formula=(EarlyVote2016=="Yes")~PartyID3, 160 | family=quasibinomial(), 161 | na.action=na.omit) 162 | 163 | summary(earlyv_glm) 164 | ``` -------------------------------------------------------------------------------- /Exercises/ContinuousExercises.R: -------------------------------------------------------------------------------- 1 | #' --- 2 | #' title: "Continous Data Analysis Exercises" 3 | #' output: 4 | #' html_document: 5 | #' df_print: paged 6 | #' --- 7 | #' 8 | #' # Set-up 9 | ## ------------------------------------------------------------------- 10 | library(tidyverse) # for tidyverse 11 | library(here) # for file paths 12 | library(survey) # for survey analysis 13 | library(srvyr) # for tidy survey analysis 14 | 15 | recs <- read_rds(here("Data", "recs.rds")) 16 | 17 | recs_des <- recs %>% 18 | as_survey_rep(weights=NWEIGHT, 19 | repweights=starts_with("BRRWT"), 20 | type="Fay", 21 | rho=0.5, 22 | mse=TRUE) 23 | 24 | #' 25 | #' # Part 1 26 | #' 27 | #' 1. Find the average square footage of housing units (TOTSQFT_EN) with a 90% confidence interval. 28 | #' 29 | ## ------------------------------------------------------------------- 30 | 31 | 32 | #' 33 | #' 2. Estimate the ratio of cooled square footage to total square footage (TOTCSQFT) to the total square footage of housing units (TOTSQFT_EN) with its standard error. 34 | #' 35 | ## ------------------------------------------------------------------- 36 | 37 | 38 | #' 39 | #' 3. Estimate the median temperature housing units are set to during the night in the winter (WinterTempNight) using the `survey_median` function. 40 | #' 41 | ## ------------------------------------------------------------------- 42 | 43 | 44 | #' 45 | #' 4. Estimate the median temperature housing units are set to during the night in the winter (WinterTempNight) using the `survey_quantile` function. 46 | #' 47 | ## ------------------------------------------------------------------- 48 | 49 | 50 | #' 51 | #' # Part 2 52 | #' 53 | #' 1. Estimate the total average energy cost (TOTALDOL) by region, division, and urbanicity. 54 | #' 55 | ## ------------------------------------------------------------------- 56 | 57 | 58 | #' 59 | #' 2. What is the median electric cost (DOLLAREL) for housing units in the South Region? What is the 95% confidence interval? 60 | #' 61 | ## ------------------------------------------------------------------- 62 | 63 | 64 | #' 65 | #' 3. Test whether daytime winter and daytime summer temperatures of homes are set the same. 66 | #' 67 | ## ------------------------------------------------------------------- 68 | 69 | 70 | #' 71 | #' 4. Test whether average electric bill (DOLLAREL) varies by region (Region). 72 | #' 73 | ## ------------------------------------------------------------------- 74 | 75 | 76 | #' 77 | #' 5. Fit a regression between the cooled square footage of a housing unit (TOTCSQFT) and the total amount spent on energy (TOTALDOL). 78 | #' 79 | ## ------------------------------------------------------------------- 80 | 81 | 82 | #' 83 | -------------------------------------------------------------------------------- /Exercises/ContinuousExercises.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Continous Data Analysis Exercises" 3 | output: 4 | html_document: 5 | df_print: paged 6 | --- 7 | 8 | # Set-up 9 | ```{r} 10 | library(tidyverse) # for tidyverse 11 | library(here) # for file paths 12 | library(survey) # for survey analysis 13 | library(srvyr) # for tidy survey analysis 14 | 15 | recs <- read_rds(here("Data", "recs.rds")) 16 | 17 | recs_des <- recs %>% 18 | as_survey_rep(weights=NWEIGHT, 19 | repweights=starts_with("BRRWT"), 20 | type="Fay", 21 | rho=0.5, 22 | mse=TRUE) 23 | ``` 24 | 25 | # Part 1 26 | 27 | 1. Find the average square footage of housing units (TOTSQFT_EN) with a 90% confidence interval. 28 | 29 | ```{r} 30 | 31 | ``` 32 | 33 | 2. Estimate the ratio of cooled square footage to total square footage (TOTCSQFT) to the total square footage of housing units (TOTSQFT_EN) with its standard error. 34 | 35 | ```{r} 36 | 37 | ``` 38 | 39 | 3. Estimate the median temperature housing units are set to during the night in the winter (WinterTempNight) using the `survey_median` function. 40 | 41 | ```{r} 42 | 43 | ``` 44 | 45 | 4. Estimate the median temperature housing units are set to during the night in the winter (WinterTempNight) using the `survey_quantile` function. 46 | 47 | ```{r} 48 | 49 | ``` 50 | 51 | # Part 2 52 | 53 | 1. Estimate the total average energy cost (TOTALDOL) by region, division, and urbanicity. 54 | 55 | ```{r} 56 | 57 | ``` 58 | 59 | 2. What is the median electric cost (DOLLAREL) for housing units in the South Region? What is the 95% confidence interval? 60 | 61 | ```{r} 62 | 63 | ``` 64 | 65 | 3. Test whether daytime winter and daytime summer temperatures of homes are set the same. 66 | 67 | ```{r} 68 | 69 | ``` 70 | 71 | 4. Test whether average electric bill (DOLLAREL) varies by region (Region). 72 | 73 | ```{r} 74 | 75 | ``` 76 | 77 | 5. Fit a regression between the cooled square footage of a housing unit (TOTCSQFT) and the total amount spent on energy (TOTALDOL). 78 | 79 | ```{r} 80 | 81 | ``` 82 | 83 | -------------------------------------------------------------------------------- /Exercises/ContinuousExercises_solutions.R: -------------------------------------------------------------------------------- 1 | #' --- 2 | #' title: "Continous Data Analysis Exercise Solutions" 3 | #' output: 4 | #' html_document: 5 | #' df_print: paged 6 | #' --- 7 | #' 8 | #' # Set-up 9 | ## ------------------------------------------------------------------- 10 | library(tidyverse) # for tidyverse 11 | library(here) # for file paths 12 | library(survey) # for survey analysis 13 | library(srvyr) # for tidy survey analysis 14 | 15 | recs <- read_rds(here("Data", "recs.rds")) 16 | 17 | recs_des <- recs %>% 18 | as_survey_rep(weights=NWEIGHT, 19 | repweights=starts_with("BRRWT"), 20 | type="Fay", 21 | rho=0.5, 22 | mse=TRUE) 23 | 24 | #' 25 | #' # Part 1 26 | #' 27 | #' 1. Find the average square footage of housing units (TOTSQFT_EN) with a 90% confidence interval. 28 | #' 29 | ## ------------------------------------------------------------------- 30 | recs_des %>% 31 | summarize( 32 | SF_HU=survey_mean(TOTSQFT_EN, 33 | vartype = "ci", 34 | level = 0.9) 35 | ) 36 | 37 | #' 38 | #' 2. Estimate the ratio of cooled square footage to total square footage (TOTCSQFT) to the total square footage of housing units (TOTSQFT_EN) with its standard error. 39 | #' 40 | ## ------------------------------------------------------------------- 41 | recs_des %>% 42 | summarize( 43 | PropCooled=survey_ratio( 44 | numerator = TOTCSQFT, 45 | denominator = TOTSQFT_EN, 46 | vartype = "se") 47 | ) 48 | 49 | #' 50 | #' 3. Estimate the median temperature housing units are set to during the night in the winter (WinterTempNight) using the `survey_median` function. 51 | #' 52 | ## ------------------------------------------------------------------- 53 | recs_des %>% 54 | summarize( 55 | WinterNightTemp=survey_median(WinterTempNight, 56 | vartype = "se", 57 | na.rm = TRUE) 58 | ) 59 | 60 | #' 61 | #' 4. Estimate the median temperature housing units are set to during the night in the winter (WinterTempNight) using the `survey_quantile` function. 62 | #' 63 | ## ------------------------------------------------------------------- 64 | recs_des %>% 65 | summarize( 66 | WinterNightTemp=survey_median(WinterTempNight, 67 | quantiles = "0.5", 68 | vartype = "se", 69 | na.rm = TRUE) 70 | ) 71 | 72 | #' 73 | #' # Part 2 74 | #' 75 | #' 1. Estimate the total average energy cost (TOTALDOL) by region, division, and urbanicity. 76 | #' 77 | ## ------------------------------------------------------------------- 78 | # option 1 79 | recs_des %>% 80 | group_by(Region, Division, Urbanicity) %>% 81 | cascade( 82 | EnergyCost=survey_mean(TOTALDOL) 83 | ) 84 | # option 2 85 | # one way 86 | recs_des %>% 87 | group_by(Region, Division, Urbanicity) %>% 88 | summarize( 89 | EnergyCost=survey_mean(TOTALDOL) 90 | ) 91 | 92 | #' 93 | #' 2. What is the median electric cost (DOLLAREL) for housing units in the South Region? What is the 95% confidence interval? 94 | #' 95 | ## ------------------------------------------------------------------- 96 | recs_des %>% 97 | filter(Region=="South") %>% 98 | summarize( 99 | MedElBill=survey_median(DOLLAREL, 100 | vartype="ci") 101 | ) 102 | 103 | #' 104 | #' 3. Test whether daytime winter and daytime summer temperatures of homes are set the same. 105 | #' 106 | ## ------------------------------------------------------------------- 107 | recs_des %>% 108 | svyttest(design=., 109 | formula = I(WinterTempDay-SummerTempDay)~0, 110 | na.rm = TRUE) 111 | 112 | #' 113 | #' 4. Test whether average electric bill (DOLLAREL) varies by region (Region). 114 | #' 115 | ## ------------------------------------------------------------------- 116 | m1 <- recs_des %>% 117 | svyglm(design=., 118 | formula=DOLLAREL~Region, 119 | na.action=na.omit) 120 | summary(m1) 121 | 122 | #' 123 | #' 5. Fit a regression between the cooled square footage of a housing unit (TOTCSQFT) and the total amount spent on energy (TOTALDOL). 124 | #' 125 | ## ------------------------------------------------------------------- 126 | m2 <- recs_des %>% 127 | svyglm(design=., 128 | formula=TOTALDOL~TOTCSQFT, 129 | na.action=na.omit) 130 | summary(m2) 131 | 132 | #' 133 | -------------------------------------------------------------------------------- /Exercises/ContinuousExercises_solutions.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Continous Data Analysis Exercise Solutions" 3 | output: 4 | html_document: 5 | df_print: paged 6 | --- 7 | 8 | # Set-up 9 | ```{r} 10 | library(tidyverse) # for tidyverse 11 | library(here) # for file paths 12 | library(survey) # for survey analysis 13 | library(srvyr) # for tidy survey analysis 14 | 15 | recs <- read_rds(here("Data", "recs.rds")) 16 | 17 | recs_des <- recs %>% 18 | as_survey_rep(weights=NWEIGHT, 19 | repweights=starts_with("BRRWT"), 20 | type="Fay", 21 | rho=0.5, 22 | mse=TRUE) 23 | ``` 24 | 25 | # Part 1 26 | 27 | 1. Find the average square footage of housing units (TOTSQFT_EN) with a 90% confidence interval. 28 | 29 | ```{r} 30 | recs_des %>% 31 | summarize( 32 | SF_HU=survey_mean(TOTSQFT_EN, 33 | vartype = "ci", 34 | level = 0.9) 35 | ) 36 | ``` 37 | 38 | 2. Estimate the ratio of cooled square footage to total square footage (TOTCSQFT) to the total square footage of housing units (TOTSQFT_EN) with its standard error. 39 | 40 | ```{r} 41 | recs_des %>% 42 | summarize( 43 | PropCooled=survey_ratio( 44 | numerator = TOTCSQFT, 45 | denominator = TOTSQFT_EN, 46 | vartype = "se") 47 | ) 48 | ``` 49 | 50 | 3. Estimate the median temperature housing units are set to during the night in the winter (WinterTempNight) using the `survey_median` function. 51 | 52 | ```{r} 53 | recs_des %>% 54 | summarize( 55 | WinterNightTemp=survey_median(WinterTempNight, 56 | vartype = "se", 57 | na.rm = TRUE) 58 | ) 59 | ``` 60 | 61 | 4. Estimate the median temperature housing units are set to during the night in the winter (WinterTempNight) using the `survey_quantile` function. 62 | 63 | ```{r} 64 | recs_des %>% 65 | summarize( 66 | WinterNightTemp=survey_median(WinterTempNight, 67 | quantiles = "0.5", 68 | vartype = "se", 69 | na.rm = TRUE) 70 | ) 71 | ``` 72 | 73 | # Part 2 74 | 75 | 1. Estimate the total average energy cost (TOTALDOL) by region, division, and urbanicity. 76 | 77 | ```{r} 78 | # option 1 79 | recs_des %>% 80 | group_by(Region, Division, Urbanicity) %>% 81 | cascade( 82 | EnergyCost=survey_mean(TOTALDOL) 83 | ) 84 | # option 2 85 | # one way 86 | recs_des %>% 87 | group_by(Region, Division, Urbanicity) %>% 88 | summarize( 89 | EnergyCost=survey_mean(TOTALDOL) 90 | ) 91 | ``` 92 | 93 | 2. What is the median electric cost (DOLLAREL) for housing units in the South Region? What is the 95% confidence interval? 94 | 95 | ```{r} 96 | recs_des %>% 97 | filter(Region=="South") %>% 98 | summarize( 99 | MedElBill=survey_median(DOLLAREL, 100 | vartype="ci") 101 | ) 102 | ``` 103 | 104 | 3. Test whether daytime winter and daytime summer temperatures of homes are set the same. 105 | 106 | ```{r} 107 | recs_des %>% 108 | svyttest(design=., 109 | formula = I(WinterTempDay-SummerTempDay)~0, 110 | na.rm = TRUE) 111 | ``` 112 | 113 | 4. Test whether average electric bill (DOLLAREL) varies by region (Region). 114 | 115 | ```{r} 116 | m1 <- recs_des %>% 117 | svyglm(design=., 118 | formula=DOLLAREL~Region, 119 | na.action=na.omit) 120 | summary(m1) 121 | ``` 122 | 123 | 5. Fit a regression between the cooled square footage of a housing unit (TOTCSQFT) and the total amount spent on energy (TOTALDOL). 124 | 125 | ```{r} 126 | m2 <- recs_des %>% 127 | svyglm(design=., 128 | formula=TOTALDOL~TOTCSQFT, 129 | na.action=na.omit) 130 | summary(m2) 131 | ``` 132 | 133 | -------------------------------------------------------------------------------- /Exercises/WarmUpExercises.R: -------------------------------------------------------------------------------- 1 | #' --- 2 | #' title: "Warm-up Exercises" 3 | #' output: 4 | #' html_document: 5 | #' df_print: paged 6 | #' --- 7 | #' 8 | #' # Course set-up 9 | #' First, let's make sure you have everything you need for the course. Run the following library statements. If something is not installed, install it. 10 | #' 11 | ## ------------------------------------------------------------------- 12 | # install.packages("tidyverse") 13 | # install.packages("srvyr") 14 | # install.packages("here") 15 | # install.packages("palmerpenguins") 16 | # install.packages("remotes") 17 | 18 | library(tidyverse) # for tidyverse 19 | library(here) # for file paths 20 | 21 | remotes::install_github("bschneidr/survey", ref = "c217689") 22 | library(srvyr) 23 | library(palmerpenguins) 24 | 25 | 26 | 27 | #' 28 | #' # Warm-up exercises: Play with penguin data!!! 29 | #' 30 | ## ------------------------------------------------------------------- 31 | penguins 32 | 33 | #' 34 | #' How many penguins of each species are there? Hint: use `count` 35 | ## ------------------------------------------------------------------- 36 | 37 | 38 | #' 39 | #' How many penguins of each species and sex are there? Hint: use `count` 40 | #' 41 | ## ------------------------------------------------------------------- 42 | 43 | 44 | #' 45 | #' What is the mean length of flipper by species? Hint: use `group_by` and `summarise` 46 | ## ------------------------------------------------------------------- 47 | 48 | 49 | #' 50 | #' 51 | #' What is the mean flipper length by species and sex? Hint: use `group_by` and `summarise` 52 | ## ------------------------------------------------------------------- 53 | 54 | 55 | #' 56 | #' # Advanced warm-up exercises 57 | #' 58 | #' Fit a simple linear regression between body mass and flipper length. 59 | #' 60 | ## ------------------------------------------------------------------- 61 | 62 | 63 | #' 64 | #' 65 | #' Test whether the average flipper length is significantly different between male and female penguins. Use t-test, lm, or glm 66 | #' 67 | ## ------------------------------------------------------------------- 68 | 69 | 70 | #' 71 | -------------------------------------------------------------------------------- /Exercises/WarmUpExercises.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Warm-up Exercises" 3 | output: 4 | html_document: 5 | df_print: paged 6 | --- 7 | 8 | # Course set-up 9 | First, let's make sure you have everything you need for the course. Run the following library statements. If something is not installed, install it. 10 | 11 | ```{r} 12 | # install.packages("tidyverse") 13 | # install.packages("srvyr") 14 | # install.packages("here") 15 | # install.packages("palmerpenguins") 16 | # install.packages("remotes") 17 | 18 | library(tidyverse) # for tidyverse 19 | library(here) # for file paths 20 | 21 | remotes::install_github("bschneidr/survey", ref = "c217689") 22 | library(srvyr) 23 | library(palmerpenguins) 24 | 25 | 26 | ``` 27 | 28 | # Warm-up exercises: Play with penguin data!!! 29 | 30 | ```{r} 31 | penguins 32 | ``` 33 | 34 | How many penguins of each species are there? Hint: use `count` 35 | ```{r} 36 | 37 | ``` 38 | 39 | How many penguins of each species and sex are there? Hint: use `count` 40 | 41 | ```{r} 42 | 43 | ``` 44 | 45 | What is the mean length of flipper by species? Hint: use `group_by` and `summarise` 46 | ```{r} 47 | 48 | ``` 49 | 50 | 51 | What is the mean flipper length by species and sex? Hint: use `group_by` and `summarise` 52 | ```{r} 53 | 54 | ``` 55 | 56 | # Advanced warm-up exercises 57 | 58 | Fit a simple linear regression between body mass and flipper length. 59 | 60 | ```{r} 61 | 62 | ``` 63 | 64 | 65 | Test whether the average flipper length is significantly different between male and female penguins. Use t-test, lm, or glm 66 | 67 | ```{r} 68 | 69 | ``` 70 | 71 | -------------------------------------------------------------------------------- /Exercises/WarmUpExercises_solutions.R: -------------------------------------------------------------------------------- 1 | #' --- 2 | #' title: "Warm-up Exercise Solutions" 3 | #' output: 4 | #' html_document: 5 | #' df_print: paged 6 | #' --- 7 | #' 8 | #' # Course set-up 9 | #' First, let's make sure you have everything you need for the course. Run the following library statements. If something is not installed, install it. 10 | #' 11 | ## ------------------------------------------------------------------- 12 | # install.packages("tidyverse") 13 | # install.packages("srvyr") 14 | # install.packages("here") 15 | # install.packages("palmerpenguins") 16 | # install.packages("remotes") 17 | 18 | library(tidyverse) # for tidyverse 19 | library(here) # for file paths 20 | 21 | remotes::install_github("bschneidr/survey", ref = "c217689") 22 | library(srvyr) 23 | library(palmerpenguins) 24 | 25 | 26 | 27 | #' 28 | #' # Warm-up exercises: Play with penguin data!!! 29 | #' 30 | ## ------------------------------------------------------------------- 31 | penguins 32 | 33 | #' 34 | #' How many penguins of each species are there? Hint: use `count` 35 | ## ------------------------------------------------------------------- 36 | penguins %>% 37 | count(species) 38 | 39 | #' 40 | #' How many penguins of each species and sex are there? Hint: use `count` 41 | #' 42 | ## ------------------------------------------------------------------- 43 | penguins %>% 44 | count(species, sex) 45 | 46 | #' 47 | #' What is the mean length of flipper by species? Hint: use `group_by` and `summarise` 48 | ## ------------------------------------------------------------------- 49 | penguins %>% 50 | group_by(species) %>% 51 | summarize( 52 | MeanFlipperLength=mean(flipper_length_mm, 53 | na.rm=TRUE)) 54 | 55 | #' 56 | #' 57 | #' What is the mean flipper length by species and sex? Hint: use `group_by` and `summarise` 58 | ## ------------------------------------------------------------------- 59 | penguins %>% 60 | group_by(species, sex) %>% 61 | summarize( 62 | MeanFlipperLength=mean(flipper_length_mm, 63 | na.rm=TRUE)) 64 | 65 | #' 66 | #' # Advanced warm-up exercises 67 | #' 68 | #' Fit a simple linear regression between body mass and flipper length. 69 | #' 70 | ## ------------------------------------------------------------------- 71 | mod1 <- lm(body_mass_g ~ flipper_length_mm, 72 | data=penguins) 73 | summary(mod1) 74 | 75 | #' 76 | #' 77 | #' Test whether the average flipper length is significantly different between male and female penguins. Use t-test, lm, or glm 78 | #' 79 | ## ------------------------------------------------------------------- 80 | t.test(flipper_length_mm ~ sex, data=penguins) 81 | 82 | mod3 <- lm(flipper_length_mm ~ sex, data=penguins) 83 | summary(mod3) 84 | 85 | mod4 <- glm(flipper_length_mm ~ sex, data=penguins) 86 | summary(mod4) 87 | 88 | #' 89 | -------------------------------------------------------------------------------- /Exercises/WarmUpExercises_solutions.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Warm-up Exercise Solutions" 3 | output: 4 | html_document: 5 | df_print: paged 6 | --- 7 | 8 | # Course set-up 9 | First, let's make sure you have everything you need for the course. Run the following library statements. If something is not installed, install it. 10 | 11 | ```{r} 12 | # install.packages("tidyverse") 13 | # install.packages("srvyr") 14 | # install.packages("here") 15 | # install.packages("palmerpenguins") 16 | # install.packages("remotes") 17 | 18 | library(tidyverse) # for tidyverse 19 | library(here) # for file paths 20 | 21 | remotes::install_github("bschneidr/survey", ref = "c217689") 22 | library(srvyr) 23 | library(palmerpenguins) 24 | 25 | 26 | ``` 27 | 28 | # Warm-up exercises: Play with penguin data!!! 29 | 30 | ```{r} 31 | penguins 32 | ``` 33 | 34 | How many penguins of each species are there? Hint: use `count` 35 | ```{r} 36 | penguins %>% 37 | count(species) 38 | ``` 39 | 40 | How many penguins of each species and sex are there? Hint: use `count` 41 | 42 | ```{r} 43 | penguins %>% 44 | count(species, sex) 45 | ``` 46 | 47 | What is the mean length of flipper by species? Hint: use `group_by` and `summarise` 48 | ```{r} 49 | penguins %>% 50 | group_by(species) %>% 51 | summarize( 52 | MeanFlipperLength=mean(flipper_length_mm, 53 | na.rm=TRUE)) 54 | ``` 55 | 56 | 57 | What is the mean flipper length by species and sex? Hint: use `group_by` and `summarise` 58 | ```{r} 59 | penguins %>% 60 | group_by(species, sex) %>% 61 | summarize( 62 | MeanFlipperLength=mean(flipper_length_mm, 63 | na.rm=TRUE)) 64 | ``` 65 | 66 | # Advanced warm-up exercises 67 | 68 | Fit a simple linear regression between body mass and flipper length. 69 | 70 | ```{r} 71 | mod1 <- lm(body_mass_g ~ flipper_length_mm, 72 | data=penguins) 73 | summary(mod1) 74 | ``` 75 | 76 | 77 | Test whether the average flipper length is significantly different between male and female penguins. Use t-test, lm, or glm 78 | 79 | ```{r} 80 | t.test(flipper_length_mm ~ sex, data=penguins) 81 | 82 | mod3 <- lm(flipper_length_mm ~ sex, data=penguins) 83 | summary(mod3) 84 | 85 | mod4 <- glm(flipper_length_mm ~ sex, data=penguins) 86 | summary(mod4) 87 | ``` 88 | 89 | -------------------------------------------------------------------------------- /FinalizeMaterials.R: -------------------------------------------------------------------------------- 1 | ### This program creates PDF slides and R files from the Rmd files 2 | 3 | library(knitr) 4 | library(here) 5 | 6 | mypurl <- function(folder, fn){ 7 | purl(here(folder, stringr::str_c(fn, ".Rmd")), 8 | output=here(folder, stringr::str_c(fn, ".R")), 9 | documentation=2) 10 | 11 | } 12 | 13 | mypurl("Exercises", "CategorialExercises") 14 | mypurl("Exercises", "ContinuousExercises") 15 | mypurl("Exercises", "WarmUpExercises") 16 | 17 | mypurl("Exercises", "CategorialExercises_solutions") 18 | mypurl("Exercises", "ContinuousExercises_solutions") 19 | mypurl("Exercises", "WarmUpExercises_solutions") 20 | 21 | mypurl("Presentation", "Slides") 22 | 23 | # remotes::install_github("jhelvy/xaringanBuilder") 24 | # remotes::install_github('rstudio/chromote') 25 | xaringanBuilder::build_pdf( 26 | input=here("Presentation", "Slides.html"), 27 | output_file=here("Presentation", "Slides.pdf"), 28 | partial_slides= TRUE) 29 | xaringanBuilder::build_pptx( 30 | input=here("Presentation", "Slides.pdf"), 31 | output_file=here("Presentation", "Slides.pptx"), 32 | partial_slides= TRUE) 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | 635 | Copyright (C) 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | Copyright (C) 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | -------------------------------------------------------------------------------- /Presentation/Slides.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Tidy Survey Analysis in R using the srvyr Package" 3 | subtitle: "AAPOR 2021 Short Course" 4 | author: 5 | - Stephanie Zimmer, RTI International 6 | - Rebecca Powell, RTI International 7 | date: "2021-05-06" 8 | output: 9 | xaringan::moon_reader: 10 | css: xaringan-themer.css 11 | nature: 12 | slideNumberFormat: "%current%" 13 | highlightStyle: github 14 | highlightLines: true 15 | ratio: 16:9 16 | countIncrementalSlides: true 17 | --- 18 | 19 | ```{r setup, include=FALSE} 20 | knitr::opts_chunk$set(echo = TRUE, message = FALSE, tidy = FALSE) 21 | ``` 22 | 23 | 24 | ```{r xaringan-themer, include=FALSE, warning=FALSE} 25 | library(xaringanthemer) 26 | style_duo_accent( 27 | primary_color = "#1E4F96", 28 | secondary_color = "#00A3E0", 29 | inverse_header_color = "#FFFFFF" 30 | ) 31 | ``` 32 | 33 | class: inverse center middle 34 | # Introduction 35 | 36 | --- 37 | 38 | ```{css, echo = FALSE} 39 | .small .remark-code { /*Change made here*/ 40 | font-size: 80% !important; 41 | } 42 | .smaller .remark-code { /*Change made here*/ 43 | font-size: 70% !important; 44 | } 45 | ``` 46 | 47 | ## Overview 48 | 49 | - At the end of this course, you should be able to 50 | - Calculate point estimates and their standard errors with survey data 51 | - Means & Proportions 52 | - Totals 53 | - Quantiles 54 | - Perform t-tests and chi-squared tests 55 | - Fit regression models 56 | - Specify a survey design in R to create a survey object 57 | 58 | - We will not be going over the following but provide some resources at the end 59 | - Weighting (calibration, post-stratification, raking, etc.) 60 | - Survival analysis 61 | - Nonlinear models 62 | 63 | 64 | 65 | --- 66 | ## Overview: Course Roadmap 67 | 68 | - Get familiar with RStudio Cloud with a warm-up exercise using the tidyverse 69 | 70 | - Introduce the survey data we'll be using in the course 71 | 72 | - Analysis of continuous data with time for practice 73 | 74 | - Analysis of categorical data with time for practice 75 | 76 | - Specify a survey design object in R with exercises 77 | 78 | - Resources for other survey analysis topics 79 | 80 | - Closing 81 | 82 | --- 83 | ## Logistics 84 | 85 | - We will be using RStudio Cloud today to ensure everyone has access 86 | 87 | - Sign-up for a free RStudio Cloud account 88 | - Access the project and files via link in email and Zoom chat 89 | - Click "START" to open the project and get started 90 | - Rstudio Cloud has the same features and appearance as RStudio for ease of use 91 | 92 | - All slides and code are available on GitHub: https://github.com/szimmer/tidy-survey-aapor-2021 93 | 94 | ??? 95 | Github repo is for future reference, all material on RStudio cloud 96 | --- 97 | ## Intro to RStudio Cloud: Penguins!! 98 | 99 | - Using `palmerpenguins` data for warm-up exercises 100 | 101 | - Data were collected and made available by Dr. Kristen Gorman and the Palmer Station, Antarctica LTER, a member of the Long Term Ecological Research Network. 102 | 103 | - Access data through `palmerpenguins` package https://github.com/allisonhorst/palmerpenguins/ 104 | 105 | 106 | ####If you are using your own RStudio environment: 107 | - Make sure you have `tidyverse`, `here`, and `palmerpenguins` installed 108 | 109 | ```{r inst_packages, error=FALSE, warning=FALSE, eval=FALSE} 110 | # Run package installation if you don't have these packages already 111 | # As a reminder, installing takes package from internet to your computer 112 | # and only needs to be done once, not each session 113 | 114 | install.packages(c("tidyverse", "here", "palmerpenguins")) 115 | ``` 116 | 117 | --- 118 | ## Intro to RStudio Cloud: Penguins!! 119 | 120 | - Load `tidyverse`, `here`, and `palmerpenguins` 121 | 122 | - Look at the penguins dataset using `glimpse` 123 | 124 | ```{r load_pack1, error=FALSE, warning=FALSE} 125 | library(tidyverse) # for tidyverse 126 | library(here) # for file paths 127 | library(palmerpenguins) # for warm-up data 128 | glimpse(penguins) 129 | ``` 130 | 131 | --- 132 | ## Warm-up Exercises: WarmUpExercises.Rmd 133 | 134 | - Let's open RStudio cloud and do some warm-up examples 135 | - We will do one together and then give you 5-minutes to work through other examples and get familiar with RStudio Cloud 136 | 137 | - Explore the penguins data 138 | - How many penguins of each species are there? We will do this one together 139 | - How many penguins of each species and sex are there? Hint: use `count` 140 | - What is the mean length of flipper by species? Hint: use `group_by` and `summarize` 141 | - What is the mean flipper length by species and sex? 142 | 143 | - More advanced warm-up 144 | - Fit a simple linear regression between body mass and flipper length. 145 | - Test whether the average flipper length is significantly different between male and female penguins. Use t-test, lm, or glm 146 | 147 | --- 148 | ## Ex. 1: How many penguins of each species are there? 149 | 150 | ```{r peng1} 151 | penguins %>% 152 | count(species) 153 | ``` 154 | 155 | ??? 156 | - Using `count` we see there are 3 different species and the number of penguins for each species 157 | --- 158 | ## Ex. 2: How many penguins of each species and sex are there? 159 | 160 | ```{r peng2} 161 | penguins %>% 162 | count(species, sex) 163 | ``` 164 | ??? 165 | - `count` can take more than one variable to get a cross-tabs between the two variables 166 | 167 | --- 168 | ## Ex. 3: What is the mean length of flipper by species? 169 | 170 | ```{r peng3} 171 | penguins %>% 172 | group_by(species) %>% 173 | summarize( 174 | MeanFlipperLength=mean(flipper_length_mm, 175 | na.rm=TRUE)) 176 | ``` 177 | ??? 178 | - `group_by` allows us to look at metrics by different subgroups like species 179 | - when using `group_by` follow it with `summarize` to get metrics (like average) at the group level 180 | - `na.rm=TRUE` removes missing data from the calculation 181 | - forgetting this argument will result in a value of `NA` as the function will try to average missing data 182 | 183 | --- 184 | ## Ex. 4: What is the mean flipper length by species and sex? 185 | 186 | ```{r peng4} 187 | penguins %>% 188 | group_by(species, sex) %>% 189 | summarize( 190 | MeanFlipperLength=mean(flipper_length_mm, 191 | na.rm=TRUE)) 192 | ``` 193 | ??? 194 | - As with `count`, you can `group_by` multiple variables 195 | 196 | --- 197 | ## Advanced Ex. 1: Linear regression (body mass & flipper length) 198 | .small[ 199 | ```{r pengad1} 200 | mod1 <- lm(body_mass_g ~ flipper_length_mm, data=penguins) 201 | summary(mod1) 202 | ``` 203 | ] 204 | ??? 205 | - use `lm` (linear model) function 206 | - equation is written as y-variable ~ x-variables 207 | 208 | --- 209 | ## Advanced Ex. 2: Flipper length differences by sex: t-test 210 | 211 | ```{r pengad2a} 212 | t.test(flipper_length_mm ~ sex, data=penguins) 213 | ``` 214 | ??? 215 | - ~ also used in `t.test` to separate what we want to measure (our y) and the groups of interest (our x) 216 | --- 217 | ## Advanced Ex. 2: Flipper length differences by sex: lm 218 | .small[ 219 | ```{r pengad2b} 220 | mod3 <- lm(flipper_length_mm ~ sex, data=penguins) 221 | summary(mod3) 222 | ``` 223 | ] 224 | --- 225 | ## Advanced Ex. 2: Flipper length differences by sex: glm 226 | .small[ 227 | ```{r pengad2c} 228 | mod4 <- glm(flipper_length_mm ~ sex, data=penguins) 229 | summary(mod4) 230 | ``` 231 | ] 232 | ??? 233 | - `glm` takes the same arguments as `lm`, but is more flexible for working with non-normal data 234 | --- 235 | class: inverse center middle 236 | # Survey Datasets 237 | --- 238 | ## Residential Energy Consumption Survey (RECS) 2015 239 | 240 | - Energy consumption/expenditures collected through energy suppliers 241 | 242 | - Fielded 14 times between 1950 and 2015 243 | 244 | - Topics include appliances, electronics, heating, a/c, temperatures, water heating, lighting, energy bills, respondent demographics, and energy assistance 245 | 246 | - Funded by the Energy Information Administration 247 | 248 | - Target Population: Primary occupied housing units in the US 249 | 250 | - Mode: In-person, paper, and web interview mode 251 | 252 | - Sample Information: BRR Replicate weights included for variance estimation 253 | 254 | https://www.eia.gov/consumption/residential/index.php 255 | 256 | ??? 257 | - We have subset the columns of this data and created derived variables, code in repository 258 | --- 259 | ## American National Election Studies (ANES) 2016 260 | 261 | - Pre and post election surveys 262 | 263 | - Fielded almost every 2 years since 1948 264 | 265 | - Topics include voter registration status, candidate preference, opinions on country and government, party and ideology affiliation, opinions on policy, news sources, and more 266 | 267 | - Collaboration of Stanford, University of Michigan - funding by the National Science Foundation 268 | 269 | - Target Population: US citizens, 18 and older living in US 270 | 271 | - Mode: FTF with CASI and Web 272 | 273 | - Sample Information: Pseudo-strata and pseudo-cluster included for variance estimation 274 | 275 | https://electionstudies.org/ 276 | 277 | ??? 278 | Chose not to use 2020 data because it is still preliminary 279 | 280 | --- 281 | class: inverse center middle 282 | # Continuous data analysis 283 | --- 284 | ## Overview of Survey Analysis using `srvyr` Package 285 | 286 | 1. Create a `tbl_svy` object using: `as_survey_design` or `as_survey_rep` 287 | 288 | 2. Subset data (if needed) using `filter` (subpopulations) 289 | 290 | 3. Specify domains of analysis using `group_by` 291 | 292 | 4. Within `summarize`, specify variables to calculate including means, totals, proportions, quantiles and more 293 | 294 | 295 | Note: We will be teaching this in the reverse order!!! 296 | --- 297 | ## Set-up for Analysis 298 | - `srvyr` package uses tidy-syntax but uses the `survey` package behind it to do calculations 299 | 300 | - If using your own RStudio environment, install both packages: 301 | ```{r inst_srv, eval=FALSE} 302 | # Install survey and srvyr packages 303 | 304 | remotes::install_github("bschneidr/survey", ref = "c217689") 305 | install.packages("srvyr") 306 | ``` 307 | 308 | - First, we will set-up a design object and later talk about what it means 309 | ```{r recs_des, error=FALSE, warning=FALSE} 310 | library(survey) # for survey analysis 311 | library(srvyr) # for tidy survey analysis 312 | 313 | recs <- read_rds(here("Data", "recs.rds")) 314 | 315 | recs_des <- recs %>% 316 | as_survey_rep(weights=NWEIGHT, 317 | repweights=starts_with("BRRWT"), 318 | type="Fay", 319 | rho=0.5, 320 | mse=TRUE) 321 | 322 | ``` 323 | ??? 324 | - need to install github version of survey package if you want CIs with quantiles 325 | --- 326 | ## Weighted Analysis for Continuous Variables 327 | 328 | - Common functions for continuous summaries 329 | - survey_mean 330 | - survey_total (like sum) 331 | - survey_median 332 | - survey_quantile 333 | - survey_ratio 334 | 335 | - Always call within `summarize`/`summarise` 336 | --- 337 | ## `survey_mean` Syntax 338 | 339 | ```{r survey_mean_syn, eval=FALSE} 340 | survey_mean( 341 | x, 342 | na.rm = FALSE, 343 | vartype = c("se", "ci", "var", "cv"), 344 | level = 0.95, 345 | proportion = FALSE, 346 | deff = FALSE, 347 | df = NULL, 348 | ... 349 | ) 350 | ``` 351 | 352 | To calculate a survey mean, we use this in `summarize`/`summarise` 353 | ```{r survey_mean_syn2, eval=FALSE} 354 | survey_design_object %>% 355 | summarize( 356 | mean_varname=survey_mean(x = continuous_varname) 357 | ) 358 | ``` 359 | 360 | ??? 361 | Only required argument is the variable 362 | 363 | --- 364 | ## `survey_mean` Example 1: Mean dollars spent on energy 365 | 366 | This is an example using the `recs_des` survey design object and `survey_mean` function defaults 367 | 368 | ```{r survey_mean_ex1} 369 | recs_des %>% 370 | summarize( 371 | TD_mean=survey_mean(x = TOTALDOL) 372 | ) 373 | ``` 374 | --- 375 | ## `survey_mean` Example 2: Mean temperature setting for summer during the day 376 | 377 | Run this code. What happens? Why? 378 | 379 | ```{r survey_mean_ex2, eval=FALSE} 380 | recs_des %>% 381 | summarize( 382 | TD_mean=survey_mean(x = SummerTempDay) 383 | ) 384 | ``` 385 | --- 386 | ## `survey_mean` Example 2: Mean temperature setting for summer during the day 387 | 388 | Run this code. What happens? Why? 389 | 390 | ```{r survey_mean_ex2_r, error=TRUE} 391 | recs_des %>% 392 | summarize( 393 | TD_mean=survey_mean(x = SummerTempDay) 394 | ) 395 | ``` 396 | 397 | How do we fix this code? 398 | 399 | ??? 400 | - missing data in temperature, need `na.rm=TRUE` 401 | --- 402 | ## `survey_mean` Example 2: Missing data solution 403 | 404 | ```{r survey_mean_ex2_sol, error=TRUE, tidy=FALSE} 405 | recs_des %>% 406 | summarize( 407 | TD_mean = survey_mean( 408 | x = SummerTempDay, 409 | na.rm = TRUE )#<< 410 | ) 411 | ``` 412 | 413 | --- 414 | ## `survey_median` Syntax 415 | 416 | ```{r survey_median_syn, eval=FALSE} 417 | survey_median( 418 | x, 419 | na.rm = FALSE, 420 | vartype = c("se", "ci"), 421 | level = 0.95, 422 | df = NULL, 423 | ... 424 | ) 425 | ``` 426 | 427 | ??? 428 | Only required argument is the variable 429 | 430 | 431 | --- 432 | ## `survey_median` Example: Median temperature setting for summer during day 433 | 434 | Fill in the blank: 435 | 436 | ```{r survey_median_fib, eval=FALSE} 437 | recs_des %>% 438 | summarize( 439 | TD_median=survey_median(x=_________, 440 | na.rm=_________) 441 | ) 442 | ``` 443 | 444 | -- 445 | 446 | ```{r survey_median_fib_sol} 447 | recs_des %>% 448 | summarize( 449 | TD_median=survey_median(x=SummerTempDay, 450 | na.rm=TRUE) 451 | ) 452 | ``` 453 | 454 | 455 | 456 | --- 457 | ## `survey_quantile` Syntax 458 | 459 | ```{r survey_quantile_syn, eval=FALSE} 460 | survey_quantile( 461 | x, 462 | quantiles, #<< 463 | na.rm = FALSE, 464 | vartype = c("se", "ci", "var", "cv"), 465 | level = 0.95, 466 | df = NULL, 467 | ... 468 | ) 469 | ``` 470 | 471 | ??? 472 | - need both the variable and the quantiles in a vector e.g. (c(.25, .75)) 473 | --- 474 | ## `survey_quantile` Example 1: 1st and 3rd quantile of dollars spent on energy 475 | 476 | ```{r survey_quantile_ex1, error=TRUE} 477 | recs_des %>% 478 | summarize( 479 | Spent=survey_quantile( 480 | x = TOTALDOL, 481 | quantiles = c(.25, .75)) #<< 482 | ) 483 | ``` 484 | ??? 485 | - This estimates the 25th and 75th percentile 486 | 487 | --- 488 | ## `survey_quantile` Example 2: 1st and 3rd quantile of dollars spent on energy now with confidence interval 489 | 490 | ```{r survey_quantile_ex2, error=TRUE} 491 | recs_des %>% 492 | summarize( 493 | Spent=survey_quantile(x = TOTALDOL, 494 | quantiles = c(.25, .75), 495 | vartype = "ci" #<< 496 | ) 497 | ) 498 | ``` 499 | --- 500 | ## `survey_ratio` Syntax 501 | 502 | - Note this estimates: $\sum x_i/\sum y_i$ not $\sum \frac{x_i}{y_i}$ 503 | 504 | ```{r survey_ratio_syn, eval=FALSE} 505 | survey_ratio( 506 | numerator, #<< 507 | denominator, #<< 508 | na.rm = FALSE, 509 | vartype = c("se", "ci", "var", "cv"), 510 | level = 0.95, 511 | deff = FALSE, 512 | df = NULL, 513 | ... 514 | ) 515 | ``` 516 | 517 | 518 | --- 519 | ## `survey_ratio` Example: mean dollars per BTU spent on energy 520 | 521 | ```{r survey_ratio_ex} 522 | recs_des %>% 523 | summarize( 524 | DolPerBTU=survey_ratio( 525 | numerator = TOTALDOL, #<< 526 | denominator = TOTALBTU, #<< 527 | na.rm = TRUE 528 | ) 529 | ) 530 | ``` 531 | --- 532 | ## Practice on your own 533 | 534 | - Open ContinuousExercises.Rmd and work through Part 1 535 | 536 | - We will take 15 minutes. Use this time for the exercises and a break 537 | --- 538 | ## Weighted Analysis for Continuous Variables: Domain Analysis 539 | 540 | - If we want to get estimates by another variable, we need to add a `group_by` statement before doing the analysis. 541 | 542 | - Example: Average dollars spent on electricity by whether AC is used 543 | 544 | ```{r domain_ex} 545 | recs_des %>% 546 | group_by(ACUsed) %>% #<< 547 | summarize( 548 | ElBill=survey_mean(DOLLAREL, 549 | na.rm=TRUE) 550 | ) 551 | ``` 552 | --- 553 | ## Domain Analysis: Totals 554 | 555 | - If we want the overall electric bill too, use the `cascade` function instead of `summarize` 556 | 557 | ```{r domain_ex_casc} 558 | recs_des %>% 559 | group_by(ACUsed) %>% 560 | cascade( 561 | ElBill=survey_mean(DOLLAREL, 562 | na.rm=TRUE) 563 | ) 564 | 565 | ``` 566 | 567 | ??? 568 | - Note the overall appears as NA 569 | 570 | --- 571 | ## Domain Analysis: Totals 572 | 573 | - Also can add sample and pop sizes 574 | 575 | ```{r domain_tot} 576 | recs_des %>% 577 | group_by(ACUsed) %>% 578 | cascade( 579 | ElBill=survey_mean(DOLLAREL, na.rm=TRUE), 580 | N=survey_total(!is.na(DOLLAREL)), #<< 581 | n=unweighted(sum(!is.na(DOLLAREL))) #<< 582 | ) 583 | 584 | ``` 585 | 586 | ??? 587 | - survey_total gets a weighted total 588 | - unweighted does just that, an unweighted estimate, can also get an unweighted mean or any other stat 589 | 590 | --- 591 | ## Weighted Analysis for Specific Subpopulations 592 | 593 | - filtering (subsetting) the data should be done AFTER specifying the design to ensure accurate standard errors 594 | 595 | - Use the `filter` function after creating the survey design object and before summarizing 596 | 597 | Wrong way: 598 | ```{r filter_bad, eval = FALSE} 599 | data %>% 600 | filter(state=="NC") %>% #<< 601 | as_survey_design(...) %>% 602 | summarize(AvgAge=mean(Age)) 603 | ``` 604 | 605 | Right way: 606 | ```{r filter_good, eval=FALSE} 607 | data %>% 608 | as_survey_design(...) %>% 609 | filter(state=="NC") %>% #<< 610 | summarize(AvgAge=mean(Age)) 611 | ``` 612 | 613 | ??? 614 | - The difference in these two methods occurs when the subpopulation doesn't occur in all strata or PSUs 615 | 616 | --- 617 | ## Subpopulation Example 1: Average electric cost of single family homes 618 | 619 | ```{r subpop1} 620 | recs_des %>% 621 | filter(HousingUnitType %in% c("Single-family detached", 622 | "Single-family attached")) %>% 623 | summarize( 624 | ElBill=survey_mean(DOLLAREL, 625 | na.rm=TRUE) 626 | ) 627 | ``` 628 | 629 | --- 630 | ## Comparisons with t-tests: `svyttest` Syntax 631 | 632 | - t-tests are done in the package `survey` not `srvyr` but you can use the same design object 633 | 634 | ```{r ttest_syn, eval=FALSE} 635 | svyttest(formula, # outcome~group for two-sample, outcome~0 for one-sample 636 | design, 637 | na.rm = FALSE 638 | ....) 639 | ``` 640 | 641 | ??? 642 | - Uses standard R formula notation 643 | - will go over examples of 1-sample, 2-sample, and paired t-test 644 | 645 | --- 646 | ## `svyttest` Example 1: One-sample t-test 647 | 648 | - I keep my house at 68 degrees at night during the summer. Is this different from the national average? 649 | 650 | ```{r ttest_ex1} 651 | recs_des %>% 652 | svyttest(design=., 653 | formula=I(SummerTempNight-68)~0, 654 | na.rm=TRUE) 655 | ``` 656 | 657 | ??? 658 | - Note the I notation, this does the arithmetic before modeling 659 | 660 | --- 661 | ## `svyttest` Example 2: Comparing two variables 662 | 663 | - Do people keep their house the same temperature at night during the summer and the winter? 664 | 665 | ```{r ttest_ex2} 666 | recs_des %>% 667 | svyttest(design=., 668 | formula=I(SummerTempNight-WinterTempNight)~0, 669 | na.rm=TRUE) 670 | ``` 671 | 672 | ??? 673 | - this is a paired t-test 674 | - testing whether the difference is 0 for each household 675 | --- 676 | ## `svyttest` Example 3: Two-sample t-test 677 | 678 | - Are electric bills different between those with and without A/C? 679 | 680 | ```{r ttest_ex3} 681 | recs_des %>% 682 | svyttest(design=., 683 | formula=DOLLAREL~ACUsed, 684 | na.rm=TRUE) 685 | ``` 686 | 687 | 688 | 689 | --- 690 | ## Linear Regression or ANOVA: `svyglm` Syntax 691 | 692 | - As with t-tests, regressions are done in the package `survey` not `srvyr` but you can use the same design object 693 | 694 | - Syntax is similar between t-test and glm 695 | 696 | ```{r glm_syn, eval=FALSE} 697 | svyglm(formula, 698 | design, 699 | na.action, #default is na.omit 700 | ....) 701 | ``` 702 | --- 703 | ## `svyglm` Example: Two-sample 704 | 705 | Same example as two-sample t-test: Are electric bills different between those with and without A/C? 706 | 707 | t-test: 708 | ```{r twosamp_ex_ttest, eval=FALSE} 709 | recs_des %>% 710 | svyttest(design=., 711 | formula=DOLLAREL~ACUsed, 712 | na.rm=TRUE) #<< 713 | ``` 714 | 715 | glm: 716 | ```{r twosamp_ex_glm, eval=FALSE} 717 | recs_des %>% 718 | svyglm(design=., 719 | formula=DOLLAREL~ACUsed, 720 | na.action=na.omit) #<< 721 | ``` 722 | 723 | ??? 724 | - one major difference in how you specify to ignore NA values 725 | - svyttest can only have 2-levels in group variable 726 | - svyglm, the variable on right can be anything (continuous or factor) 727 | 728 | --- 729 | ## `svyglm` Example: Two-sample 730 | 731 | Are electric bills different between those with and without A/C? 732 | .small[ 733 | ```{r twosamp_ex_ttest_run} 734 | recs_des %>% 735 | svyglm(design=., 736 | formula=DOLLAREL~ACUsed, 737 | na.action=na.omit) %>% 738 | summary() 739 | ``` 740 | ] 741 | 742 | ??? 743 | - same results as t-test 744 | 745 | --- 746 | ## `svyglm` Example 1: ANOVA Test 747 | 748 | Does temperature of AC at night vary by region? 749 | .smaller[ 750 | ```{r anova_ex} 751 | recs_des %>% 752 | svyglm(design=., 753 | formula=SummerTempNight~Region, 754 | na.action=na.omit) %>% 755 | summary() 756 | 757 | ``` 758 | ] 759 | 760 | ??? 761 | - Region is a factor variable, if it is numeric - this will treat it like a linear model 762 | 763 | --- 764 | ## `svyglm` Example 2: Linear Model 765 | 766 | - Is there a relationship between square footage and electric bill? 767 | - Let's review the data first with a ggplot. Note we use the original data and do NOT use the survey design object. 768 | 769 | ```{r plot_sf_elbill} 770 | p <- recs %>% 771 | ggplot(aes(x=TOTSQFT_EN, y=DOLLAREL, weight=NWEIGHT)) + 772 | geom_hex() + 773 | theme(legend.position="right") + 774 | guides(fill=guide_legend(title="HUs")) 775 | ``` 776 | --- 777 | ## `svyglm` Example 2: Linear Model 778 | ```{r plot_sf_elbill_disp, echo=FALSE, fig.asp=9/16, fig.align="center", out.width="90%", dpi=300} 779 | p + 780 | theme_xaringan() 781 | ``` 782 | 783 | 784 | --- 785 | ## `svyglm` Example 2: Linear Model 786 | .small[ 787 | ```{r lm_ex} 788 | m_electric_sqft <- recs_des %>% 789 | svyglm(design=., 790 | formula=DOLLAREL~TOTSQFT_EN, 791 | na.action=na.omit) 792 | summary(m_electric_sqft) 793 | ``` 794 | ] 795 | 796 | ??? 797 | - for every square foot bigger, on average 24.6c more in electric 798 | --- 799 | ## Practice on your own 800 | 801 | - Open ContinuousExercises.Rmd and work through Part 2 802 | 803 | - We will take 15 minutes. Use this time for the exercises and a break 804 | --- 805 | class: inverse center middle 806 | # Categorical data analysis 807 | --- 808 | ## Weighted Analysis for Categorical Variable 809 | 810 | - Functions to use within `summarize` after `group_by` 811 | - survey_mean 812 | - survey_total 813 | 814 | - Functions to get counts 815 | - survey_count 816 | 817 | ??? 818 | 819 | - we use the same mean and total functions as with continuous variables 820 | - `survey_count` is new 821 | - has a similar structure as the standard (non-survey) version of count 822 | 823 | --- 824 | ## Set-up ANES Data for Examples 825 | 826 | ```{r anes_des} 827 | anes <- read_rds(here("Data", "anes.rds")) %>% 828 | mutate(Weight=Weight/sum(Weight)*224059005) 829 | # adjust weight to sum to citizen pop, 18+ in Nov 2016 per ANES methodology documentation 830 | anes_des <- anes %>% 831 | as_survey_design(weights = Weight, 832 | strata = Stratum, 833 | ids = VarUnit, 834 | nest = TRUE) 835 | 836 | ``` 837 | ??? 838 | 839 | - American National Election Studies 840 | - provides weights that sum to the sample, but we want to get population estimates 841 | - need to adjust the weight to get it to the population count 842 | - as we mentioned before we will cover setting up the sample design object later 843 | 844 | --- 845 | ## `survey_count` Syntax 846 | 847 | - `survey_count` functions similarly to `count` in that it is NOT called within `summarize` 848 | 849 | - Produces weighted counts and variance of your choice of those counts 850 | ```{r survey_count_syn, eval=FALSE} 851 | survey_count( 852 | x, 853 | ..., 854 | wt = NULL, 855 | sort = FALSE, 856 | name = "n", 857 | .drop = dplyr::group_by_drop_default(x), 858 | vartype = c("se", "ci", "var", "cv") 859 | ) 860 | ``` 861 | ??? 862 | - similar to count in that it takes one or many variables 863 | - can change the variance type as we have seen in the other survey functions 864 | 865 | --- 866 | ## `survey_count` Example 867 | 868 | - Cross-tab of population in each age group and gender 869 | ```{r survey_count_ex} 870 | anes_des %>% 871 | survey_count(AgeGroup, Gender, name="n") 872 | 873 | ``` 874 | ??? 875 | - `survey_count` is placed on its own like `count` 876 | - it does NOT go in a `summarize` function 877 | - can take multiple variables 878 | - can change the output count name, `n` is the default 879 | 880 | --- 881 | ## `survey_mean` and `survey_total` Examples 882 | 883 | - `survey_mean` used with no x (variable) calculates a proportion of groups specified in `group_by` 884 | - `survey_total` used with no x (variable) calculates a population count estimate for the groups specified in `group_by` 885 | 886 | Cross-tab of population who voted in 2016 887 | ```{r survey_p_ex1} 888 | anes_des %>% 889 | filter(!is.na(VotedPres2016)) %>% 890 | group_by(VotedPres2016) %>% 891 | summarize( 892 | p=survey_mean(), 893 | N=survey_total(), 894 | n=unweighted(n()), .groups="drop" 895 | ) 896 | ``` 897 | ??? 898 | - to get proportions we use `group_by` and `survey_mean` 899 | - also use `survey_total` to get a population count estimate as before 900 | --- 901 | ## Conditional proportions with more than one group 902 | 903 | - Specifying more than one group calculates conditional proportions 904 | - Example: people voting in 2012 and 2016 905 | 906 | ```{r survey_p_cond} 907 | anes_des %>% 908 | filter(!is.na(VotedPres2012), !is.na(VotedPres2016)) %>% 909 | group_by(VotedPres2012, VotedPres2016) %>% 910 | summarize( 911 | p=survey_mean(), 912 | N=survey_total(), 913 | n=unweighted(n()), .groups="drop" 914 | ) 915 | ``` 916 | ??? 917 | - Note that this is the proportion of voting in 2016 by whether people voted in 2012 918 | - What if we don't want conditional proportions? 919 | 920 | --- 921 | ## Joint proportions with more than one group 922 | 923 | - Specify an interaction to get joint distribution 924 | - Example: people voting in 2012 and 2016 925 | 926 | ```{r survey_p_joint} 927 | anes_des %>% 928 | filter(!is.na(VotedPres2012), !is.na(VotedPres2016)) %>% 929 | group_by(groups = interaction(VotedPres2016, VotedPres2012)) %>% #<< 930 | summarize( 931 | p=survey_mean(), 932 | N=survey_total(), 933 | .groups="drop" 934 | ) 935 | ``` 936 | ??? 937 | - We add an interaction for the groups 938 | - This outputs the joint distribution, but the `groups` variable is hard to interpret 939 | 940 | --- 941 | ## Joint proportions with more than one group 942 | 943 | - Specify an interaction to get joint distribution 944 | - Example: people voting in 2012 and 2016 945 | 946 | ```{r survey_p_joint2} 947 | anes_des %>% 948 | filter(!is.na(VotedPres2012), !is.na(VotedPres2016)) %>% 949 | group_by(groups = interaction(VotedPres2016, VotedPres2012)) %>% 950 | summarize( 951 | VotedPres2012=VotedPres2012[1], #<< 952 | VotedPres2016=VotedPres2016[1], #<< 953 | p=survey_mean(), 954 | N=survey_total(), 955 | .groups="drop" 956 | ) 957 | ``` 958 | ??? 959 | - We can add in two variables one for `VotedPres2012` and `VotedPres2016` 960 | - using the 1 in brackets pulls out the labels for these two variables so we see the "Yes" and "No" labels 961 | 962 | --- 963 | ## Proportions with Design Effects 964 | 965 | ```{r survey_p_deff} 966 | anes_des %>% 967 | filter(!is.na(VotedPres2012), !is.na(VotedPres2016)) %>% 968 | group_by(VotedPres2012, VotedPres2016) %>% 969 | summarize( 970 | p=survey_mean(deff=TRUE), 971 | N=survey_total() 972 | ) 973 | ``` 974 | ??? 975 | - Use `deff=TRUE` option in the `survey_mean` function 976 | 977 | --- 978 | ## `svychisq` Syntax 979 | 980 | - As with testing on continuous variables, `svychisq` comes from the `survey` package 981 | 982 | ```{r svychisq_syn, eval=FALSE} 983 | svychisq(formula, 984 | design, 985 | statistic = c("F", "Chisq", "Wald", "adjWald", "lincom", "saddlepoint"), 986 | na.rm=TRUE, 987 | ...) 988 | 989 | ``` 990 | ??? 991 | - when we want to test categorical distributions we use `svychisq` 992 | - it takes a formula, and the survey design data 993 | 994 | --- 995 | ## `svychisq` Example 1: Function Defaults 996 | 997 | - How often can you trust the federal gov't to do what is right? 998 | - How often can you trust other people? 999 | 1000 | ```{r svychisq_ex1} 1001 | anes_des %>% 1002 | svychisq(design=., 1003 | formula=~TrustPeople +TrustGovernment) 1004 | 1005 | 1006 | ``` 1007 | ??? 1008 | - We want to compare the distributions of these two questions 1009 | 1010 | --- 1011 | ## `svychisq` Example 2: Wald Statistic 1012 | 1013 | - How often can you trust the federal gov't to do what is right? 1014 | - Who did you vote for? Clinton, Trump, or Other 1015 | 1016 | ```{r svychisq_ex2} 1017 | anes_des %>% 1018 | svychisq(design=., 1019 | formula=~TrustGovernment +VotedPres2016_selection, 1020 | statistic="Wald") 1021 | 1022 | 1023 | ``` 1024 | ??? 1025 | - Can use different statistics 1026 | --- 1027 | ## Practice on your own 1028 | 1029 | - Open CategoricalExercises.Rmd and work through the exercises 1030 | 1031 | - We will take 10 minutes. Use this time for the exercises and a break 1032 | --- 1033 | class: inverse center middle 1034 | # Sample design object 1035 | --- 1036 | ## `tbl_svy` Object: Taylor's Series 1037 | 1038 | - `tbl_svy` object defines the sampling design or replicate weights 1039 | 1040 | - Key information is usually found in documentation of a public use file 1041 | 1042 | ```{r sd_tsl_syn, eval=FALSE} 1043 | as_survey_design( 1044 | .data, 1045 | ids = NULL,#cluster IDs/PSUs 1046 | strata = NULL,#strata variables 1047 | variables = NULL,#defaults to all in .data 1048 | fpc = NULL,#variables defining the fpc 1049 | nest = FALSE,#TRUE/FALSE - relabel clusters to nest within strata 1050 | check_strata = !nest, #check that clusters are nested in strata 1051 | weights = NULL,# weight variable 1052 | ... 1053 | ) 1054 | ``` 1055 | 1056 | ??? 1057 | - discussing TSL first 1058 | --- 1059 | ## `tbl_svy` for Common Designs 1060 | 1061 | ```{r sd_tsl_gen_ex, eval=FALSE} 1062 | # simple random sample (SRS) 1063 | apisrs %>% as_survey_design(fpc = fpc) 1064 | 1065 | # stratified sample 1066 | apistrat %>% as_survey_design(strata = stype, weights = pw) 1067 | 1068 | # one-stage cluster sample 1069 | apiclus1 %>% as_survey_design(ids = dnum, weights = pw, fpc = fpc) 1070 | 1071 | # two-stage cluster sample, weights computed from pop size 1072 | apiclus2 %>% as_survey_design(ids = c(dnum, snum), fpc = c(fpc1, fpc2)) 1073 | 1074 | # stratified, cluster design 1075 | apistrat %>% as_survey_design(ids = dnum, strata = stype, weights =pw, nest = TRUE) 1076 | 1077 | ``` 1078 | 1079 | - examples from `srvyr` help documentation 1080 | 1081 | --- 1082 | ## ANES Design Object 1083 | 1084 | .smaller[ 1085 | ```{r sd_anes, eval=TRUE} 1086 | anes_des <- anes %>% 1087 | as_survey_design(weights = Weight, 1088 | strata = Stratum, 1089 | ids = VarUnit, 1090 | nest = TRUE) 1091 | summary(anes_des) 1092 | ``` 1093 | ] 1094 | 1095 | ??? 1096 | - Pseudo-strata (Stratum) and pseudo-cluster (VarUnit) included for variance estimation 1097 | - we renamed these variables to be more obvious but original documentation has diff var names 1098 | 1099 | --- 1100 | ## `tbl_svy` Objects with Supplied Replicate Weights 1101 | 1102 | - Key information is usually found in documentation of a public use file 1103 | 1104 | ```{r sd_rep_syn, eval=FALSE} 1105 | as_survey_rep( 1106 | .data, 1107 | variables = NULL,#defaults to all in .data 1108 | repweights = NULL,#variables specifying replicate weights 1109 | weights = NULL,#variable for analysis weight 1110 | type = c("BRR", "Fay", "JK1", "JKn", "bootstrap", "other"), 1111 | rho = NULL,#shrinkage factor for Fay's method, 1112 | mse = getOption("survey.replicates.mse"), # if TRUE, compute variances based on 1113 | # sum of squares around the point estimate, rather than the mean of the replicates 1114 | scale = NULL, # overall multiplier for squared deviations 1115 | ... 1116 | ) 1117 | 1118 | ``` 1119 | --- 1120 | ## RECS Design Object 1121 | 1122 | .smaller[ 1123 | ```{r sd_recs, eval=TRUE} 1124 | recs_des <- recs %>% 1125 | as_survey_rep(weights=NWEIGHT, 1126 | repweights=starts_with("BRRWT"), 1127 | type="Fay", 1128 | rho=0.5, 1129 | mse=TRUE) 1130 | summary(recs_des) 1131 | ``` 1132 | ] 1133 | 1134 | ??? 1135 | - Fay's method of BRR weight with $\epsilon=0.5$ 1136 | - RECS documentation includes syntax for creating survey design object 1137 | - https://www.eia.gov/consumption/residential/data/2015/pdf/microdata_v3.pdf 1138 | 1139 | --- 1140 | ## Create Replicate Weights: jackknife 1141 | 1142 | - You can also start with a design object specified by the design and create replicate weights 1143 | .smaller[ 1144 | ```{r sd_create_rep} 1145 | data(api) 1146 | dclus1 <- apiclus1 %>% as_survey_design(ids = dnum, weights = pw, fpc = fpc) 1147 | rclus1 <- as_survey_rep(dclus1) 1148 | summary(rclus1) 1149 | 1150 | ``` 1151 | ] 1152 | --- 1153 | ## Create Replicate Weights: bootstrap 1154 | 1155 | - You can also start with a design object specified by the design and create replicate weights 1156 | .small[ 1157 | ```{r sd_create_boot} 1158 | bclus1 <- as_survey_rep(dclus1, type="bootstrap", replicates=100) 1159 | summary(bclus1) 1160 | 1161 | ``` 1162 | ] 1163 | --- 1164 | ## Create Survey Design Object for ACS 1165 | 1166 | Fill in the blanks 1167 | - Analysis weight: PWGTP 1168 | - replicate weights: PWGTP1-PWGTP180 1169 | - jackknife with scale adjustment of 4/80 1170 | ```{r sd_acs_fib, eval=FALSE} 1171 | acs_des <- acs_pums %>% 1172 | as_survey_rep( 1173 | weights=___________, 1174 | repweights=___________, 1175 | type=___________, 1176 | scale=_________ 1177 | ) 1178 | ``` 1179 | -- 1180 | 1181 | ```{r sd_acs_fib_sol, eval=FALSE} 1182 | acs_des <- acs_pums %>% 1183 | as_survey_rep( 1184 | weights=PWGTP, 1185 | repweights=stringr::str_c("PWGTP", 1:80), 1186 | type="JK1", 1187 | scale=4/80 1188 | ) 1189 | 1190 | ``` 1191 | --- 1192 | ## Create Survey Design Object for CPS 2011 Supplement 1193 | 1194 | Fill in the blanks 1195 | - Analysis weight: wtsupp 1196 | - replicate weights: repwtp1 -repwtp160 1197 | - BRR 1198 | ```{r sd_cps_fib, eval=FALSE} 1199 | cps_des <- cps %>% 1200 | as_survey_rep( 1201 | weights=___________, 1202 | repweights=___________, 1203 | type=___________ 1204 | ) 1205 | ``` 1206 | -- 1207 | ```{r sd_cps_fib_sol, eval=FALSE} 1208 | cps_des <- cps %>% 1209 | as_survey_rep( 1210 | weights=wtsupp, 1211 | repweights=starts_with("repwtp"), 1212 | type="BRR" 1213 | ) 1214 | ``` 1215 | --- 1216 | ## Create Survey Design Object for NHANES 1217 | 1218 | Fill in the blanks 1219 | - Analysis weight: WTINT2YR 1220 | - Variance Stratum: SDMVSTRA 1221 | - Variance Primary Sampling Unit: VPSU 1222 | ```{r sd_nhanes_fib, eval=FALSE} 1223 | nhanes_des <- nhanes %>% 1224 | as_survey_design( 1225 | weights=___________, 1226 | ids=___________, 1227 | strata=___________, 1228 | fpc=___________ 1229 | ) 1230 | ``` 1231 | -- 1232 | ```{r sd_nhanes_fib_sol, eval=FALSE} 1233 | nhanes_des <- nhanes %>% 1234 | as_survey_design( 1235 | weights=WTINT2YR, 1236 | ids=VPSU, 1237 | strata=SDMVSTRA, 1238 | fpc=NULL 1239 | ) 1240 | ``` 1241 | --- 1242 | ## Create Survey Design Object for LEMAS 2016 1243 | 1244 | Fill in the blanks 1245 | - Analysis weight: ANALYSISWEIGHT 1246 | - Variance Stratum: STRATA 1247 | - FPC: FRAMESIZE 1248 | ```{r sd_lemas_fib, eval=FALSE} 1249 | lemas_des <- lemas %>% 1250 | as_survey_design( 1251 | weights=___________, 1252 | ids=___________, 1253 | strata=___________, 1254 | fpc=___________ 1255 | ) 1256 | ``` 1257 | -- 1258 | 1259 | ```{r sd_lemas_fib_sol, eval=FALSE} 1260 | lemas_des <- lemas %>% 1261 | as_survey_design( 1262 | weights=ANALYSISWEIGHT, 1263 | ids=1, 1264 | strata=STRATA, 1265 | fpc=FRAMESIZE 1266 | ) 1267 | ``` 1268 | 1269 | --- 1270 | class: inverse center middle 1271 | # Closing 1272 | --- 1273 | ## Resources for more learning 1274 | 1275 | - https://cran.r-project.org/web/packages/srvyr/vignettes/srvyr-vs-survey.html 1276 | 1277 | - https://r-survey.r-forge.r-project.org/survey/ 1278 | - Includes more advanced modeling 1279 | 1280 | 1281 | --- 1282 | ## Thank You! 1283 | 1284 | ### We hope you learned a lot in this short course! 1285 | 1286 | Please let us know if you have any feedback on this course. You will receive an email from AAPOR asking you to fill out a survey about this course. All feedback is welcome! 1287 | 1288 | 1289 | ## Questions? 1290 | 1291 | --- 1292 | ## Sources 1293 | 1294 | - The American National Election Studies (https://electionstudies.org/). These materials are based on work supported by the National Science Foundation under grant numbers SES 1444721, 2014-2017, the University of Michigan, and Stanford University. 1295 | 1296 | - *Residential Energy Consumption Survey: Using the 2015 Microdata File to Compute Estimates and Standard Errors.* U.S. Department of Energy (2017) https://www.eia.gov/consumption/residential/data/2015/pdf/microdata_v3.pdf 1297 | 1298 | - Horst AM, Hill AP, Gorman KB (2020). palmerpenguins: Palmer Archipelago (Antarctica) penguin data. R package version 0.1.0. https://allisonhorst.github.io/palmerpenguins/ 1299 | 1300 | - T. Lumley (2020) "survey: analysis of complex survey samples". R package version 4.0. https://r-survey.r-forge.r-project.org/survey/ 1301 | 1302 | - Greg Freedman Ellis and Ben Schneider (2020). srvyr: 'dplyr'-Like Syntax for Summary Statistics of Survey Data. R package version 1.0.0. https://CRAN.R-project.org/package=srvyr 1303 | 1304 | - Hadley Wickham, Romain François, Lionel Henry and Kirill Müller (2021). dplyr: A Grammar of Data Manipulation. R package version 1.0.5. https://CRAN.R-project.org/package=dplyr 1305 | -------------------------------------------------------------------------------- /Presentation/Slides.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/szimmer/tidy-survey-aapor-2021/5a82c36eb619bf67ccec2593a770904eedcd1d18/Presentation/Slides.pdf -------------------------------------------------------------------------------- /Presentation/Slides.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/szimmer/tidy-survey-aapor-2021/5a82c36eb619bf67ccec2593a770904eedcd1d18/Presentation/Slides.pptx -------------------------------------------------------------------------------- /Presentation/Slides_files/figure-html/plot_sf_elbill_disp-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/szimmer/tidy-survey-aapor-2021/5a82c36eb619bf67ccec2593a770904eedcd1d18/Presentation/Slides_files/figure-html/plot_sf_elbill_disp-1.png -------------------------------------------------------------------------------- /Presentation/xaringan-themer.css: -------------------------------------------------------------------------------- 1 | /* ------------------------------------------------------- 2 | * 3 | * !! This file was generated by xaringanthemer !! 4 | * 5 | * Changes made to this file directly will be overwritten 6 | * if you used xaringanthemer in your xaringan slides Rmd 7 | * 8 | * Issues or likes? 9 | * - https://github.com/gadenbuie/xaringanthemer 10 | * - https://www.garrickadenbuie.com 11 | * 12 | * Need help? Try: 13 | * - vignette(package = "xaringanthemer") 14 | * - ?xaringanthemer::style_xaringan 15 | * - xaringan wiki: https://github.com/yihui/xaringan/wiki 16 | * - remarkjs wiki: https://github.com/gnab/remark/wiki 17 | * 18 | * Version: 0.3.3 19 | * 20 | * ------------------------------------------------------- */ 21 | @import url(https://fonts.googleapis.com/css?family=Noto+Sans:400,400i,700,700i&display=swap); 22 | @import url(https://fonts.googleapis.com/css?family=Cabin:600,600i&display=swap); 23 | @import url(https://fonts.googleapis.com/css?family=Source+Code+Pro:400,700&display=swap); 24 | 25 | 26 | :root { 27 | /* Fonts */ 28 | --text-font-family: 'Noto Sans'; 29 | --text-font-is-google: 1; 30 | --text-font-family-fallback: -apple-system, BlinkMacSystemFont, avenir next, avenir, helvetica neue, helvetica, Ubuntu, roboto, noto, segoe ui, arial; 31 | --text-font-base: sans-serif; 32 | --header-font-family: Cabin; 33 | --header-font-is-google: 1; 34 | --header-font-family-fallback: Georgia, serif; 35 | --code-font-family: 'Source Code Pro'; 36 | --code-font-is-google: 1; 37 | --base-font-size: 20px; 38 | --text-font-size: 1rem; 39 | --code-font-size: 0.9rem; 40 | --code-inline-font-size: 1em; 41 | --header-h1-font-size: 2.75rem; 42 | --header-h2-font-size: 2.25rem; 43 | --header-h3-font-size: 1.75rem; 44 | 45 | /* Colors */ 46 | --text-color: #000000; 47 | --header-color: #1E4F96; 48 | --background-color: #FFFFFF; 49 | --link-color: #1E4F96; 50 | --text-bold-color: #1E4F96; 51 | --code-highlight-color: rgba(255,255,0,0.5); 52 | --inverse-text-color: #000000; 53 | --inverse-background-color: #00A3E0; 54 | --inverse-header-color: #FFFFFF; 55 | --inverse-link-color: #1E4F96; 56 | --title-slide-background-color: #1E4F96; 57 | --title-slide-text-color: #FFFFFF; 58 | --header-background-color: #1E4F96; 59 | --header-background-text-color: #FFFFFF; 60 | --primary: #1E4F96; 61 | --secondary: #00A3E0; 62 | --white: #FFFFFF; 63 | --black: #000000; 64 | } 65 | 66 | html { 67 | font-size: var(--base-font-size); 68 | } 69 | 70 | body { 71 | font-family: var(--text-font-family), var(--text-font-family-fallback), var(--text-font-base); 72 | font-weight: normal; 73 | color: var(--text-color); 74 | } 75 | h1, h2, h3 { 76 | font-family: var(--header-font-family), var(--header-font-family-fallback); 77 | font-weight: 600; 78 | color: var(--header-color); 79 | } 80 | .remark-slide-content { 81 | background-color: var(--background-color); 82 | font-size: 1rem; 83 | padding: 16px 64px 16px 64px; 84 | width: 100%; 85 | height: 100%; 86 | } 87 | .remark-slide-content h1 { 88 | font-size: var(--header-h1-font-size); 89 | } 90 | .remark-slide-content h2 { 91 | font-size: var(--header-h2-font-size); 92 | } 93 | .remark-slide-content h3 { 94 | font-size: var(--header-h3-font-size); 95 | } 96 | .remark-code, .remark-inline-code { 97 | font-family: var(--code-font-family), Menlo, Consolas, Monaco, Liberation Mono, Lucida Console, monospace; 98 | } 99 | .remark-code { 100 | font-size: var(--code-font-size); 101 | } 102 | .remark-inline-code { 103 | font-size: var(--code-inline-font-size); 104 | color: #1E4F96; 105 | } 106 | .remark-slide-number { 107 | color: #1E4F96; 108 | opacity: 1; 109 | font-size: 0.9em; 110 | } 111 | strong { color: var(--text-bold-color); } 112 | a, a > code { 113 | color: var(--link-color); 114 | text-decoration: none; 115 | } 116 | .footnote { 117 | position: absolute; 118 | bottom: 60px; 119 | padding-right: 4em; 120 | font-size: 0.9em; 121 | } 122 | .remark-code-line-highlighted { 123 | background-color: var(--code-highlight-color); 124 | } 125 | .inverse { 126 | background-color: var(--inverse-background-color); 127 | color: var(--inverse-text-color); 128 | 129 | } 130 | .inverse h1, .inverse h2, .inverse h3 { 131 | color: var(--inverse-header-color); 132 | } 133 | .inverse a, .inverse a > code { 134 | color: var(--inverse-link-color); 135 | } 136 | .title-slide, .title-slide h1, .title-slide h2, .title-slide h3 { 137 | color: var(--title-slide-text-color); 138 | } 139 | .title-slide { 140 | background-color: var(--title-slide-background-color); 141 | } 142 | .title-slide .remark-slide-number { 143 | display: none; 144 | } 145 | /* Two-column layout */ 146 | .left-column { 147 | width: 20%; 148 | height: 92%; 149 | float: left; 150 | } 151 | .left-column h2, .left-column h3 { 152 | color: #1E4F9699; 153 | } 154 | .left-column h2:last-of-type, .left-column h3:last-child { 155 | color: #1E4F96; 156 | } 157 | .right-column { 158 | width: 75%; 159 | float: right; 160 | padding-top: 1em; 161 | } 162 | .pull-left { 163 | float: left; 164 | width: 47%; 165 | } 166 | .pull-right { 167 | float: right; 168 | width: 47%; 169 | } 170 | .pull-right + * { 171 | clear: both; 172 | } 173 | img, video, iframe { 174 | max-width: 100%; 175 | } 176 | blockquote { 177 | border-left: solid 5px #00A3E080; 178 | padding-left: 1em; 179 | } 180 | .remark-slide table { 181 | margin: auto; 182 | border-top: 1px solid #666; 183 | border-bottom: 1px solid #666; 184 | } 185 | .remark-slide table thead th { 186 | border-bottom: 1px solid #ddd; 187 | } 188 | th, td { 189 | padding: 5px; 190 | } 191 | .remark-slide thead, .remark-slide tfoot, .remark-slide tr:nth-child(even) { 192 | background: #CCECF8; 193 | } 194 | table.dataTable tbody { 195 | background-color: var(--background-color); 196 | color: var(--text-color); 197 | } 198 | table.dataTable.display tbody tr.odd { 199 | background-color: var(--background-color); 200 | } 201 | table.dataTable.display tbody tr.even { 202 | background-color: #CCECF8; 203 | } 204 | table.dataTable.hover tbody tr:hover, table.dataTable.display tbody tr:hover { 205 | background-color: rgba(255, 255, 255, 0.5); 206 | } 207 | .dataTables_wrapper .dataTables_length, .dataTables_wrapper .dataTables_filter, .dataTables_wrapper .dataTables_info, .dataTables_wrapper .dataTables_processing, .dataTables_wrapper .dataTables_paginate { 208 | color: var(--text-color); 209 | } 210 | .dataTables_wrapper .dataTables_paginate .paginate_button { 211 | color: var(--text-color) !important; 212 | } 213 | 214 | /* Slide Header Background for h1 elements */ 215 | .remark-slide-content.header_background > h1 { 216 | display: block; 217 | position: absolute; 218 | top: 0; 219 | left: 0; 220 | width: 100%; 221 | background: var(--header-background-color); 222 | color: var(--header-background-text-color); 223 | padding: 2rem 64px 1.5rem 64px; 224 | margin-top: 0; 225 | box-sizing: border-box; 226 | } 227 | .remark-slide-content.header_background { 228 | padding-top: 7rem; 229 | } 230 | 231 | @page { margin: 0; } 232 | @media print { 233 | .remark-slide-scaler { 234 | width: 100% !important; 235 | height: 100% !important; 236 | transform: scale(1) !important; 237 | top: 0 !important; 238 | left: 0 !important; 239 | } 240 | } 241 | 242 | .primary { 243 | color: var(--primary); 244 | } 245 | .bg-primary { 246 | background-color: var(--primary); 247 | } 248 | .secondary { 249 | color: var(--secondary); 250 | } 251 | .bg-secondary { 252 | background-color: var(--secondary); 253 | } 254 | .white { 255 | color: var(--white); 256 | } 257 | .bg-white { 258 | background-color: var(--white); 259 | } 260 | .black { 261 | color: var(--black); 262 | } 263 | .bg-black { 264 | background-color: var(--black); 265 | } 266 | 267 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | A new version of this course is at: https://github.com/tidy-survey-r/tidy-survey-short-course 2 | 3 | # Tidy Survey Analysis in R using the srvyr Package 4 | Materials for [AAPOR short course](https://www.aapor.org/Conference-Events/Annual-Meeting/Short-Courses.aspx) on Tidy Survey Analysis in R using the `srvyr` Package in May 2021 5 | 6 | - **RawData** folder contains public use file data along with any documentation 7 | - American National Election Studies, 2016 8 | - Residential Energy Consumption Survey, 2015 9 | - **DataCleaningScripts** folder contains scripts for making public use files analysis ready 10 | - Create derived variables 11 | - Renames some variables 12 | - Selects fewer variables just for examples 13 | - **Data** folder contains data files ready for analysis in presentation and examples 14 | - **Presentation** folder contains the slides for the course 15 | - Includes Rmd to create slides 16 | - Slides are available in html, pptx, R, and PDF 17 | - **Exercises** contains RMD and R files with exercises and solutions to practice concepts 18 | 19 | ## Sources 20 | 21 | - The American National Election Studies (https://electionstudies.org/). These materials are based on work supported by the National Science Foundation under grant numbers SES 1444721, 2014-2017, the University of Michigan, and Stanford University. 22 | 23 | - *Residential Energy Consumption Survey: Using the 2015 Microdata File to Compute Estimates and Standard Errors.* U.S. Department of Energy (2017) https://www.eia.gov/consumption/residential/data/2015/pdf/microdata_v3.pdf 24 | 25 | - Horst AM, Hill AP, Gorman KB (2020). palmerpenguins: Palmer Archipelago (Antarctica) penguin data. R package version 0.1.0. https://allisonhorst.github.io/palmerpenguins/ 26 | 27 | - T. Lumley (2020) "survey: analysis of complex survey samples". R package version 4.0. https://r-survey.r-forge.r-project.org/survey/ 28 | 29 | - Greg Freedman Ellis and Ben Schneider (2020). srvyr: 'dplyr'-Like Syntax for Summary Statistics of Survey Data. R package version 1.0.0. https://CRAN.R-project.org/package=srvyr 30 | 31 | - Hadley Wickham, Romain François, Lionel Henry and Kirill Müller (2021). dplyr: A Grammar of Data Manipulation. R package version 1.0.5. https://CRAN.R-project.org/package=dplyr 32 | -------------------------------------------------------------------------------- /RawData/ANES_2016/anes_timeseries_2016.sav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/szimmer/tidy-survey-aapor-2021/5a82c36eb619bf67ccec2593a770904eedcd1d18/RawData/ANES_2016/anes_timeseries_2016.sav -------------------------------------------------------------------------------- /RawData/ANES_2016/anes_timeseries_2016_qnaire_post.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/szimmer/tidy-survey-aapor-2021/5a82c36eb619bf67ccec2593a770904eedcd1d18/RawData/ANES_2016/anes_timeseries_2016_qnaire_post.pdf -------------------------------------------------------------------------------- /RawData/ANES_2016/anes_timeseries_2016_qnaire_pre.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/szimmer/tidy-survey-aapor-2021/5a82c36eb619bf67ccec2593a770904eedcd1d18/RawData/ANES_2016/anes_timeseries_2016_qnaire_pre.pdf -------------------------------------------------------------------------------- /RawData/ANES_2016/anes_timeseries_2016_userguidecodebook.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/szimmer/tidy-survey-aapor-2021/5a82c36eb619bf67ccec2593a770904eedcd1d18/RawData/ANES_2016/anes_timeseries_2016_userguidecodebook.pdf -------------------------------------------------------------------------------- /RawData/RECS_2015/2020_RECS-457A.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/szimmer/tidy-survey-aapor-2021/5a82c36eb619bf67ccec2593a770904eedcd1d18/RawData/RECS_2015/2020_RECS-457A.pdf -------------------------------------------------------------------------------- /RawData/RECS_2015/README.md: -------------------------------------------------------------------------------- 1 | # Residential Energy Consumption Survey (RECS) 2015 2 | 3 | All data and resources were downloaded from https://www.eia.gov/consumption/residential/data/2015/index.php?view=microdata on March 3, 2021. -------------------------------------------------------------------------------- /RawData/RECS_2015/codebook_publicv4.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/szimmer/tidy-survey-aapor-2021/5a82c36eb619bf67ccec2593a770904eedcd1d18/RawData/RECS_2015/codebook_publicv4.xlsx -------------------------------------------------------------------------------- /RawData/RECS_2015/microdata_v3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/szimmer/tidy-survey-aapor-2021/5a82c36eb619bf67ccec2593a770904eedcd1d18/RawData/RECS_2015/microdata_v3.pdf -------------------------------------------------------------------------------- /tidy-survey-short-course.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 3 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | --------------------------------------------------------------------------------