├── .gitignore ├── Codebooks ├── ANES 2020 Derived Variable Codebook.Rmd ├── ANES-2020-Derived-Variable-Codebook.html ├── ANES-2020-Derived-Variable-Codebook.md ├── RECS 2015 Codebook.Rmd ├── RECS-2015-Codebook.html ├── RECS-2015-Codebook.md └── style.css ├── Data ├── anes.rds ├── anes_2020.rds └── recs.rds ├── DataCleaningScripts ├── ANES_DataPrep.Rmd ├── ANES_DataPrep.md ├── ANES_DataPrep_2020.Rmd ├── ANES_DataPrep_2020.md ├── RECS_DataPrep.Rmd ├── RECS_DataPrep.md ├── TargetPopulation.Rmd └── TargetPopulation.md ├── Exercises ├── CategorialExercises.R ├── CategorialExercises.Rmd ├── CategorialExercises_solutions.R ├── CategorialExercises_solutions.Rmd ├── CategorialExercises_solutions.html ├── ContinuousExercises.R ├── ContinuousExercises.Rmd ├── ContinuousExercises_solutions.R ├── ContinuousExercises_solutions.Rmd ├── ContinuousExercises_solutions.html ├── DesignDerivedVariablesExercises.R ├── DesignDerivedVariablesExercises.Rmd ├── DesignDerivedVariablesExercises_cache │ └── html │ │ ├── __packages │ │ ├── datin_95c553bea5c677086a0878157dbd740f.RData │ │ ├── datin_95c553bea5c677086a0878157dbd740f.rdb │ │ └── datin_95c553bea5c677086a0878157dbd740f.rdx ├── DesignDerivedVariablesExercises_solutions.R ├── DesignDerivedVariablesExercises_solutions.Rmd ├── DesignDerivedVariablesExercises_solutions.html ├── DesignDerivedVariablesExercises_solutions_cache │ └── html │ │ ├── __packages │ │ ├── datin_71bc85b99d78d2975dbdaf1205650ccd.RData │ │ ├── datin_71bc85b99d78d2975dbdaf1205650ccd.rdb │ │ └── datin_71bc85b99d78d2975dbdaf1205650ccd.rdx ├── WarmUpExercises.R ├── WarmUpExercises.Rmd ├── WarmUpExercises_solutions.R ├── WarmUpExercises_solutions.Rmd └── WarmUpExercises_solutions.html ├── FinalizeMaterials.R ├── LICENSE ├── Presentation ├── Images │ ├── IsabellaVelasquez_Headshot.jpeg │ ├── MAPOR-Logo1.png │ ├── Project-Contents.png │ ├── RebeccaPowell_Headshot.jpeg │ └── StephanieZimmer_Headshot.jpeg ├── Slides-day-1.R ├── Slides-day-1.Rmd ├── Slides-day-1.html ├── Slides-day-1.pdf ├── Slides-day-1.pptx ├── Slides-day-1_files │ ├── crosstalk-1.2.0 │ │ ├── css │ │ │ └── crosstalk.min.css │ │ ├── js │ │ │ ├── crosstalk.js │ │ │ ├── crosstalk.js.map │ │ │ ├── crosstalk.min.js │ │ │ └── crosstalk.min.js.map │ │ └── scss │ │ │ └── crosstalk.scss │ ├── datatables-binding-0.20 │ │ └── datatables.js │ ├── datatables-binding-0.21 │ │ └── datatables.js │ ├── datatables-binding-0.22 │ │ └── datatables.js │ ├── datatables-css-0.0.0 │ │ └── datatables-crosstalk.css │ ├── dt-core-1.11.3 │ │ ├── css │ │ │ ├── jquery.dataTables.extra.css │ │ │ └── jquery.dataTables.min.css │ │ └── js │ │ │ └── jquery.dataTables.min.js │ ├── header-attrs-2.11.22 │ │ └── header-attrs.js │ ├── header-attrs-2.11 │ │ └── header-attrs.js │ ├── header-attrs-2.13 │ │ └── header-attrs.js │ ├── htmlwidgets-1.5.4 │ │ └── htmlwidgets.js │ └── jquery-3.6.0 │ │ ├── jquery-3.6.0.js │ │ ├── jquery-3.6.0.min.js │ │ └── jquery-3.6.0.min.map ├── Slides-day-2.R ├── Slides-day-2.Rmd ├── Slides-day-2.html ├── Slides-day-2.pdf ├── Slides-day-2.pptx ├── Slides-day-2_files │ ├── figure-html │ │ └── plot_sf_elbill_disp-1.png │ ├── header-attrs-2.11.22 │ │ └── header-attrs.js │ └── header-attrs-2.13 │ │ └── header-attrs.js ├── Slides-day-3.R ├── Slides-day-3.Rmd ├── Slides-day-3.html ├── Slides-day-3.pdf ├── Slides-day-3.pptx ├── Slides-day-3_cache │ └── html │ │ ├── __packages │ │ ├── acsin_2028e4f3797786227f2217d2d50f2b92.RData │ │ ├── acsin_2028e4f3797786227f2217d2d50f2b92.rdb │ │ └── acsin_2028e4f3797786227f2217d2d50f2b92.rdx ├── Slides-day-3_files │ ├── figure-html │ │ └── der3c-1.png │ ├── header-attrs-2.11.22 │ │ └── header-attrs.js │ └── header-attrs-2.13 │ │ └── header-attrs.js └── xaringan-themer-mod.css ├── README.md ├── RawData ├── ANES_2016 │ ├── anes_timeseries_2016.sav │ ├── anes_timeseries_2016_qnaire_post.pdf │ ├── anes_timeseries_2016_qnaire_pre.pdf │ └── anes_timeseries_2016_userguidecodebook.pdf ├── ANES_2020 │ ├── anes_timeseries_2020_methodology_userguidecodebook_20210719.pdf │ ├── anes_timeseries_2020_questionnaire_20210719.pdf │ ├── anes_timeseries_2020_questionnaire_screener_20210719.pdf │ ├── anes_timeseries_2020_spss_20220210.sav │ └── anes_timeseries_2020_userguidecodebook_20220210.pdf └── RECS_2015 │ ├── 2020_RECS-457A.pdf │ ├── README.md │ ├── codebook_publicv4.xlsx │ ├── microdata_v3.pdf │ └── recs2015_public_v4.csv ├── tidy-survey-short-course.Rproj └── xaringan-themer.css /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | debug.log 6 | .DS_Store 7 | -------------------------------------------------------------------------------- /Codebooks/ANES 2020 Derived Variable Codebook.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "ANES 2020 Derived Variable Codebook" 3 | output: 4 | html_document: 5 | css: "style.css" 6 | toc: true 7 | toc_depth: 4 8 | toc_float: true 9 | self_contained: true 10 | github_document: 11 | toc: true 12 | always_allow_html: true 13 | --- 14 | 15 | ```{r setup, include = FALSE} 16 | knitr::opts_chunk$set(echo = FALSE) 17 | library(dplyr) 18 | library(janitor) 19 | library(kableExtra) 20 | library(knitr) 21 | 22 | anes_2020 <- readRDS(here::here("Data", "anes_2020.rds")) %>% 23 | haven::zap_labels() 24 | ``` 25 | 26 | The full codebook with the original variables is available at electionstudies.org. 27 | 28 | ## Weighting variables 29 | 30 | ### V200010b 31 | 32 | Full sample post-election weight. 33 | 34 | ### V200010d 35 | 36 | Full sample variance stratum. 37 | 38 | ### V200010c 39 | 40 | Full sample variance unit. 41 | 42 | ## InterviewMode 43 | 44 | MODE OF INTERVIEW: PRE-ELECTION INTERVIEW 45 | 46 | ```{r variable-InterviewMode} 47 | anes_2020 %>% 48 | count(V200002, InterviewMode) %>% 49 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>% 50 | janitor::adorn_totals() %>% 51 | kbl() %>% 52 | kable_minimal() 53 | ``` 54 | Variables used: V200002 55 | 56 | ## Age 57 | 58 | RESPONDENT AGE 59 | 60 | ```{r variable-Age} 61 | anes_2020 %>% 62 | count(AgeGroup) %>% 63 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>% 64 | kbl() %>% 65 | kable_minimal() 66 | ``` 67 | 68 | Variables used: V201507x 69 | 70 | ## Gender 71 | 72 | PRE: WHAT IS YOUR (R) SEX? [REVISED] 73 | 74 | What is your sex? 75 | 76 | ```{r variable-Gender} 77 | anes_2020 %>% 78 | count(V201600, Gender) %>% 79 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>% 80 | kbl() %>% 81 | kable_minimal() 82 | ``` 83 | 84 | Variables used: V201600 85 | 86 | ## RaceEth 87 | 88 | PRE: SUMMARY: R SELF-IDENTIFIED RACE/ETHNICITY 89 | 90 | ```{r variable-RaceEth} 91 | anes_2020 %>% 92 | count(V201549x, RaceEth) %>% 93 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>% 94 | kbl() %>% 95 | kable_minimal() 96 | ``` 97 | 98 | Variables used: V201549x 99 | 100 | ## PartyID 101 | 102 | PRE: SUMMARY: PARTY ID 103 | 104 | ```{r variable-PartyID} 105 | anes_2020 %>% 106 | count(V201231x, PartyID) %>% 107 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>% 108 | kbl() %>% 109 | kable_minimal() 110 | ``` 111 | 112 | Variables used: V201231x 113 | 114 | ## Education 115 | 116 | What is the highest level of school you have completed or the 117 | highest degree you have received? 118 | 119 | ```{r variable-Education} 120 | anes_2020 %>% 121 | count(V201510, Education) %>% 122 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>% 123 | kbl() %>% 124 | kable_minimal() 125 | ``` 126 | 127 | Variables used: V201510 128 | 129 | ## Income 130 | 131 | PRE: SUMMARY: TOTAL (FAMILY) INCOME 132 | 133 | ```{r variable-Income} 134 | anes_2020 %>% 135 | count(V201617x, Income) %>% 136 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>% 137 | kbl() %>% 138 | kable_minimal() 139 | ``` 140 | 141 | ```{r variable-Income7} 142 | anes_2020 %>% 143 | count(V201617x, Income7) %>% 144 | group_by(Income7) %>% 145 | summarise(n = sum(n)) %>% 146 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>% 147 | kbl() %>% 148 | kable_minimal() 149 | ``` 150 | 151 | Variables used: V201617x 152 | 153 | ## CampaignInterest 154 | 155 | PRE: HOW INTERESTED IN FOLLOWING CAMPAIGNS 156 | 157 | Some people don’t pay much attention to political campaigns. How about you? Would you say that you have been very much interested, somewhat interested or not much interested in the political campaigns so far this year? 158 | 159 | ```{r variable-CampaignInterest} 160 | anes_2020 %>% 161 | count(V201006, CampaignInterest) %>% 162 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>% 163 | kbl() %>% 164 | kable_minimal() 165 | ``` 166 | 167 | Variables used: V201006 168 | 169 | ## TrustGovernment 170 | 171 | PRE: HOW OFTEN TRUST GOVERNMENT IN WASHINGTON TO DO WHAT IS RIGHT [REVISED] 172 | 173 | How often can you trust the federal government in Washington to do what is right? 174 | 175 | ```{r variable-TrustGovernment} 176 | anes_2020 %>% 177 | count(V201233, TrustGovernment) %>% 178 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>% 179 | kbl() %>% 180 | kable_minimal() 181 | ``` 182 | 183 | Variables used: V201233 184 | 185 | ## TrustPeople 186 | 187 | PRE: HOW OFTEN CAN PEOPLE BE TRUSTED 188 | 189 | Generally speaking, how often can you trust other people? 190 | 191 | ```{r variable-TrustPeople} 192 | anes_2020 %>% 193 | count(V201237, TrustPeople) %>% 194 | rename(Label = V201237) %>% 195 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>% 196 | kbl() %>% 197 | kable_minimal() 198 | ``` 199 | 200 | Variables used: V201237 201 | 202 | ## VotedPres2016 203 | 204 | PRE: DID R VOTE FOR PRESIDENT IN 2016 205 | 206 | Four years ago, in 2016, Hillary Clinton ran on the Democratic ticket against Donald Trump for the Republicans. Do you remember for sure whether or not you voted in that election? 207 | 208 | *Revised:* Four years ago, in 2016, Hillary Clinton ran on the Democratic ticket against Donald Trump for the Republicans. We talk to many people who tell us they did not vote. And we talk to a few people who tell us they did vote, who really did not. We can tell they did not vote by checking with official government records. What about you? If we check the official government voter records, will they show that you voted in the 2016 presidential election, or that you did not vote in that election? 209 | 210 | ```{r variable-VotedPres2016} 211 | anes_2020 %>% 212 | count(V201101, V201102, VotedPres2016) %>% 213 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>% 214 | kbl() %>% 215 | kable_minimal() 216 | ``` 217 | 218 | Variables used: V201101, V201102 219 | 220 | ## VotedPres2016_selection 221 | 222 | PRE: RECALL OF LAST (2016) PRESIDENTIAL VOTE CHOICE 223 | 224 | Which one did you vote for? 225 | 226 | ```{r variable-VotedPres2016_selection} 227 | anes_2020 %>% 228 | count(V201103, VotedPres2016_selection) %>% 229 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>% 230 | kbl() %>% 231 | kable_minimal() 232 | ``` 233 | 234 | Variables used: V201103 235 | 236 | ## VotedPres2020 237 | 238 | PRE-POST: SUMMARY: VOTER TURNOUT IN 2020 239 | 240 | ```{r variable-VotedPres2020} 241 | anes_2020 %>% 242 | count(V202109x, VotedPres2020) %>% 243 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>% 244 | kbl() %>% 245 | kable_minimal() 246 | ``` 247 | 248 | Variables used: V202109x 249 | 250 | ## VotedPres2020_selection 251 | 252 | POST: FOR WHOM DID R VOTE FOR PRESIDENT 253 | 254 | ```{r variable-VotedPres2020_selection} 255 | anes_2020 %>% 256 | count(V202073, VotedPres2020_selection) %>% 257 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>% 258 | kbl() %>% 259 | kable_minimal() 260 | ``` 261 | 262 | Variables used: V202073 263 | 264 | ## EarlyVote2020 / VotedPres2020 265 | 266 | PRE: SUMMARY: REGISTRATION AND EARLY VOTE STATUS 267 | 268 | ```{r variable-EarlyVote2020} 269 | anes_2020 %>% 270 | count(V201025x, V202109x, VotedPres2020, EarlyVote2020) %>% 271 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>% 272 | kbl() %>% 273 | kable_minimal() 274 | ``` 275 | 276 | Variables used: V201025x, V202109x -------------------------------------------------------------------------------- /Codebooks/RECS 2015 Codebook.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "RECS 2015 Codebook" 3 | output: 4 | html_document: 5 | css: "style.css" 6 | toc: true 7 | toc_depth: 4 8 | toc_float: true 9 | self_contained: true 10 | github_document: 11 | toc: true 12 | always_allow_html: true 13 | --- 14 | 15 | ```{r setup, include = FALSE} 16 | knitr::opts_chunk$set(echo = FALSE) 17 | library(dplyr) 18 | library(janitor) 19 | library(kableExtra) 20 | library(knitr) 21 | 22 | recs <- readRDS(here::here("Data", "recs.rds")) %>% 23 | haven::zap_labels() 24 | ``` 25 | 26 | The full codebook with the original variables is available at eia.gov. 27 | 28 | ## Weighting variables 29 | 30 | ### DOEID 31 | 32 | Unique identifier for each respondent 33 | 34 | ### NWEIGHT 35 | 36 | Final sample weight 37 | 38 | ### BRRWT 39 | 40 | Replicate weights 41 | 42 | ## Categorical 43 | 44 | ### Region 45 | 46 | ```{r variable-Region} 47 | recs %>% 48 | count(Region) %>% 49 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>% 50 | kbl() %>% 51 | kable_minimal() 52 | ``` 53 | Variables used: REGIONC 54 | 55 | ### Division 56 | 57 | ```{r variable-Division} 58 | recs %>% 59 | count(Division) %>% 60 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>% 61 | kbl() %>% 62 | kable_minimal() 63 | ``` 64 | Variables used: DIVISION 65 | 66 | ### MSAStatus 67 | 68 | ```{r variable-MSAStatus} 69 | recs %>% 70 | count(MSAStatus) %>% 71 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>% 72 | kbl() %>% 73 | kable_minimal() 74 | ``` 75 | Variables used: METROMICRO 76 | 77 | ### Urbanicity 78 | 79 | ```{r variable-Urbanicity} 80 | recs %>% 81 | count(Urbanicity) %>% 82 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>% 83 | kbl() %>% 84 | kable_minimal() 85 | ``` 86 | 87 | Variables used: UATYP10 88 | 89 | ### HousingUnitType 90 | 91 | ```{r variable-HousingUnitType} 92 | recs %>% 93 | count(HousingUnitType) %>% 94 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>% 95 | kbl() %>% 96 | kable_minimal() 97 | ``` 98 | 99 | Variables used: TYPEHUQ 100 | 101 | ### YearMade 102 | 103 | ```{r variable-YearMade} 104 | recs %>% 105 | count(YearMade) %>% 106 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>% 107 | kbl() %>% 108 | kable_minimal() 109 | ``` 110 | 111 | Variables used: YEARMADERANGE 112 | 113 | ### SpaceHeatingUsed 114 | 115 | ```{r variable-SpaceHeatingUsed} 116 | recs %>% 117 | count(SpaceHeatingUsed) %>% 118 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>% 119 | kbl() %>% 120 | kable_minimal() 121 | ``` 122 | 123 | Variables used: HEATHOME 124 | 125 | ### HeatingBehavior 126 | 127 | ```{r variable-HeatingBehavior} 128 | recs %>% 129 | count(HeatingBehavior) %>% 130 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>% 131 | kbl() %>% 132 | kable_minimal() 133 | ``` 134 | 135 | Variables used: EQUIPMUSE 136 | 137 | ### ACUsed 138 | 139 | ```{r variable-ACUsed} 140 | recs %>% 141 | count(ACUsed) %>% 142 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>% 143 | kbl() %>% 144 | kable_minimal() 145 | ``` 146 | 147 | Variables used: AIRCOND 148 | 149 | ### ACBehavior 150 | 151 | ```{r variable-ACBehavior} 152 | recs %>% 153 | count(ACBehavior) %>% 154 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>% 155 | kbl() %>% 156 | kable_minimal() 157 | ``` 158 | 159 | Variables used: USECENAC 160 | 161 | ### ClimateRegion_BA 162 | 163 | ```{r variable-ClimateRegion_BA} 164 | recs %>% 165 | count(ClimateRegion_BA) %>% 166 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>% 167 | kbl() %>% 168 | kable_minimal() 169 | ``` 170 | 171 | Variables used: CLIMATE_REGION_PUB 172 | 173 | ### ClimateRegion_IECC 174 | 175 | ```{r variable-ClimateRegion_IECC} 176 | recs %>% 177 | count(ClimateRegion_IECC) %>% 178 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>% 179 | kbl() %>% 180 | kable_minimal() 181 | ``` 182 | 183 | Variables used: IECC_CLIMATE_PUB 184 | 185 | ## Continuous 186 | 187 | ### WinterTempDay 188 | 189 | ```{r variable-WinterTempDay} 190 | recs %>% 191 | summarize(Minimum = min(WinterTempDay, na.rm = TRUE), 192 | Median = median(WinterTempDay, na.rm = TRUE), 193 | Maximum = max(WinterTempDay, na.rm = TRUE)) %>% 194 | kbl() %>% 195 | kable_minimal() 196 | ``` 197 | 198 | Variables used: TEMPHOME 199 | 200 | ### WinterTempAway 201 | 202 | ```{r variable-WinterTempAway} 203 | recs %>% 204 | summarize(Minimum = min(WinterTempAway, na.rm = TRUE), 205 | Median = median(WinterTempAway, na.rm = TRUE), 206 | Maximum = max(WinterTempAway, na.rm = TRUE)) %>% 207 | kbl() %>% 208 | kable_minimal() 209 | ``` 210 | 211 | Variables used: TEMPGONE 212 | 213 | ### WinterTempNight 214 | 215 | ```{r variable-WinterTempNight} 216 | recs %>% 217 | summarize(Minimum = min(WinterTempNight, na.rm = TRUE), 218 | Median = median(WinterTempNight, na.rm = TRUE), 219 | Maximum = max(WinterTempNight, na.rm = TRUE)) %>% 220 | kbl() %>% 221 | kable_minimal() 222 | ``` 223 | 224 | Variables used: TEMPNITE 225 | 226 | ### SummerTempDay 227 | 228 | ```{r variable-SummerTempDay} 229 | recs %>% 230 | summarize(Minimum = min(SummerTempDay, na.rm = TRUE), 231 | Median = median(SummerTempDay, na.rm = TRUE), 232 | Maximum = max(SummerTempDay, na.rm = TRUE)) %>% 233 | kbl() %>% 234 | kable_minimal() 235 | ``` 236 | 237 | Variables used: TEMPHOMEAC 238 | 239 | ### SummerTempAway 240 | 241 | ```{r variable-SummerTempAway} 242 | recs %>% 243 | summarize(Minimum = min(SummerTempAway, na.rm = TRUE), 244 | Median = median(SummerTempAway, na.rm = TRUE), 245 | Maximum = max(SummerTempAway, na.rm = TRUE)) %>% 246 | kbl() %>% 247 | kable_minimal() 248 | ``` 249 | 250 | Variables used: TEMPGONEAC 251 | 252 | ### SummerTempNight 253 | 254 | ```{r variable-SummerTempNight} 255 | recs %>% 256 | summarize(Minimum = min(SummerTempNight, na.rm = TRUE), 257 | Median = median(SummerTempNight, na.rm = TRUE), 258 | Maximum = max(SummerTempNight, na.rm = TRUE)) %>% 259 | kbl() %>% 260 | kable_minimal() 261 | ``` 262 | 263 | Variables used: TEMPNITEAC 264 | 265 | ### TOTCSQFT 266 | 267 | Total cooled square footage 268 | 269 | ```{r variable-TOTCSQFT} 270 | recs %>% 271 | summarize(Minimum = min(TOTCSQFT, na.rm = TRUE), 272 | Median = median(TOTCSQFT, na.rm = TRUE), 273 | Maximum = max(TOTCSQFT, na.rm = TRUE)) %>% 274 | kbl() %>% 275 | kable_minimal() 276 | ``` 277 | 278 | ### TOTHSQFT 279 | 280 | Total heated square footage 281 | 282 | ```{r variable-TOTHSQFT} 283 | recs %>% 284 | summarize(Minimum = min(TOTHSQFT, na.rm = TRUE), 285 | Median = median(TOTHSQFT, na.rm = TRUE), 286 | Maximum = max(TOTHSQFT, na.rm = TRUE)) %>% 287 | kbl() %>% 288 | kable_minimal() 289 | ``` 290 | 291 | ### TOTSQFT_EN 292 | 293 | Total square footage (used for publication) 294 | 295 | ```{r variable-TOTSQFT_EN} 296 | recs %>% 297 | summarize(Minimum = min(TOTSQFT_EN, na.rm = TRUE), 298 | Median = median(TOTSQFT_EN, na.rm = TRUE), 299 | Maximum = max(TOTSQFT_EN, na.rm = TRUE)) %>% 300 | kbl() %>% 301 | kable_minimal() 302 | ``` 303 | 304 | ### TOTUCSQFT 305 | 306 | Total uncooled square footage 307 | 308 | ```{r variable-TOTUCSQFT} 309 | recs %>% 310 | summarize(Minimum = min(TOTUCSQFT, na.rm = TRUE), 311 | Median = median(TOTUCSQFT, na.rm = TRUE), 312 | Maximum = max(TOTUCSQFT, na.rm = TRUE)) %>% 313 | kbl() %>% 314 | kable_minimal() 315 | ``` 316 | 317 | ### TOTUSQFT 318 | 319 | Total unheated square footage 320 | 321 | ```{r variable-TOTUSQFT} 322 | recs %>% 323 | summarize(Minimum = min(TOTUSQFT, na.rm = TRUE), 324 | Median = median(TOTUSQFT, na.rm = TRUE), 325 | Maximum = max(TOTUSQFT, na.rm = TRUE)) %>% 326 | kbl() %>% 327 | kable_minimal() 328 | ``` 329 | 330 | ### CDD30YR 331 | 332 | Cooling degree days, 30-year average 1981-2010, base temperature 65F 333 | 334 | ```{r variable-CDD30YR} 335 | recs %>% 336 | summarize(Minimum = min(CDD30YR, na.rm = TRUE), 337 | Median = median(CDD30YR, na.rm = TRUE), 338 | Maximum = max(CDD30YR, na.rm = TRUE)) %>% 339 | kbl() %>% 340 | kable_minimal() 341 | ``` 342 | 343 | ### CDD65 344 | 345 | Cooling degree days in 2015, base temperature 65F 346 | 347 | ```{r variable-CDD65} 348 | recs %>% 349 | summarize(Minimum = min(CDD65, na.rm = TRUE), 350 | Median = median(CDD65, na.rm = TRUE), 351 | Maximum = max(CDD65, na.rm = TRUE)) %>% 352 | kbl() %>% 353 | kable_minimal() 354 | ``` 355 | 356 | ### CDD80 357 | 358 | Cooling degree days in 2015, base temperature 80F (used for garage cooling load estimation only) 359 | 360 | ```{r variable-CDD80} 361 | recs %>% 362 | summarize(Minimum = min(CDD80, na.rm = TRUE), 363 | Median = median(CDD80, na.rm = TRUE), 364 | Maximum = max(CDD80, na.rm = TRUE)) %>% 365 | kbl() %>% 366 | kable_minimal() 367 | ``` 368 | 369 | ### HDD30YR 370 | 371 | Heating degree days, 30-year average 1981-2010, base temperature 65F 372 | 373 | ```{r variable-HDD30YR} 374 | recs %>% 375 | summarize(Minimum = min(HDD30YR, na.rm = TRUE), 376 | Median = median(HDD30YR, na.rm = TRUE), 377 | Maximum = max(HDD30YR, na.rm = TRUE)) %>% 378 | kbl() %>% 379 | kable_minimal() 380 | ``` 381 | 382 | ### HDD65 383 | 384 | Heating degree days in 2015, base temperature 65F 385 | 386 | ```{r variable-HDD65} 387 | recs %>% 388 | summarize(Minimum = min(HDD65, na.rm = TRUE), 389 | Median = median(HDD65, na.rm = TRUE), 390 | Maximum = max(HDD65, na.rm = TRUE)) %>% 391 | kbl() %>% 392 | kable_minimal() 393 | ``` 394 | 395 | ### HDD50 396 | 397 | Heating degree days in 2015, base temperature 50F (used for garage heating load estimation only) 398 | 399 | ```{r variable-HDD50} 400 | recs %>% 401 | summarize(Minimum = min(HDD50, na.rm = TRUE), 402 | Median = median(HDD50, na.rm = TRUE), 403 | Maximum = max(HDD50, na.rm = TRUE)) %>% 404 | kbl() %>% 405 | kable_minimal() 406 | ``` 407 | 408 | ### GNDHDD65 409 | 410 | Heating degree days of ground temperature in 2015, base temperature 65F 411 | 412 | ```{r variable-GNDHDD65} 413 | recs %>% 414 | summarize(Minimum = min(GNDHDD65, na.rm = TRUE), 415 | Median = median(GNDHDD65, na.rm = TRUE), 416 | Maximum = max(GNDHDD65, na.rm = TRUE)) %>% 417 | kbl() %>% 418 | kable_minimal() 419 | ``` 420 | 421 | ### BTUEL 422 | 423 | Total site electricity usage, in thousand Btu, 2015 424 | 425 | ```{r variable-BTUEL} 426 | recs %>% 427 | summarize(Minimum = min(BTUEL, na.rm = TRUE), 428 | Median = median(BTUEL, na.rm = TRUE), 429 | Maximum = max(BTUEL, na.rm = TRUE)) %>% 430 | kbl() %>% 431 | kable_minimal() 432 | ``` 433 | 434 | ### DOLLAREL 435 | 436 | Total electricity cost, in dollars, 2015 437 | 438 | ```{r variable-DOLLAREL} 439 | recs %>% 440 | summarize(Minimum = min(DOLLAREL, na.rm = TRUE), 441 | Median = median(DOLLAREL, na.rm = TRUE), 442 | Maximum = max(DOLLAREL, na.rm = TRUE)) %>% 443 | kbl() %>% 444 | kable_minimal() 445 | ``` 446 | 447 | ### BTUNG 448 | 449 | Total natural gas usage, in thousand Btu, 2015 450 | 451 | ```{r variable-BTUNG} 452 | recs %>% 453 | summarize(Minimum = min(BTUNG, na.rm = TRUE), 454 | Median = median(BTUNG, na.rm = TRUE), 455 | Maximum = max(BTUNG, na.rm = TRUE)) %>% 456 | kbl() %>% 457 | kable_minimal() 458 | ``` 459 | 460 | ### DOLLARNG 461 | 462 | Total natural gas cost, in dollars, 2015 463 | 464 | ```{r variable-DOLLARNG} 465 | recs %>% 466 | summarize(Minimum = min(DOLLARNG, na.rm = TRUE), 467 | Median = median(DOLLARNG, na.rmx = TRUE), 468 | Maximum = max(DOLLARNG, na.rm = TRUE)) %>% 469 | kbl() %>% 470 | kable_minimal() 471 | ``` 472 | 473 | ### BTULP 474 | 475 | Total propane usage, in thousand Btu, 2015 476 | 477 | ```{r variable-BTULP} 478 | recs %>% 479 | summarize(Minimum = min(BTULP, na.rm = TRUE), 480 | Median = median(BTULP, na.rm = TRUE), 481 | Maximum = max(BTULP, na.rm = TRUE)) %>% 482 | kbl() %>% 483 | kable_minimal() 484 | ``` 485 | 486 | ### DOLLARLP 487 | 488 | Total cost of propane, in dollars, 2015 489 | 490 | ```{r variable-DOLLARLP} 491 | recs %>% 492 | summarize(Minimum = min(DOLLARLP, na.rm = TRUE), 493 | Median = median(DOLLARLP, na.rm = TRUE), 494 | Maximum = max(DOLLARLP, na.rm = TRUE)) %>% 495 | kbl() %>% 496 | kable_minimal() 497 | ``` 498 | 499 | ### BTUFO 500 | 501 | Total fuel oil/kerosene usage, in thousand Btu, 2015 502 | 503 | ```{r variable-BTUFO} 504 | recs %>% 505 | summarize(Minimum = min(BTUFO, na.rm = TRUE), 506 | Median = median(BTUFO, na.rm = TRUE), 507 | Maximum = max(BTUFO, na.rm = TRUE)) %>% 508 | kbl() %>% 509 | kable_minimal() 510 | ``` 511 | 512 | ### DOLLARFO 513 | 514 | Total cost of fuel oil/kerosene, in dollars, 2015 515 | 516 | ```{r variable-DOLLARFO} 517 | recs %>% 518 | summarize(Minimum = min(DOLLARFO, na.rm = TRUE), 519 | Median = median(DOLLARFO, na.rm = TRUE), 520 | Maximum = max(DOLLARFO, na.rm = TRUE)) %>% 521 | kbl() %>% 522 | kable_minimal() 523 | ``` 524 | 525 | ### TOTALBTU 526 | 527 | Total usage, in thousand Btu, 2015 528 | 529 | ```{r variable-TOTALBTU} 530 | recs %>% 531 | summarize(Minimum = min(TOTALBTU, na.rm = TRUE), 532 | Median = median(TOTALBTU, na.rm = TRUE), 533 | Maximum = max(TOTALBTU, na.rm = TRUE)) %>% 534 | kbl() %>% 535 | kable_minimal() 536 | ``` 537 | 538 | ### TOTALDOL 539 | 540 | Total usage, in thousand Btu, 2015 541 | 542 | ```{r variable-TOTALDOL} 543 | recs %>% 544 | summarize(Minimum = min(TOTALDOL, na.rm = TRUE), 545 | Median = median(TOTALDOL, na.rm = TRUE), 546 | Maximum = max(TOTALDOL, na.rm = TRUE)) %>% 547 | kbl() %>% 548 | kable_minimal() 549 | ``` 550 | 551 | ### BTUWOOD 552 | 553 | Total cordwood usage, in thousand Btu, 2015 (Wood consumption is not included in TOTALBTU or TOTALDOL) 554 | 555 | ```{r variable-BTUWOOD} 556 | recs %>% 557 | summarize(Minimum = min(BTUWOOD, na.rm = TRUE), 558 | Median = median(BTUWOOD, na.rm = TRUE), 559 | Maximum = max(BTUWOOD, na.rm = TRUE)) %>% 560 | kbl() %>% 561 | kable_minimal() 562 | ``` 563 | 564 | ### BTUPELLET 565 | 566 | Total wood pellet usage, in thousand Btu, 2015 (Wood consumption is not included in TOTALBTU or TOTALDOL) 567 | 568 | ```{r variable-BTUPELLET} 569 | recs %>% 570 | summarize(Minimum = min(BTUPELLET, na.rm = TRUE), 571 | Median = median(BTUPELLET, na.rm = TRUE), 572 | Maximum = max(BTUPELLET, na.rm = TRUE)) %>% 573 | kbl() %>% 574 | kable_minimal() 575 | ``` -------------------------------------------------------------------------------- /Codebooks/style.css: -------------------------------------------------------------------------------- 1 | 2 | @import url(https://fonts.googleapis.com/css?family=Noto+Sans:400,400i,700,700i&display=swap); 3 | @import url(https://fonts.googleapis.com/css?family=Cabin:600,600i&display=swap); 4 | @import url(https://fonts.googleapis.com/css?family=Source+Code+Pro:400,700&display=swap); 5 | @import url('https://fonts.googleapis.com/css2?family=Telex&family=Ubuntu:wght@300&display=swap'); 6 | 7 | 8 | :root { 9 | /* Fonts */ 10 | --text-font-family: 'Noto Sans'; 11 | --text-font-is-google: 1; 12 | --text-font-family-fallback: -apple-system, BlinkMacSystemFont, avenir next, avenir, helvetica neue, helvetica, Ubuntu, roboto, noto, segoe ui, arial; 13 | --text-font-base: sans-serif; 14 | --header-font-family: Cabin; 15 | --header-font-is-google: 1; 16 | --header-font-family-fallback: Georgia, serif; 17 | --code-font-family: 'Source Code Pro'; 18 | --code-font-is-google: 1; 19 | --base-font-size: 20px; 20 | --text-font-size: 1rem; 21 | --code-font-size: 0.9rem; 22 | --code-inline-font-size: 1em; 23 | --header-h1-font-size: 2.75rem; 24 | --header-h2-font-size: 2.25rem; 25 | --header-h3-font-size: 1.75rem; 26 | 27 | /* Colors */ 28 | --text-color: #000000; 29 | --header-color: #1E4F96; 30 | --background-color: #FFFFFF; 31 | --link-color: #1E4F96; 32 | --text-bold-color: #1E4F96; 33 | --code-highlight-color: rgba(255,255,0,0.5); 34 | --inverse-text-color: #000000; 35 | --inverse-background-color: #00A3E0; 36 | --inverse-header-color: #FFFFFF; 37 | --inverse-link-color: #1E4F96; 38 | --title-slide-background-color: #1E4F96; 39 | --title-slide-text-color: #FFFFFF; 40 | --header-background-color: #1E4F96; 41 | --header-background-text-color: #FFFFFF; 42 | --primary: #1E4F96; 43 | --secondary: #00A3E0; 44 | --white: #FFFFFF; 45 | --black: #000000; 46 | } 47 | 48 | html { 49 | font-size: var(--base-font-size); 50 | } 51 | 52 | body { 53 | font-family: 'Telex', sans-serif; 54 | font-weight: normal; 55 | color: var(--text-color); 56 | } 57 | h1, h2, h3 { 58 | font-family: 'Ubuntu', sans-serif; 59 | font-weight: 600; 60 | color: var(--header-color); 61 | } 62 | .remark-slide-content { 63 | background-color: var(--background-color); 64 | font-size: 1rem; 65 | padding: 16px 64px 16px 64px; 66 | width: 100%; 67 | height: 100%; 68 | } 69 | .remark-slide-content h1 { 70 | font-size: var(--header-h1-font-size); 71 | } 72 | .remark-slide-content h2 { 73 | font-size: var(--header-h2-font-size); 74 | } 75 | .remark-slide-content h3 { 76 | font-size: var(--header-h3-font-size); 77 | } 78 | .remark-code, .remark-inline-code { 79 | font-family: var(--code-font-family), Menlo, Consolas, Monaco, Liberation Mono, Lucida Console, monospace; 80 | } 81 | .remark-code { 82 | font-size: var(--code-font-size); 83 | } 84 | .remark-inline-code { 85 | font-size: var(--code-inline-font-size); 86 | color: #1E4F96; 87 | } 88 | .remark-slide-number { 89 | color: #1E4F96; 90 | opacity: 1; 91 | font-size: 0.9rem; 92 | } 93 | strong { 94 | font-weight: bold; 95 | color: var(--text-bold-color); 96 | } 97 | a, a > code { 98 | color: var(--link-color); 99 | text-decoration: none; 100 | } 101 | .footnote { 102 | position: absolute; 103 | bottom: 60px; 104 | padding-right: 4em; 105 | font-size: 0.9em; 106 | } 107 | .remark-code-line-highlighted { 108 | background-color: var(--code-highlight-color); 109 | } 110 | .inverse { 111 | background-color: var(--inverse-background-color); 112 | color: var(--inverse-text-color); 113 | 114 | } 115 | .inverse h1, .inverse h2, .inverse h3 { 116 | color: var(--inverse-header-color); 117 | } 118 | .inverse a, .inverse a > code { 119 | color: var(--inverse-link-color); 120 | } 121 | .title-slide, .title-slide h1, .title-slide h2, .title-slide h3 { 122 | color: var(--title-slide-text-color); 123 | } 124 | .title-slide { 125 | background-color: var(--title-slide-background-color); 126 | } 127 | .title-slide .remark-slide-number { 128 | display: none; 129 | } 130 | /* Two-column layout */ 131 | .left-column { 132 | width: 20%; 133 | height: 92%; 134 | float: left; 135 | } 136 | .left-column h2, .left-column h3 { 137 | color: #1E4F9699; 138 | } 139 | .left-column h2:last-of-type, .left-column h3:last-child { 140 | color: #1E4F96; 141 | } 142 | .right-column { 143 | width: 75%; 144 | float: right; 145 | padding-top: 1em; 146 | } 147 | .pull-left { 148 | float: left; 149 | width: 47%; 150 | } 151 | .pull-right { 152 | float: right; 153 | width: 47%; 154 | } 155 | .pull-right + * { 156 | clear: both; 157 | } 158 | img, video, iframe { 159 | max-width: 100%; 160 | } 161 | blockquote { 162 | border-left: solid 5px #00A3E080; 163 | padding-left: 1em; 164 | } 165 | .remark-slide table { 166 | margin: auto; 167 | border-top: 1px solid #666; 168 | border-bottom: 1px solid #666; 169 | } 170 | .remark-slide table thead th { 171 | border-bottom: 1px solid #ddd; 172 | } 173 | th, td { 174 | padding: 5px; 175 | } 176 | .remark-slide thead, .remark-slide tfoot, .remark-slide tr:nth-child(even) { 177 | background: #CCECF8; 178 | } 179 | table.dataTable tbody { 180 | background-color: var(--background-color); 181 | color: var(--text-color); 182 | } 183 | table.dataTable.display tbody tr.odd { 184 | background-color: var(--background-color); 185 | } 186 | table.dataTable.display tbody tr.even { 187 | background-color: #CCECF8; 188 | } 189 | table.dataTable.hover tbody tr:hover, table.dataTable.display tbody tr:hover { 190 | background-color: rgba(255, 255, 255, 0.5); 191 | } 192 | .dataTables_wrapper .dataTables_length, .dataTables_wrapper .dataTables_filter, .dataTables_wrapper .dataTables_info, .dataTables_wrapper .dataTables_processing, .dataTables_wrapper .dataTables_paginate { 193 | color: var(--text-color); 194 | } 195 | .dataTables_wrapper .dataTables_paginate .paginate_button { 196 | color: var(--text-color) !important; 197 | } 198 | 199 | /* Horizontal alignment of code blocks */ 200 | .remark-slide-content.left pre, 201 | .remark-slide-content.center pre, 202 | .remark-slide-content.right pre { 203 | text-align: start; 204 | width: max-content; 205 | max-width: 100%; 206 | } 207 | .remark-slide-content.left pre, 208 | .remark-slide-content.right pre { 209 | min-width: 50%; 210 | min-width: min(40ch, 100%); 211 | } 212 | .remark-slide-content.center pre { 213 | min-width: 66%; 214 | min-width: min(50ch, 100%); 215 | } 216 | .remark-slide-content.left pre { 217 | margin-left: unset; 218 | margin-right: auto; 219 | } 220 | .remark-slide-content.center pre { 221 | margin-left: auto; 222 | margin-right: auto; 223 | } 224 | .remark-slide-content.right pre { 225 | margin-left: auto; 226 | margin-right: unset; 227 | } 228 | 229 | /* Slide Header Background for h1 elements */ 230 | .remark-slide-content.header_background > h1 { 231 | display: block; 232 | position: absolute; 233 | top: 0; 234 | left: 0; 235 | width: 100%; 236 | background: var(--header-background-color); 237 | color: var(--header-background-text-color); 238 | padding: 2rem 64px 1.5rem 64px; 239 | margin-top: 0; 240 | box-sizing: border-box; 241 | } 242 | .remark-slide-content.header_background { 243 | padding-top: 7rem; 244 | } 245 | 246 | @page { margin: 0; } 247 | @media print { 248 | .remark-slide-scaler { 249 | width: 100% !important; 250 | height: 100% !important; 251 | transform: scale(1) !important; 252 | top: 0 !important; 253 | left: 0 !important; 254 | } 255 | } 256 | 257 | .primary { 258 | color: var(--primary); 259 | } 260 | .bg-primary { 261 | background-color: var(--primary); 262 | } 263 | .secondary { 264 | color: var(--secondary); 265 | } 266 | .bg-secondary { 267 | background-color: var(--secondary); 268 | } 269 | .white { 270 | color: var(--white); 271 | } 272 | .bg-white { 273 | background-color: var(--white); 274 | } 275 | .black { 276 | color: var(--black); 277 | } 278 | .bg-black { 279 | background-color: var(--black); 280 | } 281 | 282 | -------------------------------------------------------------------------------- /Data/anes.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Data/anes.rds -------------------------------------------------------------------------------- /Data/anes_2020.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Data/anes_2020.rds -------------------------------------------------------------------------------- /Data/recs.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Data/recs.rds -------------------------------------------------------------------------------- /DataCleaningScripts/ANES_DataPrep.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "American National Election Studies (ANES) 2016 Time Series Study Data Prep" 3 | output: github_document 4 | --- 5 | 6 | ```{r setup, include=FALSE} 7 | knitr::opts_chunk$set(echo = TRUE) 8 | ``` 9 | 10 | ## Data information 11 | 12 | All data and resources were downloaded from https://electionstudies.org/data-center/2016-time-series-study/ on April 3, 2021. 13 | 14 | American National Election Studies. 2019. ANES 2016 Time Series Study [dataset and documentation]. September 4, 2019 version. www.electionstudies.org 15 | ```{r loadpackageh, message=FALSE} 16 | library(here) #easy relative paths 17 | ``` 18 | 19 | 20 | 21 | ```{r loadpackages} 22 | library(tidyverse) #data manipulation 23 | library(haven) #data import 24 | library(tidylog) #informative logging messages 25 | ``` 26 | ## Import data and create derived variables 27 | 28 | ```{r derivedata} 29 | anes_in <- read_sav(here("RawData", "ANES_2016", "anes_timeseries_2016.sav")) 30 | 31 | 32 | anes <- anes_in %>% 33 | select('V160102', 'V160201', 'V160202', 'V160501', 'V161004', 'V161005', 'V161006', 'V161024x', 'V161158x', 'V161215', 'V161219', 'V161267', 'V161267', 'V161270', 'V161310x', 'V161342', 'V161361x', 'V162031', 'V162031x', 'V162034', 'V162034a', 'V162062x', 'V162062x' 34 | ) %>% 35 | mutate( 36 | InterviewMode=fct_recode(as.character(V160501), FTF="1", Web="2"), 37 | Weight=V160102, 38 | Stratum=as.factor(V160201), 39 | VarUnit=as.factor(V160202), 40 | Age=if_else(V161267>0, as.numeric(V161267), NA_real_), 41 | AgeGroup=cut(Age, c(17, 29, 39, 49, 59, 69, 200), 42 | labels=c("18-29", "30-39", "40-49", "50-59", "60-69", "70 or older")), 43 | Gender=factor( 44 | case_when( 45 | V161342==1~"Male", 46 | V161342==2~"Female", 47 | V161342==3~"Other", 48 | TRUE~NA_character_ 49 | ), 50 | levels=c("Male", "Female", "Other") 51 | ), 52 | RaceEth=factor( 53 | case_when( 54 | V161310x==1~"White", 55 | V161310x==2~"Black", 56 | V161310x==5~"Hispanic", 57 | V161310x==3~"Asian, NH/PI", 58 | near(V161310x, 4)~"AI/AN", 59 | near(V161310x, 6)~"Other/multiple race", 60 | TRUE ~ NA_character_ 61 | ), 62 | levels=c("White", "Black", "Hispanic", "Asian, NH/PI", "AI/AN", "Other/multiple race", NA_character_) 63 | ), 64 | PartyID=factor( 65 | case_when( 66 | V161158x==1~"Strong democrat", 67 | V161158x==2~"Not very strong democrat", 68 | V161158x==3~"Independent-democrat", 69 | V161158x==4~"Independent", 70 | V161158x==5~"Independent-republican", 71 | V161158x==6~"Not very strong republican", 72 | V161158x==7~"Strong republican", 73 | TRUE ~ NA_character_ 74 | ), 75 | levels=c("Strong democrat", "Not very strong democrat", "Independent-democrat", "Independent", "Independent-republican", "Not very strong republican", "Strong republican") 76 | ), 77 | Education=factor( 78 | case_when( 79 | V161270 <=0~NA_character_, 80 | V161270 <= 8~"Less than HS", 81 | V161270==9|V161270==90~"High school", 82 | V161270<=12~"Post HS", 83 | V161270==13~"Bachelor's", 84 | V161270<=16~"Graduate", 85 | TRUE~NA_character_ 86 | ), 87 | levels=c("Less than HS", "High school", "Post HS", "Bachelor's", "Graduate") 88 | ), 89 | Income=cut(V161361x, c(-5, 1:28), 90 | labels=c("Under $5k", 91 | "$5-10k", "$10-12.5k", "$12.5-15", "$15-17.5k", "$17.5-20k", "$20-22.5k", "$22.5-25k", "$25-27.5k", "$27.5-30k", "$30-35k", "$35-40k", "$40-45k", "$45-50k", "$50-55k", "$55-60k", "$60-65k","$65-70k", "$70-75k", "$75-80k", "$80-90k", "$90-100k","$100-110k", "$110-125k", "$125-150k", "$150-175k", "$175-250k", "$250k or more" ) 92 | ), 93 | Income7=fct_collapse( 94 | Income, 95 | "Under $20k"=c("Under $5k", "$5-10k", "$10-12.5k", "$12.5-15", "$15-17.5k", "$17.5-20k"), 96 | "$20-40k"=c("$20-22.5k", "$22.5-25k", "$25-27.5k", "$27.5-30k", "$30-35k", "$35-40k"), 97 | "$40-60k"=c( "$40-45k", "$45-50k", "$50-55k", "$55-60k"), 98 | "$60-80k"=c( "$60-65k", "$65-70k", "$70-75k", "$75-80k"), 99 | "$80-100k"=c("$80-90k", "$90-100k"), 100 | "$100-125k"=c("$100-110k", "$110-125k"), 101 | "$125k or more"=c("$125-150k", "$150-175k", "$175-250k", "$250k or more") 102 | ), 103 | CampaignInterest=factor( 104 | case_when( 105 | V161004==1~"Very much interested", 106 | V161004==2~"Somewhat interested", 107 | V161004==3~"Not much interested", 108 | TRUE~NA_character_ 109 | ), 110 | levels=c("Very much interested", "Somewhat interested", "Not much interested") 111 | ), 112 | TrustGovernment=factor( 113 | case_when( 114 | V161215==1~"Always", 115 | V161215==2~"Most of the time", 116 | V161215==3~"About half the time", 117 | V161215==4~"Some of the time", 118 | V161215==5~"Never", 119 | TRUE~NA_character_ 120 | ), 121 | levels=c("Always", "Most of the time", "About half the time", "Some of the time", "Never") 122 | ), 123 | TrustPeople=factor( 124 | case_when( 125 | V161219==1~"Always", 126 | V161219==2~"Most of the time", 127 | V161219==3~"About half the time", 128 | V161219==4~"Some of the time", 129 | V161219==5~"Never", 130 | TRUE ~ NA_character_ 131 | ), 132 | levels=c("Always", "Most of the time", "About half the time", "Some of the time", "Never") 133 | ), 134 | VotedPres2012=factor( 135 | case_when( 136 | V161005==1~"Yes", 137 | V161005==2~"No", 138 | TRUE~NA_character_ 139 | ), levels=c("Yes", "No") 140 | ), 141 | VotedPres2012_selection=factor( 142 | case_when( 143 | V161006==1~"Obama", 144 | V161006==2~"Romney", 145 | V161006==5~"Other", 146 | TRUE~NA_character_ 147 | ), levels=c("Obama", "Romney", "Other") 148 | ), 149 | VotedPres2016=factor( 150 | case_when( 151 | V162031x==1~"Yes", 152 | V162031x==0~"No", 153 | TRUE~NA_character_ 154 | ), levels=c("Yes", "No") 155 | ), 156 | VotedPres2016_selection=factor( 157 | case_when( 158 | V162062x==1~"Clinton", 159 | V162062x==2~"Trump", 160 | V162062x >=3 ~"Other", 161 | TRUE~NA_character_ 162 | ), levels=c("Clinton", "Trump", "Other") 163 | ), 164 | EarlyVote2016=factor( 165 | case_when( 166 | V161024x==4~"Yes", 167 | VotedPres2016=="Yes"~"No", 168 | TRUE~NA_character_ 169 | ), levels=c("Yes", "No") 170 | ) 171 | ) 172 | 173 | 174 | 175 | summary(anes) 176 | ``` 177 | 178 | 179 | ## Check derived variables for correct coding 180 | 181 | ```{r checkvars} 182 | 183 | anes %>% count(InterviewMode, V160501) 184 | anes %>% group_by(AgeGroup) %>% summarise(minAge=min(Age), maxAge=max(Age), minV=min(V161267), maxV=max(V161267)) 185 | anes %>% count(Gender, V161342) 186 | anes %>% count(RaceEth, V161310x) 187 | anes %>% count(PartyID, V161158x) 188 | anes %>% count(Education, V161270) 189 | anes %>% count(Income, Income7, V161361x) %>% print(n=30) 190 | anes %>% count(CampaignInterest, V161004) 191 | anes %>% count(TrustGovernment, V161215) 192 | anes %>% count(TrustPeople, V161219) 193 | anes %>% count(VotedPres2012, V161005) 194 | anes %>% count(VotedPres2012_selection, V161006) 195 | anes %>% count(VotedPres2016, V162031x) 196 | anes %>% count(VotedPres2016_selection, V162062x) 197 | anes %>% count(EarlyVote2016, V161024x, VotedPres2016) 198 | 199 | anes %>% 200 | summarise(WtSum=sum(Weight)) %>% 201 | pull(WtSum) 202 | 203 | ``` 204 | ## Save data 205 | 206 | ```{r savedat} 207 | write_rds(anes, here("Data", "anes.rds"), compress="gz") 208 | ``` 209 | 210 | 211 | -------------------------------------------------------------------------------- /DataCleaningScripts/ANES_DataPrep_2020.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "American National Election Studies (ANES) 2020 Time Series Study Data Prep" 3 | output: github_document 4 | --- 5 | 6 | ```{r setup, include=FALSE} 7 | knitr::opts_chunk$set(echo = TRUE) 8 | ``` 9 | 10 | ## Data information 11 | 12 | All data and resources were downloaded from https://electionstudies.org/data-center/2020-time-series-study/ on February 28, 2022. 13 | 14 | American National Election Studies. 2021. ANES 2020 Time Series Study Full Release [dataset and documentation]. www.electionstudies.org 15 | ```{r loadpackageh, message=FALSE} 16 | library(here) # easy relative paths 17 | ``` 18 | 19 | 20 | 21 | ```{r loadpackages} 22 | library(tidyverse) # data manipulation 23 | library(haven) # data import 24 | library(tidylog) # informative logging messages 25 | ``` 26 | ## Import data and create derived variables 27 | 28 | ```{r derivedata} 29 | anes_in_2020 <- read_sav(here("RawData", "ANES_2020", "anes_timeseries_2020_spss_20220210.sav")) 30 | 31 | # weight validity for post-election survey 32 | anes_in_2020 %>% 33 | select(V200004, V200010a, V200010b) %>% 34 | group_by(V200004) %>% #type of respondent 35 | summarise( 36 | n=n(), 37 | nvalidwt_pre=sum(!is.na(V200010a) & V200010a>0), 38 | nvalidwt_post=sum(!is.na(V200010b) & V200010b>0) 39 | ) 40 | 41 | # Are all PSU/Stratum represented in post-weight? If so, we can drop pre-only cases later 42 | 43 | anes_in_2020 %>% 44 | count(V200010d, V200010c, V200004) %>% 45 | group_by(V200010d, V200010c) %>% 46 | mutate( 47 | Pct=n/sum(n) 48 | ) %>% 49 | filter(V200004==3) %>% 50 | arrange(Pct) 51 | 52 | 53 | anes_2020 <- anes_in_2020 %>% 54 | filter(V200004==3) %>% 55 | select( 56 | "V200010b", # FULL SAMPLE POST-ELECTION WEIGHT 57 | "V200010d", # FULL SAMPLE VARIANCE STRATUM 58 | "V200010c", # FULL SAMPLE VARIANCE UNIT 59 | "V200002", # MODE OF INTERVIEW: PRE-ELECTION INTERVIEW 60 | "V201006", # PRE: HOW INTERESTED IN FOLLOWING CAMPAIGNS 61 | "V201102", # PRE: DID R VOTE FOR PRESIDENT IN 2016 62 | "V201101", # PRE: DID R VOTE FOR PRESIDENT IN 2016 [REVISED] 63 | "V201103", # PRE: RECALL OF LAST (2016) PRESIDENTIAL VOTE CHOICE) 64 | "V201025x", # PRE: SUMMARY: REGISTRATION AND EARLY VOTE STATUS 65 | "V201231x", # PRE: SUMMARY: PARTY ID 66 | "V201233", # PRE: HOW OFTEN TRUST GOVERNMENT IN WASHINGTON TO DO WHAT IS RIGHT [REVISED] 67 | "V201237", # PRE: HOW OFTEN CAN PEOPLE BE TRUSTED 68 | "V201507x", # PRE: SUMMARY: RESPONDENT AGE 69 | "V201510", # PRE: HIGHEST LEVEL OF EDUCATION 70 | "V201549x", # PRE: SUMMARY: R SELF-IDENTIFIED RACE/ETHNICITY 71 | "V201600", # PRE: WHAT IS YOUR (R) SEX? [REVISED] 72 | "V201617x", # PRE: SUMMARY: TOTAL (FAMILY) INCOME 73 | "V202066", # POST: DID R VOTE IN NOVEMBER 2020 ELECTION 74 | "V202109x", # PRE-POST: SUMMARY: VOTER TURNOUT IN 2020 75 | "V202072", # POST: DID R VOTE FOR PRESIDENT 76 | "V202073", # POST: FOR WHOM DID R VOTE FOR PRESIDENT 77 | "V202110x" # PRE-POST: SUMMARY: 2020 PRESIDENTIAL VOTE 78 | ) %>% 79 | mutate( 80 | InterviewMode = fct_recode(as.character(V200002), Video = "1", Telephone = "2", Web = "3"), 81 | Weight = V200010b, 82 | Stratum = as.factor(V200010d), 83 | VarUnit = as.factor(V200010c), 84 | Age = if_else(V201507x > 0, as.numeric(V201507x), NA_real_), 85 | AgeGroup = cut(Age, c(17, 29, 39, 49, 59, 69, 200), 86 | labels = c("18-29", "30-39", "40-49", "50-59", "60-69", "70 or older") 87 | ), 88 | Gender = factor( 89 | case_when( 90 | V201600 == 1 ~ "Male", 91 | V201600 == 2 ~ "Female", 92 | TRUE ~ NA_character_ 93 | ), 94 | levels = c("Male", "Female") 95 | ), 96 | RaceEth = factor( 97 | case_when( 98 | V201549x == 1 ~ "White", 99 | V201549x == 2 ~ "Black", 100 | V201549x == 3 ~ "Hispanic", 101 | V201549x == 4 ~ "Asian, NH/PI", 102 | V201549x == 5 ~ "AI/AN", 103 | V201549x == 6 ~ "Other/multiple race", 104 | TRUE ~ NA_character_ 105 | ), 106 | levels = c("White", "Black", "Hispanic", "Asian, NH/PI", "AI/AN", "Other/multiple race", NA_character_) 107 | ), 108 | PartyID = factor( 109 | case_when( 110 | V201231x == 1 ~ "Strong democrat", 111 | V201231x == 2 ~ "Not very strong democrat", 112 | V201231x == 3 ~ "Independent-democrat", 113 | V201231x == 4 ~ "Independent", 114 | V201231x == 5 ~ "Independent-republican", 115 | V201231x == 6 ~ "Not very strong republican", 116 | V201231x == 7 ~ "Strong republican", 117 | TRUE ~ NA_character_ 118 | ), 119 | levels = c("Strong democrat", "Not very strong democrat", "Independent-democrat", "Independent", "Independent-republican", "Not very strong republican", "Strong republican") 120 | ), 121 | Education = factor( 122 | case_when( 123 | V201510 <= 0 ~ NA_character_, 124 | V201510 == 1 ~ "Less than HS", 125 | V201510 == 2 ~ "High school", 126 | V201510 <= 5 ~ "Post HS", 127 | V201510 == 6 ~ "Bachelor's", 128 | V201510 <= 8 ~ "Graduate", 129 | TRUE ~ NA_character_ 130 | ), 131 | levels = c("Less than HS", "High school", "Post HS", "Bachelor's", "Graduate") 132 | ), 133 | Income = cut(V201617x, c(-5, 1:22), 134 | labels = c( 135 | "Under $9,999", 136 | "$10,000-14,999", 137 | "$15,000-19,999", 138 | "$20,000-24,999", 139 | "$25,000-29,999", 140 | "$30,000-34,999", 141 | "$35,000-39,999", 142 | "$40,000-44,999", 143 | "$45,000-49,999", 144 | "$50,000-59,999", 145 | "$60,000-64,999", 146 | "$65,000-69,999", 147 | "$70,000-74,999", 148 | "$75,000-79,999", 149 | "$80,000-89,999", 150 | "$90,000-99,999", 151 | "$100,000-109,999", 152 | "$110,000-124,999", 153 | "$125,000-149,999", 154 | "$150,000-174,999", 155 | "$175,000-249,999", 156 | "$250,000 or more" 157 | ) 158 | ), 159 | Income7 = fct_collapse( 160 | Income, 161 | "Under $20k" = c("Under $9,999", "$10,000-14,999", "$15,000-19,999"), 162 | "$20-40k" = c("$20,000-24,999", "$25,000-29,999", "$30,000-34,999", "$35,000-39,999"), 163 | "$40-60k" = c("$40,000-44,999", "$45,000-49,999", "$50,000-59,999"), 164 | "$60-80k" = c("$60,000-64,999", "$65,000-69,999", "$70,000-74,999", "$75,000-79,999"), 165 | "$80-100k" = c("$80,000-89,999", "$90,000-99,999"), 166 | "$100-125k" = c("$100,000-109,999", "$110,000-124,999"), 167 | "$125k or more" = c("$125,000-149,999", "$150,000-174,999", "$175,000-249,999", "$250,000 or more") 168 | ), 169 | CampaignInterest = factor( 170 | case_when( 171 | V201006 == 1 ~ "Very much interested", 172 | V201006 == 2 ~ "Somewhat interested", 173 | V201006 == 3 ~ "Not much interested", 174 | TRUE ~ NA_character_ 175 | ), 176 | levels = c("Very much interested", "Somewhat interested", "Not much interested") 177 | ), 178 | TrustGovernment = factor( 179 | case_when( 180 | V201233 == 1 ~ "Always", 181 | V201233 == 2 ~ "Most of the time", 182 | V201233 == 3 ~ "About half the time", 183 | V201233 == 4 ~ "Some of the time", 184 | V201233 == 5 ~ "Never", 185 | TRUE ~ NA_character_ 186 | ), 187 | levels = c("Always", "Most of the time", "About half the time", "Some of the time", "Never") 188 | ), 189 | TrustPeople = factor( 190 | case_when( 191 | V201237 == 1 ~ "Always", 192 | V201237 == 2 ~ "Most of the time", 193 | V201237 == 3 ~ "About half the time", 194 | V201237 == 4 ~ "Some of the time", 195 | V201237 == 5 ~ "Never", 196 | TRUE ~ NA_character_ 197 | ), 198 | levels = c("Always", "Most of the time", "About half the time", "Some of the time", "Never") 199 | ), 200 | VotedPres2016 = factor( 201 | case_when( 202 | V201101 == 1 | V201102 == 1 ~ "Yes", 203 | V201101 == 2 | V201102 == 2 ~ "No", 204 | TRUE ~ NA_character_ 205 | ), 206 | levels = c("Yes", "No") 207 | ), 208 | VotedPres2016_selection = factor( 209 | case_when( 210 | V201103 == 1 ~ "Clinton", 211 | V201103 == 2 ~ "Trump", 212 | V201103 == 5 ~ "Other", 213 | TRUE ~ NA_character_ 214 | ), 215 | levels = c("Clinton", "Trump", "Other") 216 | ), 217 | VotedPres2020 = factor( 218 | case_when( 219 | V202109x == 1 ~ "Yes", 220 | V202109x == 0 ~ "No", 221 | TRUE ~ NA_character_ 222 | ), 223 | levels = c("Yes", "No") 224 | ), 225 | VotedPres2020_selection = factor( 226 | case_when( 227 | V202073 == 1 ~ "Biden", 228 | V202073 == 2 ~ "Trump", 229 | V202073 >= 3 & V202073 <= 8~ "Other", 230 | V202073 == 11 ~ NA_character_, 231 | V202073 == 12 ~ NA_character_, 232 | TRUE ~ NA_character_ 233 | ), 234 | levels = c("Biden", "Trump", "Other") 235 | ), 236 | EarlyVote2020 = factor( 237 | case_when( 238 | V201025x < 0 ~ NA_character_, 239 | V201025x == 4 ~ "Yes", 240 | VotedPres2020 == "Yes" ~ "No", 241 | TRUE ~ NA_character_), 242 | levels = c("Yes", "No") 243 | ) 244 | ) 245 | 246 | summary(anes_2020) 247 | ``` 248 | 249 | ## Check derived variables for correct coding 250 | 251 | ```{r checkvars} 252 | 253 | anes_2020 %>% count(InterviewMode, V200002) 254 | 255 | anes_2020 %>% 256 | group_by(AgeGroup) %>% 257 | summarise( 258 | minAge = min(Age), 259 | maxAge = max(Age), 260 | minV = min(V201507x), 261 | maxV = max(V201507x) 262 | ) 263 | 264 | anes_2020 %>% count(Gender, V201600) 265 | 266 | anes_2020 %>% count(RaceEth, V201549x) 267 | 268 | anes_2020 %>% count(PartyID, V201231x) 269 | 270 | anes_2020 %>% count(Education, V201510) 271 | 272 | anes_2020 %>% 273 | count(Income, Income7, V201617x) %>% 274 | print(n = 30) 275 | 276 | anes_2020 %>% count(CampaignInterest, V201006) 277 | 278 | anes_2020 %>% count(TrustGovernment, V201233) 279 | 280 | anes_2020 %>% count(TrustPeople, V201237) 281 | 282 | anes_2020 %>% count(VotedPres2016, V201101, V201102) 283 | 284 | anes_2020 %>% count(VotedPres2016_selection, V201103) 285 | 286 | anes_2020 %>% count(VotedPres2020, V202109x) 287 | 288 | anes_2020 %>% count(VotedPres2020_selection, V202073) 289 | 290 | anes_2020 %>% count(EarlyVote2020, V201025x, VotedPres2020) 291 | 292 | anes_2020 %>% 293 | summarise(WtSum = sum(Weight, na.rm = TRUE)) %>% 294 | pull(WtSum) 295 | ``` 296 | 297 | ## Save data 298 | 299 | ```{r savedat} 300 | write_rds(anes_2020, here("Data", "anes_2020.rds"), compress = "gz") 301 | ``` 302 | -------------------------------------------------------------------------------- /DataCleaningScripts/RECS_DataPrep.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Residential Energy Consumption Survey (RECS) 2015 Data Prep" 3 | output: github_document 4 | --- 5 | 6 | ```{r setup, include=FALSE} 7 | knitr::opts_chunk$set(echo = TRUE) 8 | ``` 9 | 10 | ## Data information 11 | 12 | All data and resources were downloaded from https://www.eia.gov/consumption/residential/data/2015/index.php?view=microdata on March 3, 2021. 13 | 14 | ```{r loadpackageh, message=FALSE} 15 | library(here) #easy relative paths 16 | ``` 17 | 18 | ```{r loadpackages} 19 | library(tidyverse) #data manipulation 20 | library(haven) #data import 21 | library(tidylog) #informative logging messages 22 | ``` 23 | ## Import data and create derived variables 24 | 25 | ```{r derivedata} 26 | recs_in <- read_csv(here("RawData", "RECS_2015", "recs2015_public_v4.csv")) 27 | 28 | recs <- recs_in %>% 29 | select(DOEID, REGIONC, DIVISION, METROMICRO, UATYP10, TYPEHUQ, YEARMADERANGE, HEATHOME, EQUIPMUSE, TEMPHOME, TEMPGONE, TEMPNITE, AIRCOND, USECENAC, TEMPHOMEAC, TEMPGONEAC, TEMPNITEAC, TOTCSQFT, TOTHSQFT, TOTSQFT_EN, TOTUCSQFT, TOTUSQFT, NWEIGHT, starts_with("BRRWT"), CDD30YR, CDD65, CDD80, CLIMATE_REGION_PUB, IECC_CLIMATE_PUB, HDD30YR, HDD65, HDD50, GNDHDD65, BTUEL, DOLLAREL, BTUNG, DOLLARNG, BTULP, DOLLARLP, BTUFO, DOLLARFO, TOTALBTU, TOTALDOL, BTUWOOD=WOODBTU, BTUPELLET=PELLETBTU ) %>% 30 | mutate( 31 | Region=parse_factor( 32 | case_when( 33 | REGIONC==1~"Northeast", 34 | REGIONC==2~"Midwest", 35 | REGIONC==3~"South", 36 | REGIONC==4~"West", 37 | ), levels=c("Northeast", "Midwest", "South", "West")), 38 | Division=parse_factor( 39 | case_when( 40 | DIVISION==1~"New England", 41 | DIVISION==2~"Middle Atlantic", 42 | DIVISION==3~"East North Central", 43 | DIVISION==4~"West North Central", 44 | DIVISION==5~"South Atlantic", 45 | DIVISION==6~"East South Central", 46 | DIVISION==7~"West South Central", 47 | DIVISION==8~"Mountain North", 48 | DIVISION==9~"Mountain South", 49 | DIVISION==10~"Pacific", 50 | ), levels=c("New England", "Middle Atlantic", "East North Central", "West North Central", "South Atlantic", "East South Central", "West South Central", "Mountain North", "Mountain South", "Pacific")), 51 | MSAStatus=fct_recode(METROMICRO, "Metropolitan Statistical Area"="METRO", "Micropolitan Statistical Area"="MICRO", "None"="NONE"), 52 | Urbanicity=parse_factor( 53 | case_when( 54 | UATYP10=="U"~"Urban Area", 55 | UATYP10=="C"~"Urban Cluster", 56 | UATYP10=="R"~"Rural" 57 | ), 58 | levels=c("Urban Area", "Urban Cluster", "Rural") 59 | ), 60 | HousingUnitType=parse_factor( 61 | case_when( 62 | TYPEHUQ==1~"Mobile home", 63 | TYPEHUQ==2~"Single-family detached", 64 | TYPEHUQ==3~"Single-family attached", 65 | TYPEHUQ==4~"Apartment: 2-4 Units", 66 | TYPEHUQ==5~"Apartment: 5 or more units", 67 | ), levels=c("Mobile home", "Single-family detached", "Single-family attached", "Apartment: 2-4 Units", "Apartment: 5 or more units")), 68 | YearMade=parse_factor( 69 | case_when( 70 | YEARMADERANGE==1~"Before 1950", 71 | YEARMADERANGE==2~"1950-1959", 72 | YEARMADERANGE==3~"1960-1969", 73 | YEARMADERANGE==4~"1970-1979", 74 | YEARMADERANGE==5~"1980-1989", 75 | YEARMADERANGE==6~"1990-1999", 76 | YEARMADERANGE==7~"2000-2009", 77 | YEARMADERANGE==8~"2010-2015", 78 | ), 79 | levels=c("Before 1950", "1950-1959", "1960-1969", "1970-1979", "1980-1989", "1990-1999", "2000-2009", "2010-2015"), 80 | ordered = TRUE 81 | ), 82 | SpaceHeatingUsed=as.logical(HEATHOME), 83 | HeatingBehavior=parse_factor( 84 | case_when( 85 | EQUIPMUSE==1~"Set one temp and leave it", 86 | EQUIPMUSE==2~"Manually adjust at night/no one home", 87 | EQUIPMUSE==3~"Program thermostat to change at certain times", 88 | EQUIPMUSE==4~"Turn on or off as needed", 89 | EQUIPMUSE==5~"No control", 90 | EQUIPMUSE==9~"Other", 91 | EQUIPMUSE==-9~NA_character_), 92 | levels=c("Set one temp and leave it", "Manually adjust at night/no one home", "Program thermostat to change at certain times", "Turn on or off as needed", "No control", "Other") 93 | ), 94 | WinterTempDay=if_else(TEMPHOME>0, TEMPHOME, NA_real_), 95 | WinterTempAway=if_else(TEMPGONE>0, TEMPGONE, NA_real_), 96 | WinterTempNight=if_else(TEMPNITE>0, TEMPNITE, NA_real_), 97 | ACUsed=as.logical(AIRCOND), 98 | ACBehavior=parse_factor( 99 | case_when( 100 | USECENAC==1~"Set one temp and leave it", 101 | USECENAC==2~"Manually adjust at night/no one home", 102 | USECENAC==3~"Program thermostat to change at certain times", 103 | USECENAC==4~"Turn on or off as needed", 104 | USECENAC==5~"No control", 105 | USECENAC==-9~NA_character_), 106 | levels=c("Set one temp and leave it", "Manually adjust at night/no one home", "Program thermostat to change at certain times", "Turn on or off as needed", "No control") 107 | ), 108 | SummerTempDay=if_else(TEMPHOMEAC>0, TEMPHOMEAC, NA_real_), 109 | SummerTempAway=if_else(TEMPGONEAC>0, TEMPGONEAC, NA_real_), 110 | SummerTempNight=if_else(TEMPNITEAC>0, TEMPNITEAC, NA_real_), 111 | ClimateRegion_BA=parse_factor(CLIMATE_REGION_PUB), 112 | ClimateRegion_IECC=factor(IECC_CLIMATE_PUB) 113 | 114 | ) 115 | 116 | ``` 117 | 118 | 119 | ## Check derived variables for correct coding 120 | 121 | ```{r checkvars} 122 | recs %>% count(Region, REGIONC) 123 | recs %>% count(Division, DIVISION) 124 | recs %>% count(MSAStatus, METROMICRO) 125 | recs %>% count(Urbanicity, UATYP10) 126 | recs %>% count(HousingUnitType, TYPEHUQ) 127 | recs %>% count(YearMade, YEARMADERANGE) 128 | recs %>% count(SpaceHeatingUsed, HEATHOME) 129 | recs %>% count(HeatingBehavior, EQUIPMUSE) 130 | recs %>% count(ACUsed, AIRCOND) 131 | recs %>% count(ACBehavior, USECENAC) 132 | recs %>% count(ClimateRegion_BA, CLIMATE_REGION_PUB) 133 | recs %>% count(ClimateRegion_IECC, IECC_CLIMATE_PUB) 134 | 135 | ``` 136 | ## Save data 137 | 138 | ```{r savedat} 139 | recs_out <- recs %>% 140 | select(DOEID, Region, Division, MSAStatus, Urbanicity, HousingUnitType, YearMade, SpaceHeatingUsed, HeatingBehavior, WinterTempDay, WinterTempAway, WinterTempNight, ACUsed, ACBehavior, SummerTempDay, SummerTempAway, SummerTempNight, TOTCSQFT, TOTHSQFT, TOTSQFT_EN, TOTUCSQFT, TOTUSQFT, NWEIGHT, starts_with("BRRWT"), CDD30YR, CDD65, CDD80, ClimateRegion_BA, ClimateRegion_IECC, HDD30YR, HDD65, HDD50, GNDHDD65, BTUEL, DOLLAREL, BTUNG, DOLLARNG, BTULP, DOLLARLP, BTUFO, DOLLARFO, TOTALBTU, TOTALDOL, BTUWOOD, BTUPELLET) 141 | 142 | summary(recs_out) 143 | write_rds(recs_out, here("Data", "recs.rds"), compress="gz") 144 | ``` 145 | 146 | 147 | -------------------------------------------------------------------------------- /DataCleaningScripts/TargetPopulation.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Target Population 2020 ANES" 3 | output: github_document 4 | --- 5 | 6 | ```{r setup, include=FALSE} 7 | knitr::opts_chunk$set(echo = TRUE) 8 | ``` 9 | 10 | ## Target pop 2020 ANES 11 | From the User Guide: "The target population for the fresh cross-section was the 231 million non-institutional U.S. citizens aged 18 or older living in the 50 US states or the District of Columbia." 12 | 13 | - We will use Current Population Survey (CPS) to find this total from November 2020 14 | - Relevant data dictionary: https://www2.census.gov/programs-surveys/cps/datasets/2020/basic/2020_Basic_CPS_Public_Use_Record_Layout_plus_IO_Code_list.txt 15 | 16 | ```{r} 17 | library(censusapi) 18 | library(tidyverse) 19 | 20 | cps_state_in <- getCensus( 21 | name="cps/basic/nov", 22 | vintage=2020, 23 | region="state", 24 | vars=c("HRHHID", "HRMONTH", "HRYEAR4", "PRTAGE", "PRCITSHP", "PWSSWGT"), 25 | key = Sys.getenv("CENSUS_API_KEY") 26 | ) 27 | 28 | cps_state <- cps_state_in %>% 29 | as_tibble() %>% 30 | mutate(across(.fns=as.numeric)) 31 | 32 | # confirm this doesn't include territories 33 | cps_state %>% 34 | count(state) 35 | 36 | # confirm this is only November 2020 37 | cps_state %>% 38 | count(HRMONTH, HRYEAR4) 39 | 40 | # voting age citizen population 41 | 42 | targetpop <- cps_state %>% 43 | as_tibble() %>% 44 | filter( 45 | PRTAGE>=18, 46 | PRCITSHP %in% (1:4) 47 | ) %>% 48 | pull(PWSSWGT) %>% 49 | sum() 50 | 51 | 52 | 53 | ``` 54 | 55 | The target population in 2020 is: `r prettyNum(targetpop, big.mark=",")`. 56 | 57 | 58 | ## Target pop 2016 ANES 59 | From the User Guide: "...and the target population for the Internet mode was 224.1 million U.S. citizens age 18 or older living in the 50 US states or the District of Columbia" 60 | 61 | - We will use Current Population Survey (CPS) to find this total from November 2016 62 | 63 | ```{r} 64 | 65 | cps_state_in <- getCensus( 66 | name="cps/basic/nov", 67 | vintage=2016, 68 | region="state", 69 | vars=c("HRHHID", "HRMONTH", "HRYEAR4", "PRTAGE", "PRCITSHP", "PWSSWGT"), 70 | key = Sys.getenv("CENSUS_API_KEY") 71 | ) 72 | 73 | cps_state <- cps_state_in %>% 74 | as_tibble() %>% 75 | mutate(across(.fns=as.numeric)) 76 | 77 | # confirm this doesn't include territories 78 | cps_state %>% 79 | count(state) 80 | 81 | # confirm this is only November 2016 82 | cps_state %>% 83 | count(HRMONTH, HRYEAR4) 84 | 85 | # voting age citizen population 86 | 87 | targetpop <- cps_state %>% 88 | as_tibble() %>% 89 | filter( 90 | PRTAGE>=18, 91 | PRCITSHP %in% (1:4) 92 | ) %>% 93 | pull(PWSSWGT) %>% 94 | sum() 95 | 96 | 97 | 98 | ``` 99 | 100 | The target population in 2016 is: `r prettyNum(targetpop, big.mark=",")`. 101 | 102 | -------------------------------------------------------------------------------- /DataCleaningScripts/TargetPopulation.md: -------------------------------------------------------------------------------- 1 | Target Population 2020 ANES 2 | ================ 3 | 4 | ## Target pop 2020 ANES 5 | 6 | From the User Guide: “The target population for the fresh cross-section 7 | was the 231 million non-institutional U.S. citizens aged 18 or older 8 | living in the 50 US states or the District of Columbia.” 9 | 10 | - We will use Current Population Survey (CPS) to find this total from 11 | November 2020 12 | - Relevant data dictionary: 13 | 14 | 15 | ``` r 16 | library(censusapi) 17 | ``` 18 | 19 | ## 20 | ## Attaching package: 'censusapi' 21 | 22 | ## The following object is masked from 'package:methods': 23 | ## 24 | ## getFunction 25 | 26 | ``` r 27 | library(tidyverse) 28 | ``` 29 | 30 | ## -- Attaching packages --------------------------------------- tidyverse 1.3.1 -- 31 | 32 | ## v ggplot2 3.3.5 v purrr 0.3.4 33 | ## v tibble 3.1.6 v dplyr 1.0.8 34 | ## v tidyr 1.2.0 v stringr 1.4.0 35 | ## v readr 2.1.2 v forcats 0.5.1 36 | 37 | ## -- Conflicts ------------------------------------------ tidyverse_conflicts() -- 38 | ## x dplyr::filter() masks stats::filter() 39 | ## x dplyr::lag() masks stats::lag() 40 | 41 | ``` r 42 | cps_state_in <- getCensus( 43 | name="cps/basic/nov", 44 | vintage=2020, 45 | region="state", 46 | vars=c("HRHHID", "HRMONTH", "HRYEAR4", "PRTAGE", "PRCITSHP", "PWSSWGT"), 47 | key = Sys.getenv("CENSUS_API_KEY") 48 | ) 49 | 50 | cps_state <- cps_state_in %>% 51 | as_tibble() %>% 52 | mutate(across(.fns=as.numeric)) 53 | 54 | # confirm this doesn't include territories 55 | cps_state %>% 56 | count(state) 57 | ``` 58 | 59 | ## # A tibble: 51 x 2 60 | ## state n 61 | ## 62 | ## 1 1 2406 63 | ## 2 2 1289 64 | ## 3 4 1969 65 | ## 4 5 1988 66 | ## 5 6 9574 67 | ## 6 8 1365 68 | ## 7 9 1157 69 | ## 8 10 1285 70 | ## 9 11 1622 71 | ## 10 12 5055 72 | ## # ... with 41 more rows 73 | 74 | ``` r 75 | # confirm this is only November 2020 76 | cps_state %>% 77 | count(HRMONTH, HRYEAR4) 78 | ``` 79 | 80 | ## # A tibble: 1 x 3 81 | ## HRMONTH HRYEAR4 n 82 | ## 83 | ## 1 11 2020 112037 84 | 85 | ``` r 86 | # voting age citizen population 87 | 88 | targetpop <- cps_state %>% 89 | as_tibble() %>% 90 | filter( 91 | PRTAGE>=18, 92 | PRCITSHP %in% (1:4) 93 | ) %>% 94 | pull(PWSSWGT) %>% 95 | sum() 96 | ``` 97 | 98 | The target population in 2020 is: 231,592,693. 99 | 100 | ## Target pop 2016 ANES 101 | 102 | From the User Guide: “…and the target population for the Internet mode 103 | was 224.1 million U.S. citizens age 18 or older living in the 50 US 104 | states or the District of Columbia” 105 | 106 | - We will use Current Population Survey (CPS) to find this total from 107 | November 2016 108 | 109 | ``` r 110 | cps_state_in <- getCensus( 111 | name="cps/basic/nov", 112 | vintage=2016, 113 | region="state", 114 | vars=c("HRHHID", "HRMONTH", "HRYEAR4", "PRTAGE", "PRCITSHP", "PWSSWGT"), 115 | key = Sys.getenv("CENSUS_API_KEY") 116 | ) 117 | 118 | cps_state <- cps_state_in %>% 119 | as_tibble() %>% 120 | mutate(across(.fns=as.numeric)) 121 | 122 | # confirm this doesn't include territories 123 | cps_state %>% 124 | count(state) 125 | ``` 126 | 127 | ## # A tibble: 51 x 2 128 | ## state n 129 | ## 130 | ## 1 1 2651 131 | ## 2 2 1720 132 | ## 3 4 2145 133 | ## 4 5 2342 134 | ## 5 6 11200 135 | ## 6 8 1551 136 | ## 7 9 1228 137 | ## 8 10 1508 138 | ## 9 11 2094 139 | ## 10 12 5777 140 | ## # ... with 41 more rows 141 | 142 | ``` r 143 | # confirm this is only November 2016 144 | cps_state %>% 145 | count(HRMONTH, HRYEAR4) 146 | ``` 147 | 148 | ## # A tibble: 1 x 3 149 | ## HRMONTH HRYEAR4 n 150 | ## 151 | ## 1 11 2016 131389 152 | 153 | ``` r 154 | # voting age citizen population 155 | 156 | targetpop <- cps_state %>% 157 | as_tibble() %>% 158 | filter( 159 | PRTAGE>=18, 160 | PRCITSHP %in% (1:4) 161 | ) %>% 162 | pull(PWSSWGT) %>% 163 | sum() 164 | ``` 165 | 166 | The target population in 2016 is: 224,059,005. 167 | -------------------------------------------------------------------------------- /Exercises/CategorialExercises.R: -------------------------------------------------------------------------------- 1 | #' --- 2 | #' title: "Categorical Data Analysis Exercise Solutions" 3 | #' output: 4 | #' html_document: 5 | #' df_print: paged 6 | #' --- 7 | #' 8 | #' # Set-up 9 | ## ----setup--------------------------------------------------------------- 10 | library(tidyverse) # for tidyverse 11 | library(here) # for file paths 12 | library(survey) # for survey analysis 13 | library(srvyr) # for tidy survey analysis 14 | 15 | anes <- read_rds(here("Data", "anes_2020.rds")) %>% 16 | mutate(Weight=Weight/sum(Weight)*231592693) 17 | # adjust weight to sum to citizen pop, 18+ in Nov 2020 per ANES methodology documentation 18 | anes_des <- anes %>% 19 | as_survey_design(weights = Weight, 20 | strata = Stratum, 21 | ids = VarUnit, 22 | nest = TRUE) 23 | 24 | #' 25 | #' # Part 1 26 | #' 27 | #' 1. How many females have a graduate degree? Hint: the variables `Gender` and `Education` will be useful. 28 | #' 29 | ## ----ex1_1--------------------------------------------------------------- 30 | 31 | 32 | 33 | #' 34 | #' 35 | #' 2. What percentage of people identify as "Strong democrat"? Hint: The variable `PartyID` indicates what party people identify with. 36 | #' 37 | ## ----ex1_2--------------------------------------------------------------- 38 | 39 | 40 | #' 41 | #' 42 | #' 43 | #' 3. What percentage of people who voted in the 2020 election identify as "Strong republican"? Hint: The variable `VotedPres2020` indicates whether someone voted in 2020. 44 | #' 45 | ## ----ex1_3--------------------------------------------------------------- 46 | 47 | 48 | #' 49 | #' 4. What percentage of people voted in both the 2016 election and in the 2020 election? Include the logit confidence interval. Hint: The variable `VotedPres2016` indicates whether someone voted in 2016. 50 | #' 51 | ## ----ex1_4--------------------------------------------------------------- 52 | 53 | 54 | #' 55 | #' 5. What is the design effect for the proportion of people who voted early? Hint: The variable `EarlyVote2020` indicates whether someone voted early in 2020. 56 | #' 57 | ## ----ex1_5--------------------------------------------------------------- 58 | 59 | 60 | #' 61 | #' # Part 2 62 | #' 63 | #' 1. Is there a relationship between PartyID and whether people voted early? 64 | #' 65 | ## ----ex2_1--------------------------------------------------------------- 66 | 67 | 68 | #' 69 | #' 70 | #' 2. Is there a relationship between PartyID and trust in the government? Hints: `TrustGovernment` indicates how strongly people trust the government. Use Wald as the `statistic` option. 71 | #' 72 | ## ----ex2_2--------------------------------------------------------------- 73 | 74 | 75 | #' 76 | #' 77 | #' # Bonus 78 | #' 79 | #' 1. What percentage of people lean republican? These are individuals that are strong republicans, not very strong republicans and are independent-republicans. Include an appropriate confidence interval. Hint: to get the correct confidence interval, create a new variable BEFORE calculating the estimate. 80 | #' 81 | ## ----exb_1--------------------------------------------------------------- 82 | 83 | 84 | #' 85 | #' 2. Were people who lean democrat more likely to vote early in the 2020 election? Hint: use a logistic model and 3-level party variable to use in the model. 86 | #' 87 | ## ----exb_2--------------------------------------------------------------- 88 | 89 | 90 | -------------------------------------------------------------------------------- /Exercises/CategorialExercises.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Categorical Data Analysis Exercise Solutions" 3 | output: 4 | html_document: 5 | df_print: paged 6 | --- 7 | 8 | # Set-up 9 | ```{r setup} 10 | library(tidyverse) # for tidyverse 11 | library(here) # for file paths 12 | library(survey) # for survey analysis 13 | library(srvyr) # for tidy survey analysis 14 | 15 | anes <- read_rds(here("Data", "anes_2020.rds")) %>% 16 | mutate(Weight=Weight/sum(Weight)*231592693) 17 | # adjust weight to sum to citizen pop, 18+ in Nov 2020 per ANES methodology documentation 18 | anes_des <- anes %>% 19 | as_survey_design(weights = Weight, 20 | strata = Stratum, 21 | ids = VarUnit, 22 | nest = TRUE) 23 | ``` 24 | 25 | # Part 1 26 | 27 | 1. How many females have a graduate degree? Hint: the variables `Gender` and `Education` will be useful. 28 | 29 | ```{r ex1_1} 30 | 31 | 32 | ``` 33 | 34 | 35 | 2. What percentage of people identify as "Strong democrat"? Hint: The variable `PartyID` indicates what party people identify with. 36 | 37 | ```{r ex1_2} 38 | 39 | ``` 40 | 41 | 42 | 43 | 3. What percentage of people who voted in the 2020 election identify as "Strong republican"? Hint: The variable `VotedPres2020` indicates whether someone voted in 2020. 44 | 45 | ```{r ex1_3} 46 | 47 | ``` 48 | 49 | 4. What percentage of people voted in both the 2016 election and in the 2020 election? Include the logit confidence interval. Hint: The variable `VotedPres2016` indicates whether someone voted in 2016. 50 | 51 | ```{r ex1_4} 52 | 53 | ``` 54 | 55 | 5. What is the design effect for the proportion of people who voted early? Hint: The variable `EarlyVote2020` indicates whether someone voted early in 2020. 56 | 57 | ```{r ex1_5} 58 | 59 | ``` 60 | 61 | # Part 2 62 | 63 | 1. Is there a relationship between PartyID and whether people voted early? 64 | 65 | ```{r ex2_1} 66 | 67 | ``` 68 | 69 | 70 | 2. Is there a relationship between PartyID and trust in the government? Hints: `TrustGovernment` indicates how strongly people trust the government. Use Wald as the `statistic` option. 71 | 72 | ```{r ex2_2} 73 | 74 | ``` 75 | 76 | 77 | # Bonus 78 | 79 | 1. What percentage of people lean republican? These are individuals that are strong republicans, not very strong republicans and are independent-republicans. Include an appropriate confidence interval. Hint: to get the correct confidence interval, create a new variable BEFORE calculating the estimate. 80 | 81 | ```{r exb_1} 82 | 83 | ``` 84 | 85 | 2. Were people who lean democrat more likely to vote early in the 2020 election? Hint: use a logistic model and 3-level party variable to use in the model. 86 | 87 | ```{r exb_2} 88 | 89 | ``` 90 | -------------------------------------------------------------------------------- /Exercises/CategorialExercises_solutions.R: -------------------------------------------------------------------------------- 1 | #' --- 2 | #' title: "Categorical Data Analysis Exercise Solutions" 3 | #' output: 4 | #' html_document: 5 | #' df_print: paged 6 | #' --- 7 | #' 8 | #' # Set-up 9 | ## ----setup--------------------------------------------------------------- 10 | library(tidyverse) # for tidyverse 11 | library(here) # for file paths 12 | library(survey) # for survey analysis 13 | library(srvyr) # for tidy survey analysis 14 | 15 | anes <- read_rds(here("Data", "anes_2020.rds")) %>% 16 | mutate(Weight=Weight/sum(Weight)*231592693) 17 | # adjust weight to sum to citizen pop, 18+ in Nov 2020 per ANES methodology documentation 18 | anes_des <- anes %>% 19 | as_survey_design(weights = Weight, 20 | strata = Stratum, 21 | ids = VarUnit, 22 | nest = TRUE) 23 | 24 | #' 25 | #' # Part 1 26 | #' 27 | #' 1. How many females have a graduate degree? Hint: the variables `Gender` and `Education` will be useful. 28 | #' 29 | ## ----ex1_1--------------------------------------------------------------- 30 | #Option 1: 31 | femgd <- anes_des %>% 32 | filter(Gender=="Female", Education=="Graduate") %>% 33 | survey_count(name="n") 34 | #Option 2: 35 | femgd <- anes_des %>% 36 | filter(Gender=="Female", Education=="Graduate") %>% 37 | summarize( 38 | N=survey_total(), .groups="drop" 39 | ) 40 | 41 | 42 | #' 43 | #' There are `r formatC(pull(femgd, N), format="d", big.mark=",")` females with a graduate degree. 44 | #' 45 | #' 46 | #' 2. What percentage of people identify as "Strong democrat"? Hint: The variable `PartyID` indicates what party people identify with. 47 | #' 48 | ## ----ex1_2--------------------------------------------------------------- 49 | (psd <- anes_des %>% 50 | group_by(PartyID) %>% 51 | summarize( 52 | p=survey_mean() 53 | ) %>% 54 | filter(PartyID=="Strong democrat")) 55 | 56 | #' 57 | #' `r str_c(round(pull(psd, p)*100, 1), "%")` of people identify as a strong democrat. 58 | #' 59 | #' 60 | #' 3. What percentage of people who voted in the 2020 election identify as "Strong republican"? Hint: The variable `VotedPres2020` indicates whether someone voted in 2020. 61 | #' 62 | ## ----ex1_3--------------------------------------------------------------- 63 | (psr <- anes_des %>% 64 | filter(VotedPres2020=="Yes") %>% 65 | group_by(PartyID) %>% 66 | summarize( 67 | p=survey_mean() 68 | ) %>% 69 | filter(PartyID=="Strong republican")) 70 | 71 | #' 72 | #' `r str_c(round(pull(psr, p)*100, 1), "%")` of people identify as a strong republican among those who voted in 2020. 73 | #' 74 | #' 4. What percentage of people voted in both the 2016 election and in the 2020 election? Include the logit confidence interval. Hint: The variable `VotedPres2016` indicates whether someone voted in 2016. 75 | #' 76 | ## ----ex1_4--------------------------------------------------------------- 77 | (pvb <- anes_des %>% 78 | filter(!is.na(VotedPres2016), !is.na(VotedPres2020)) %>% 79 | group_by(interact(VotedPres2016, VotedPres2020)) %>% 80 | summarize( 81 | p=survey_prop(var="ci", method="logit"), 82 | ) %>% 83 | filter(VotedPres2016=="Yes", VotedPres2020=="Yes")) 84 | 85 | #' 86 | #' `r str_c(round(pull(pvb, p)*100, 1), "%")` (`r round(pull(pvb, p_low)*100, 1)`-`r str_c(round(pull(pvb, p_upp)*100, 1), "%")`) voted in both the 2016 and 2020 elections. 87 | #' 88 | #' 89 | #' 90 | #' 5. What is the design effect for the proportion of people who voted early? Hint: The variable `EarlyVote2020` indicates whether someone voted early in 2020. 91 | #' 92 | ## ----ex1_5--------------------------------------------------------------- 93 | (pdeff <- anes_des %>% 94 | filter(!is.na(EarlyVote2020)) %>% 95 | group_by(EarlyVote2020) %>% 96 | summarize( 97 | p=survey_mean(deff=TRUE) 98 | ) %>% 99 | filter(EarlyVote2020=="Yes")) 100 | 101 | #' 102 | #' The design effect is `r round(pull(pdeff, p_deff), 2)`. 103 | #' 104 | #' # Part 2 105 | #' 106 | #' 1. Is there a relationship between PartyID and whether people voted early? 107 | #' 108 | ## ----ex2_1--------------------------------------------------------------- 109 | anes_des %>% 110 | filter(!is.na(PartyID), !is.na(EarlyVote2020)) %>% 111 | group_by(PartyID, EarlyVote2020) %>% 112 | summarise( 113 | p=survey_mean(), 114 | .groups="drop" 115 | ) %>% 116 | filter(EarlyVote2020=="Yes") 117 | 118 | (pid_vote <- anes_des %>% 119 | svychisq(design=., 120 | formula=~PartyID +EarlyVote2020)) 121 | 122 | #' 123 | #' There is strong association with when people voted and their party, p-value=`r pluck(pid_vote, "p.value") %>% round(5)` 124 | #' 125 | #' 2. Is there a relationship between PartyID and trust in the government? Hints: `TrustGovernment` indicates how strongly people trust the government. Use Wald as the `statistic` option. 126 | #' 127 | ## ----ex2_2--------------------------------------------------------------- 128 | anes_des %>% 129 | filter(!is.na(PartyID), !is.na(TrustGovernment)) %>% 130 | group_by(PartyID, TrustGovernment) %>% 131 | summarise( 132 | p=survey_mean(), 133 | .groups="drop" 134 | ) %>% 135 | pivot_wider(id_cols=PartyID, names_from = "TrustGovernment", values_from="p") 136 | 137 | (pid_trust <- anes_des %>% 138 | svychisq(design=., 139 | formula=~PartyID+TrustGovernment, 140 | statistic="Wald")) 141 | 142 | #' 143 | #' There is strong association with how much people trust government and their party, p-value=`r pluck(pid_trust, "p.value") %>% round(5)` 144 | #' 145 | #' # Bonus 146 | #' 147 | #' 1. What percentage of people lean republican? These are individuals that are strong republicans, not very strong republicans and are independent-republicans. Include an appropriate confidence interval. Hint: to get the correct confidence interval, create a new variable BEFORE calculating the estimate. 148 | #' 149 | ## ----exb_1--------------------------------------------------------------- 150 | 151 | #Solution 1: Using forcats package 152 | anes_des %>% 153 | filter(!is.na(PartyID)) %>% 154 | mutate(PartyID3=fct_collapse(PartyID, 155 | LeanDem=c("Strong democrat", 156 | "Not very strong democrat", 157 | "Independent-democrat"), 158 | LeanRep=c("Strong republican", 159 | "Not very strong republican", 160 | "Independent-republican"), 161 | other_level="Other")) %>% 162 | group_by(PartyID3) %>% 163 | summarize(p=survey_prop(vartype="ci", proportion = TRUE)) 164 | 165 | #Solution 2: Using case_when 166 | anes_des %>% 167 | filter(!is.na(PartyID)) %>% 168 | mutate(PartyID3=case_when(PartyID %in% c("Strong democrat", 169 | "Not very strong democrat", 170 | "Independent-democrat")~"LeanDem", 171 | PartyID %in% c("Strong republican", 172 | "Not very strong republican", 173 | "Independent-republican")~"LeanRep", 174 | TRUE~"Other")) %>% 175 | group_by(PartyID3) %>% 176 | summarize(p=survey_prop(vartype="ci", proportion = TRUE)) 177 | 178 | 179 | #' 180 | #' 2. Were people who lean democrat more likely to vote early in the 2020 election? Hint: use a logistic model and 3-level party variable to use in the model. 181 | #' 182 | ## ----exb_2--------------------------------------------------------------- 183 | anes_des %>% 184 | mutate(PartyID3=fct_collapse(PartyID, 185 | LeanDem=c("Strong democrat", 186 | "Not very strong democrat", 187 | "Independent-democrat"), 188 | LeanRep=c("Strong republican", 189 | "Not very strong republican", 190 | "Independent-republican"), 191 | other_level="Other")) %>% 192 | filter(!is.na(PartyID3), !is.na(EarlyVote2020)) %>% 193 | group_by(PartyID3, EarlyVote2020) %>% 194 | summarise( 195 | p=survey_prop(proportion = TRUE) 196 | ) %>% filter(EarlyVote2020=="Yes") 197 | 198 | earlyv_glm<- anes_des %>% 199 | mutate(PartyID3=fct_collapse(PartyID, 200 | LeanDem=c("Strong democrat", 201 | "Not very strong democrat", 202 | "Independent-democrat"), 203 | LeanRep=c("Strong republican", 204 | "Not very strong republican", 205 | "Independent-republican"), 206 | other_level="Other")) %>% 207 | svyglm(design=., 208 | formula=(EarlyVote2020=="Yes")~PartyID3, 209 | family=quasibinomial(), 210 | na.action=na.omit) 211 | 212 | summary(earlyv_glm) 213 | 214 | #' 215 | #' Yes, there is evidence that those leaning democrat were more likely to vote early. They are the reference level in the model and the other coeffecients are negative and significant. 216 | #' 217 | #' # Session information 218 | #' 219 | ## ----si------------------------------------------------------------------ 220 | devtools::session_info(pkgs="attached") 221 | 222 | #' 223 | -------------------------------------------------------------------------------- /Exercises/CategorialExercises_solutions.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Categorical Data Analysis Exercise Solutions" 3 | output: 4 | html_document: 5 | df_print: paged 6 | --- 7 | 8 | # Set-up 9 | ```{r setup} 10 | library(tidyverse) # for tidyverse 11 | library(here) # for file paths 12 | library(survey) # for survey analysis 13 | library(srvyr) # for tidy survey analysis 14 | 15 | anes <- read_rds(here("Data", "anes_2020.rds")) %>% 16 | mutate(Weight=Weight/sum(Weight)*231592693) 17 | # adjust weight to sum to citizen pop, 18+ in Nov 2020 per ANES methodology documentation 18 | anes_des <- anes %>% 19 | as_survey_design(weights = Weight, 20 | strata = Stratum, 21 | ids = VarUnit, 22 | nest = TRUE) 23 | ``` 24 | 25 | # Part 1 26 | 27 | 1. How many females have a graduate degree? Hint: the variables `Gender` and `Education` will be useful. 28 | 29 | ```{r ex1_1} 30 | #Option 1: 31 | femgd <- anes_des %>% 32 | filter(Gender=="Female", Education=="Graduate") %>% 33 | survey_count(name="n") 34 | #Option 2: 35 | femgd <- anes_des %>% 36 | filter(Gender=="Female", Education=="Graduate") %>% 37 | summarize( 38 | N=survey_total(), .groups="drop" 39 | ) 40 | 41 | ``` 42 | 43 | There are `r formatC(pull(femgd, N), format="d", big.mark=",")` females with a graduate degree. 44 | 45 | 46 | 2. What percentage of people identify as "Strong democrat"? Hint: The variable `PartyID` indicates what party people identify with. 47 | 48 | ```{r ex1_2} 49 | (psd <- anes_des %>% 50 | group_by(PartyID) %>% 51 | summarize( 52 | p=survey_mean() 53 | ) %>% 54 | filter(PartyID=="Strong democrat")) 55 | ``` 56 | 57 | `r str_c(round(pull(psd, p)*100, 1), "%")` of people identify as a strong democrat. 58 | 59 | 60 | 3. What percentage of people who voted in the 2020 election identify as "Strong republican"? Hint: The variable `VotedPres2020` indicates whether someone voted in 2020. 61 | 62 | ```{r ex1_3} 63 | (psr <- anes_des %>% 64 | filter(VotedPres2020=="Yes") %>% 65 | group_by(PartyID) %>% 66 | summarize( 67 | p=survey_mean() 68 | ) %>% 69 | filter(PartyID=="Strong republican")) 70 | ``` 71 | 72 | `r str_c(round(pull(psr, p)*100, 1), "%")` of people identify as a strong republican among those who voted in 2020. 73 | 74 | 4. What percentage of people voted in both the 2016 election and in the 2020 election? Include the logit confidence interval. Hint: The variable `VotedPres2016` indicates whether someone voted in 2016. 75 | 76 | ```{r ex1_4} 77 | (pvb <- anes_des %>% 78 | filter(!is.na(VotedPres2016), !is.na(VotedPres2020)) %>% 79 | group_by(interact(VotedPres2016, VotedPres2020)) %>% 80 | summarize( 81 | p=survey_prop(var="ci", method="logit"), 82 | ) %>% 83 | filter(VotedPres2016=="Yes", VotedPres2020=="Yes")) 84 | ``` 85 | 86 | `r str_c(round(pull(pvb, p)*100, 1), "%")` (`r round(pull(pvb, p_low)*100, 1)`-`r str_c(round(pull(pvb, p_upp)*100, 1), "%")`) voted in both the 2016 and 2020 elections. 87 | 88 | 89 | 90 | 5. What is the design effect for the proportion of people who voted early? Hint: The variable `EarlyVote2020` indicates whether someone voted early in 2020. 91 | 92 | ```{r ex1_5} 93 | (pdeff <- anes_des %>% 94 | filter(!is.na(EarlyVote2020)) %>% 95 | group_by(EarlyVote2020) %>% 96 | summarize( 97 | p=survey_mean(deff=TRUE) 98 | ) %>% 99 | filter(EarlyVote2020=="Yes")) 100 | ``` 101 | 102 | The design effect is `r round(pull(pdeff, p_deff), 2)`. 103 | 104 | # Part 2 105 | 106 | 1. Is there a relationship between PartyID and whether people voted early? 107 | 108 | ```{r ex2_1} 109 | anes_des %>% 110 | filter(!is.na(PartyID), !is.na(EarlyVote2020)) %>% 111 | group_by(PartyID, EarlyVote2020) %>% 112 | summarise( 113 | p=survey_mean(), 114 | .groups="drop" 115 | ) %>% 116 | filter(EarlyVote2020=="Yes") 117 | 118 | (pid_vote <- anes_des %>% 119 | svychisq(design=., 120 | formula=~PartyID +EarlyVote2020)) 121 | ``` 122 | 123 | There is strong association with when people voted and their party, p-value=`r pluck(pid_vote, "p.value") %>% round(5)` 124 | 125 | 2. Is there a relationship between PartyID and trust in the government? Hints: `TrustGovernment` indicates how strongly people trust the government. Use Wald as the `statistic` option. 126 | 127 | ```{r ex2_2} 128 | anes_des %>% 129 | filter(!is.na(PartyID), !is.na(TrustGovernment)) %>% 130 | group_by(PartyID, TrustGovernment) %>% 131 | summarise( 132 | p=survey_mean(), 133 | .groups="drop" 134 | ) %>% 135 | pivot_wider(id_cols=PartyID, names_from = "TrustGovernment", values_from="p") 136 | 137 | (pid_trust <- anes_des %>% 138 | svychisq(design=., 139 | formula=~PartyID+TrustGovernment, 140 | statistic="Wald")) 141 | ``` 142 | 143 | There is strong association with how much people trust government and their party, p-value=`r pluck(pid_trust, "p.value") %>% round(5)` 144 | 145 | # Bonus 146 | 147 | 1. What percentage of people lean republican? These are individuals that are strong republicans, not very strong republicans and are independent-republicans. Include an appropriate confidence interval. Hint: to get the correct confidence interval, create a new variable BEFORE calculating the estimate. 148 | 149 | ```{r exb_1} 150 | 151 | #Solution 1: Using forcats package 152 | anes_des %>% 153 | filter(!is.na(PartyID)) %>% 154 | mutate(PartyID3=fct_collapse(PartyID, 155 | LeanDem=c("Strong democrat", 156 | "Not very strong democrat", 157 | "Independent-democrat"), 158 | LeanRep=c("Strong republican", 159 | "Not very strong republican", 160 | "Independent-republican"), 161 | other_level="Other")) %>% 162 | group_by(PartyID3) %>% 163 | summarize(p=survey_prop(vartype="ci", proportion = TRUE)) 164 | 165 | #Solution 2: Using case_when 166 | anes_des %>% 167 | filter(!is.na(PartyID)) %>% 168 | mutate(PartyID3=case_when(PartyID %in% c("Strong democrat", 169 | "Not very strong democrat", 170 | "Independent-democrat")~"LeanDem", 171 | PartyID %in% c("Strong republican", 172 | "Not very strong republican", 173 | "Independent-republican")~"LeanRep", 174 | TRUE~"Other")) %>% 175 | group_by(PartyID3) %>% 176 | summarize(p=survey_prop(vartype="ci", proportion = TRUE)) 177 | 178 | ``` 179 | 180 | 2. Were people who lean democrat more likely to vote early in the 2020 election? Hint: use a logistic model and 3-level party variable to use in the model. 181 | 182 | ```{r exb_2} 183 | anes_des %>% 184 | mutate(PartyID3=fct_collapse(PartyID, 185 | LeanDem=c("Strong democrat", 186 | "Not very strong democrat", 187 | "Independent-democrat"), 188 | LeanRep=c("Strong republican", 189 | "Not very strong republican", 190 | "Independent-republican"), 191 | other_level="Other")) %>% 192 | filter(!is.na(PartyID3), !is.na(EarlyVote2020)) %>% 193 | group_by(PartyID3, EarlyVote2020) %>% 194 | summarise( 195 | p=survey_prop(proportion = TRUE) 196 | ) %>% filter(EarlyVote2020=="Yes") 197 | 198 | earlyv_glm<- anes_des %>% 199 | mutate(PartyID3=fct_collapse(PartyID, 200 | LeanDem=c("Strong democrat", 201 | "Not very strong democrat", 202 | "Independent-democrat"), 203 | LeanRep=c("Strong republican", 204 | "Not very strong republican", 205 | "Independent-republican"), 206 | other_level="Other")) %>% 207 | svyglm(design=., 208 | formula=(EarlyVote2020=="Yes")~PartyID3, 209 | family=quasibinomial(), 210 | na.action=na.omit) 211 | 212 | summary(earlyv_glm) 213 | ``` 214 | 215 | Yes, there is evidence that those leaning democrat were more likely to vote early. They are the reference level in the model and the other coeffecients are negative and significant. 216 | 217 | # Session information 218 | 219 | ```{r si} 220 | devtools::session_info(pkgs="attached") 221 | ``` 222 | 223 | -------------------------------------------------------------------------------- /Exercises/ContinuousExercises.R: -------------------------------------------------------------------------------- 1 | #' --- 2 | #' title: "Continous Data Analysis Exercises" 3 | #' output: 4 | #' html_document: 5 | #' df_print: paged 6 | #' --- 7 | #' 8 | #' # Set-up 9 | ## ------------------------------------------------------------------------ 10 | library(tidyverse) # for tidyverse 11 | library(here) # for file paths 12 | library(survey) # for survey analysis 13 | library(srvyr) # for tidy survey analysis 14 | 15 | recs <- read_rds(here("Data", "recs.rds")) 16 | 17 | recs_des <- recs %>% 18 | as_survey_rep(weights=NWEIGHT, 19 | repweights=starts_with("BRRWT"), 20 | type="Fay", 21 | rho=0.5, 22 | mse=TRUE) 23 | 24 | #' 25 | #' # Part 1 26 | #' 27 | #' 1. Find the average square footage of housing units (TOTSQFT_EN) with a 90% confidence interval. 28 | #' 29 | ## ------------------------------------------------------------------------ 30 | 31 | 32 | #' 33 | #' 2. Estimate the ratio of cooled square footage to total square footage (TOTCSQFT) to the total square footage of housing units (TOTSQFT_EN) with its standard error. 34 | #' 35 | ## ------------------------------------------------------------------------ 36 | 37 | 38 | #' 39 | #' 3. Estimate the median temperature housing units are set to during the night in the winter (WinterTempNight) using the `survey_median` function. 40 | #' 41 | ## ------------------------------------------------------------------------ 42 | 43 | 44 | #' 45 | #' 4. Estimate the median temperature housing units are set to during the night in the winter (WinterTempNight) using the `survey_quantile` function. 46 | #' 47 | ## ------------------------------------------------------------------------ 48 | 49 | 50 | #' 51 | #' # Part 2 52 | #' 53 | #' 1. Estimate the total average energy cost (TOTALDOL) by region, division, and urbanicity. 54 | #' 55 | ## ------------------------------------------------------------------------ 56 | 57 | 58 | #' 59 | #' 2. What is the median electric cost (DOLLAREL) for housing units in the South Region? What is the 95% confidence interval? 60 | #' 61 | ## ------------------------------------------------------------------------ 62 | 63 | 64 | #' 65 | #' 3. Test whether daytime winter and daytime summer temperatures of homes are set the same. 66 | #' 67 | ## ------------------------------------------------------------------------ 68 | 69 | 70 | #' 71 | #' 4. Test whether average electric bill (DOLLAREL) varies by region (Region). 72 | #' 73 | ## ------------------------------------------------------------------------ 74 | 75 | 76 | #' 77 | #' 5. Fit a regression between the cooled square footage of a housing unit (TOTCSQFT) and the total amount spent on energy (TOTALDOL). 78 | #' 79 | ## ------------------------------------------------------------------------ 80 | 81 | 82 | #' 83 | -------------------------------------------------------------------------------- /Exercises/ContinuousExercises.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Continous Data Analysis Exercises" 3 | output: 4 | html_document: 5 | df_print: paged 6 | --- 7 | 8 | # Set-up 9 | ```{r} 10 | library(tidyverse) # for tidyverse 11 | library(here) # for file paths 12 | library(survey) # for survey analysis 13 | library(srvyr) # for tidy survey analysis 14 | 15 | recs <- read_rds(here("Data", "recs.rds")) 16 | 17 | recs_des <- recs %>% 18 | as_survey_rep(weights=NWEIGHT, 19 | repweights=starts_with("BRRWT"), 20 | type="Fay", 21 | rho=0.5, 22 | mse=TRUE) 23 | ``` 24 | 25 | # Part 1 26 | 27 | 1. Find the average square footage of housing units (TOTSQFT_EN) with a 90% confidence interval. 28 | 29 | ```{r} 30 | 31 | ``` 32 | 33 | 2. Estimate the ratio of cooled square footage to total square footage (TOTCSQFT) to the total square footage of housing units (TOTSQFT_EN) with its standard error. 34 | 35 | ```{r} 36 | 37 | ``` 38 | 39 | 3. Estimate the median temperature housing units are set to during the night in the winter (WinterTempNight) using the `survey_median` function. 40 | 41 | ```{r} 42 | 43 | ``` 44 | 45 | 4. Estimate the median temperature housing units are set to during the night in the winter (WinterTempNight) using the `survey_quantile` function. 46 | 47 | ```{r} 48 | 49 | ``` 50 | 51 | # Part 2 52 | 53 | 1. Estimate the total average energy cost (TOTALDOL) by region, division, and urbanicity. 54 | 55 | ```{r} 56 | 57 | ``` 58 | 59 | 2. What is the median electric cost (DOLLAREL) for housing units in the South Region? What is the 95% confidence interval? 60 | 61 | ```{r} 62 | 63 | ``` 64 | 65 | 3. Test whether daytime winter and daytime summer temperatures of homes are set the same. 66 | 67 | ```{r} 68 | 69 | ``` 70 | 71 | 4. Test whether average electric bill (DOLLAREL) varies by region (Region). 72 | 73 | ```{r} 74 | 75 | ``` 76 | 77 | 5. Fit a regression between the cooled square footage of a housing unit (TOTCSQFT) and the total amount spent on energy (TOTALDOL). 78 | 79 | ```{r} 80 | 81 | ``` 82 | 83 | -------------------------------------------------------------------------------- /Exercises/ContinuousExercises_solutions.R: -------------------------------------------------------------------------------- 1 | #' --- 2 | #' title: "Continous Data Analysis Exercise Solutions" 3 | #' output: 4 | #' html_document: 5 | #' df_print: paged 6 | #' --- 7 | #' 8 | #' # Set-up 9 | ## ------------------------------------------------------------------------------------------------------------------------------- 10 | library(tidyverse) # for tidyverse 11 | library(here) # for file paths 12 | library(survey) # for survey analysis 13 | library(srvyr) # for tidy survey analysis 14 | 15 | recs <- read_rds(here("Data", "recs.rds")) 16 | 17 | recs_des <- recs %>% 18 | as_survey_rep(weights=NWEIGHT, 19 | repweights=starts_with("BRRWT"), 20 | type="Fay", 21 | rho=0.5, 22 | mse=TRUE) 23 | 24 | #' 25 | #' # Part 1 26 | #' 27 | #' 1. Find the average square footage of housing units (TOTSQFT_EN) with a 90% confidence interval. 28 | #' 29 | ## ----ex1_1---------------------------------------------------------------------------------------------------------------------- 30 | avg_sqci<-recs_des %>% 31 | summarize( 32 | SF_HU=survey_mean(TOTSQFT_EN, 33 | vartype = "ci", 34 | level = 0.9) 35 | ) 36 | 37 | #' 38 | #' On average US households have `r formatC(pull(avg_sqci, SF_HU), format="d", big.mark=",")` square feet, with a 90% CI of (`r formatC(pull(avg_sqci, SF_HU_low), format="d", big.mark=",")` sq ft, `r formatC(pull(avg_sqci, SF_HU_upp), format="d", big.mark=",")` sq ft). 39 | #' 40 | #' 41 | #' 2. Estimate the ratio of cooled square footage to total square footage (TOTCSQFT) to the total square footage of housing units (TOTSQFT_EN) with its standard error. 42 | #' 43 | ## ----ex1_2---------------------------------------------------------------------------------------------------------------------- 44 | cool_totratio<-recs_des %>% 45 | summarize( 46 | PropCooled=survey_ratio( 47 | numerator = TOTCSQFT, 48 | denominator = TOTSQFT_EN, 49 | vartype = "se") 50 | ) 51 | 52 | #' 53 | #' On average US households have a ratio of `r round(pull(cool_totratio, PropCooled), 2)` square feet cooled per total square feet. 54 | #' 55 | #' 56 | #' 3. Estimate the median temperature housing units are set to during the night in the winter (WinterTempNight) using the `survey_median` function. 57 | #' 58 | ## ----ex1_3---------------------------------------------------------------------------------------------------------------------- 59 | med_wintertemp<-recs_des %>% 60 | summarize( 61 | temp_winter=survey_median(WinterTempNight, 62 | vartype = "se", 63 | na.rm = TRUE) 64 | ) 65 | 66 | #' 67 | #' The median temperature housing units are set to during the night in the winter is `r round(pull(med_wintertemp, temp_winter), 2)` degrees Fahrenheit. 68 | #' 69 | #' 70 | #' 4. Estimate the median temperature housing units are set to during the night in the winter (WinterTempNight) using the `survey_quantile` function. 71 | #' 72 | ## ----ex1_4---------------------------------------------------------------------------------------------------------------------- 73 | recs_des %>% 74 | summarize( 75 | WinterNightTemp=survey_quantile(WinterTempNight, 76 | quantiles = 0.5, 77 | vartype = "se", 78 | na.rm = TRUE) 79 | ) 80 | 81 | #' 82 | #' The 50th percentile (median) temperature housing units are set to during the night in the winter is `r round(pull(med_wintertemp, temp_winter), 2)` degrees Fahrenheit. 83 | #' 84 | #' 85 | #' # Part 2 86 | #' 87 | #' 1. Estimate the total average energy cost (TOTALDOL) by region, division, and urbanicity. 88 | #' 89 | ## ----ex2_1---------------------------------------------------------------------------------------------------------------------- 90 | # option 1 91 | recs_des %>% 92 | group_by(Region, Division, Urbanicity) %>% 93 | cascade( 94 | EnergyCost=survey_mean(TOTALDOL) 95 | ) 96 | 97 | # option 2 98 | # one way 99 | recs_des %>% 100 | group_by(Region, Division, Urbanicity) %>% 101 | summarize( 102 | EnergyCost=survey_mean(TOTALDOL) 103 | ) 104 | 105 | #' 106 | #' 2. What is the median electric cost (DOLLAREL) for housing units in the South Region? What is the 95% confidence interval? 107 | #' 108 | ## ----ex2_2---------------------------------------------------------------------------------------------------------------------- 109 | med_billsouth<-recs_des %>% 110 | filter(Region=="South") %>% 111 | summarize( 112 | MedElBill=survey_median(DOLLAREL, 113 | vartype="ci") 114 | ) 115 | 116 | #' 117 | #' The median electric cost for housing units in the South is \$`r formatC(pull(med_billsouth, MedElBill), format="d", big.mark=",")` (\$`r formatC(pull(med_billsouth, MedElBill_low), format="d", big.mark=",")`, \$`r formatC(pull(med_billsouth, MedElBill_upp), format="d", big.mark=",")`). 118 | #' 119 | #' 120 | #' 3. Test whether daytime winter and daytime summer temperatures of homes are set the same. 121 | #' 122 | ## ----ex2_3---------------------------------------------------------------------------------------------------------------------- 123 | daytemp_ttest<-recs_des %>% 124 | svyttest(design=., 125 | formula = I(WinterTempDay-SummerTempDay)~0, 126 | na.rm = TRUE) 127 | 128 | #' 129 | #' On average housing units have set the temperature lower in the winter than the summer, p-value=`r pluck(daytemp_ttest, "p.value") %>% round(5)`. 130 | #' 131 | #' 132 | #' 4. Test whether average electric bill (DOLLAREL) varies by region (Region). 133 | #' 134 | ## ----ex2_4---------------------------------------------------------------------------------------------------------------------- 135 | m1 <- recs_des %>% 136 | svyglm(design=., 137 | formula=DOLLAREL~Region, 138 | na.action=na.omit) 139 | summary(m1) 140 | 141 | #' 142 | #' Yes, there is evidence that the average electric bill varies by region. 143 | #' 144 | #' 145 | #' 5. Fit a regression between the cooled square footage of a housing unit (TOTCSQFT) and the total amount spent on energy (TOTALDOL). 146 | #' 147 | ## ----ex2_5---------------------------------------------------------------------------------------------------------------------- 148 | m2 <- recs_des %>% 149 | svyglm(design=., 150 | formula=TOTALDOL~TOTCSQFT, 151 | na.action=na.omit) 152 | summary(m2) 153 | 154 | #' 155 | #' For each additional cooled square foot, the total energy cost increases by \$`r round(pluck(m2$coefficients,"TOTCSQFT"),2)`. 156 | -------------------------------------------------------------------------------- /Exercises/ContinuousExercises_solutions.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Continous Data Analysis Exercise Solutions" 3 | output: 4 | html_document: 5 | df_print: paged 6 | --- 7 | 8 | # Set-up 9 | ```{r} 10 | library(tidyverse) # for tidyverse 11 | library(here) # for file paths 12 | library(survey) # for survey analysis 13 | library(srvyr) # for tidy survey analysis 14 | 15 | recs <- read_rds(here("Data", "recs.rds")) 16 | 17 | recs_des <- recs %>% 18 | as_survey_rep(weights=NWEIGHT, 19 | repweights=starts_with("BRRWT"), 20 | type="Fay", 21 | rho=0.5, 22 | mse=TRUE) 23 | ``` 24 | 25 | # Part 1 26 | 27 | 1. Find the average square footage of housing units (TOTSQFT_EN) with a 90% confidence interval. 28 | 29 | ```{r ex1_1} 30 | avg_sqci<-recs_des %>% 31 | summarize( 32 | SF_HU=survey_mean(TOTSQFT_EN, 33 | vartype = "ci", 34 | level = 0.9) 35 | ) 36 | ``` 37 | 38 | On average US households have `r formatC(pull(avg_sqci, SF_HU), format="d", big.mark=",")` square feet, with a 90% CI of (`r formatC(pull(avg_sqci, SF_HU_low), format="d", big.mark=",")` sq ft, `r formatC(pull(avg_sqci, SF_HU_upp), format="d", big.mark=",")` sq ft). 39 | 40 | 41 | 2. Estimate the ratio of cooled square footage to total square footage (TOTCSQFT) to the total square footage of housing units (TOTSQFT_EN) with its standard error. 42 | 43 | ```{r ex1_2} 44 | cool_totratio<-recs_des %>% 45 | summarize( 46 | PropCooled=survey_ratio( 47 | numerator = TOTCSQFT, 48 | denominator = TOTSQFT_EN, 49 | vartype = "se") 50 | ) 51 | ``` 52 | 53 | On average US households have a ratio of `r round(pull(cool_totratio, PropCooled), 2)` square feet cooled per total square feet. 54 | 55 | 56 | 3. Estimate the median temperature housing units are set to during the night in the winter (WinterTempNight) using the `survey_median` function. 57 | 58 | ```{r ex1_3} 59 | med_wintertemp<-recs_des %>% 60 | summarize( 61 | temp_winter=survey_median(WinterTempNight, 62 | vartype = "se", 63 | na.rm = TRUE) 64 | ) 65 | ``` 66 | 67 | The median temperature housing units are set to during the night in the winter is `r round(pull(med_wintertemp, temp_winter), 2)` degrees Fahrenheit. 68 | 69 | 70 | 4. Estimate the median temperature housing units are set to during the night in the winter (WinterTempNight) using the `survey_quantile` function. 71 | 72 | ```{r ex1_4} 73 | recs_des %>% 74 | summarize( 75 | WinterNightTemp=survey_quantile(WinterTempNight, 76 | quantiles = 0.5, 77 | vartype = "se", 78 | na.rm = TRUE) 79 | ) 80 | ``` 81 | 82 | The 50th percentile (median) temperature housing units are set to during the night in the winter is `r round(pull(med_wintertemp, temp_winter), 2)` degrees Fahrenheit. 83 | 84 | 85 | # Part 2 86 | 87 | 1. Estimate the total average energy cost (TOTALDOL) by region, division, and urbanicity. 88 | 89 | ```{r ex2_1} 90 | # option 1 91 | recs_des %>% 92 | group_by(Region, Division, Urbanicity) %>% 93 | cascade( 94 | EnergyCost=survey_mean(TOTALDOL) 95 | ) 96 | 97 | # option 2 98 | # one way 99 | recs_des %>% 100 | group_by(Region, Division, Urbanicity) %>% 101 | summarize( 102 | EnergyCost=survey_mean(TOTALDOL) 103 | ) 104 | ``` 105 | 106 | 2. What is the median electric cost (DOLLAREL) for housing units in the South Region? What is the 95% confidence interval? 107 | 108 | ```{r ex2_2} 109 | med_billsouth<-recs_des %>% 110 | filter(Region=="South") %>% 111 | summarize( 112 | MedElBill=survey_median(DOLLAREL, 113 | vartype="ci") 114 | ) 115 | ``` 116 | 117 | The median electric cost for housing units in the South is \$`r formatC(pull(med_billsouth, MedElBill), format="d", big.mark=",")` (\$`r formatC(pull(med_billsouth, MedElBill_low), format="d", big.mark=",")`, \$`r formatC(pull(med_billsouth, MedElBill_upp), format="d", big.mark=",")`). 118 | 119 | 120 | 3. Test whether daytime winter and daytime summer temperatures of homes are set the same. 121 | 122 | ```{r ex2_3} 123 | daytemp_ttest<-recs_des %>% 124 | svyttest(design=., 125 | formula = I(WinterTempDay-SummerTempDay)~0, 126 | na.rm = TRUE) 127 | ``` 128 | 129 | On average housing units have set the temperature lower in the winter than the summer, p-value=`r pluck(daytemp_ttest, "p.value") %>% round(5)`. 130 | 131 | 132 | 4. Test whether average electric bill (DOLLAREL) varies by region (Region). 133 | 134 | ```{r ex2_4} 135 | m1 <- recs_des %>% 136 | svyglm(design=., 137 | formula=DOLLAREL~Region, 138 | na.action=na.omit) 139 | summary(m1) 140 | ``` 141 | 142 | Yes, there is evidence that the average electric bill varies by region. 143 | 144 | 145 | 5. Fit a regression between the cooled square footage of a housing unit (TOTCSQFT) and the total amount spent on energy (TOTALDOL). 146 | 147 | ```{r ex2_5} 148 | m2 <- recs_des %>% 149 | svyglm(design=., 150 | formula=TOTALDOL~TOTCSQFT, 151 | na.action=na.omit) 152 | summary(m2) 153 | ``` 154 | 155 | For each additional cooled square foot, the total energy cost increases by \$`r round(pluck(m2$coefficients,"TOTCSQFT"),2)`. 156 | -------------------------------------------------------------------------------- /Exercises/DesignDerivedVariablesExercises.R: -------------------------------------------------------------------------------- 1 | #' --- 2 | #' title: "Design objects and derived variables exercise" 3 | #' output: 4 | #' html_document: 5 | #' df_print: paged 6 | #' --- 7 | #' 8 | #' # Course set-up 9 | #' 10 | #' First, let's make sure you have everything you need for the course. Run the following library statements. If something is not installed, install it. 11 | #' 12 | ## ----setup--------------------------------------------------------------- 13 | # install.packages("tidyverse") 14 | # remotes::install_github("bschneidr/r-forge-survey-mirror") 15 | # install.packages("srvyr") 16 | # install.packages("here") 17 | # install.packages("palmerpenguins") 18 | # install.packages("remotes") 19 | 20 | library(tidyverse) # for tidyverse 21 | library(here) # for file paths 22 | library(srvyr) 23 | 24 | 25 | 26 | #' 27 | #' # Part 1 - Design Objects 28 | #' 29 | #' In these exercises, you will be given a study and assume you have the data. How would you create the design object? 30 | #' 31 | #' 1. California Health Interview Survey - 2019-2020 32 | #' - Note that you can do this with design variables aka Taylor's series or replicate weights. 33 | #' - Useful links: 34 | #' - 35 | #' - 36 | #' - 37 | #' - Assume you have the Public Use File for **adults** already read into R and the dataframe is called `chis19_adult`. 38 | #' 39 | ## ----chis, eval=FALSE---------------------------------------------------- 40 | ## chis19_adult <- haven::read_sas(here::here("RawData", "adult_2019_sas", "adult.sas7bdat")) 41 | ## 42 | 43 | #' 44 | #' 2. National Survey on Drug Use and Health - 2019 45 | #' - Useful links: 46 | #' - 47 | #' - 48 | #' - Assume you have the Public Use File for already read into R and the dataframe is called `nsduh19`. 49 | #' 50 | ## ----nsduh, eval=FALSE--------------------------------------------------- 51 | ## nsduh19 <- haven::read_sav(here::here("RawData", "NSDUH_2019", "NSDUH_2019.SAV")) 52 | ## 53 | 54 | #' 55 | #' 56 | #' # Part 2 - Derived variables 57 | #' 58 | #' Before exercises, read the data in 59 | ## ----datin, cache=TRUE--------------------------------------------------- 60 | anes <- read_rds(here("Data", "anes_2020.rds")) 61 | recs_in <- read_csv(here("RawData", "RECS_2015", "recs2015_public_v4.csv")) 62 | 63 | #' 64 | #' In these exercises, you will be given specifications for a derived variable. Create the variable and check your work. Useful resources: 65 | #' 66 | #' - [ANES Codebook Raw Variables](https://electionstudies.org/wp-content/uploads/2022/02/anes_timeseries_2020_userguidecodebook_20220210.pdf) 67 | #' - [ANES Codebook Exiting Derived Variables](https://github.com/tidy-survey-r/tidy-survey-short-course/blob/main/Codebook/ANES-2020-Derived-Variable-Codebook.md) 68 | #' - [RECS Codebook Raw Variables](https://www.eia.gov/consumption/residential/data/2015/xls/codebook_publicv4.xlsx) - Note this will download a spreadsheet 69 | #' 70 | #' 1. ANES: Create a 5-level income variable as described below. For cases where the income is refused or the interview was a breakoff, this derived variable should be `NA`. The variable should be created such that "Under $25,000" comes first and so on. Hint: Use the variable `V201617x` 71 | #' 72 | #' - Under $25,000 73 | #' - $25,000-49,999 74 | #' - $50,000-74,999 75 | #' - $75,000-99,999 76 | #' - $100,000 or more 77 | #' 78 | ## ----income5------------------------------------------------------------- 79 | 80 | 81 | #' 82 | #' 2. ANES: Create a 5-level age variable as described below. For the cases where age is refused, this derived variable should be `NA`. Hint: use variable `V201507x` 83 | #' 84 | #' - 18-24 85 | #' - 25-44 86 | #' - 45-64 87 | #' - 65-74 88 | #' - 75 or older 89 | #' 90 | #' 91 | ## ----age5---------------------------------------------------------------- 92 | 93 | 94 | #' 95 | #' 3. RECS: Create a logical variable indicating whether there was any household energy insecurity. Household energy insecurity is defined as households that have any of the following occur at least once: 96 | #' 97 | #' - Reducing or forgoing basic necesseties to pay energy costs (SCALEB) 98 | #' - Leaving home at unhealthy temperature (SCALEG) 99 | #' - Receiving disconnect or delivery stop notice (SCALEE) 100 | #' - Unable to use heating equipment (NOHEATBROKE, NOHEATEL, NOHEATNG, NOHEATBULK) 101 | #' - Unable to use cooling equipment (NOACBROKE, NOACEL) 102 | #' 103 | #' The relevant variables that should be used are included in parentheses. 104 | #' 105 | ## ----energyinsec--------------------------------------------------------- 106 | 107 | 108 | #' 109 | -------------------------------------------------------------------------------- /Exercises/DesignDerivedVariablesExercises.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Design objects and derived variables exercise" 3 | output: 4 | html_document: 5 | df_print: paged 6 | --- 7 | 8 | # Course set-up 9 | 10 | First, let's make sure you have everything you need for the course. Run the following library statements. If something is not installed, install it. 11 | 12 | ```{r setup} 13 | # install.packages("tidyverse") 14 | # remotes::install_github("bschneidr/r-forge-survey-mirror") 15 | # install.packages("srvyr") 16 | # install.packages("here") 17 | # install.packages("palmerpenguins") 18 | # install.packages("remotes") 19 | 20 | library(tidyverse) # for tidyverse 21 | library(here) # for file paths 22 | library(srvyr) 23 | 24 | 25 | ``` 26 | 27 | # Part 1 - Design Objects 28 | 29 | In these exercises, you will be given a study and assume you have the data. How would you create the design object? 30 | 31 | 1. California Health Interview Survey - 2019-2020 32 | - Note that you can do this with design variables aka Taylor's series or replicate weights. 33 | - Useful links: 34 | - 35 | - 36 | - 37 | - Assume you have the Public Use File for **adults** already read into R and the dataframe is called `chis19_adult`. 38 | 39 | ```{r chis, eval=FALSE} 40 | chis19_adult <- haven::read_sas(here::here("RawData", "adult_2019_sas", "adult.sas7bdat")) 41 | 42 | ``` 43 | 44 | 2. National Survey on Drug Use and Health - 2019 45 | - Useful links: 46 | - 47 | - 48 | - Assume you have the Public Use File for already read into R and the dataframe is called `nsduh19`. 49 | 50 | ```{r nsduh, eval=FALSE} 51 | nsduh19 <- haven::read_sav(here::here("RawData", "NSDUH_2019", "NSDUH_2019.SAV")) 52 | 53 | ``` 54 | 55 | 56 | # Part 2 - Derived variables 57 | 58 | Before exercises, read the data in 59 | ```{r datin, cache=TRUE} 60 | anes <- read_rds(here("Data", "anes_2020.rds")) 61 | recs_in <- read_csv(here("RawData", "RECS_2015", "recs2015_public_v4.csv")) 62 | ``` 63 | 64 | In these exercises, you will be given specifications for a derived variable. Create the variable and check your work. Useful resources: 65 | 66 | - [ANES Codebook Raw Variables](https://electionstudies.org/wp-content/uploads/2022/02/anes_timeseries_2020_userguidecodebook_20220210.pdf) 67 | - [ANES Codebook Exiting Derived Variables](https://github.com/tidy-survey-r/tidy-survey-short-course/blob/main/Codebook/ANES-2020-Derived-Variable-Codebook.md) 68 | - [RECS Codebook Raw Variables](https://www.eia.gov/consumption/residential/data/2015/xls/codebook_publicv4.xlsx) - Note this will download a spreadsheet 69 | 70 | 1. ANES: Create a 5-level income variable as described below. For cases where the income is refused or the interview was a breakoff, this derived variable should be `NA`. The variable should be created such that "Under $25,000" comes first and so on. Hint: Use the variable `V201617x` 71 | 72 | - Under $25,000 73 | - $25,000-49,999 74 | - $50,000-74,999 75 | - $75,000-99,999 76 | - $100,000 or more 77 | 78 | ```{r income5} 79 | 80 | ``` 81 | 82 | 2. ANES: Create a 5-level age variable as described below. For the cases where age is refused, this derived variable should be `NA`. Hint: use variable `V201507x` 83 | 84 | - 18-24 85 | - 25-44 86 | - 45-64 87 | - 65-74 88 | - 75 or older 89 | 90 | 91 | ```{r age5} 92 | 93 | ``` 94 | 95 | 3. RECS: Create a logical variable indicating whether there was any household energy insecurity. Household energy insecurity is defined as households that have any of the following occur at least once: 96 | 97 | - Reducing or forgoing basic necesseties to pay energy costs (SCALEB) 98 | - Leaving home at unhealthy temperature (SCALEG) 99 | - Receiving disconnect or delivery stop notice (SCALEE) 100 | - Unable to use heating equipment (NOHEATBROKE, NOHEATEL, NOHEATNG, NOHEATBULK) 101 | - Unable to use cooling equipment (NOACBROKE, NOACEL) 102 | 103 | The relevant variables that should be used are included in parentheses. 104 | 105 | ```{r energyinsec} 106 | 107 | ``` 108 | 109 | -------------------------------------------------------------------------------- /Exercises/DesignDerivedVariablesExercises_cache/html/__packages: -------------------------------------------------------------------------------- 1 | base 2 | methods 3 | datasets 4 | utils 5 | grDevices 6 | graphics 7 | stats 8 | tidyverse 9 | ggplot2 10 | tibble 11 | tidyr 12 | readr 13 | purrr 14 | dplyr 15 | stringr 16 | forcats 17 | here 18 | srvyr 19 | -------------------------------------------------------------------------------- /Exercises/DesignDerivedVariablesExercises_cache/html/datin_95c553bea5c677086a0878157dbd740f.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Exercises/DesignDerivedVariablesExercises_cache/html/datin_95c553bea5c677086a0878157dbd740f.RData -------------------------------------------------------------------------------- /Exercises/DesignDerivedVariablesExercises_cache/html/datin_95c553bea5c677086a0878157dbd740f.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Exercises/DesignDerivedVariablesExercises_cache/html/datin_95c553bea5c677086a0878157dbd740f.rdb -------------------------------------------------------------------------------- /Exercises/DesignDerivedVariablesExercises_cache/html/datin_95c553bea5c677086a0878157dbd740f.rdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Exercises/DesignDerivedVariablesExercises_cache/html/datin_95c553bea5c677086a0878157dbd740f.rdx -------------------------------------------------------------------------------- /Exercises/DesignDerivedVariablesExercises_solutions.R: -------------------------------------------------------------------------------- 1 | #' --- 2 | #' title: "Design objects and derived variables exercise solutions" 3 | #' output: 4 | #' html_document: 5 | #' df_print: paged 6 | #' --- 7 | #' 8 | #' # Course set-up 9 | #' 10 | #' First, let's make sure you have everything you need for the course. Run the following library statements. If something is not installed, install it. 11 | #' 12 | ## ----setup--------------------------------------------------------------- 13 | # install.packages("tidyverse") 14 | # remotes::install_github("bschneidr/r-forge-survey-mirror") 15 | # install.packages("srvyr") 16 | # install.packages("here") 17 | # install.packages("palmerpenguins") 18 | # install.packages("remotes") 19 | 20 | library(tidyverse) # for tidyverse 21 | library(here) # for file paths 22 | library(srvyr) 23 | 24 | 25 | 26 | #' 27 | #' # Part 1 - Design Objects 28 | #' 29 | #' In these exercises, you will be given a study and assume you have the data. How would you create the design object? 30 | #' 31 | #' 1. California Health Interview Survey - 2019-2020 32 | #' - Note that you can do this with design variables aka Taylor's series or replicate weights. 33 | #' - Useful links: 34 | #' - 35 | #' - 36 | #' - 37 | #' - Assume you have the Public Use File for **adults** already read into R and the dataframe is called `chis19_adult`. 38 | #' 39 | ## ----chis, eval=FALSE---------------------------------------------------- 40 | ## 41 | ## chis19_adult <- haven::read_sas(here::here("RawData", "adult_2019_sas", "adult.sas7bdat")) 42 | ## 43 | ## rep_des <- chis19_adult %>% 44 | ## as_survey_rep(weights=RAKEDW0, repweights=stringr::str_c("RAKEDW", 1:80), 45 | ## type="JKn", rscales=1) 46 | ## 47 | ## tsl_des <- chis19_adult %>% 48 | ## as_survey_design(weights=RAKEDW0, strata=TSVARSTR, ids=1) 49 | ## 50 | ## # or 51 | ## tsl_des <- chis19_adult %>% 52 | ## as_survey_design(weights=RAKEDW0, strata=TSVARSTR, ids=0) 53 | ## 54 | ## 55 | 56 | #' 57 | #' 2. National Survey on Drug Use and Health - 2019 58 | #' - Useful links: 59 | #' - 60 | #' - 61 | #' - Assume you have the Public Use File for already read into R and the dataframe is called `nsduh19`. 62 | #' 63 | ## ----nsduh, eval=FALSE--------------------------------------------------- 64 | ## nsduh19 <- haven::read_sav(here::here("RawData", "NSDUH_2019", "NSDUH_2019.SAV")) 65 | ## nsduh_des <- nsduh19 %>% 66 | ## as_survey_design(weights=ANALWT_C, strata=VESTR, ids=VEREP, nest=TRUE) 67 | ## 68 | 69 | #' 70 | #' 71 | #' # Part 2 - Derived variables 72 | #' 73 | #' Before exercises, read the data in 74 | ## ----datin, cache=TRUE--------------------------------------------------- 75 | anes <- read_rds(here("Data", "anes_2020.rds")) 76 | recs_in <- read_csv(here("RawData", "RECS_2015", "recs2015_public_v4.csv")) 77 | 78 | #' 79 | #' In these exercises, you will be given specifications for a derived variable. Create the variable and check your work. Useful resources: 80 | #' 81 | #' - [ANES Codebook Raw Variables](https://electionstudies.org/wp-content/uploads/2022/02/anes_timeseries_2020_userguidecodebook_20220210.pdf) 82 | #' - [ANES Codebook Exiting Derived Variables](https://github.com/tidy-survey-r/tidy-survey-short-course/blob/main/Codebook/ANES-2020-Derived-Variable-Codebook.md) 83 | #' - [RECS Codebook Raw Variables](https://www.eia.gov/consumption/residential/data/2015/xls/codebook_publicv4.xlsx) - Note this will download a spreadsheet 84 | #' 85 | #' 1. ANES: Create a 5-level income variable as described below. For cases where the income is refused or the interview was a breakoff, this derived variable should be `NA`. The variable should be created such that "Under $25,000" comes first and so on. Hint: Use the variable `V201617x` 86 | #' 87 | #' - Under $25,000 88 | #' - $25,000-49,999 89 | #' - $50,000-74,999 90 | #' - $75,000-99,999 91 | #' - $100,000 or more 92 | #' 93 | ## ----income5------------------------------------------------------------- 94 | anes_income <- anes %>% 95 | mutate( 96 | Income5=factor(case_when( 97 | V201617x %in% c(1:4)~"Under $25,000", 98 | V201617x %in% c(5:9)~"$25,000-49,999", 99 | V201617x %in% c(10:13)~"$50,000-74,999", 100 | V201617x %in% c(14:16)~"$75,000-99,999", 101 | V201617x %in% c(17:22)~"$100,000 or more", 102 | TRUE ~ NA_character_ 103 | ), levels=c("Under $25,000", "$25,000-49,999", "$50,000-74,999", "$75,000-99,999", "$100,000 or more")) 104 | ) 105 | 106 | anes_income %>% 107 | count(Income5, V201617x) 108 | 109 | #' 110 | #' 2. ANES: Create a 5-level age variable as described below. For the cases where age is refused, this derived variable should be `NA`. Hint: use variable `V201507x` 111 | #' 112 | #' - 18-24 113 | #' - 25-44 114 | #' - 45-64 115 | #' - 65-74 116 | #' - 75 or older 117 | #' 118 | #' 119 | ## ----age5---------------------------------------------------------------- 120 | anes_age <- anes %>% 121 | mutate( 122 | AgeGroup5=factor(case_when( 123 | V201507x %in% c(18:24)~"18-24", 124 | V201507x %in% c(25:44)~"25-44", 125 | V201507x %in% c(45:64)~"45-64", 126 | V201507x %in% c(65:74)~"65-74", 127 | V201507x %in% c(75:90)~"75 or older", 128 | TRUE ~ NA_character_), 129 | levels=c('18-24', '25-44', '45-64', '65-74', '75 or older' )) 130 | ) 131 | 132 | anes_age %>% 133 | group_by(AgeGroup5) %>% 134 | summarise( 135 | minV=min(V201507x, na.rm = TRUE), 136 | maxV=max(V201507x, na.rm = TRUE), 137 | ncat=n(), 138 | nNA_v=sum(is.na(V201507x)) 139 | ) 140 | 141 | #' 142 | #' 3. RECS: Create a logical variable indicating whether there was any household energy insecurity. Household energy insecurity is defined as households that have any of the following occur at least once: 143 | #' 144 | #' - Reducing or forgoing basic necesseties to pay energy costs (SCALEB) 145 | #' - Leaving home at unhealthy temperature (SCALEG) 146 | #' - Receiving disconnect or delivery stop notice (SCALEE) 147 | #' - Unable to use heating equipment (NOHEATBROKE, NOHEATEL, NOHEATNG, NOHEATBULK) 148 | #' - Unable to use cooling equipment (NOACBROKE, NOACEL) 149 | #' 150 | #' The relevant variables that should be used are included in parentheses. 151 | #' 152 | ## ----energyinsec--------------------------------------------------------- 153 | recs_insecur <- recs_in %>% 154 | select(starts_with("SCALE"), starts_with("NOHEAT"), starts_with("NOAC"), NWEIGHT) %>% 155 | mutate( 156 | EnergyInsec=SCALEB %in% c(1:3) | SCALEG %in% c(1:3) | SCALEE %in% c(1:3) | 157 | NOHEATBROKE==1 | NOHEATEL==1|NOHEATNG==1|NOHEATBULK==1| 158 | NOACBROKE==1|NOACEL==1 159 | ) 160 | 161 | recs_insecur %>% 162 | count(EnergyInsec, SCALEB, SCALEG, SCALEE, NOHEATBROKE, NOHEATEL, NOHEATNG, 163 | NOHEATBULK, NOACBROKE, NOACEL) 164 | 165 | 166 | #' 167 | -------------------------------------------------------------------------------- /Exercises/DesignDerivedVariablesExercises_solutions.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Design objects and derived variables exercise solutions" 3 | output: 4 | html_document: 5 | df_print: paged 6 | --- 7 | 8 | # Course set-up 9 | 10 | First, let's make sure you have everything you need for the course. Run the following library statements. If something is not installed, install it. 11 | 12 | ```{r setup} 13 | # install.packages("tidyverse") 14 | # remotes::install_github("bschneidr/r-forge-survey-mirror") 15 | # install.packages("srvyr") 16 | # install.packages("here") 17 | # install.packages("palmerpenguins") 18 | # install.packages("remotes") 19 | 20 | library(tidyverse) # for tidyverse 21 | library(here) # for file paths 22 | library(srvyr) 23 | 24 | 25 | ``` 26 | 27 | # Part 1 - Design Objects 28 | 29 | In these exercises, you will be given a study and assume you have the data. How would you create the design object? 30 | 31 | 1. California Health Interview Survey - 2019-2020 32 | - Note that you can do this with design variables aka Taylor's series or replicate weights. 33 | - Useful links: 34 | - 35 | - 36 | - 37 | - Assume you have the Public Use File for **adults** already read into R and the dataframe is called `chis19_adult`. 38 | 39 | ```{r chis, eval=FALSE} 40 | 41 | chis19_adult <- haven::read_sas(here::here("RawData", "adult_2019_sas", "adult.sas7bdat")) 42 | 43 | rep_des <- chis19_adult %>% 44 | as_survey_rep(weights=RAKEDW0, repweights=stringr::str_c("RAKEDW", 1:80), 45 | type="JKn", rscales=1) 46 | 47 | tsl_des <- chis19_adult %>% 48 | as_survey_design(weights=RAKEDW0, strata=TSVARSTR, ids=1) 49 | 50 | # or 51 | tsl_des <- chis19_adult %>% 52 | as_survey_design(weights=RAKEDW0, strata=TSVARSTR, ids=0) 53 | 54 | 55 | ``` 56 | 57 | 2. National Survey on Drug Use and Health - 2019 58 | - Useful links: 59 | - 60 | - 61 | - Assume you have the Public Use File for already read into R and the dataframe is called `nsduh19`. 62 | 63 | ```{r nsduh, eval=FALSE} 64 | nsduh19 <- haven::read_sav(here::here("RawData", "NSDUH_2019", "NSDUH_2019.SAV")) 65 | nsduh_des <- nsduh19 %>% 66 | as_survey_design(weights=ANALWT_C, strata=VESTR, ids=VEREP, nest=TRUE) 67 | 68 | ``` 69 | 70 | 71 | # Part 2 - Derived variables 72 | 73 | Before exercises, read the data in 74 | ```{r datin, cache=TRUE} 75 | anes <- read_rds(here("Data", "anes_2020.rds")) 76 | recs_in <- read_csv(here("RawData", "RECS_2015", "recs2015_public_v4.csv")) 77 | ``` 78 | 79 | In these exercises, you will be given specifications for a derived variable. Create the variable and check your work. Useful resources: 80 | 81 | - [ANES Codebook Raw Variables](https://electionstudies.org/wp-content/uploads/2022/02/anes_timeseries_2020_userguidecodebook_20220210.pdf) 82 | - [ANES Codebook Exiting Derived Variables](https://github.com/tidy-survey-r/tidy-survey-short-course/blob/main/Codebook/ANES-2020-Derived-Variable-Codebook.md) 83 | - [RECS Codebook Raw Variables](https://www.eia.gov/consumption/residential/data/2015/xls/codebook_publicv4.xlsx) - Note this will download a spreadsheet 84 | 85 | 1. ANES: Create a 5-level income variable as described below. For cases where the income is refused or the interview was a breakoff, this derived variable should be `NA`. The variable should be created such that "Under $25,000" comes first and so on. Hint: Use the variable `V201617x` 86 | 87 | - Under $25,000 88 | - $25,000-49,999 89 | - $50,000-74,999 90 | - $75,000-99,999 91 | - $100,000 or more 92 | 93 | ```{r income5} 94 | anes_income <- anes %>% 95 | mutate( 96 | Income5=factor(case_when( 97 | V201617x %in% c(1:4)~"Under $25,000", 98 | V201617x %in% c(5:9)~"$25,000-49,999", 99 | V201617x %in% c(10:13)~"$50,000-74,999", 100 | V201617x %in% c(14:16)~"$75,000-99,999", 101 | V201617x %in% c(17:22)~"$100,000 or more", 102 | TRUE ~ NA_character_ 103 | ), levels=c("Under $25,000", "$25,000-49,999", "$50,000-74,999", "$75,000-99,999", "$100,000 or more")) 104 | ) 105 | 106 | anes_income %>% 107 | count(Income5, V201617x) 108 | ``` 109 | 110 | 2. ANES: Create a 5-level age variable as described below. For the cases where age is refused, this derived variable should be `NA`. Hint: use variable `V201507x` 111 | 112 | - 18-24 113 | - 25-44 114 | - 45-64 115 | - 65-74 116 | - 75 or older 117 | 118 | 119 | ```{r age5} 120 | anes_age <- anes %>% 121 | mutate( 122 | AgeGroup5=factor(case_when( 123 | V201507x %in% c(18:24)~"18-24", 124 | V201507x %in% c(25:44)~"25-44", 125 | V201507x %in% c(45:64)~"45-64", 126 | V201507x %in% c(65:74)~"65-74", 127 | V201507x %in% c(75:90)~"75 or older", 128 | TRUE ~ NA_character_), 129 | levels=c('18-24', '25-44', '45-64', '65-74', '75 or older' )) 130 | ) 131 | 132 | anes_age %>% 133 | group_by(AgeGroup5) %>% 134 | summarise( 135 | minV=min(V201507x, na.rm = TRUE), 136 | maxV=max(V201507x, na.rm = TRUE), 137 | ncat=n(), 138 | nNA_v=sum(is.na(V201507x)) 139 | ) 140 | ``` 141 | 142 | 3. RECS: Create a logical variable indicating whether there was any household energy insecurity. Household energy insecurity is defined as households that have any of the following occur at least once: 143 | 144 | - Reducing or forgoing basic necesseties to pay energy costs (SCALEB) 145 | - Leaving home at unhealthy temperature (SCALEG) 146 | - Receiving disconnect or delivery stop notice (SCALEE) 147 | - Unable to use heating equipment (NOHEATBROKE, NOHEATEL, NOHEATNG, NOHEATBULK) 148 | - Unable to use cooling equipment (NOACBROKE, NOACEL) 149 | 150 | The relevant variables that should be used are included in parentheses. 151 | 152 | ```{r energyinsec} 153 | recs_insecur <- recs_in %>% 154 | select(starts_with("SCALE"), starts_with("NOHEAT"), starts_with("NOAC"), NWEIGHT) %>% 155 | mutate( 156 | EnergyInsec=SCALEB %in% c(1:3) | SCALEG %in% c(1:3) | SCALEE %in% c(1:3) | 157 | NOHEATBROKE==1 | NOHEATEL==1|NOHEATNG==1|NOHEATBULK==1| 158 | NOACBROKE==1|NOACEL==1 159 | ) 160 | 161 | recs_insecur %>% 162 | count(EnergyInsec, SCALEB, SCALEG, SCALEE, NOHEATBROKE, NOHEATEL, NOHEATNG, 163 | NOHEATBULK, NOACBROKE, NOACEL) 164 | 165 | ``` 166 | 167 | -------------------------------------------------------------------------------- /Exercises/DesignDerivedVariablesExercises_solutions_cache/html/__packages: -------------------------------------------------------------------------------- 1 | tidyverse 2 | ggplot2 3 | tibble 4 | tidyr 5 | readr 6 | purrr 7 | dplyr 8 | stringr 9 | forcats 10 | here 11 | srvyr 12 | -------------------------------------------------------------------------------- /Exercises/DesignDerivedVariablesExercises_solutions_cache/html/datin_71bc85b99d78d2975dbdaf1205650ccd.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Exercises/DesignDerivedVariablesExercises_solutions_cache/html/datin_71bc85b99d78d2975dbdaf1205650ccd.RData -------------------------------------------------------------------------------- /Exercises/DesignDerivedVariablesExercises_solutions_cache/html/datin_71bc85b99d78d2975dbdaf1205650ccd.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Exercises/DesignDerivedVariablesExercises_solutions_cache/html/datin_71bc85b99d78d2975dbdaf1205650ccd.rdb -------------------------------------------------------------------------------- /Exercises/DesignDerivedVariablesExercises_solutions_cache/html/datin_71bc85b99d78d2975dbdaf1205650ccd.rdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Exercises/DesignDerivedVariablesExercises_solutions_cache/html/datin_71bc85b99d78d2975dbdaf1205650ccd.rdx -------------------------------------------------------------------------------- /Exercises/WarmUpExercises.R: -------------------------------------------------------------------------------- 1 | #' --- 2 | #' title: "Warm-up Exercises" 3 | #' output: 4 | #' html_document: 5 | #' df_print: paged 6 | #' --- 7 | #' 8 | #' # Course set-up 9 | #' First, let's make sure you have everything you need for the course. Run the following library statements. If something is not installed, install it. 10 | #' 11 | ## ----setup---------------------------------------------------------------------------------------------------------------------- 12 | # install.packages("tidyverse") 13 | # remotes::install_github("bschneidr/r-forge-survey-mirror") 14 | # install.packages("srvyr") 15 | # install.packages("here") 16 | # install.packages("palmerpenguins") 17 | # install.packages("remotes") 18 | 19 | library(tidyverse) # for tidyverse 20 | library(here) # for file paths 21 | library(palmerpenguins) 22 | 23 | 24 | 25 | #' 26 | #' # Warm-up exercises for Day 1 27 | #' 28 | ## ----datapeek------------------------------------------------------------------------------------------------------------------- 29 | glimpse(penguins) 30 | 31 | #' 32 | #' How many penguins of each species are there? Hint: use `count` 33 | ## ----speciestab----------------------------------------------------------------------------------------------------------------- 34 | 35 | 36 | #' 37 | #' How many penguins of each species and sex are there? Hint: use `count` 38 | #' 39 | ## ----speciessextab-------------------------------------------------------------------------------------------------------------- 40 | 41 | 42 | #' 43 | #' 44 | #' What is the proportion of each species of penguins? Hint: use `count` then `mutate` 45 | #' 46 | ## ----speciestabp---------------------------------------------------------------------------------------------------------------- 47 | 48 | 49 | #' 50 | #' What is the proportion of each sex of penguins within species? Hint: use `count` then `group_by` and `mutate` 51 | #' 52 | ## ----speciessextabp------------------------------------------------------------------------------------------------------------- 53 | 54 | 55 | #' 56 | #' 57 | #' # Warm-up exercises for Day 2 58 | #' 59 | #' What is the mean body mass in grams of all penguins? Hint: use `summarize` and remove missing data 60 | #' 61 | ## ----peng_meanmass-------------------------------------------------------------------------------------------------------------- 62 | 63 | 64 | #' 65 | #' What is the mean length of flipper by species? Hint: use `group_by` 66 | #' 67 | ## ----peng_meanflip_species------------------------------------------------------------------------------------------------------ 68 | 69 | 70 | #' 71 | #' What is the mean flipper length by species and sex? 72 | #' 73 | ## ----peng_meanflip_speciessex--------------------------------------------------------------------------------------------------- 74 | 75 | 76 | #' 77 | #' Fit a simple linear regression between body mass and flipper length. 78 | #' 79 | ## ----pengLM--------------------------------------------------------------------------------------------------------------------- 80 | 81 | 82 | #' 83 | -------------------------------------------------------------------------------- /Exercises/WarmUpExercises.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Warm-up Exercises" 3 | output: 4 | html_document: 5 | df_print: paged 6 | --- 7 | 8 | # Course set-up 9 | First, let's make sure you have everything you need for the course. Run the following library statements. If something is not installed, install it. 10 | 11 | ```{r setup} 12 | # install.packages("tidyverse") 13 | # remotes::install_github("bschneidr/r-forge-survey-mirror") 14 | # install.packages("srvyr") 15 | # install.packages("here") 16 | # install.packages("palmerpenguins") 17 | # install.packages("remotes") 18 | 19 | library(tidyverse) # for tidyverse 20 | library(here) # for file paths 21 | library(palmerpenguins) 22 | 23 | 24 | ``` 25 | 26 | # Warm-up exercises for Day 1 27 | 28 | ```{r datapeek} 29 | glimpse(penguins) 30 | ``` 31 | 32 | How many penguins of each species are there? Hint: use `count` 33 | ```{r speciestab} 34 | 35 | ``` 36 | 37 | How many penguins of each species and sex are there? Hint: use `count` 38 | 39 | ```{r speciessextab} 40 | 41 | ``` 42 | 43 | 44 | What is the proportion of each species of penguins? Hint: use `count` then `mutate` 45 | 46 | ```{r speciestabp} 47 | 48 | ``` 49 | 50 | What is the proportion of each sex of penguins within species? Hint: use `count` then `group_by` and `mutate` 51 | 52 | ```{r speciessextabp} 53 | 54 | ``` 55 | 56 | 57 | # Warm-up exercises for Day 2 58 | 59 | What is the mean body mass in grams of all penguins? Hint: use `summarize` and remove missing data 60 | 61 | ```{r peng_meanmass} 62 | 63 | ``` 64 | 65 | What is the mean length of flipper by species? Hint: use `group_by` 66 | 67 | ```{r peng_meanflip_species} 68 | 69 | ``` 70 | 71 | What is the mean flipper length by species and sex? 72 | 73 | ```{r peng_meanflip_speciessex} 74 | 75 | ``` 76 | 77 | Fit a simple linear regression between body mass and flipper length. 78 | 79 | ```{r pengLM} 80 | 81 | ``` 82 | 83 | -------------------------------------------------------------------------------- /Exercises/WarmUpExercises_solutions.R: -------------------------------------------------------------------------------- 1 | #' --- 2 | #' title: "Warm-up Exercise Solutions" 3 | #' output: 4 | #' html_document: 5 | #' df_print: paged 6 | #' --- 7 | #' 8 | #' # Course set-up 9 | #' First, let's make sure you have everything you need for the course. Run the following library statements. If something is not installed, install it. 10 | #' 11 | ## ----setup---------------------------------------------------------------------------------------------------------------------- 12 | # install.packages("tidyverse") 13 | # remotes::install_github("bschneidr/r-forge-survey-mirror") 14 | # install.packages("srvyr") 15 | # install.packages("here") 16 | # install.packages("palmerpenguins") 17 | # install.packages("remotes") 18 | 19 | library(tidyverse) # for tidyverse 20 | library(here) # for file paths 21 | library(palmerpenguins) 22 | 23 | 24 | 25 | #' 26 | #' # Warm-up exercises for Day 1 27 | #' 28 | ## ----datapeek------------------------------------------------------------------------------------------------------------------- 29 | glimpse(penguins) 30 | 31 | #' 32 | #' How many penguins of each species are there? Hint: use `count` 33 | #' 34 | ## ----speciestab----------------------------------------------------------------------------------------------------------------- 35 | penguins %>% 36 | count(species) 37 | 38 | #' 39 | #' How many penguins of each species and sex are there? Hint: use `count` 40 | #' 41 | ## ----speciessextab-------------------------------------------------------------------------------------------------------------- 42 | penguins %>% 43 | count(species, sex) 44 | 45 | #' 46 | #' What is the proportion of each species of penguins? Hint: use `count` then `mutate` 47 | #' 48 | ## ----speciestabp---------------------------------------------------------------------------------------------------------------- 49 | penguins %>% 50 | count(species) %>% 51 | mutate( 52 | p=n/sum(n) 53 | ) 54 | 55 | #' 56 | #' What is the proportion of each sex of penguins within species? Hint: use `count` then `group_by` and `mutate` 57 | #' 58 | ## ----speciessextabp------------------------------------------------------------------------------------------------------------- 59 | penguins %>% 60 | count(species, sex) %>% 61 | group_by(species) %>% 62 | mutate( 63 | p=n/sum(n) 64 | ) 65 | 66 | 67 | #' 68 | #' # Warm-up exercises for Day 2 69 | #' 70 | #' What is the mean body mass in grams of all penguins? Hint: use `summarize` and remove missing data 71 | #' 72 | ## ----peng_meanmass-------------------------------------------------------------------------------------------------------------- 73 | penguins %>% 74 | summarize( 75 | MeanBodyMass=mean(body_mass_g, 76 | na.rm=TRUE) 77 | ) 78 | 79 | #' 80 | #' What is the mean length of flipper by species? Hint: use `group_by` 81 | #' 82 | ## ----peng_meanflip_species------------------------------------------------------------------------------------------------------ 83 | penguins %>% 84 | group_by(species) %>% 85 | summarize( 86 | MeanFlipperLength=mean(flipper_length_mm, 87 | na.rm=TRUE) 88 | ) 89 | 90 | #' 91 | #' What is the mean flipper length by species and sex? 92 | #' 93 | ## ----peng_meanflip_speciessex--------------------------------------------------------------------------------------------------- 94 | penguins %>% 95 | group_by(species,sex) %>% 96 | summarize( 97 | MeanFlipperLength=mean(flipper_length_mm, 98 | na.rm=TRUE)) 99 | 100 | #' 101 | #' Fit a simple linear regression between body mass and flipper length. 102 | #' 103 | ## ----pengLM--------------------------------------------------------------------------------------------------------------------- 104 | #Option 1 105 | mod1 <- lm(body_mass_g ~ flipper_length_mm, data=penguins) 106 | summary(mod1) 107 | 108 | #Option 2 109 | mod2 <- glm(body_mass_g ~ flipper_length_mm, data=penguins) 110 | summary(mod2) 111 | 112 | #' 113 | #' 114 | #' # Session information 115 | #' 116 | ## ----si------------------------------------------------------------------------------------------------------------------------- 117 | devtools::session_info(pkgs="attached") 118 | 119 | -------------------------------------------------------------------------------- /Exercises/WarmUpExercises_solutions.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Warm-up Exercise Solutions" 3 | output: 4 | html_document: 5 | df_print: paged 6 | --- 7 | 8 | # Course set-up 9 | First, let's make sure you have everything you need for the course. Run the following library statements. If something is not installed, install it. 10 | 11 | ```{r setup} 12 | # install.packages("tidyverse") 13 | # remotes::install_github("bschneidr/r-forge-survey-mirror") 14 | # install.packages("srvyr") 15 | # install.packages("here") 16 | # install.packages("palmerpenguins") 17 | # install.packages("remotes") 18 | 19 | library(tidyverse) # for tidyverse 20 | library(here) # for file paths 21 | library(palmerpenguins) 22 | 23 | 24 | ``` 25 | 26 | # Warm-up exercises for Day 1 27 | 28 | ```{r datapeek} 29 | glimpse(penguins) 30 | ``` 31 | 32 | How many penguins of each species are there? Hint: use `count` 33 | 34 | ```{r speciestab} 35 | penguins %>% 36 | count(species) 37 | ``` 38 | 39 | How many penguins of each species and sex are there? Hint: use `count` 40 | 41 | ```{r speciessextab} 42 | penguins %>% 43 | count(species, sex) 44 | ``` 45 | 46 | What is the proportion of each species of penguins? Hint: use `count` then `mutate` 47 | 48 | ```{r speciestabp} 49 | penguins %>% 50 | count(species) %>% 51 | mutate( 52 | p=n/sum(n) 53 | ) 54 | ``` 55 | 56 | What is the proportion of each sex of penguins within species? Hint: use `count` then `group_by` and `mutate` 57 | 58 | ```{r speciessextabp} 59 | penguins %>% 60 | count(species, sex) %>% 61 | group_by(species) %>% 62 | mutate( 63 | p=n/sum(n) 64 | ) 65 | 66 | ``` 67 | 68 | # Warm-up exercises for Day 2 69 | 70 | What is the mean body mass in grams of all penguins? Hint: use `summarize` and remove missing data 71 | 72 | ```{r peng_meanmass} 73 | penguins %>% 74 | summarize( 75 | MeanBodyMass=mean(body_mass_g, 76 | na.rm=TRUE) 77 | ) 78 | ``` 79 | 80 | What is the mean length of flipper by species? Hint: use `group_by` 81 | 82 | ```{r peng_meanflip_species} 83 | penguins %>% 84 | group_by(species) %>% 85 | summarize( 86 | MeanFlipperLength=mean(flipper_length_mm, 87 | na.rm=TRUE) 88 | ) 89 | ``` 90 | 91 | What is the mean flipper length by species and sex? 92 | 93 | ```{r peng_meanflip_speciessex} 94 | penguins %>% 95 | group_by(species,sex) %>% 96 | summarize( 97 | MeanFlipperLength=mean(flipper_length_mm, 98 | na.rm=TRUE)) 99 | ``` 100 | 101 | Fit a simple linear regression between body mass and flipper length. 102 | 103 | ```{r pengLM} 104 | #Option 1 105 | mod1 <- lm(body_mass_g ~ flipper_length_mm, data=penguins) 106 | summary(mod1) 107 | 108 | #Option 2 109 | mod2 <- glm(body_mass_g ~ flipper_length_mm, data=penguins) 110 | summary(mod2) 111 | ``` 112 | 113 | 114 | # Session information 115 | 116 | ```{r si} 117 | devtools::session_info(pkgs="attached") 118 | ``` -------------------------------------------------------------------------------- /FinalizeMaterials.R: -------------------------------------------------------------------------------- 1 | ### This program creates PDF slides, PPTX slides, and R files from the Rmd files 2 | 3 | # remotes::install_github("jhelvy/xaringanBuilder") 4 | # remotes::install_github('rstudio/chromote') 5 | # install.packages('pdftools') 6 | # install.packages('officer') 7 | 8 | library(knitr) 9 | library(here) 10 | 11 | mypurl <- function(folder, fn){ 12 | purl(here(folder, stringr::str_c(fn, ".Rmd")), 13 | output=here(folder, stringr::str_c(fn, ".R")), 14 | documentation=2) 15 | 16 | } 17 | 18 | # Day 1 processing 19 | 20 | mypurl("Presentation", "Slides-day-1") 21 | xaringanBuilder::build_html( 22 | input=here("Presentation", "Slides-day-1.Rmd"), 23 | output_file=here("Presentation", "Slides-day-1.html")) 24 | xaringanBuilder::build_pdf( 25 | input=here("Presentation", "Slides-day-1.html"), 26 | output_file=here("Presentation", "Slides-day-1.pdf"), 27 | partial_slides= TRUE) 28 | xaringanBuilder::build_pptx( 29 | input=here("Presentation", "Slides-day-1.pdf"), 30 | output_file=here("Presentation", "Slides-day-1.pptx"), 31 | partial_slides= TRUE) 32 | mypurl("Exercises", "CategorialExercises") 33 | mypurl("Exercises", "CategorialExercises_solutions") 34 | 35 | # Day 1/2 processing 36 | mypurl("Exercises", "WarmUpExercises") 37 | mypurl("Exercises", "WarmUpExercises_solutions") 38 | 39 | # Day 2 processing 40 | mypurl("Presentation", "Slides-day-2") 41 | xaringanBuilder::build_html( 42 | input=here("Presentation", "Slides-day-2.Rmd"), 43 | output_file=here("Presentation", "Slides-day-2.html")) 44 | xaringanBuilder::build_pdf( 45 | input=here("Presentation", "Slides-day-2.html"), 46 | output_file=here("Presentation", "Slides-day-2.pdf"), 47 | partial_slides= TRUE) 48 | xaringanBuilder::build_pptx( 49 | input=here("Presentation", "Slides-day-2.pdf"), 50 | output_file=here("Presentation", "Slides-day-2.pptx"), 51 | partial_slides= TRUE) 52 | mypurl("Exercises", "ContinuousExercises") 53 | mypurl("Exercises", "ContinuousExercises_solutions") 54 | 55 | 56 | 57 | # Day 3 processing 58 | 59 | mypurl("Presentation", "Slides-day-3") 60 | xaringanBuilder::build_html( 61 | input=here("Presentation", "Slides-day-3.Rmd"), 62 | output_file=here("Presentation", "Slides-day-3.html")) 63 | xaringanBuilder::build_pdf( 64 | input=here("Presentation", "Slides-day-3.html"), 65 | output_file=here("Presentation", "Slides-day-3.pdf"), 66 | partial_slides= TRUE) 67 | xaringanBuilder::build_pptx( 68 | input=here("Presentation", "Slides-day-3.pdf"), 69 | output_file=here("Presentation", "Slides-day-3.pptx"), 70 | partial_slides= TRUE) 71 | mypurl("Exercises", "DesignDerivedVariablesExercises") 72 | mypurl("Exercises", "DesignDerivedVariablesExercises_solutions") 73 | -------------------------------------------------------------------------------- /Presentation/Images/IsabellaVelasquez_Headshot.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Presentation/Images/IsabellaVelasquez_Headshot.jpeg -------------------------------------------------------------------------------- /Presentation/Images/MAPOR-Logo1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Presentation/Images/MAPOR-Logo1.png -------------------------------------------------------------------------------- /Presentation/Images/Project-Contents.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Presentation/Images/Project-Contents.png -------------------------------------------------------------------------------- /Presentation/Images/RebeccaPowell_Headshot.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Presentation/Images/RebeccaPowell_Headshot.jpeg -------------------------------------------------------------------------------- /Presentation/Images/StephanieZimmer_Headshot.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Presentation/Images/StephanieZimmer_Headshot.jpeg -------------------------------------------------------------------------------- /Presentation/Slides-day-1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Presentation/Slides-day-1.pdf -------------------------------------------------------------------------------- /Presentation/Slides-day-1.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Presentation/Slides-day-1.pptx -------------------------------------------------------------------------------- /Presentation/Slides-day-1_files/crosstalk-1.2.0/css/crosstalk.min.css: -------------------------------------------------------------------------------- 1 | .container-fluid.crosstalk-bscols{margin-left:-30px;margin-right:-30px;white-space:normal}body>.container-fluid.crosstalk-bscols{margin-left:auto;margin-right:auto}.crosstalk-input-checkboxgroup .crosstalk-options-group .crosstalk-options-column{display:inline-block;padding-right:12px;vertical-align:top}@media only screen and (max-width: 480px){.crosstalk-input-checkboxgroup .crosstalk-options-group .crosstalk-options-column{display:block;padding-right:inherit}}.crosstalk-input{margin-bottom:15px}.crosstalk-input .control-label{margin-bottom:0;vertical-align:middle}.crosstalk-input input[type="checkbox"]{margin:4px 0 0;margin-top:1px;line-height:normal}.crosstalk-input .checkbox{position:relative;display:block;margin-top:10px;margin-bottom:10px}.crosstalk-input .checkbox>label{padding-left:20px;margin-bottom:0;font-weight:400;cursor:pointer}.crosstalk-input .checkbox input[type="checkbox"],.crosstalk-input .checkbox-inline input[type="checkbox"]{position:absolute;margin-top:2px;margin-left:-20px}.crosstalk-input .checkbox+.checkbox{margin-top:-5px}.crosstalk-input .checkbox-inline{position:relative;display:inline-block;padding-left:20px;margin-bottom:0;font-weight:400;vertical-align:middle;cursor:pointer}.crosstalk-input .checkbox-inline+.checkbox-inline{margin-top:0;margin-left:10px} 2 | -------------------------------------------------------------------------------- /Presentation/Slides-day-1_files/crosstalk-1.2.0/js/crosstalk.min.js: -------------------------------------------------------------------------------- 1 | !function o(u,a,l){function s(n,e){if(!a[n]){if(!u[n]){var t="function"==typeof require&&require;if(!e&&t)return t(n,!0);if(f)return f(n,!0);var r=new Error("Cannot find module '"+n+"'");throw r.code="MODULE_NOT_FOUND",r}var i=a[n]={exports:{}};u[n][0].call(i.exports,function(e){var t=u[n][1][e];return s(t||e)},i,i.exports,o,u,a,l)}return a[n].exports}for(var f="function"==typeof require&&require,e=0;e?@[\\\]^`{|}~])/g,"\\$1")+"']"),r=JSON.parse(n[0].innerText),i=e.factory(t,r);o(t).data("crosstalk-instance",i),o(t).addClass("crosstalk-input-bound")}if(t.Shiny){var e=new t.Shiny.InputBinding,u=t.jQuery;u.extend(e,{find:function(e){return u(e).find(".crosstalk-input")},initialize:function(e){var t,n;u(e).hasClass("crosstalk-input-bound")||(n=o(t=e),Object.keys(r).forEach(function(e){n.hasClass(e)&&!n.hasClass("crosstalk-input-bound")&&i(r[e],t)}))},getId:function(e){return e.id},getValue:function(e){},setValue:function(e,t){},receiveMessage:function(e,t){},subscribe:function(e,t){u(e).data("crosstalk-instance").resume()},unsubscribe:function(e){u(e).data("crosstalk-instance").suspend()}}),t.Shiny.inputBindings.register(e,"crosstalk.inputBinding")}}).call(this,"undefined"!=typeof global?global:"undefined"!=typeof self?self:"undefined"!=typeof window?window:{})},{}],7:[function(r,e,t){(function(e){"use strict";var t=function(e){{if(e&&e.__esModule)return e;var t={};if(null!=e)for(var n in e)Object.prototype.hasOwnProperty.call(e,n)&&(t[n]=e[n]);return t.default=e,t}}(r("./input")),n=r("./filter");var a=e.jQuery;t.register({className:"crosstalk-input-checkboxgroup",factory:function(e,r){var i=new n.FilterHandle(r.group),o=void 0,u=a(e);return u.on("change","input[type='checkbox']",function(){var e=u.find("input[type='checkbox']:checked");if(0===e.length)o=null,i.clear();else{var t={};e.each(function(){r.map[this.value].forEach(function(e){t[e]=!0})});var n=Object.keys(t);n.sort(),o=n,i.set(n)}}),{suspend:function(){i.clear()},resume:function(){o&&i.set(o)}}}})}).call(this,"undefined"!=typeof global?global:"undefined"!=typeof self?self:"undefined"!=typeof window?window:{})},{"./filter":2,"./input":6}],8:[function(r,e,t){(function(e){"use strict";var t=n(r("./input")),l=n(r("./util")),s=r("./filter");function n(e){if(e&&e.__esModule)return e;var t={};if(null!=e)for(var n in e)Object.prototype.hasOwnProperty.call(e,n)&&(t[n]=e[n]);return t.default=e,t}var f=e.jQuery;t.register({className:"crosstalk-input-select",factory:function(e,n){var t=l.dataframeToD3(n.items),r={options:[{value:"",label:"(All)"}].concat(t),valueField:"value",labelField:"label",searchField:"label"},i=f(e).find("select")[0],o=f(i).selectize(r)[0].selectize,u=new s.FilterHandle(n.group),a=void 0;return o.on("change",function(){if(0===o.items.length)a=null,u.clear();else{var t={};o.items.forEach(function(e){n.map[e].forEach(function(e){t[e]=!0})});var e=Object.keys(t);e.sort(),a=e,u.set(e)}}),{suspend:function(){u.clear()},resume:function(){a&&u.set(a)}}}})}).call(this,"undefined"!=typeof global?global:"undefined"!=typeof self?self:"undefined"!=typeof window?window:{})},{"./filter":2,"./input":6,"./util":11}],9:[function(n,e,t){(function(e){"use strict";var d=function(e,t){if(Array.isArray(e))return e;if(Symbol.iterator in Object(e))return function(e,t){var n=[],r=!0,i=!1,o=void 0;try{for(var u,a=e[Symbol.iterator]();!(r=(u=a.next()).done)&&(n.push(u.value),!t||n.length!==t);r=!0);}catch(e){i=!0,o=e}finally{try{!r&&a.return&&a.return()}finally{if(i)throw o}}return n}(e,t);throw new TypeError("Invalid attempt to destructure non-iterable instance")},t=function(e){{if(e&&e.__esModule)return e;var t={};if(null!=e)for(var n in e)Object.prototype.hasOwnProperty.call(e,n)&&(t[n]=e[n]);return t.default=e,t}}(n("./input")),a=n("./filter");var v=e.jQuery,p=e.strftime;function y(e,t){for(var n=e.toString();n.length .container-fluid.crosstalk-bscols { 12 | margin-left: auto; 13 | margin-right: auto; 14 | } 15 | 16 | .crosstalk-input-checkboxgroup .crosstalk-options-group .crosstalk-options-column { 17 | display: inline-block; 18 | padding-right: 12px; 19 | vertical-align: top; 20 | } 21 | 22 | @media only screen and (max-width:480px) { 23 | .crosstalk-input-checkboxgroup .crosstalk-options-group .crosstalk-options-column { 24 | display: block; 25 | padding-right: inherit; 26 | } 27 | } 28 | 29 | /* Relevant BS3 styles to make filter_checkbox() look reasonable without Bootstrap */ 30 | .crosstalk-input { 31 | margin-bottom: 15px; /* a la .form-group */ 32 | .control-label { 33 | margin-bottom: 0; 34 | vertical-align: middle; 35 | } 36 | input[type="checkbox"] { 37 | margin: 4px 0 0; 38 | margin-top: 1px; 39 | line-height: normal; 40 | } 41 | .checkbox { 42 | position: relative; 43 | display: block; 44 | margin-top: 10px; 45 | margin-bottom: 10px; 46 | } 47 | .checkbox > label{ 48 | padding-left: 20px; 49 | margin-bottom: 0; 50 | font-weight: 400; 51 | cursor: pointer; 52 | } 53 | .checkbox input[type="checkbox"], 54 | .checkbox-inline input[type="checkbox"] { 55 | position: absolute; 56 | margin-top: 2px; 57 | margin-left: -20px; 58 | } 59 | .checkbox + .checkbox { 60 | margin-top: -5px; 61 | } 62 | .checkbox-inline { 63 | position: relative; 64 | display: inline-block; 65 | padding-left: 20px; 66 | margin-bottom: 0; 67 | font-weight: 400; 68 | vertical-align: middle; 69 | cursor: pointer; 70 | } 71 | .checkbox-inline + .checkbox-inline { 72 | margin-top: 0; 73 | margin-left: 10px; 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /Presentation/Slides-day-1_files/datatables-css-0.0.0/datatables-crosstalk.css: -------------------------------------------------------------------------------- 1 | .dt-crosstalk-fade { 2 | opacity: 0.2; 3 | } 4 | 5 | html body div.DTS div.dataTables_scrollBody { 6 | background: none; 7 | } 8 | 9 | 10 | /* 11 | Fix https://github.com/rstudio/DT/issues/563 12 | If the `table.display` is set to "block" (e.g., pkgdown), the browser will display 13 | datatable objects strangely. The search panel and the page buttons will still be 14 | in full-width but the table body will be "compact" and shorter. 15 | In therory, having this attributes will affect `dom="t"` 16 | with `display: block` users. But in reality, there should be no one. 17 | We may remove the below lines in the future if the upstream agree to have this there. 18 | See https://github.com/DataTables/DataTablesSrc/issues/160 19 | */ 20 | 21 | table.dataTable { 22 | display: table; 23 | } 24 | -------------------------------------------------------------------------------- /Presentation/Slides-day-1_files/dt-core-1.11.3/css/jquery.dataTables.extra.css: -------------------------------------------------------------------------------- 1 | /* Selected rows/cells */ 2 | table.dataTable tr.selected td, table.dataTable td.selected { 3 | background-color: #b0bed9 !important; 4 | } 5 | /* In case of scrollX/Y or FixedHeader */ 6 | .dataTables_scrollBody .dataTables_sizing { 7 | visibility: hidden; 8 | } 9 | 10 | /* The datatables' theme CSS file doesn't define 11 | the color but with white background. It leads to an issue that 12 | when the HTML's body color is set to 'white', the user can't 13 | see the text since the background is white. One case happens in the 14 | RStudio's IDE when inline viewing the DT table inside an Rmd file, 15 | if the IDE theme is set to "Cobalt". 16 | 17 | See https://github.com/rstudio/DT/issues/447 for more info 18 | 19 | This fixes should have little side-effects because all the other elements 20 | of the default theme use the #333 font color. 21 | 22 | TODO: The upstream may use relative colors for both the table background 23 | and the color. It means the table can display well without this patch 24 | then. At that time, we need to remove the below CSS attributes. 25 | */ 26 | div.datatables { 27 | color: #333; 28 | } 29 | -------------------------------------------------------------------------------- /Presentation/Slides-day-1_files/dt-core-1.11.3/css/jquery.dataTables.min.css: -------------------------------------------------------------------------------- 1 | td.dt-control{background:url() no-repeat center center;cursor:pointer}tr.dt-hasChild td.dt-control{background:url() no-repeat center center}table.dataTable th.dt-left,table.dataTable td.dt-left{text-align:left}table.dataTable th.dt-center,table.dataTable td.dt-center,table.dataTable td.dataTables_empty{text-align:center}table.dataTable th.dt-right,table.dataTable td.dt-right{text-align:right}table.dataTable th.dt-justify,table.dataTable td.dt-justify{text-align:justify}table.dataTable th.dt-nowrap,table.dataTable td.dt-nowrap{white-space:nowrap}table.dataTable thead th.dt-head-left,table.dataTable thead td.dt-head-left,table.dataTable tfoot th.dt-head-left,table.dataTable tfoot td.dt-head-left{text-align:left}table.dataTable thead th.dt-head-center,table.dataTable thead td.dt-head-center,table.dataTable tfoot th.dt-head-center,table.dataTable tfoot td.dt-head-center{text-align:center}table.dataTable thead th.dt-head-right,table.dataTable thead td.dt-head-right,table.dataTable tfoot th.dt-head-right,table.dataTable tfoot td.dt-head-right{text-align:right}table.dataTable thead th.dt-head-justify,table.dataTable thead td.dt-head-justify,table.dataTable tfoot th.dt-head-justify,table.dataTable tfoot td.dt-head-justify{text-align:justify}table.dataTable thead th.dt-head-nowrap,table.dataTable thead td.dt-head-nowrap,table.dataTable tfoot th.dt-head-nowrap,table.dataTable tfoot td.dt-head-nowrap{white-space:nowrap}table.dataTable tbody th.dt-body-left,table.dataTable tbody td.dt-body-left{text-align:left}table.dataTable tbody th.dt-body-center,table.dataTable tbody td.dt-body-center{text-align:center}table.dataTable tbody th.dt-body-right,table.dataTable tbody td.dt-body-right{text-align:right}table.dataTable tbody th.dt-body-justify,table.dataTable tbody td.dt-body-justify{text-align:justify}table.dataTable tbody th.dt-body-nowrap,table.dataTable tbody td.dt-body-nowrap{white-space:nowrap}table.dataTable{width:100%;margin:0 auto;clear:both;border-collapse:separate;border-spacing:0}table.dataTable thead th,table.dataTable tfoot th{font-weight:bold}table.dataTable thead th,table.dataTable thead td{padding:10px 18px;border-bottom:1px solid #111}table.dataTable thead th:active,table.dataTable thead td:active{outline:none}table.dataTable tfoot th,table.dataTable tfoot td{padding:10px 18px 6px 18px;border-top:1px solid #111}table.dataTable thead .sorting,table.dataTable thead .sorting_asc,table.dataTable thead .sorting_desc,table.dataTable thead .sorting_asc_disabled,table.dataTable thead .sorting_desc_disabled{cursor:pointer;*cursor:hand;background-repeat:no-repeat;background-position:center right}table.dataTable thead .sorting{background-image:url()}table.dataTable thead .sorting_asc{background-image:url() !important}table.dataTable thead .sorting_desc{background-image:url() !important}table.dataTable thead .sorting_asc_disabled{background-image:url()}table.dataTable thead .sorting_desc_disabled{background-image:url()}table.dataTable tbody tr{background-color:#fff}table.dataTable tbody tr.selected{background-color:#b0bed9}table.dataTable tbody th,table.dataTable tbody td{padding:8px 10px}table.dataTable.row-border tbody th,table.dataTable.row-border tbody td,table.dataTable.display tbody th,table.dataTable.display tbody td{border-top:1px solid #ddd}table.dataTable.row-border tbody tr:first-child th,table.dataTable.row-border tbody tr:first-child td,table.dataTable.display tbody tr:first-child th,table.dataTable.display tbody tr:first-child td{border-top:none}table.dataTable.cell-border tbody th,table.dataTable.cell-border tbody td{border-top:1px solid #ddd;border-right:1px solid #ddd}table.dataTable.cell-border tbody tr th:first-child,table.dataTable.cell-border tbody tr td:first-child{border-left:1px solid #ddd}table.dataTable.cell-border tbody tr:first-child th,table.dataTable.cell-border tbody tr:first-child td{border-top:none}table.dataTable.stripe tbody tr.odd,table.dataTable.display tbody tr.odd{background-color:#f9f9f9}table.dataTable.stripe tbody tr.odd.selected,table.dataTable.display tbody tr.odd.selected{background-color:#acbad4}table.dataTable.hover tbody tr:hover,table.dataTable.display tbody tr:hover{background-color:#f6f6f6}table.dataTable.hover tbody tr:hover.selected,table.dataTable.display tbody tr:hover.selected{background-color:#aab7d1}table.dataTable.order-column tbody tr>.sorting_1,table.dataTable.order-column tbody tr>.sorting_2,table.dataTable.order-column tbody tr>.sorting_3,table.dataTable.display tbody tr>.sorting_1,table.dataTable.display tbody tr>.sorting_2,table.dataTable.display tbody tr>.sorting_3{background-color:#fafafa}table.dataTable.order-column tbody tr.selected>.sorting_1,table.dataTable.order-column tbody tr.selected>.sorting_2,table.dataTable.order-column tbody tr.selected>.sorting_3,table.dataTable.display tbody tr.selected>.sorting_1,table.dataTable.display tbody tr.selected>.sorting_2,table.dataTable.display tbody tr.selected>.sorting_3{background-color:#acbad5}table.dataTable.display tbody tr.odd>.sorting_1,table.dataTable.order-column.stripe tbody tr.odd>.sorting_1{background-color:#f1f1f1}table.dataTable.display tbody tr.odd>.sorting_2,table.dataTable.order-column.stripe tbody tr.odd>.sorting_2{background-color:#f3f3f3}table.dataTable.display tbody tr.odd>.sorting_3,table.dataTable.order-column.stripe tbody tr.odd>.sorting_3{background-color:whitesmoke}table.dataTable.display tbody tr.odd.selected>.sorting_1,table.dataTable.order-column.stripe tbody tr.odd.selected>.sorting_1{background-color:#a6b4cd}table.dataTable.display tbody tr.odd.selected>.sorting_2,table.dataTable.order-column.stripe tbody tr.odd.selected>.sorting_2{background-color:#a8b5cf}table.dataTable.display tbody tr.odd.selected>.sorting_3,table.dataTable.order-column.stripe tbody tr.odd.selected>.sorting_3{background-color:#a9b7d1}table.dataTable.display tbody tr.even>.sorting_1,table.dataTable.order-column.stripe tbody tr.even>.sorting_1{background-color:#fafafa}table.dataTable.display tbody tr.even>.sorting_2,table.dataTable.order-column.stripe tbody tr.even>.sorting_2{background-color:#fcfcfc}table.dataTable.display tbody tr.even>.sorting_3,table.dataTable.order-column.stripe tbody tr.even>.sorting_3{background-color:#fefefe}table.dataTable.display tbody tr.even.selected>.sorting_1,table.dataTable.order-column.stripe tbody tr.even.selected>.sorting_1{background-color:#acbad5}table.dataTable.display tbody tr.even.selected>.sorting_2,table.dataTable.order-column.stripe tbody tr.even.selected>.sorting_2{background-color:#aebcd6}table.dataTable.display tbody tr.even.selected>.sorting_3,table.dataTable.order-column.stripe tbody tr.even.selected>.sorting_3{background-color:#afbdd8}table.dataTable.display tbody tr:hover>.sorting_1,table.dataTable.order-column.hover tbody tr:hover>.sorting_1{background-color:#eaeaea}table.dataTable.display tbody tr:hover>.sorting_2,table.dataTable.order-column.hover tbody tr:hover>.sorting_2{background-color:#ececec}table.dataTable.display tbody tr:hover>.sorting_3,table.dataTable.order-column.hover tbody tr:hover>.sorting_3{background-color:#efefef}table.dataTable.display tbody tr:hover.selected>.sorting_1,table.dataTable.order-column.hover tbody tr:hover.selected>.sorting_1{background-color:#a2aec7}table.dataTable.display tbody tr:hover.selected>.sorting_2,table.dataTable.order-column.hover tbody tr:hover.selected>.sorting_2{background-color:#a3b0c9}table.dataTable.display tbody tr:hover.selected>.sorting_3,table.dataTable.order-column.hover tbody tr:hover.selected>.sorting_3{background-color:#a5b2cb}table.dataTable.no-footer{border-bottom:1px solid #111}table.dataTable.nowrap th,table.dataTable.nowrap td{white-space:nowrap}table.dataTable.compact thead th,table.dataTable.compact thead td{padding:4px 17px}table.dataTable.compact tfoot th,table.dataTable.compact tfoot td{padding:4px}table.dataTable.compact tbody th,table.dataTable.compact tbody td{padding:4px}table.dataTable th,table.dataTable td{box-sizing:content-box}.dataTables_wrapper{position:relative;clear:both}.dataTables_wrapper .dataTables_length{float:left}.dataTables_wrapper .dataTables_length select{border:1px solid #aaa;border-radius:3px;padding:5px;background-color:transparent;padding:4px}.dataTables_wrapper .dataTables_filter{float:right;text-align:right}.dataTables_wrapper .dataTables_filter input{border:1px solid #aaa;border-radius:3px;padding:5px;background-color:transparent;margin-left:3px}.dataTables_wrapper .dataTables_info{clear:both;float:left;padding-top:.755em}.dataTables_wrapper .dataTables_paginate{float:right;text-align:right;padding-top:.25em}.dataTables_wrapper .dataTables_paginate .paginate_button{box-sizing:border-box;display:inline-block;min-width:1.5em;padding:.5em 1em;margin-left:2px;text-align:center;text-decoration:none !important;cursor:pointer;*cursor:hand;color:#333 !important;border:1px solid transparent;border-radius:2px}.dataTables_wrapper .dataTables_paginate .paginate_button.current,.dataTables_wrapper .dataTables_paginate .paginate_button.current:hover{color:#333 !important;border:1px solid #979797;background-color:white;background:-webkit-gradient(linear, left top, left bottom, color-stop(0%, white), color-stop(100%, #dcdcdc));background:-webkit-linear-gradient(top, white 0%, #dcdcdc 100%);background:-moz-linear-gradient(top, white 0%, #dcdcdc 100%);background:-ms-linear-gradient(top, white 0%, #dcdcdc 100%);background:-o-linear-gradient(top, white 0%, #dcdcdc 100%);background:linear-gradient(to bottom, white 0%, #dcdcdc 100%)}.dataTables_wrapper .dataTables_paginate .paginate_button.disabled,.dataTables_wrapper .dataTables_paginate .paginate_button.disabled:hover,.dataTables_wrapper .dataTables_paginate .paginate_button.disabled:active{cursor:default;color:#666 !important;border:1px solid transparent;background:transparent;box-shadow:none}.dataTables_wrapper .dataTables_paginate .paginate_button:hover{color:white !important;border:1px solid #111;background-color:#585858;background:-webkit-gradient(linear, left top, left bottom, color-stop(0%, #585858), color-stop(100%, #111));background:-webkit-linear-gradient(top, #585858 0%, #111 100%);background:-moz-linear-gradient(top, #585858 0%, #111 100%);background:-ms-linear-gradient(top, #585858 0%, #111 100%);background:-o-linear-gradient(top, #585858 0%, #111 100%);background:linear-gradient(to bottom, #585858 0%, #111 100%)}.dataTables_wrapper .dataTables_paginate .paginate_button:active{outline:none;background-color:#2b2b2b;background:-webkit-gradient(linear, left top, left bottom, color-stop(0%, #2b2b2b), color-stop(100%, #0c0c0c));background:-webkit-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:-moz-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:-ms-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:-o-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:linear-gradient(to bottom, #2b2b2b 0%, #0c0c0c 100%);box-shadow:inset 0 0 3px #111}.dataTables_wrapper .dataTables_paginate .ellipsis{padding:0 1em}.dataTables_wrapper .dataTables_processing{position:absolute;top:50%;left:50%;width:100%;height:40px;margin-left:-50%;margin-top:-25px;padding-top:20px;text-align:center;font-size:1.2em;background-color:white;background:-webkit-gradient(linear, left top, right top, color-stop(0%, rgba(255, 255, 255, 0)), color-stop(25%, rgba(255, 255, 255, 0.9)), color-stop(75%, rgba(255, 255, 255, 0.9)), color-stop(100%, rgba(255, 255, 255, 0)));background:-webkit-linear-gradient(left, rgba(255, 255, 255, 0) 0%, rgba(255, 255, 255, 0.9) 25%, rgba(255, 255, 255, 0.9) 75%, rgba(255, 255, 255, 0) 100%);background:-moz-linear-gradient(left, rgba(255, 255, 255, 0) 0%, rgba(255, 255, 255, 0.9) 25%, rgba(255, 255, 255, 0.9) 75%, rgba(255, 255, 255, 0) 100%);background:-ms-linear-gradient(left, rgba(255, 255, 255, 0) 0%, rgba(255, 255, 255, 0.9) 25%, rgba(255, 255, 255, 0.9) 75%, rgba(255, 255, 255, 0) 100%);background:-o-linear-gradient(left, rgba(255, 255, 255, 0) 0%, rgba(255, 255, 255, 0.9) 25%, rgba(255, 255, 255, 0.9) 75%, rgba(255, 255, 255, 0) 100%);background:linear-gradient(to right, rgba(255, 255, 255, 0) 0%, rgba(255, 255, 255, 0.9) 25%, rgba(255, 255, 255, 0.9) 75%, rgba(255, 255, 255, 0) 100%)}.dataTables_wrapper .dataTables_length,.dataTables_wrapper .dataTables_filter,.dataTables_wrapper .dataTables_info,.dataTables_wrapper .dataTables_processing,.dataTables_wrapper .dataTables_paginate{color:#333}.dataTables_wrapper .dataTables_scroll{clear:both}.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody{*margin-top:-1px;-webkit-overflow-scrolling:touch}.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>thead>tr>th,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>thead>tr>td,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>tbody>tr>th,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>tbody>tr>td{vertical-align:middle}.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>thead>tr>th>div.dataTables_sizing,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>thead>tr>td>div.dataTables_sizing,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>tbody>tr>th>div.dataTables_sizing,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>tbody>tr>td>div.dataTables_sizing{height:0;overflow:hidden;margin:0 !important;padding:0 !important}.dataTables_wrapper.no-footer .dataTables_scrollBody{border-bottom:1px solid #111}.dataTables_wrapper.no-footer div.dataTables_scrollHead table.dataTable,.dataTables_wrapper.no-footer div.dataTables_scrollBody>table{border-bottom:none}.dataTables_wrapper:after{visibility:hidden;display:block;content:"";clear:both;height:0}@media screen and (max-width: 767px){.dataTables_wrapper .dataTables_info,.dataTables_wrapper .dataTables_paginate{float:none;text-align:center}.dataTables_wrapper .dataTables_paginate{margin-top:.5em}}@media screen and (max-width: 640px){.dataTables_wrapper .dataTables_length,.dataTables_wrapper .dataTables_filter{float:none;text-align:center}.dataTables_wrapper .dataTables_filter{margin-top:.5em}} 2 | -------------------------------------------------------------------------------- /Presentation/Slides-day-1_files/header-attrs-2.11.22/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /Presentation/Slides-day-1_files/header-attrs-2.11/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /Presentation/Slides-day-1_files/header-attrs-2.13/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /Presentation/Slides-day-2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Presentation/Slides-day-2.pdf -------------------------------------------------------------------------------- /Presentation/Slides-day-2.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Presentation/Slides-day-2.pptx -------------------------------------------------------------------------------- /Presentation/Slides-day-2_files/figure-html/plot_sf_elbill_disp-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Presentation/Slides-day-2_files/figure-html/plot_sf_elbill_disp-1.png -------------------------------------------------------------------------------- /Presentation/Slides-day-2_files/header-attrs-2.11.22/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /Presentation/Slides-day-2_files/header-attrs-2.13/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /Presentation/Slides-day-3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Presentation/Slides-day-3.pdf -------------------------------------------------------------------------------- /Presentation/Slides-day-3.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Presentation/Slides-day-3.pptx -------------------------------------------------------------------------------- /Presentation/Slides-day-3_cache/html/__packages: -------------------------------------------------------------------------------- 1 | tidyverse 2 | ggplot2 3 | tibble 4 | tidyr 5 | readr 6 | purrr 7 | dplyr 8 | stringr 9 | forcats 10 | here 11 | srvyr 12 | Matrix 13 | survival 14 | survey 15 | xaringan 16 | knitr 17 | tidycensus 18 | -------------------------------------------------------------------------------- /Presentation/Slides-day-3_cache/html/acsin_2028e4f3797786227f2217d2d50f2b92.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Presentation/Slides-day-3_cache/html/acsin_2028e4f3797786227f2217d2d50f2b92.RData -------------------------------------------------------------------------------- /Presentation/Slides-day-3_cache/html/acsin_2028e4f3797786227f2217d2d50f2b92.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Presentation/Slides-day-3_cache/html/acsin_2028e4f3797786227f2217d2d50f2b92.rdb -------------------------------------------------------------------------------- /Presentation/Slides-day-3_cache/html/acsin_2028e4f3797786227f2217d2d50f2b92.rdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Presentation/Slides-day-3_cache/html/acsin_2028e4f3797786227f2217d2d50f2b92.rdx -------------------------------------------------------------------------------- /Presentation/Slides-day-3_files/figure-html/der3c-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Presentation/Slides-day-3_files/figure-html/der3c-1.png -------------------------------------------------------------------------------- /Presentation/Slides-day-3_files/header-attrs-2.11.22/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /Presentation/Slides-day-3_files/header-attrs-2.13/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /Presentation/xaringan-themer-mod.css: -------------------------------------------------------------------------------- 1 | /* ------------------------------------------------------- 2 | * 3 | * !! This file was generated by xaringanthemer !! 4 | * !! and then customized !! 5 | * 6 | * 7 | * Issues or likes? 8 | * - https://github.com/gadenbuie/xaringanthemer 9 | * - https://www.garrickadenbuie.com 10 | * 11 | * Need help? Try: 12 | * - vignette(package = "xaringanthemer") 13 | * - ?xaringanthemer::style_xaringan 14 | * - xaringan wiki: https://github.com/yihui/xaringan/wiki 15 | * - remarkjs wiki: https://github.com/gnab/remark/wiki 16 | * 17 | * Version: 0.4.1 18 | * 19 | * ------------------------------------------------------- */ 20 | @import url(https://fonts.googleapis.com/css?family=Noto+Sans:400,400i,700,700i&display=swap); 21 | @import url(https://fonts.googleapis.com/css?family=Cabin:600,600i&display=swap); 22 | @import url(https://fonts.googleapis.com/css?family=Source+Code+Pro:400,700&display=swap); 23 | @import url('https://fonts.googleapis.com/css2?family=Telex&family=Ubuntu:wght@300&display=swap'); 24 | 25 | 26 | :root { 27 | /* Fonts */ 28 | --text-font-family: 'Noto Sans'; 29 | --text-font-is-google: 1; 30 | --text-font-family-fallback: -apple-system, BlinkMacSystemFont, avenir next, avenir, helvetica neue, helvetica, Ubuntu, roboto, noto, segoe ui, arial; 31 | --text-font-base: sans-serif; 32 | --header-font-family: Cabin; 33 | --header-font-is-google: 1; 34 | --header-font-family-fallback: Georgia, serif; 35 | --code-font-family: 'Source Code Pro'; 36 | --code-font-is-google: 1; 37 | --base-font-size: 20px; 38 | --text-font-size: 1rem; 39 | --code-font-size: 0.9rem; 40 | --code-inline-font-size: 1em; 41 | --header-h1-font-size: 2.75rem; 42 | --header-h2-font-size: 2.25rem; 43 | --header-h3-font-size: 1.75rem; 44 | 45 | /* Colors */ 46 | --text-color: #000000; 47 | --header-color: #1E4F96; 48 | --background-color: #FFFFFF; 49 | --link-color: #1E4F96; 50 | --text-bold-color: #1E4F96; 51 | --code-highlight-color: rgba(255,255,0,0.5); 52 | --inverse-text-color: #000000; 53 | --inverse-background-color: #00A3E0; 54 | --inverse-header-color: #FFFFFF; 55 | --inverse-link-color: #1E4F96; 56 | --title-slide-background-color: #1E4F96; 57 | --title-slide-text-color: #FFFFFF; 58 | --header-background-color: #1E4F96; 59 | --header-background-text-color: #FFFFFF; 60 | --primary: #1E4F96; 61 | --secondary: #00A3E0; 62 | --white: #FFFFFF; 63 | --black: #000000; 64 | } 65 | 66 | html { 67 | font-size: var(--base-font-size); 68 | } 69 | 70 | body { 71 | font-family: 'Telex', sans-serif; 72 | font-weight: normal; 73 | color: var(--text-color); 74 | } 75 | h1, h2, h3 { 76 | font-family: 'Ubuntu', sans-serif; 77 | font-weight: 600; 78 | color: var(--header-color); 79 | } 80 | .remark-slide-content { 81 | background-color: var(--background-color); 82 | font-size: 1rem; 83 | padding: 16px 64px 16px 64px; 84 | width: 100%; 85 | height: 100%; 86 | } 87 | .remark-slide-content h1 { 88 | font-size: var(--header-h1-font-size); 89 | } 90 | .remark-slide-content h2 { 91 | font-size: var(--header-h2-font-size); 92 | } 93 | .remark-slide-content h3 { 94 | font-size: var(--header-h3-font-size); 95 | } 96 | .remark-code, .remark-inline-code { 97 | font-family: var(--code-font-family), Menlo, Consolas, Monaco, Liberation Mono, Lucida Console, monospace; 98 | } 99 | .remark-code { 100 | font-size: var(--code-font-size); 101 | } 102 | .remark-inline-code { 103 | font-size: var(--code-inline-font-size); 104 | color: #1E4F96; 105 | } 106 | .remark-slide-number { 107 | color: #1E4F96; 108 | opacity: 1; 109 | font-size: 0.9rem; 110 | } 111 | strong { 112 | font-weight: bold; 113 | color: var(--text-bold-color); 114 | } 115 | a, a > code { 116 | color: var(--link-color); 117 | text-decoration: none; 118 | } 119 | .footnote { 120 | position: absolute; 121 | bottom: 60px; 122 | padding-right: 4em; 123 | font-size: 0.9em; 124 | } 125 | .remark-code-line-highlighted { 126 | background-color: var(--code-highlight-color); 127 | } 128 | .inverse { 129 | background-color: var(--inverse-background-color); 130 | color: var(--inverse-text-color); 131 | 132 | } 133 | .inverse h1, .inverse h2, .inverse h3 { 134 | color: var(--inverse-header-color); 135 | } 136 | .inverse a, .inverse a > code { 137 | color: var(--inverse-link-color); 138 | } 139 | .title-slide, .title-slide h1, .title-slide h2, .title-slide h3 { 140 | color: var(--title-slide-text-color); 141 | } 142 | .title-slide { 143 | background-color: var(--title-slide-background-color); 144 | } 145 | .title-slide .remark-slide-number { 146 | display: none; 147 | } 148 | /* Two-column layout */ 149 | .left-column { 150 | width: 20%; 151 | height: 92%; 152 | float: left; 153 | } 154 | .left-column h2, .left-column h3 { 155 | color: #1E4F9699; 156 | } 157 | .left-column h2:last-of-type, .left-column h3:last-child { 158 | color: #1E4F96; 159 | } 160 | .right-column { 161 | width: 75%; 162 | float: right; 163 | padding-top: 1em; 164 | } 165 | .pull-left { 166 | float: left; 167 | width: 47%; 168 | } 169 | .pull-right { 170 | float: right; 171 | width: 47%; 172 | } 173 | .pull-right + * { 174 | clear: both; 175 | } 176 | img, video, iframe { 177 | max-width: 100%; 178 | } 179 | blockquote { 180 | border-left: solid 5px #00A3E080; 181 | padding-left: 1em; 182 | } 183 | .remark-slide table { 184 | margin: auto; 185 | border-top: 1px solid #666; 186 | border-bottom: 1px solid #666; 187 | } 188 | .remark-slide table thead th { 189 | border-bottom: 1px solid #ddd; 190 | } 191 | th, td { 192 | padding: 5px; 193 | } 194 | .remark-slide thead, .remark-slide tfoot, .remark-slide tr:nth-child(even) { 195 | background: #CCECF8; 196 | } 197 | table.dataTable tbody { 198 | background-color: var(--background-color); 199 | color: var(--text-color); 200 | } 201 | table.dataTable.display tbody tr.odd { 202 | background-color: var(--background-color); 203 | } 204 | table.dataTable.display tbody tr.even { 205 | background-color: #CCECF8; 206 | } 207 | table.dataTable.hover tbody tr:hover, table.dataTable.display tbody tr:hover { 208 | background-color: rgba(255, 255, 255, 0.5); 209 | } 210 | .dataTables_wrapper .dataTables_length, .dataTables_wrapper .dataTables_filter, .dataTables_wrapper .dataTables_info, .dataTables_wrapper .dataTables_processing, .dataTables_wrapper .dataTables_paginate { 211 | color: var(--text-color); 212 | } 213 | .dataTables_wrapper .dataTables_paginate .paginate_button { 214 | color: var(--text-color) !important; 215 | } 216 | 217 | /* Horizontal alignment of code blocks */ 218 | .remark-slide-content.left pre, 219 | .remark-slide-content.center pre, 220 | .remark-slide-content.right pre { 221 | text-align: start; 222 | width: max-content; 223 | max-width: 100%; 224 | } 225 | .remark-slide-content.left pre, 226 | .remark-slide-content.right pre { 227 | min-width: 50%; 228 | min-width: min(40ch, 100%); 229 | } 230 | .remark-slide-content.center pre { 231 | min-width: 66%; 232 | min-width: min(50ch, 100%); 233 | } 234 | .remark-slide-content.left pre { 235 | margin-left: unset; 236 | margin-right: auto; 237 | } 238 | .remark-slide-content.center pre { 239 | margin-left: auto; 240 | margin-right: auto; 241 | } 242 | .remark-slide-content.right pre { 243 | margin-left: auto; 244 | margin-right: unset; 245 | } 246 | 247 | /* Slide Header Background for h1 elements */ 248 | .remark-slide-content.header_background > h1 { 249 | display: block; 250 | position: absolute; 251 | top: 0; 252 | left: 0; 253 | width: 100%; 254 | background: var(--header-background-color); 255 | color: var(--header-background-text-color); 256 | padding: 2rem 64px 1.5rem 64px; 257 | margin-top: 0; 258 | box-sizing: border-box; 259 | } 260 | .remark-slide-content.header_background { 261 | padding-top: 7rem; 262 | } 263 | 264 | @page { margin: 0; } 265 | @media print { 266 | .remark-slide-scaler { 267 | width: 100% !important; 268 | height: 100% !important; 269 | transform: scale(1) !important; 270 | top: 0 !important; 271 | left: 0 !important; 272 | } 273 | } 274 | 275 | .primary { 276 | color: var(--primary); 277 | } 278 | .bg-primary { 279 | background-color: var(--primary); 280 | } 281 | .secondary { 282 | color: var(--secondary); 283 | } 284 | .bg-secondary { 285 | background-color: var(--secondary); 286 | } 287 | .white { 288 | color: var(--white); 289 | } 290 | .bg-white { 291 | background-color: var(--white); 292 | } 293 | .black { 294 | color: var(--black); 295 | } 296 | .bg-black { 297 | background-color: var(--black); 298 | } 299 | 300 | 301 | .column { 302 | float: left; 303 | width: 33.33%; 304 | } 305 | 306 | /* Clear floats after the columns */ 307 | .row:after { 308 | content: ""; 309 | display: table; 310 | clear: both; 311 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Tidy Survey Analysis in R using the srvyr Package 2 | Materials for a forthcoming short course presented in 3 sessions. 3 | 4 | - **RawData** folder contains public use file data along with any documentation 5 | - American National Election Studies, 2016 6 | - Residential Energy Consumption Survey, 2015 7 | - **DataCleaningScripts** folder contains scripts for making public use files analysis ready 8 | - Create derived variables 9 | - Renames some variables 10 | - Selects fewer variables just for examples 11 | - **Data** folder contains data files ready for analysis in presentation and examples 12 | - **Presentation** folder contains the slides for the course 13 | - Includes Rmd to create slides 14 | - Slides are available in html, pptx, R, and PDF 15 | - **Exercises** folder contains RMD and R files with exercises and solutions to practice concepts 16 | - **Codebooks** folder contains HTML and MD codebooks for the variables used from ANES 2020 and RECS 2015. 17 | 18 | ## Sources 19 | 20 | - The American National Election Studies (https://electionstudies.org/). These materials are based on work supported by the National Science Foundation under grant numbers SES 1444721, 2014-2017, the University of Michigan, and Stanford University. 21 | 22 | - *Residential Energy Consumption Survey: Using the 2015 Microdata File to Compute Estimates and Standard Errors.* U.S. Department of Energy (2017) https://www.eia.gov/consumption/residential/data/2015/pdf/microdata_v3.pdf 23 | 24 | - Horst AM, Hill AP, Gorman KB (2020). palmerpenguins: Palmer Archipelago (Antarctica) penguin data. R package version 0.1.0. https://allisonhorst.github.io/palmerpenguins/ 25 | 26 | - T. Lumley (2020) "survey: analysis of complex survey samples". R package version 4.0. https://r-survey.r-forge.r-project.org/survey/ 27 | 28 | - Greg Freedman Ellis and Ben Schneider (2020). srvyr: 'dplyr'-Like Syntax for Summary Statistics of Survey Data. R package version 1.0.0. https://CRAN.R-project.org/package=srvyr 29 | 30 | - Hadley Wickham, Romain François, Lionel Henry and Kirill Müller (2021). dplyr: A Grammar of Data Manipulation. R package version 1.0.5. https://CRAN.R-project.org/package=dplyr 31 | -------------------------------------------------------------------------------- /RawData/ANES_2016/anes_timeseries_2016.sav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/RawData/ANES_2016/anes_timeseries_2016.sav -------------------------------------------------------------------------------- /RawData/ANES_2016/anes_timeseries_2016_qnaire_post.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/RawData/ANES_2016/anes_timeseries_2016_qnaire_post.pdf -------------------------------------------------------------------------------- /RawData/ANES_2016/anes_timeseries_2016_qnaire_pre.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/RawData/ANES_2016/anes_timeseries_2016_qnaire_pre.pdf -------------------------------------------------------------------------------- /RawData/ANES_2016/anes_timeseries_2016_userguidecodebook.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/RawData/ANES_2016/anes_timeseries_2016_userguidecodebook.pdf -------------------------------------------------------------------------------- /RawData/ANES_2020/anes_timeseries_2020_methodology_userguidecodebook_20210719.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/RawData/ANES_2020/anes_timeseries_2020_methodology_userguidecodebook_20210719.pdf -------------------------------------------------------------------------------- /RawData/ANES_2020/anes_timeseries_2020_questionnaire_20210719.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/RawData/ANES_2020/anes_timeseries_2020_questionnaire_20210719.pdf -------------------------------------------------------------------------------- /RawData/ANES_2020/anes_timeseries_2020_questionnaire_screener_20210719.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/RawData/ANES_2020/anes_timeseries_2020_questionnaire_screener_20210719.pdf -------------------------------------------------------------------------------- /RawData/ANES_2020/anes_timeseries_2020_spss_20220210.sav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/RawData/ANES_2020/anes_timeseries_2020_spss_20220210.sav -------------------------------------------------------------------------------- /RawData/ANES_2020/anes_timeseries_2020_userguidecodebook_20220210.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/RawData/ANES_2020/anes_timeseries_2020_userguidecodebook_20220210.pdf -------------------------------------------------------------------------------- /RawData/RECS_2015/2020_RECS-457A.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/RawData/RECS_2015/2020_RECS-457A.pdf -------------------------------------------------------------------------------- /RawData/RECS_2015/README.md: -------------------------------------------------------------------------------- 1 | # Residential Energy Consumption Survey (RECS) 2015 2 | 3 | All data and resources were downloaded from https://www.eia.gov/consumption/residential/data/2015/index.php?view=microdata on March 3, 2021. -------------------------------------------------------------------------------- /RawData/RECS_2015/codebook_publicv4.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/RawData/RECS_2015/codebook_publicv4.xlsx -------------------------------------------------------------------------------- /RawData/RECS_2015/microdata_v3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/RawData/RECS_2015/microdata_v3.pdf -------------------------------------------------------------------------------- /tidy-survey-short-course.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 3 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | -------------------------------------------------------------------------------- /xaringan-themer.css: -------------------------------------------------------------------------------- 1 | /* ------------------------------------------------------- 2 | * 3 | * !! This file was generated by xaringanthemer !! 4 | * 5 | * Changes made to this file directly will be overwritten 6 | * if you used xaringanthemer in your xaringan slides Rmd 7 | * 8 | * Issues or likes? 9 | * - https://github.com/gadenbuie/xaringanthemer 10 | * - https://www.garrickadenbuie.com 11 | * 12 | * Need help? Try: 13 | * - vignette(package = "xaringanthemer") 14 | * - ?xaringanthemer::style_xaringan 15 | * - xaringan wiki: https://github.com/yihui/xaringan/wiki 16 | * - remarkjs wiki: https://github.com/gnab/remark/wiki 17 | * 18 | * Version: 0.4.1 19 | * 20 | * ------------------------------------------------------- */ 21 | @import url(https://fonts.googleapis.com/css?family=Noto+Sans:400,400i,700,700i&display=swap); 22 | @import url(https://fonts.googleapis.com/css?family=Cabin:600,600i&display=swap); 23 | @import url(https://fonts.googleapis.com/css?family=Source+Code+Pro:400,700&display=swap); 24 | 25 | 26 | :root { 27 | /* Fonts */ 28 | --text-font-family: 'Noto Sans'; 29 | --text-font-is-google: 1; 30 | --text-font-family-fallback: -apple-system, BlinkMacSystemFont, avenir next, avenir, helvetica neue, helvetica, Ubuntu, roboto, noto, segoe ui, arial; 31 | --text-font-base: sans-serif; 32 | --header-font-family: Cabin; 33 | --header-font-is-google: 1; 34 | --header-font-family-fallback: Georgia, serif; 35 | --code-font-family: 'Source Code Pro'; 36 | --code-font-is-google: 1; 37 | --base-font-size: 20px; 38 | --text-font-size: 1rem; 39 | --code-font-size: 0.9rem; 40 | --code-inline-font-size: 1em; 41 | --header-h1-font-size: 2.75rem; 42 | --header-h2-font-size: 2.25rem; 43 | --header-h3-font-size: 1.75rem; 44 | 45 | /* Colors */ 46 | --text-color: #000000; 47 | --header-color: #1E4F96; 48 | --background-color: #FFFFFF; 49 | --link-color: #1E4F96; 50 | --text-bold-color: #1E4F96; 51 | --code-highlight-color: rgba(255,255,0,0.5); 52 | --inverse-text-color: #000000; 53 | --inverse-background-color: #00A3E0; 54 | --inverse-header-color: #FFFFFF; 55 | --inverse-link-color: #1E4F96; 56 | --title-slide-background-color: #1E4F96; 57 | --title-slide-text-color: #FFFFFF; 58 | --header-background-color: #1E4F96; 59 | --header-background-text-color: #FFFFFF; 60 | --primary: #1E4F96; 61 | --secondary: #00A3E0; 62 | --white: #FFFFFF; 63 | --black: #000000; 64 | } 65 | 66 | html { 67 | font-size: var(--base-font-size); 68 | } 69 | 70 | body { 71 | font-family: var(--text-font-family), var(--text-font-family-fallback), var(--text-font-base); 72 | font-weight: normal; 73 | color: var(--text-color); 74 | } 75 | h1, h2, h3 { 76 | font-family: var(--header-font-family), var(--header-font-family-fallback); 77 | font-weight: 600; 78 | color: var(--header-color); 79 | } 80 | .remark-slide-content { 81 | background-color: var(--background-color); 82 | font-size: 1rem; 83 | padding: 16px 64px 16px 64px; 84 | width: 100%; 85 | height: 100%; 86 | } 87 | .remark-slide-content h1 { 88 | font-size: var(--header-h1-font-size); 89 | } 90 | .remark-slide-content h2 { 91 | font-size: var(--header-h2-font-size); 92 | } 93 | .remark-slide-content h3 { 94 | font-size: var(--header-h3-font-size); 95 | } 96 | .remark-code, .remark-inline-code { 97 | font-family: var(--code-font-family), Menlo, Consolas, Monaco, Liberation Mono, Lucida Console, monospace; 98 | } 99 | .remark-code { 100 | font-size: var(--code-font-size); 101 | } 102 | .remark-inline-code { 103 | font-size: var(--code-inline-font-size); 104 | color: #1E4F96; 105 | } 106 | .remark-slide-number { 107 | color: #1E4F96; 108 | opacity: 1; 109 | font-size: 0.9rem; 110 | } 111 | strong { 112 | font-weight: bold; 113 | color: var(--text-bold-color); 114 | } 115 | a, a > code { 116 | color: var(--link-color); 117 | text-decoration: none; 118 | } 119 | .footnote { 120 | position: absolute; 121 | bottom: 60px; 122 | padding-right: 4em; 123 | font-size: 0.9em; 124 | } 125 | .remark-code-line-highlighted { 126 | background-color: var(--code-highlight-color); 127 | } 128 | .inverse { 129 | background-color: var(--inverse-background-color); 130 | color: var(--inverse-text-color); 131 | 132 | } 133 | .inverse h1, .inverse h2, .inverse h3 { 134 | color: var(--inverse-header-color); 135 | } 136 | .inverse a, .inverse a > code { 137 | color: var(--inverse-link-color); 138 | } 139 | .title-slide, .title-slide h1, .title-slide h2, .title-slide h3 { 140 | color: var(--title-slide-text-color); 141 | } 142 | .title-slide { 143 | background-color: var(--title-slide-background-color); 144 | } 145 | .title-slide .remark-slide-number { 146 | display: none; 147 | } 148 | /* Two-column layout */ 149 | .left-column { 150 | width: 20%; 151 | height: 92%; 152 | float: left; 153 | } 154 | .left-column h2, .left-column h3 { 155 | color: #1E4F9699; 156 | } 157 | .left-column h2:last-of-type, .left-column h3:last-child { 158 | color: #1E4F96; 159 | } 160 | .right-column { 161 | width: 75%; 162 | float: right; 163 | padding-top: 1em; 164 | } 165 | .pull-left { 166 | float: left; 167 | width: 47%; 168 | } 169 | .pull-right { 170 | float: right; 171 | width: 47%; 172 | } 173 | .pull-right + * { 174 | clear: both; 175 | } 176 | img, video, iframe { 177 | max-width: 100%; 178 | } 179 | blockquote { 180 | border-left: solid 5px #00A3E080; 181 | padding-left: 1em; 182 | } 183 | .remark-slide table { 184 | margin: auto; 185 | border-top: 1px solid #666; 186 | border-bottom: 1px solid #666; 187 | } 188 | .remark-slide table thead th { 189 | border-bottom: 1px solid #ddd; 190 | } 191 | th, td { 192 | padding: 5px; 193 | } 194 | .remark-slide thead, .remark-slide tfoot, .remark-slide tr:nth-child(even) { 195 | background: #CCECF8; 196 | } 197 | table.dataTable tbody { 198 | background-color: var(--background-color); 199 | color: var(--text-color); 200 | } 201 | table.dataTable.display tbody tr.odd { 202 | background-color: var(--background-color); 203 | } 204 | table.dataTable.display tbody tr.even { 205 | background-color: #CCECF8; 206 | } 207 | table.dataTable.hover tbody tr:hover, table.dataTable.display tbody tr:hover { 208 | background-color: rgba(255, 255, 255, 0.5); 209 | } 210 | .dataTables_wrapper .dataTables_length, .dataTables_wrapper .dataTables_filter, .dataTables_wrapper .dataTables_info, .dataTables_wrapper .dataTables_processing, .dataTables_wrapper .dataTables_paginate { 211 | color: var(--text-color); 212 | } 213 | .dataTables_wrapper .dataTables_paginate .paginate_button { 214 | color: var(--text-color) !important; 215 | } 216 | 217 | /* Horizontal alignment of code blocks */ 218 | .remark-slide-content.left pre, 219 | .remark-slide-content.center pre, 220 | .remark-slide-content.right pre { 221 | text-align: start; 222 | width: max-content; 223 | max-width: 100%; 224 | } 225 | .remark-slide-content.left pre, 226 | .remark-slide-content.right pre { 227 | min-width: 50%; 228 | min-width: min(40ch, 100%); 229 | } 230 | .remark-slide-content.center pre { 231 | min-width: 66%; 232 | min-width: min(50ch, 100%); 233 | } 234 | .remark-slide-content.left pre { 235 | margin-left: unset; 236 | margin-right: auto; 237 | } 238 | .remark-slide-content.center pre { 239 | margin-left: auto; 240 | margin-right: auto; 241 | } 242 | .remark-slide-content.right pre { 243 | margin-left: auto; 244 | margin-right: unset; 245 | } 246 | 247 | /* Slide Header Background for h1 elements */ 248 | .remark-slide-content.header_background > h1 { 249 | display: block; 250 | position: absolute; 251 | top: 0; 252 | left: 0; 253 | width: 100%; 254 | background: var(--header-background-color); 255 | color: var(--header-background-text-color); 256 | padding: 2rem 64px 1.5rem 64px; 257 | margin-top: 0; 258 | box-sizing: border-box; 259 | } 260 | .remark-slide-content.header_background { 261 | padding-top: 7rem; 262 | } 263 | 264 | @page { margin: 0; } 265 | @media print { 266 | .remark-slide-scaler { 267 | width: 100% !important; 268 | height: 100% !important; 269 | transform: scale(1) !important; 270 | top: 0 !important; 271 | left: 0 !important; 272 | } 273 | } 274 | 275 | .primary { 276 | color: var(--primary); 277 | } 278 | .bg-primary { 279 | background-color: var(--primary); 280 | } 281 | .secondary { 282 | color: var(--secondary); 283 | } 284 | .bg-secondary { 285 | background-color: var(--secondary); 286 | } 287 | .white { 288 | color: var(--white); 289 | } 290 | .bg-white { 291 | background-color: var(--white); 292 | } 293 | .black { 294 | color: var(--black); 295 | } 296 | .bg-black { 297 | background-color: var(--black); 298 | } 299 | 300 | --------------------------------------------------------------------------------