├── .gitignore
├── Codebooks
├── ANES 2020 Derived Variable Codebook.Rmd
├── ANES-2020-Derived-Variable-Codebook.html
├── ANES-2020-Derived-Variable-Codebook.md
├── RECS 2015 Codebook.Rmd
├── RECS-2015-Codebook.html
├── RECS-2015-Codebook.md
└── style.css
├── Data
├── anes.rds
├── anes_2020.rds
└── recs.rds
├── DataCleaningScripts
├── ANES_DataPrep.Rmd
├── ANES_DataPrep.md
├── ANES_DataPrep_2020.Rmd
├── ANES_DataPrep_2020.md
├── RECS_DataPrep.Rmd
├── RECS_DataPrep.md
├── TargetPopulation.Rmd
└── TargetPopulation.md
├── Exercises
├── CategorialExercises.R
├── CategorialExercises.Rmd
├── CategorialExercises_solutions.R
├── CategorialExercises_solutions.Rmd
├── CategorialExercises_solutions.html
├── ContinuousExercises.R
├── ContinuousExercises.Rmd
├── ContinuousExercises_solutions.R
├── ContinuousExercises_solutions.Rmd
├── ContinuousExercises_solutions.html
├── DesignDerivedVariablesExercises.R
├── DesignDerivedVariablesExercises.Rmd
├── DesignDerivedVariablesExercises_cache
│ └── html
│ │ ├── __packages
│ │ ├── datin_95c553bea5c677086a0878157dbd740f.RData
│ │ ├── datin_95c553bea5c677086a0878157dbd740f.rdb
│ │ └── datin_95c553bea5c677086a0878157dbd740f.rdx
├── DesignDerivedVariablesExercises_solutions.R
├── DesignDerivedVariablesExercises_solutions.Rmd
├── DesignDerivedVariablesExercises_solutions.html
├── DesignDerivedVariablesExercises_solutions_cache
│ └── html
│ │ ├── __packages
│ │ ├── datin_71bc85b99d78d2975dbdaf1205650ccd.RData
│ │ ├── datin_71bc85b99d78d2975dbdaf1205650ccd.rdb
│ │ └── datin_71bc85b99d78d2975dbdaf1205650ccd.rdx
├── WarmUpExercises.R
├── WarmUpExercises.Rmd
├── WarmUpExercises_solutions.R
├── WarmUpExercises_solutions.Rmd
└── WarmUpExercises_solutions.html
├── FinalizeMaterials.R
├── LICENSE
├── Presentation
├── Images
│ ├── IsabellaVelasquez_Headshot.jpeg
│ ├── MAPOR-Logo1.png
│ ├── Project-Contents.png
│ ├── RebeccaPowell_Headshot.jpeg
│ └── StephanieZimmer_Headshot.jpeg
├── Slides-day-1.R
├── Slides-day-1.Rmd
├── Slides-day-1.html
├── Slides-day-1.pdf
├── Slides-day-1.pptx
├── Slides-day-1_files
│ ├── crosstalk-1.2.0
│ │ ├── css
│ │ │ └── crosstalk.min.css
│ │ ├── js
│ │ │ ├── crosstalk.js
│ │ │ ├── crosstalk.js.map
│ │ │ ├── crosstalk.min.js
│ │ │ └── crosstalk.min.js.map
│ │ └── scss
│ │ │ └── crosstalk.scss
│ ├── datatables-binding-0.20
│ │ └── datatables.js
│ ├── datatables-binding-0.21
│ │ └── datatables.js
│ ├── datatables-binding-0.22
│ │ └── datatables.js
│ ├── datatables-css-0.0.0
│ │ └── datatables-crosstalk.css
│ ├── dt-core-1.11.3
│ │ ├── css
│ │ │ ├── jquery.dataTables.extra.css
│ │ │ └── jquery.dataTables.min.css
│ │ └── js
│ │ │ └── jquery.dataTables.min.js
│ ├── header-attrs-2.11.22
│ │ └── header-attrs.js
│ ├── header-attrs-2.11
│ │ └── header-attrs.js
│ ├── header-attrs-2.13
│ │ └── header-attrs.js
│ ├── htmlwidgets-1.5.4
│ │ └── htmlwidgets.js
│ └── jquery-3.6.0
│ │ ├── jquery-3.6.0.js
│ │ ├── jquery-3.6.0.min.js
│ │ └── jquery-3.6.0.min.map
├── Slides-day-2.R
├── Slides-day-2.Rmd
├── Slides-day-2.html
├── Slides-day-2.pdf
├── Slides-day-2.pptx
├── Slides-day-2_files
│ ├── figure-html
│ │ └── plot_sf_elbill_disp-1.png
│ ├── header-attrs-2.11.22
│ │ └── header-attrs.js
│ └── header-attrs-2.13
│ │ └── header-attrs.js
├── Slides-day-3.R
├── Slides-day-3.Rmd
├── Slides-day-3.html
├── Slides-day-3.pdf
├── Slides-day-3.pptx
├── Slides-day-3_cache
│ └── html
│ │ ├── __packages
│ │ ├── acsin_2028e4f3797786227f2217d2d50f2b92.RData
│ │ ├── acsin_2028e4f3797786227f2217d2d50f2b92.rdb
│ │ └── acsin_2028e4f3797786227f2217d2d50f2b92.rdx
├── Slides-day-3_files
│ ├── figure-html
│ │ └── der3c-1.png
│ ├── header-attrs-2.11.22
│ │ └── header-attrs.js
│ └── header-attrs-2.13
│ │ └── header-attrs.js
└── xaringan-themer-mod.css
├── README.md
├── RawData
├── ANES_2016
│ ├── anes_timeseries_2016.sav
│ ├── anes_timeseries_2016_qnaire_post.pdf
│ ├── anes_timeseries_2016_qnaire_pre.pdf
│ └── anes_timeseries_2016_userguidecodebook.pdf
├── ANES_2020
│ ├── anes_timeseries_2020_methodology_userguidecodebook_20210719.pdf
│ ├── anes_timeseries_2020_questionnaire_20210719.pdf
│ ├── anes_timeseries_2020_questionnaire_screener_20210719.pdf
│ ├── anes_timeseries_2020_spss_20220210.sav
│ └── anes_timeseries_2020_userguidecodebook_20220210.pdf
└── RECS_2015
│ ├── 2020_RECS-457A.pdf
│ ├── README.md
│ ├── codebook_publicv4.xlsx
│ ├── microdata_v3.pdf
│ └── recs2015_public_v4.csv
├── tidy-survey-short-course.Rproj
└── xaringan-themer.css
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | debug.log
6 | .DS_Store
7 |
--------------------------------------------------------------------------------
/Codebooks/ANES 2020 Derived Variable Codebook.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "ANES 2020 Derived Variable Codebook"
3 | output:
4 | html_document:
5 | css: "style.css"
6 | toc: true
7 | toc_depth: 4
8 | toc_float: true
9 | self_contained: true
10 | github_document:
11 | toc: true
12 | always_allow_html: true
13 | ---
14 |
15 | ```{r setup, include = FALSE}
16 | knitr::opts_chunk$set(echo = FALSE)
17 | library(dplyr)
18 | library(janitor)
19 | library(kableExtra)
20 | library(knitr)
21 |
22 | anes_2020 <- readRDS(here::here("Data", "anes_2020.rds")) %>%
23 | haven::zap_labels()
24 | ```
25 |
26 | The full codebook with the original variables is available at electionstudies.org.
27 |
28 | ## Weighting variables
29 |
30 | ### V200010b
31 |
32 | Full sample post-election weight.
33 |
34 | ### V200010d
35 |
36 | Full sample variance stratum.
37 |
38 | ### V200010c
39 |
40 | Full sample variance unit.
41 |
42 | ## InterviewMode
43 |
44 | MODE OF INTERVIEW: PRE-ELECTION INTERVIEW
45 |
46 | ```{r variable-InterviewMode}
47 | anes_2020 %>%
48 | count(V200002, InterviewMode) %>%
49 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>%
50 | janitor::adorn_totals() %>%
51 | kbl() %>%
52 | kable_minimal()
53 | ```
54 | Variables used: V200002
55 |
56 | ## Age
57 |
58 | RESPONDENT AGE
59 |
60 | ```{r variable-Age}
61 | anes_2020 %>%
62 | count(AgeGroup) %>%
63 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>%
64 | kbl() %>%
65 | kable_minimal()
66 | ```
67 |
68 | Variables used: V201507x
69 |
70 | ## Gender
71 |
72 | PRE: WHAT IS YOUR (R) SEX? [REVISED]
73 |
74 | What is your sex?
75 |
76 | ```{r variable-Gender}
77 | anes_2020 %>%
78 | count(V201600, Gender) %>%
79 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>%
80 | kbl() %>%
81 | kable_minimal()
82 | ```
83 |
84 | Variables used: V201600
85 |
86 | ## RaceEth
87 |
88 | PRE: SUMMARY: R SELF-IDENTIFIED RACE/ETHNICITY
89 |
90 | ```{r variable-RaceEth}
91 | anes_2020 %>%
92 | count(V201549x, RaceEth) %>%
93 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>%
94 | kbl() %>%
95 | kable_minimal()
96 | ```
97 |
98 | Variables used: V201549x
99 |
100 | ## PartyID
101 |
102 | PRE: SUMMARY: PARTY ID
103 |
104 | ```{r variable-PartyID}
105 | anes_2020 %>%
106 | count(V201231x, PartyID) %>%
107 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>%
108 | kbl() %>%
109 | kable_minimal()
110 | ```
111 |
112 | Variables used: V201231x
113 |
114 | ## Education
115 |
116 | What is the highest level of school you have completed or the
117 | highest degree you have received?
118 |
119 | ```{r variable-Education}
120 | anes_2020 %>%
121 | count(V201510, Education) %>%
122 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>%
123 | kbl() %>%
124 | kable_minimal()
125 | ```
126 |
127 | Variables used: V201510
128 |
129 | ## Income
130 |
131 | PRE: SUMMARY: TOTAL (FAMILY) INCOME
132 |
133 | ```{r variable-Income}
134 | anes_2020 %>%
135 | count(V201617x, Income) %>%
136 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>%
137 | kbl() %>%
138 | kable_minimal()
139 | ```
140 |
141 | ```{r variable-Income7}
142 | anes_2020 %>%
143 | count(V201617x, Income7) %>%
144 | group_by(Income7) %>%
145 | summarise(n = sum(n)) %>%
146 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>%
147 | kbl() %>%
148 | kable_minimal()
149 | ```
150 |
151 | Variables used: V201617x
152 |
153 | ## CampaignInterest
154 |
155 | PRE: HOW INTERESTED IN FOLLOWING CAMPAIGNS
156 |
157 | Some people don’t pay much attention to political campaigns. How about you? Would you say that you have been very much interested, somewhat interested or not much interested in the political campaigns so far this year?
158 |
159 | ```{r variable-CampaignInterest}
160 | anes_2020 %>%
161 | count(V201006, CampaignInterest) %>%
162 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>%
163 | kbl() %>%
164 | kable_minimal()
165 | ```
166 |
167 | Variables used: V201006
168 |
169 | ## TrustGovernment
170 |
171 | PRE: HOW OFTEN TRUST GOVERNMENT IN WASHINGTON TO DO WHAT IS RIGHT [REVISED]
172 |
173 | How often can you trust the federal government in Washington to do what is right?
174 |
175 | ```{r variable-TrustGovernment}
176 | anes_2020 %>%
177 | count(V201233, TrustGovernment) %>%
178 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>%
179 | kbl() %>%
180 | kable_minimal()
181 | ```
182 |
183 | Variables used: V201233
184 |
185 | ## TrustPeople
186 |
187 | PRE: HOW OFTEN CAN PEOPLE BE TRUSTED
188 |
189 | Generally speaking, how often can you trust other people?
190 |
191 | ```{r variable-TrustPeople}
192 | anes_2020 %>%
193 | count(V201237, TrustPeople) %>%
194 | rename(Label = V201237) %>%
195 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>%
196 | kbl() %>%
197 | kable_minimal()
198 | ```
199 |
200 | Variables used: V201237
201 |
202 | ## VotedPres2016
203 |
204 | PRE: DID R VOTE FOR PRESIDENT IN 2016
205 |
206 | Four years ago, in 2016, Hillary Clinton ran on the Democratic ticket against Donald Trump for the Republicans. Do you remember for sure whether or not you voted in that election?
207 |
208 | *Revised:* Four years ago, in 2016, Hillary Clinton ran on the Democratic ticket against Donald Trump for the Republicans. We talk to many people who tell us they did not vote. And we talk to a few people who tell us they did vote, who really did not. We can tell they did not vote by checking with official government records. What about you? If we check the official government voter records, will they show that you voted in the 2016 presidential election, or that you did not vote in that election?
209 |
210 | ```{r variable-VotedPres2016}
211 | anes_2020 %>%
212 | count(V201101, V201102, VotedPres2016) %>%
213 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>%
214 | kbl() %>%
215 | kable_minimal()
216 | ```
217 |
218 | Variables used: V201101, V201102
219 |
220 | ## VotedPres2016_selection
221 |
222 | PRE: RECALL OF LAST (2016) PRESIDENTIAL VOTE CHOICE
223 |
224 | Which one did you vote for?
225 |
226 | ```{r variable-VotedPres2016_selection}
227 | anes_2020 %>%
228 | count(V201103, VotedPres2016_selection) %>%
229 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>%
230 | kbl() %>%
231 | kable_minimal()
232 | ```
233 |
234 | Variables used: V201103
235 |
236 | ## VotedPres2020
237 |
238 | PRE-POST: SUMMARY: VOTER TURNOUT IN 2020
239 |
240 | ```{r variable-VotedPres2020}
241 | anes_2020 %>%
242 | count(V202109x, VotedPres2020) %>%
243 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>%
244 | kbl() %>%
245 | kable_minimal()
246 | ```
247 |
248 | Variables used: V202109x
249 |
250 | ## VotedPres2020_selection
251 |
252 | POST: FOR WHOM DID R VOTE FOR PRESIDENT
253 |
254 | ```{r variable-VotedPres2020_selection}
255 | anes_2020 %>%
256 | count(V202073, VotedPres2020_selection) %>%
257 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>%
258 | kbl() %>%
259 | kable_minimal()
260 | ```
261 |
262 | Variables used: V202073
263 |
264 | ## EarlyVote2020 / VotedPres2020
265 |
266 | PRE: SUMMARY: REGISTRATION AND EARLY VOTE STATUS
267 |
268 | ```{r variable-EarlyVote2020}
269 | anes_2020 %>%
270 | count(V201025x, V202109x, VotedPres2020, EarlyVote2020) %>%
271 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>%
272 | kbl() %>%
273 | kable_minimal()
274 | ```
275 |
276 | Variables used: V201025x, V202109x
--------------------------------------------------------------------------------
/Codebooks/RECS 2015 Codebook.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "RECS 2015 Codebook"
3 | output:
4 | html_document:
5 | css: "style.css"
6 | toc: true
7 | toc_depth: 4
8 | toc_float: true
9 | self_contained: true
10 | github_document:
11 | toc: true
12 | always_allow_html: true
13 | ---
14 |
15 | ```{r setup, include = FALSE}
16 | knitr::opts_chunk$set(echo = FALSE)
17 | library(dplyr)
18 | library(janitor)
19 | library(kableExtra)
20 | library(knitr)
21 |
22 | recs <- readRDS(here::here("Data", "recs.rds")) %>%
23 | haven::zap_labels()
24 | ```
25 |
26 | The full codebook with the original variables is available at eia.gov.
27 |
28 | ## Weighting variables
29 |
30 | ### DOEID
31 |
32 | Unique identifier for each respondent
33 |
34 | ### NWEIGHT
35 |
36 | Final sample weight
37 |
38 | ### BRRWT
39 |
40 | Replicate weights
41 |
42 | ## Categorical
43 |
44 | ### Region
45 |
46 | ```{r variable-Region}
47 | recs %>%
48 | count(Region) %>%
49 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>%
50 | kbl() %>%
51 | kable_minimal()
52 | ```
53 | Variables used: REGIONC
54 |
55 | ### Division
56 |
57 | ```{r variable-Division}
58 | recs %>%
59 | count(Division) %>%
60 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>%
61 | kbl() %>%
62 | kable_minimal()
63 | ```
64 | Variables used: DIVISION
65 |
66 | ### MSAStatus
67 |
68 | ```{r variable-MSAStatus}
69 | recs %>%
70 | count(MSAStatus) %>%
71 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>%
72 | kbl() %>%
73 | kable_minimal()
74 | ```
75 | Variables used: METROMICRO
76 |
77 | ### Urbanicity
78 |
79 | ```{r variable-Urbanicity}
80 | recs %>%
81 | count(Urbanicity) %>%
82 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>%
83 | kbl() %>%
84 | kable_minimal()
85 | ```
86 |
87 | Variables used: UATYP10
88 |
89 | ### HousingUnitType
90 |
91 | ```{r variable-HousingUnitType}
92 | recs %>%
93 | count(HousingUnitType) %>%
94 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>%
95 | kbl() %>%
96 | kable_minimal()
97 | ```
98 |
99 | Variables used: TYPEHUQ
100 |
101 | ### YearMade
102 |
103 | ```{r variable-YearMade}
104 | recs %>%
105 | count(YearMade) %>%
106 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>%
107 | kbl() %>%
108 | kable_minimal()
109 | ```
110 |
111 | Variables used: YEARMADERANGE
112 |
113 | ### SpaceHeatingUsed
114 |
115 | ```{r variable-SpaceHeatingUsed}
116 | recs %>%
117 | count(SpaceHeatingUsed) %>%
118 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>%
119 | kbl() %>%
120 | kable_minimal()
121 | ```
122 |
123 | Variables used: HEATHOME
124 |
125 | ### HeatingBehavior
126 |
127 | ```{r variable-HeatingBehavior}
128 | recs %>%
129 | count(HeatingBehavior) %>%
130 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>%
131 | kbl() %>%
132 | kable_minimal()
133 | ```
134 |
135 | Variables used: EQUIPMUSE
136 |
137 | ### ACUsed
138 |
139 | ```{r variable-ACUsed}
140 | recs %>%
141 | count(ACUsed) %>%
142 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>%
143 | kbl() %>%
144 | kable_minimal()
145 | ```
146 |
147 | Variables used: AIRCOND
148 |
149 | ### ACBehavior
150 |
151 | ```{r variable-ACBehavior}
152 | recs %>%
153 | count(ACBehavior) %>%
154 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>%
155 | kbl() %>%
156 | kable_minimal()
157 | ```
158 |
159 | Variables used: USECENAC
160 |
161 | ### ClimateRegion_BA
162 |
163 | ```{r variable-ClimateRegion_BA}
164 | recs %>%
165 | count(ClimateRegion_BA) %>%
166 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>%
167 | kbl() %>%
168 | kable_minimal()
169 | ```
170 |
171 | Variables used: CLIMATE_REGION_PUB
172 |
173 | ### ClimateRegion_IECC
174 |
175 | ```{r variable-ClimateRegion_IECC}
176 | recs %>%
177 | count(ClimateRegion_IECC) %>%
178 | mutate(`Unweighted Freq` = round(n / sum(n), 3)) %>%
179 | kbl() %>%
180 | kable_minimal()
181 | ```
182 |
183 | Variables used: IECC_CLIMATE_PUB
184 |
185 | ## Continuous
186 |
187 | ### WinterTempDay
188 |
189 | ```{r variable-WinterTempDay}
190 | recs %>%
191 | summarize(Minimum = min(WinterTempDay, na.rm = TRUE),
192 | Median = median(WinterTempDay, na.rm = TRUE),
193 | Maximum = max(WinterTempDay, na.rm = TRUE)) %>%
194 | kbl() %>%
195 | kable_minimal()
196 | ```
197 |
198 | Variables used: TEMPHOME
199 |
200 | ### WinterTempAway
201 |
202 | ```{r variable-WinterTempAway}
203 | recs %>%
204 | summarize(Minimum = min(WinterTempAway, na.rm = TRUE),
205 | Median = median(WinterTempAway, na.rm = TRUE),
206 | Maximum = max(WinterTempAway, na.rm = TRUE)) %>%
207 | kbl() %>%
208 | kable_minimal()
209 | ```
210 |
211 | Variables used: TEMPGONE
212 |
213 | ### WinterTempNight
214 |
215 | ```{r variable-WinterTempNight}
216 | recs %>%
217 | summarize(Minimum = min(WinterTempNight, na.rm = TRUE),
218 | Median = median(WinterTempNight, na.rm = TRUE),
219 | Maximum = max(WinterTempNight, na.rm = TRUE)) %>%
220 | kbl() %>%
221 | kable_minimal()
222 | ```
223 |
224 | Variables used: TEMPNITE
225 |
226 | ### SummerTempDay
227 |
228 | ```{r variable-SummerTempDay}
229 | recs %>%
230 | summarize(Minimum = min(SummerTempDay, na.rm = TRUE),
231 | Median = median(SummerTempDay, na.rm = TRUE),
232 | Maximum = max(SummerTempDay, na.rm = TRUE)) %>%
233 | kbl() %>%
234 | kable_minimal()
235 | ```
236 |
237 | Variables used: TEMPHOMEAC
238 |
239 | ### SummerTempAway
240 |
241 | ```{r variable-SummerTempAway}
242 | recs %>%
243 | summarize(Minimum = min(SummerTempAway, na.rm = TRUE),
244 | Median = median(SummerTempAway, na.rm = TRUE),
245 | Maximum = max(SummerTempAway, na.rm = TRUE)) %>%
246 | kbl() %>%
247 | kable_minimal()
248 | ```
249 |
250 | Variables used: TEMPGONEAC
251 |
252 | ### SummerTempNight
253 |
254 | ```{r variable-SummerTempNight}
255 | recs %>%
256 | summarize(Minimum = min(SummerTempNight, na.rm = TRUE),
257 | Median = median(SummerTempNight, na.rm = TRUE),
258 | Maximum = max(SummerTempNight, na.rm = TRUE)) %>%
259 | kbl() %>%
260 | kable_minimal()
261 | ```
262 |
263 | Variables used: TEMPNITEAC
264 |
265 | ### TOTCSQFT
266 |
267 | Total cooled square footage
268 |
269 | ```{r variable-TOTCSQFT}
270 | recs %>%
271 | summarize(Minimum = min(TOTCSQFT, na.rm = TRUE),
272 | Median = median(TOTCSQFT, na.rm = TRUE),
273 | Maximum = max(TOTCSQFT, na.rm = TRUE)) %>%
274 | kbl() %>%
275 | kable_minimal()
276 | ```
277 |
278 | ### TOTHSQFT
279 |
280 | Total heated square footage
281 |
282 | ```{r variable-TOTHSQFT}
283 | recs %>%
284 | summarize(Minimum = min(TOTHSQFT, na.rm = TRUE),
285 | Median = median(TOTHSQFT, na.rm = TRUE),
286 | Maximum = max(TOTHSQFT, na.rm = TRUE)) %>%
287 | kbl() %>%
288 | kable_minimal()
289 | ```
290 |
291 | ### TOTSQFT_EN
292 |
293 | Total square footage (used for publication)
294 |
295 | ```{r variable-TOTSQFT_EN}
296 | recs %>%
297 | summarize(Minimum = min(TOTSQFT_EN, na.rm = TRUE),
298 | Median = median(TOTSQFT_EN, na.rm = TRUE),
299 | Maximum = max(TOTSQFT_EN, na.rm = TRUE)) %>%
300 | kbl() %>%
301 | kable_minimal()
302 | ```
303 |
304 | ### TOTUCSQFT
305 |
306 | Total uncooled square footage
307 |
308 | ```{r variable-TOTUCSQFT}
309 | recs %>%
310 | summarize(Minimum = min(TOTUCSQFT, na.rm = TRUE),
311 | Median = median(TOTUCSQFT, na.rm = TRUE),
312 | Maximum = max(TOTUCSQFT, na.rm = TRUE)) %>%
313 | kbl() %>%
314 | kable_minimal()
315 | ```
316 |
317 | ### TOTUSQFT
318 |
319 | Total unheated square footage
320 |
321 | ```{r variable-TOTUSQFT}
322 | recs %>%
323 | summarize(Minimum = min(TOTUSQFT, na.rm = TRUE),
324 | Median = median(TOTUSQFT, na.rm = TRUE),
325 | Maximum = max(TOTUSQFT, na.rm = TRUE)) %>%
326 | kbl() %>%
327 | kable_minimal()
328 | ```
329 |
330 | ### CDD30YR
331 |
332 | Cooling degree days, 30-year average 1981-2010, base temperature 65F
333 |
334 | ```{r variable-CDD30YR}
335 | recs %>%
336 | summarize(Minimum = min(CDD30YR, na.rm = TRUE),
337 | Median = median(CDD30YR, na.rm = TRUE),
338 | Maximum = max(CDD30YR, na.rm = TRUE)) %>%
339 | kbl() %>%
340 | kable_minimal()
341 | ```
342 |
343 | ### CDD65
344 |
345 | Cooling degree days in 2015, base temperature 65F
346 |
347 | ```{r variable-CDD65}
348 | recs %>%
349 | summarize(Minimum = min(CDD65, na.rm = TRUE),
350 | Median = median(CDD65, na.rm = TRUE),
351 | Maximum = max(CDD65, na.rm = TRUE)) %>%
352 | kbl() %>%
353 | kable_minimal()
354 | ```
355 |
356 | ### CDD80
357 |
358 | Cooling degree days in 2015, base temperature 80F (used for garage cooling load estimation only)
359 |
360 | ```{r variable-CDD80}
361 | recs %>%
362 | summarize(Minimum = min(CDD80, na.rm = TRUE),
363 | Median = median(CDD80, na.rm = TRUE),
364 | Maximum = max(CDD80, na.rm = TRUE)) %>%
365 | kbl() %>%
366 | kable_minimal()
367 | ```
368 |
369 | ### HDD30YR
370 |
371 | Heating degree days, 30-year average 1981-2010, base temperature 65F
372 |
373 | ```{r variable-HDD30YR}
374 | recs %>%
375 | summarize(Minimum = min(HDD30YR, na.rm = TRUE),
376 | Median = median(HDD30YR, na.rm = TRUE),
377 | Maximum = max(HDD30YR, na.rm = TRUE)) %>%
378 | kbl() %>%
379 | kable_minimal()
380 | ```
381 |
382 | ### HDD65
383 |
384 | Heating degree days in 2015, base temperature 65F
385 |
386 | ```{r variable-HDD65}
387 | recs %>%
388 | summarize(Minimum = min(HDD65, na.rm = TRUE),
389 | Median = median(HDD65, na.rm = TRUE),
390 | Maximum = max(HDD65, na.rm = TRUE)) %>%
391 | kbl() %>%
392 | kable_minimal()
393 | ```
394 |
395 | ### HDD50
396 |
397 | Heating degree days in 2015, base temperature 50F (used for garage heating load estimation only)
398 |
399 | ```{r variable-HDD50}
400 | recs %>%
401 | summarize(Minimum = min(HDD50, na.rm = TRUE),
402 | Median = median(HDD50, na.rm = TRUE),
403 | Maximum = max(HDD50, na.rm = TRUE)) %>%
404 | kbl() %>%
405 | kable_minimal()
406 | ```
407 |
408 | ### GNDHDD65
409 |
410 | Heating degree days of ground temperature in 2015, base temperature 65F
411 |
412 | ```{r variable-GNDHDD65}
413 | recs %>%
414 | summarize(Minimum = min(GNDHDD65, na.rm = TRUE),
415 | Median = median(GNDHDD65, na.rm = TRUE),
416 | Maximum = max(GNDHDD65, na.rm = TRUE)) %>%
417 | kbl() %>%
418 | kable_minimal()
419 | ```
420 |
421 | ### BTUEL
422 |
423 | Total site electricity usage, in thousand Btu, 2015
424 |
425 | ```{r variable-BTUEL}
426 | recs %>%
427 | summarize(Minimum = min(BTUEL, na.rm = TRUE),
428 | Median = median(BTUEL, na.rm = TRUE),
429 | Maximum = max(BTUEL, na.rm = TRUE)) %>%
430 | kbl() %>%
431 | kable_minimal()
432 | ```
433 |
434 | ### DOLLAREL
435 |
436 | Total electricity cost, in dollars, 2015
437 |
438 | ```{r variable-DOLLAREL}
439 | recs %>%
440 | summarize(Minimum = min(DOLLAREL, na.rm = TRUE),
441 | Median = median(DOLLAREL, na.rm = TRUE),
442 | Maximum = max(DOLLAREL, na.rm = TRUE)) %>%
443 | kbl() %>%
444 | kable_minimal()
445 | ```
446 |
447 | ### BTUNG
448 |
449 | Total natural gas usage, in thousand Btu, 2015
450 |
451 | ```{r variable-BTUNG}
452 | recs %>%
453 | summarize(Minimum = min(BTUNG, na.rm = TRUE),
454 | Median = median(BTUNG, na.rm = TRUE),
455 | Maximum = max(BTUNG, na.rm = TRUE)) %>%
456 | kbl() %>%
457 | kable_minimal()
458 | ```
459 |
460 | ### DOLLARNG
461 |
462 | Total natural gas cost, in dollars, 2015
463 |
464 | ```{r variable-DOLLARNG}
465 | recs %>%
466 | summarize(Minimum = min(DOLLARNG, na.rm = TRUE),
467 | Median = median(DOLLARNG, na.rmx = TRUE),
468 | Maximum = max(DOLLARNG, na.rm = TRUE)) %>%
469 | kbl() %>%
470 | kable_minimal()
471 | ```
472 |
473 | ### BTULP
474 |
475 | Total propane usage, in thousand Btu, 2015
476 |
477 | ```{r variable-BTULP}
478 | recs %>%
479 | summarize(Minimum = min(BTULP, na.rm = TRUE),
480 | Median = median(BTULP, na.rm = TRUE),
481 | Maximum = max(BTULP, na.rm = TRUE)) %>%
482 | kbl() %>%
483 | kable_minimal()
484 | ```
485 |
486 | ### DOLLARLP
487 |
488 | Total cost of propane, in dollars, 2015
489 |
490 | ```{r variable-DOLLARLP}
491 | recs %>%
492 | summarize(Minimum = min(DOLLARLP, na.rm = TRUE),
493 | Median = median(DOLLARLP, na.rm = TRUE),
494 | Maximum = max(DOLLARLP, na.rm = TRUE)) %>%
495 | kbl() %>%
496 | kable_minimal()
497 | ```
498 |
499 | ### BTUFO
500 |
501 | Total fuel oil/kerosene usage, in thousand Btu, 2015
502 |
503 | ```{r variable-BTUFO}
504 | recs %>%
505 | summarize(Minimum = min(BTUFO, na.rm = TRUE),
506 | Median = median(BTUFO, na.rm = TRUE),
507 | Maximum = max(BTUFO, na.rm = TRUE)) %>%
508 | kbl() %>%
509 | kable_minimal()
510 | ```
511 |
512 | ### DOLLARFO
513 |
514 | Total cost of fuel oil/kerosene, in dollars, 2015
515 |
516 | ```{r variable-DOLLARFO}
517 | recs %>%
518 | summarize(Minimum = min(DOLLARFO, na.rm = TRUE),
519 | Median = median(DOLLARFO, na.rm = TRUE),
520 | Maximum = max(DOLLARFO, na.rm = TRUE)) %>%
521 | kbl() %>%
522 | kable_minimal()
523 | ```
524 |
525 | ### TOTALBTU
526 |
527 | Total usage, in thousand Btu, 2015
528 |
529 | ```{r variable-TOTALBTU}
530 | recs %>%
531 | summarize(Minimum = min(TOTALBTU, na.rm = TRUE),
532 | Median = median(TOTALBTU, na.rm = TRUE),
533 | Maximum = max(TOTALBTU, na.rm = TRUE)) %>%
534 | kbl() %>%
535 | kable_minimal()
536 | ```
537 |
538 | ### TOTALDOL
539 |
540 | Total usage, in thousand Btu, 2015
541 |
542 | ```{r variable-TOTALDOL}
543 | recs %>%
544 | summarize(Minimum = min(TOTALDOL, na.rm = TRUE),
545 | Median = median(TOTALDOL, na.rm = TRUE),
546 | Maximum = max(TOTALDOL, na.rm = TRUE)) %>%
547 | kbl() %>%
548 | kable_minimal()
549 | ```
550 |
551 | ### BTUWOOD
552 |
553 | Total cordwood usage, in thousand Btu, 2015 (Wood consumption is not included in TOTALBTU or TOTALDOL)
554 |
555 | ```{r variable-BTUWOOD}
556 | recs %>%
557 | summarize(Minimum = min(BTUWOOD, na.rm = TRUE),
558 | Median = median(BTUWOOD, na.rm = TRUE),
559 | Maximum = max(BTUWOOD, na.rm = TRUE)) %>%
560 | kbl() %>%
561 | kable_minimal()
562 | ```
563 |
564 | ### BTUPELLET
565 |
566 | Total wood pellet usage, in thousand Btu, 2015 (Wood consumption is not included in TOTALBTU or TOTALDOL)
567 |
568 | ```{r variable-BTUPELLET}
569 | recs %>%
570 | summarize(Minimum = min(BTUPELLET, na.rm = TRUE),
571 | Median = median(BTUPELLET, na.rm = TRUE),
572 | Maximum = max(BTUPELLET, na.rm = TRUE)) %>%
573 | kbl() %>%
574 | kable_minimal()
575 | ```
--------------------------------------------------------------------------------
/Codebooks/style.css:
--------------------------------------------------------------------------------
1 |
2 | @import url(https://fonts.googleapis.com/css?family=Noto+Sans:400,400i,700,700i&display=swap);
3 | @import url(https://fonts.googleapis.com/css?family=Cabin:600,600i&display=swap);
4 | @import url(https://fonts.googleapis.com/css?family=Source+Code+Pro:400,700&display=swap);
5 | @import url('https://fonts.googleapis.com/css2?family=Telex&family=Ubuntu:wght@300&display=swap');
6 |
7 |
8 | :root {
9 | /* Fonts */
10 | --text-font-family: 'Noto Sans';
11 | --text-font-is-google: 1;
12 | --text-font-family-fallback: -apple-system, BlinkMacSystemFont, avenir next, avenir, helvetica neue, helvetica, Ubuntu, roboto, noto, segoe ui, arial;
13 | --text-font-base: sans-serif;
14 | --header-font-family: Cabin;
15 | --header-font-is-google: 1;
16 | --header-font-family-fallback: Georgia, serif;
17 | --code-font-family: 'Source Code Pro';
18 | --code-font-is-google: 1;
19 | --base-font-size: 20px;
20 | --text-font-size: 1rem;
21 | --code-font-size: 0.9rem;
22 | --code-inline-font-size: 1em;
23 | --header-h1-font-size: 2.75rem;
24 | --header-h2-font-size: 2.25rem;
25 | --header-h3-font-size: 1.75rem;
26 |
27 | /* Colors */
28 | --text-color: #000000;
29 | --header-color: #1E4F96;
30 | --background-color: #FFFFFF;
31 | --link-color: #1E4F96;
32 | --text-bold-color: #1E4F96;
33 | --code-highlight-color: rgba(255,255,0,0.5);
34 | --inverse-text-color: #000000;
35 | --inverse-background-color: #00A3E0;
36 | --inverse-header-color: #FFFFFF;
37 | --inverse-link-color: #1E4F96;
38 | --title-slide-background-color: #1E4F96;
39 | --title-slide-text-color: #FFFFFF;
40 | --header-background-color: #1E4F96;
41 | --header-background-text-color: #FFFFFF;
42 | --primary: #1E4F96;
43 | --secondary: #00A3E0;
44 | --white: #FFFFFF;
45 | --black: #000000;
46 | }
47 |
48 | html {
49 | font-size: var(--base-font-size);
50 | }
51 |
52 | body {
53 | font-family: 'Telex', sans-serif;
54 | font-weight: normal;
55 | color: var(--text-color);
56 | }
57 | h1, h2, h3 {
58 | font-family: 'Ubuntu', sans-serif;
59 | font-weight: 600;
60 | color: var(--header-color);
61 | }
62 | .remark-slide-content {
63 | background-color: var(--background-color);
64 | font-size: 1rem;
65 | padding: 16px 64px 16px 64px;
66 | width: 100%;
67 | height: 100%;
68 | }
69 | .remark-slide-content h1 {
70 | font-size: var(--header-h1-font-size);
71 | }
72 | .remark-slide-content h2 {
73 | font-size: var(--header-h2-font-size);
74 | }
75 | .remark-slide-content h3 {
76 | font-size: var(--header-h3-font-size);
77 | }
78 | .remark-code, .remark-inline-code {
79 | font-family: var(--code-font-family), Menlo, Consolas, Monaco, Liberation Mono, Lucida Console, monospace;
80 | }
81 | .remark-code {
82 | font-size: var(--code-font-size);
83 | }
84 | .remark-inline-code {
85 | font-size: var(--code-inline-font-size);
86 | color: #1E4F96;
87 | }
88 | .remark-slide-number {
89 | color: #1E4F96;
90 | opacity: 1;
91 | font-size: 0.9rem;
92 | }
93 | strong {
94 | font-weight: bold;
95 | color: var(--text-bold-color);
96 | }
97 | a, a > code {
98 | color: var(--link-color);
99 | text-decoration: none;
100 | }
101 | .footnote {
102 | position: absolute;
103 | bottom: 60px;
104 | padding-right: 4em;
105 | font-size: 0.9em;
106 | }
107 | .remark-code-line-highlighted {
108 | background-color: var(--code-highlight-color);
109 | }
110 | .inverse {
111 | background-color: var(--inverse-background-color);
112 | color: var(--inverse-text-color);
113 |
114 | }
115 | .inverse h1, .inverse h2, .inverse h3 {
116 | color: var(--inverse-header-color);
117 | }
118 | .inverse a, .inverse a > code {
119 | color: var(--inverse-link-color);
120 | }
121 | .title-slide, .title-slide h1, .title-slide h2, .title-slide h3 {
122 | color: var(--title-slide-text-color);
123 | }
124 | .title-slide {
125 | background-color: var(--title-slide-background-color);
126 | }
127 | .title-slide .remark-slide-number {
128 | display: none;
129 | }
130 | /* Two-column layout */
131 | .left-column {
132 | width: 20%;
133 | height: 92%;
134 | float: left;
135 | }
136 | .left-column h2, .left-column h3 {
137 | color: #1E4F9699;
138 | }
139 | .left-column h2:last-of-type, .left-column h3:last-child {
140 | color: #1E4F96;
141 | }
142 | .right-column {
143 | width: 75%;
144 | float: right;
145 | padding-top: 1em;
146 | }
147 | .pull-left {
148 | float: left;
149 | width: 47%;
150 | }
151 | .pull-right {
152 | float: right;
153 | width: 47%;
154 | }
155 | .pull-right + * {
156 | clear: both;
157 | }
158 | img, video, iframe {
159 | max-width: 100%;
160 | }
161 | blockquote {
162 | border-left: solid 5px #00A3E080;
163 | padding-left: 1em;
164 | }
165 | .remark-slide table {
166 | margin: auto;
167 | border-top: 1px solid #666;
168 | border-bottom: 1px solid #666;
169 | }
170 | .remark-slide table thead th {
171 | border-bottom: 1px solid #ddd;
172 | }
173 | th, td {
174 | padding: 5px;
175 | }
176 | .remark-slide thead, .remark-slide tfoot, .remark-slide tr:nth-child(even) {
177 | background: #CCECF8;
178 | }
179 | table.dataTable tbody {
180 | background-color: var(--background-color);
181 | color: var(--text-color);
182 | }
183 | table.dataTable.display tbody tr.odd {
184 | background-color: var(--background-color);
185 | }
186 | table.dataTable.display tbody tr.even {
187 | background-color: #CCECF8;
188 | }
189 | table.dataTable.hover tbody tr:hover, table.dataTable.display tbody tr:hover {
190 | background-color: rgba(255, 255, 255, 0.5);
191 | }
192 | .dataTables_wrapper .dataTables_length, .dataTables_wrapper .dataTables_filter, .dataTables_wrapper .dataTables_info, .dataTables_wrapper .dataTables_processing, .dataTables_wrapper .dataTables_paginate {
193 | color: var(--text-color);
194 | }
195 | .dataTables_wrapper .dataTables_paginate .paginate_button {
196 | color: var(--text-color) !important;
197 | }
198 |
199 | /* Horizontal alignment of code blocks */
200 | .remark-slide-content.left pre,
201 | .remark-slide-content.center pre,
202 | .remark-slide-content.right pre {
203 | text-align: start;
204 | width: max-content;
205 | max-width: 100%;
206 | }
207 | .remark-slide-content.left pre,
208 | .remark-slide-content.right pre {
209 | min-width: 50%;
210 | min-width: min(40ch, 100%);
211 | }
212 | .remark-slide-content.center pre {
213 | min-width: 66%;
214 | min-width: min(50ch, 100%);
215 | }
216 | .remark-slide-content.left pre {
217 | margin-left: unset;
218 | margin-right: auto;
219 | }
220 | .remark-slide-content.center pre {
221 | margin-left: auto;
222 | margin-right: auto;
223 | }
224 | .remark-slide-content.right pre {
225 | margin-left: auto;
226 | margin-right: unset;
227 | }
228 |
229 | /* Slide Header Background for h1 elements */
230 | .remark-slide-content.header_background > h1 {
231 | display: block;
232 | position: absolute;
233 | top: 0;
234 | left: 0;
235 | width: 100%;
236 | background: var(--header-background-color);
237 | color: var(--header-background-text-color);
238 | padding: 2rem 64px 1.5rem 64px;
239 | margin-top: 0;
240 | box-sizing: border-box;
241 | }
242 | .remark-slide-content.header_background {
243 | padding-top: 7rem;
244 | }
245 |
246 | @page { margin: 0; }
247 | @media print {
248 | .remark-slide-scaler {
249 | width: 100% !important;
250 | height: 100% !important;
251 | transform: scale(1) !important;
252 | top: 0 !important;
253 | left: 0 !important;
254 | }
255 | }
256 |
257 | .primary {
258 | color: var(--primary);
259 | }
260 | .bg-primary {
261 | background-color: var(--primary);
262 | }
263 | .secondary {
264 | color: var(--secondary);
265 | }
266 | .bg-secondary {
267 | background-color: var(--secondary);
268 | }
269 | .white {
270 | color: var(--white);
271 | }
272 | .bg-white {
273 | background-color: var(--white);
274 | }
275 | .black {
276 | color: var(--black);
277 | }
278 | .bg-black {
279 | background-color: var(--black);
280 | }
281 |
282 |
--------------------------------------------------------------------------------
/Data/anes.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Data/anes.rds
--------------------------------------------------------------------------------
/Data/anes_2020.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Data/anes_2020.rds
--------------------------------------------------------------------------------
/Data/recs.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Data/recs.rds
--------------------------------------------------------------------------------
/DataCleaningScripts/ANES_DataPrep.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "American National Election Studies (ANES) 2016 Time Series Study Data Prep"
3 | output: github_document
4 | ---
5 |
6 | ```{r setup, include=FALSE}
7 | knitr::opts_chunk$set(echo = TRUE)
8 | ```
9 |
10 | ## Data information
11 |
12 | All data and resources were downloaded from https://electionstudies.org/data-center/2016-time-series-study/ on April 3, 2021.
13 |
14 | American National Election Studies. 2019. ANES 2016 Time Series Study [dataset and documentation]. September 4, 2019 version. www.electionstudies.org
15 | ```{r loadpackageh, message=FALSE}
16 | library(here) #easy relative paths
17 | ```
18 |
19 |
20 |
21 | ```{r loadpackages}
22 | library(tidyverse) #data manipulation
23 | library(haven) #data import
24 | library(tidylog) #informative logging messages
25 | ```
26 | ## Import data and create derived variables
27 |
28 | ```{r derivedata}
29 | anes_in <- read_sav(here("RawData", "ANES_2016", "anes_timeseries_2016.sav"))
30 |
31 |
32 | anes <- anes_in %>%
33 | select('V160102', 'V160201', 'V160202', 'V160501', 'V161004', 'V161005', 'V161006', 'V161024x', 'V161158x', 'V161215', 'V161219', 'V161267', 'V161267', 'V161270', 'V161310x', 'V161342', 'V161361x', 'V162031', 'V162031x', 'V162034', 'V162034a', 'V162062x', 'V162062x'
34 | ) %>%
35 | mutate(
36 | InterviewMode=fct_recode(as.character(V160501), FTF="1", Web="2"),
37 | Weight=V160102,
38 | Stratum=as.factor(V160201),
39 | VarUnit=as.factor(V160202),
40 | Age=if_else(V161267>0, as.numeric(V161267), NA_real_),
41 | AgeGroup=cut(Age, c(17, 29, 39, 49, 59, 69, 200),
42 | labels=c("18-29", "30-39", "40-49", "50-59", "60-69", "70 or older")),
43 | Gender=factor(
44 | case_when(
45 | V161342==1~"Male",
46 | V161342==2~"Female",
47 | V161342==3~"Other",
48 | TRUE~NA_character_
49 | ),
50 | levels=c("Male", "Female", "Other")
51 | ),
52 | RaceEth=factor(
53 | case_when(
54 | V161310x==1~"White",
55 | V161310x==2~"Black",
56 | V161310x==5~"Hispanic",
57 | V161310x==3~"Asian, NH/PI",
58 | near(V161310x, 4)~"AI/AN",
59 | near(V161310x, 6)~"Other/multiple race",
60 | TRUE ~ NA_character_
61 | ),
62 | levels=c("White", "Black", "Hispanic", "Asian, NH/PI", "AI/AN", "Other/multiple race", NA_character_)
63 | ),
64 | PartyID=factor(
65 | case_when(
66 | V161158x==1~"Strong democrat",
67 | V161158x==2~"Not very strong democrat",
68 | V161158x==3~"Independent-democrat",
69 | V161158x==4~"Independent",
70 | V161158x==5~"Independent-republican",
71 | V161158x==6~"Not very strong republican",
72 | V161158x==7~"Strong republican",
73 | TRUE ~ NA_character_
74 | ),
75 | levels=c("Strong democrat", "Not very strong democrat", "Independent-democrat", "Independent", "Independent-republican", "Not very strong republican", "Strong republican")
76 | ),
77 | Education=factor(
78 | case_when(
79 | V161270 <=0~NA_character_,
80 | V161270 <= 8~"Less than HS",
81 | V161270==9|V161270==90~"High school",
82 | V161270<=12~"Post HS",
83 | V161270==13~"Bachelor's",
84 | V161270<=16~"Graduate",
85 | TRUE~NA_character_
86 | ),
87 | levels=c("Less than HS", "High school", "Post HS", "Bachelor's", "Graduate")
88 | ),
89 | Income=cut(V161361x, c(-5, 1:28),
90 | labels=c("Under $5k",
91 | "$5-10k", "$10-12.5k", "$12.5-15", "$15-17.5k", "$17.5-20k", "$20-22.5k", "$22.5-25k", "$25-27.5k", "$27.5-30k", "$30-35k", "$35-40k", "$40-45k", "$45-50k", "$50-55k", "$55-60k", "$60-65k","$65-70k", "$70-75k", "$75-80k", "$80-90k", "$90-100k","$100-110k", "$110-125k", "$125-150k", "$150-175k", "$175-250k", "$250k or more" )
92 | ),
93 | Income7=fct_collapse(
94 | Income,
95 | "Under $20k"=c("Under $5k", "$5-10k", "$10-12.5k", "$12.5-15", "$15-17.5k", "$17.5-20k"),
96 | "$20-40k"=c("$20-22.5k", "$22.5-25k", "$25-27.5k", "$27.5-30k", "$30-35k", "$35-40k"),
97 | "$40-60k"=c( "$40-45k", "$45-50k", "$50-55k", "$55-60k"),
98 | "$60-80k"=c( "$60-65k", "$65-70k", "$70-75k", "$75-80k"),
99 | "$80-100k"=c("$80-90k", "$90-100k"),
100 | "$100-125k"=c("$100-110k", "$110-125k"),
101 | "$125k or more"=c("$125-150k", "$150-175k", "$175-250k", "$250k or more")
102 | ),
103 | CampaignInterest=factor(
104 | case_when(
105 | V161004==1~"Very much interested",
106 | V161004==2~"Somewhat interested",
107 | V161004==3~"Not much interested",
108 | TRUE~NA_character_
109 | ),
110 | levels=c("Very much interested", "Somewhat interested", "Not much interested")
111 | ),
112 | TrustGovernment=factor(
113 | case_when(
114 | V161215==1~"Always",
115 | V161215==2~"Most of the time",
116 | V161215==3~"About half the time",
117 | V161215==4~"Some of the time",
118 | V161215==5~"Never",
119 | TRUE~NA_character_
120 | ),
121 | levels=c("Always", "Most of the time", "About half the time", "Some of the time", "Never")
122 | ),
123 | TrustPeople=factor(
124 | case_when(
125 | V161219==1~"Always",
126 | V161219==2~"Most of the time",
127 | V161219==3~"About half the time",
128 | V161219==4~"Some of the time",
129 | V161219==5~"Never",
130 | TRUE ~ NA_character_
131 | ),
132 | levels=c("Always", "Most of the time", "About half the time", "Some of the time", "Never")
133 | ),
134 | VotedPres2012=factor(
135 | case_when(
136 | V161005==1~"Yes",
137 | V161005==2~"No",
138 | TRUE~NA_character_
139 | ), levels=c("Yes", "No")
140 | ),
141 | VotedPres2012_selection=factor(
142 | case_when(
143 | V161006==1~"Obama",
144 | V161006==2~"Romney",
145 | V161006==5~"Other",
146 | TRUE~NA_character_
147 | ), levels=c("Obama", "Romney", "Other")
148 | ),
149 | VotedPres2016=factor(
150 | case_when(
151 | V162031x==1~"Yes",
152 | V162031x==0~"No",
153 | TRUE~NA_character_
154 | ), levels=c("Yes", "No")
155 | ),
156 | VotedPres2016_selection=factor(
157 | case_when(
158 | V162062x==1~"Clinton",
159 | V162062x==2~"Trump",
160 | V162062x >=3 ~"Other",
161 | TRUE~NA_character_
162 | ), levels=c("Clinton", "Trump", "Other")
163 | ),
164 | EarlyVote2016=factor(
165 | case_when(
166 | V161024x==4~"Yes",
167 | VotedPres2016=="Yes"~"No",
168 | TRUE~NA_character_
169 | ), levels=c("Yes", "No")
170 | )
171 | )
172 |
173 |
174 |
175 | summary(anes)
176 | ```
177 |
178 |
179 | ## Check derived variables for correct coding
180 |
181 | ```{r checkvars}
182 |
183 | anes %>% count(InterviewMode, V160501)
184 | anes %>% group_by(AgeGroup) %>% summarise(minAge=min(Age), maxAge=max(Age), minV=min(V161267), maxV=max(V161267))
185 | anes %>% count(Gender, V161342)
186 | anes %>% count(RaceEth, V161310x)
187 | anes %>% count(PartyID, V161158x)
188 | anes %>% count(Education, V161270)
189 | anes %>% count(Income, Income7, V161361x) %>% print(n=30)
190 | anes %>% count(CampaignInterest, V161004)
191 | anes %>% count(TrustGovernment, V161215)
192 | anes %>% count(TrustPeople, V161219)
193 | anes %>% count(VotedPres2012, V161005)
194 | anes %>% count(VotedPres2012_selection, V161006)
195 | anes %>% count(VotedPres2016, V162031x)
196 | anes %>% count(VotedPres2016_selection, V162062x)
197 | anes %>% count(EarlyVote2016, V161024x, VotedPres2016)
198 |
199 | anes %>%
200 | summarise(WtSum=sum(Weight)) %>%
201 | pull(WtSum)
202 |
203 | ```
204 | ## Save data
205 |
206 | ```{r savedat}
207 | write_rds(anes, here("Data", "anes.rds"), compress="gz")
208 | ```
209 |
210 |
211 |
--------------------------------------------------------------------------------
/DataCleaningScripts/ANES_DataPrep_2020.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "American National Election Studies (ANES) 2020 Time Series Study Data Prep"
3 | output: github_document
4 | ---
5 |
6 | ```{r setup, include=FALSE}
7 | knitr::opts_chunk$set(echo = TRUE)
8 | ```
9 |
10 | ## Data information
11 |
12 | All data and resources were downloaded from https://electionstudies.org/data-center/2020-time-series-study/ on February 28, 2022.
13 |
14 | American National Election Studies. 2021. ANES 2020 Time Series Study Full Release [dataset and documentation]. www.electionstudies.org
15 | ```{r loadpackageh, message=FALSE}
16 | library(here) # easy relative paths
17 | ```
18 |
19 |
20 |
21 | ```{r loadpackages}
22 | library(tidyverse) # data manipulation
23 | library(haven) # data import
24 | library(tidylog) # informative logging messages
25 | ```
26 | ## Import data and create derived variables
27 |
28 | ```{r derivedata}
29 | anes_in_2020 <- read_sav(here("RawData", "ANES_2020", "anes_timeseries_2020_spss_20220210.sav"))
30 |
31 | # weight validity for post-election survey
32 | anes_in_2020 %>%
33 | select(V200004, V200010a, V200010b) %>%
34 | group_by(V200004) %>% #type of respondent
35 | summarise(
36 | n=n(),
37 | nvalidwt_pre=sum(!is.na(V200010a) & V200010a>0),
38 | nvalidwt_post=sum(!is.na(V200010b) & V200010b>0)
39 | )
40 |
41 | # Are all PSU/Stratum represented in post-weight? If so, we can drop pre-only cases later
42 |
43 | anes_in_2020 %>%
44 | count(V200010d, V200010c, V200004) %>%
45 | group_by(V200010d, V200010c) %>%
46 | mutate(
47 | Pct=n/sum(n)
48 | ) %>%
49 | filter(V200004==3) %>%
50 | arrange(Pct)
51 |
52 |
53 | anes_2020 <- anes_in_2020 %>%
54 | filter(V200004==3) %>%
55 | select(
56 | "V200010b", # FULL SAMPLE POST-ELECTION WEIGHT
57 | "V200010d", # FULL SAMPLE VARIANCE STRATUM
58 | "V200010c", # FULL SAMPLE VARIANCE UNIT
59 | "V200002", # MODE OF INTERVIEW: PRE-ELECTION INTERVIEW
60 | "V201006", # PRE: HOW INTERESTED IN FOLLOWING CAMPAIGNS
61 | "V201102", # PRE: DID R VOTE FOR PRESIDENT IN 2016
62 | "V201101", # PRE: DID R VOTE FOR PRESIDENT IN 2016 [REVISED]
63 | "V201103", # PRE: RECALL OF LAST (2016) PRESIDENTIAL VOTE CHOICE)
64 | "V201025x", # PRE: SUMMARY: REGISTRATION AND EARLY VOTE STATUS
65 | "V201231x", # PRE: SUMMARY: PARTY ID
66 | "V201233", # PRE: HOW OFTEN TRUST GOVERNMENT IN WASHINGTON TO DO WHAT IS RIGHT [REVISED]
67 | "V201237", # PRE: HOW OFTEN CAN PEOPLE BE TRUSTED
68 | "V201507x", # PRE: SUMMARY: RESPONDENT AGE
69 | "V201510", # PRE: HIGHEST LEVEL OF EDUCATION
70 | "V201549x", # PRE: SUMMARY: R SELF-IDENTIFIED RACE/ETHNICITY
71 | "V201600", # PRE: WHAT IS YOUR (R) SEX? [REVISED]
72 | "V201617x", # PRE: SUMMARY: TOTAL (FAMILY) INCOME
73 | "V202066", # POST: DID R VOTE IN NOVEMBER 2020 ELECTION
74 | "V202109x", # PRE-POST: SUMMARY: VOTER TURNOUT IN 2020
75 | "V202072", # POST: DID R VOTE FOR PRESIDENT
76 | "V202073", # POST: FOR WHOM DID R VOTE FOR PRESIDENT
77 | "V202110x" # PRE-POST: SUMMARY: 2020 PRESIDENTIAL VOTE
78 | ) %>%
79 | mutate(
80 | InterviewMode = fct_recode(as.character(V200002), Video = "1", Telephone = "2", Web = "3"),
81 | Weight = V200010b,
82 | Stratum = as.factor(V200010d),
83 | VarUnit = as.factor(V200010c),
84 | Age = if_else(V201507x > 0, as.numeric(V201507x), NA_real_),
85 | AgeGroup = cut(Age, c(17, 29, 39, 49, 59, 69, 200),
86 | labels = c("18-29", "30-39", "40-49", "50-59", "60-69", "70 or older")
87 | ),
88 | Gender = factor(
89 | case_when(
90 | V201600 == 1 ~ "Male",
91 | V201600 == 2 ~ "Female",
92 | TRUE ~ NA_character_
93 | ),
94 | levels = c("Male", "Female")
95 | ),
96 | RaceEth = factor(
97 | case_when(
98 | V201549x == 1 ~ "White",
99 | V201549x == 2 ~ "Black",
100 | V201549x == 3 ~ "Hispanic",
101 | V201549x == 4 ~ "Asian, NH/PI",
102 | V201549x == 5 ~ "AI/AN",
103 | V201549x == 6 ~ "Other/multiple race",
104 | TRUE ~ NA_character_
105 | ),
106 | levels = c("White", "Black", "Hispanic", "Asian, NH/PI", "AI/AN", "Other/multiple race", NA_character_)
107 | ),
108 | PartyID = factor(
109 | case_when(
110 | V201231x == 1 ~ "Strong democrat",
111 | V201231x == 2 ~ "Not very strong democrat",
112 | V201231x == 3 ~ "Independent-democrat",
113 | V201231x == 4 ~ "Independent",
114 | V201231x == 5 ~ "Independent-republican",
115 | V201231x == 6 ~ "Not very strong republican",
116 | V201231x == 7 ~ "Strong republican",
117 | TRUE ~ NA_character_
118 | ),
119 | levels = c("Strong democrat", "Not very strong democrat", "Independent-democrat", "Independent", "Independent-republican", "Not very strong republican", "Strong republican")
120 | ),
121 | Education = factor(
122 | case_when(
123 | V201510 <= 0 ~ NA_character_,
124 | V201510 == 1 ~ "Less than HS",
125 | V201510 == 2 ~ "High school",
126 | V201510 <= 5 ~ "Post HS",
127 | V201510 == 6 ~ "Bachelor's",
128 | V201510 <= 8 ~ "Graduate",
129 | TRUE ~ NA_character_
130 | ),
131 | levels = c("Less than HS", "High school", "Post HS", "Bachelor's", "Graduate")
132 | ),
133 | Income = cut(V201617x, c(-5, 1:22),
134 | labels = c(
135 | "Under $9,999",
136 | "$10,000-14,999",
137 | "$15,000-19,999",
138 | "$20,000-24,999",
139 | "$25,000-29,999",
140 | "$30,000-34,999",
141 | "$35,000-39,999",
142 | "$40,000-44,999",
143 | "$45,000-49,999",
144 | "$50,000-59,999",
145 | "$60,000-64,999",
146 | "$65,000-69,999",
147 | "$70,000-74,999",
148 | "$75,000-79,999",
149 | "$80,000-89,999",
150 | "$90,000-99,999",
151 | "$100,000-109,999",
152 | "$110,000-124,999",
153 | "$125,000-149,999",
154 | "$150,000-174,999",
155 | "$175,000-249,999",
156 | "$250,000 or more"
157 | )
158 | ),
159 | Income7 = fct_collapse(
160 | Income,
161 | "Under $20k" = c("Under $9,999", "$10,000-14,999", "$15,000-19,999"),
162 | "$20-40k" = c("$20,000-24,999", "$25,000-29,999", "$30,000-34,999", "$35,000-39,999"),
163 | "$40-60k" = c("$40,000-44,999", "$45,000-49,999", "$50,000-59,999"),
164 | "$60-80k" = c("$60,000-64,999", "$65,000-69,999", "$70,000-74,999", "$75,000-79,999"),
165 | "$80-100k" = c("$80,000-89,999", "$90,000-99,999"),
166 | "$100-125k" = c("$100,000-109,999", "$110,000-124,999"),
167 | "$125k or more" = c("$125,000-149,999", "$150,000-174,999", "$175,000-249,999", "$250,000 or more")
168 | ),
169 | CampaignInterest = factor(
170 | case_when(
171 | V201006 == 1 ~ "Very much interested",
172 | V201006 == 2 ~ "Somewhat interested",
173 | V201006 == 3 ~ "Not much interested",
174 | TRUE ~ NA_character_
175 | ),
176 | levels = c("Very much interested", "Somewhat interested", "Not much interested")
177 | ),
178 | TrustGovernment = factor(
179 | case_when(
180 | V201233 == 1 ~ "Always",
181 | V201233 == 2 ~ "Most of the time",
182 | V201233 == 3 ~ "About half the time",
183 | V201233 == 4 ~ "Some of the time",
184 | V201233 == 5 ~ "Never",
185 | TRUE ~ NA_character_
186 | ),
187 | levels = c("Always", "Most of the time", "About half the time", "Some of the time", "Never")
188 | ),
189 | TrustPeople = factor(
190 | case_when(
191 | V201237 == 1 ~ "Always",
192 | V201237 == 2 ~ "Most of the time",
193 | V201237 == 3 ~ "About half the time",
194 | V201237 == 4 ~ "Some of the time",
195 | V201237 == 5 ~ "Never",
196 | TRUE ~ NA_character_
197 | ),
198 | levels = c("Always", "Most of the time", "About half the time", "Some of the time", "Never")
199 | ),
200 | VotedPres2016 = factor(
201 | case_when(
202 | V201101 == 1 | V201102 == 1 ~ "Yes",
203 | V201101 == 2 | V201102 == 2 ~ "No",
204 | TRUE ~ NA_character_
205 | ),
206 | levels = c("Yes", "No")
207 | ),
208 | VotedPres2016_selection = factor(
209 | case_when(
210 | V201103 == 1 ~ "Clinton",
211 | V201103 == 2 ~ "Trump",
212 | V201103 == 5 ~ "Other",
213 | TRUE ~ NA_character_
214 | ),
215 | levels = c("Clinton", "Trump", "Other")
216 | ),
217 | VotedPres2020 = factor(
218 | case_when(
219 | V202109x == 1 ~ "Yes",
220 | V202109x == 0 ~ "No",
221 | TRUE ~ NA_character_
222 | ),
223 | levels = c("Yes", "No")
224 | ),
225 | VotedPres2020_selection = factor(
226 | case_when(
227 | V202073 == 1 ~ "Biden",
228 | V202073 == 2 ~ "Trump",
229 | V202073 >= 3 & V202073 <= 8~ "Other",
230 | V202073 == 11 ~ NA_character_,
231 | V202073 == 12 ~ NA_character_,
232 | TRUE ~ NA_character_
233 | ),
234 | levels = c("Biden", "Trump", "Other")
235 | ),
236 | EarlyVote2020 = factor(
237 | case_when(
238 | V201025x < 0 ~ NA_character_,
239 | V201025x == 4 ~ "Yes",
240 | VotedPres2020 == "Yes" ~ "No",
241 | TRUE ~ NA_character_),
242 | levels = c("Yes", "No")
243 | )
244 | )
245 |
246 | summary(anes_2020)
247 | ```
248 |
249 | ## Check derived variables for correct coding
250 |
251 | ```{r checkvars}
252 |
253 | anes_2020 %>% count(InterviewMode, V200002)
254 |
255 | anes_2020 %>%
256 | group_by(AgeGroup) %>%
257 | summarise(
258 | minAge = min(Age),
259 | maxAge = max(Age),
260 | minV = min(V201507x),
261 | maxV = max(V201507x)
262 | )
263 |
264 | anes_2020 %>% count(Gender, V201600)
265 |
266 | anes_2020 %>% count(RaceEth, V201549x)
267 |
268 | anes_2020 %>% count(PartyID, V201231x)
269 |
270 | anes_2020 %>% count(Education, V201510)
271 |
272 | anes_2020 %>%
273 | count(Income, Income7, V201617x) %>%
274 | print(n = 30)
275 |
276 | anes_2020 %>% count(CampaignInterest, V201006)
277 |
278 | anes_2020 %>% count(TrustGovernment, V201233)
279 |
280 | anes_2020 %>% count(TrustPeople, V201237)
281 |
282 | anes_2020 %>% count(VotedPres2016, V201101, V201102)
283 |
284 | anes_2020 %>% count(VotedPres2016_selection, V201103)
285 |
286 | anes_2020 %>% count(VotedPres2020, V202109x)
287 |
288 | anes_2020 %>% count(VotedPres2020_selection, V202073)
289 |
290 | anes_2020 %>% count(EarlyVote2020, V201025x, VotedPres2020)
291 |
292 | anes_2020 %>%
293 | summarise(WtSum = sum(Weight, na.rm = TRUE)) %>%
294 | pull(WtSum)
295 | ```
296 |
297 | ## Save data
298 |
299 | ```{r savedat}
300 | write_rds(anes_2020, here("Data", "anes_2020.rds"), compress = "gz")
301 | ```
302 |
--------------------------------------------------------------------------------
/DataCleaningScripts/RECS_DataPrep.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Residential Energy Consumption Survey (RECS) 2015 Data Prep"
3 | output: github_document
4 | ---
5 |
6 | ```{r setup, include=FALSE}
7 | knitr::opts_chunk$set(echo = TRUE)
8 | ```
9 |
10 | ## Data information
11 |
12 | All data and resources were downloaded from https://www.eia.gov/consumption/residential/data/2015/index.php?view=microdata on March 3, 2021.
13 |
14 | ```{r loadpackageh, message=FALSE}
15 | library(here) #easy relative paths
16 | ```
17 |
18 | ```{r loadpackages}
19 | library(tidyverse) #data manipulation
20 | library(haven) #data import
21 | library(tidylog) #informative logging messages
22 | ```
23 | ## Import data and create derived variables
24 |
25 | ```{r derivedata}
26 | recs_in <- read_csv(here("RawData", "RECS_2015", "recs2015_public_v4.csv"))
27 |
28 | recs <- recs_in %>%
29 | select(DOEID, REGIONC, DIVISION, METROMICRO, UATYP10, TYPEHUQ, YEARMADERANGE, HEATHOME, EQUIPMUSE, TEMPHOME, TEMPGONE, TEMPNITE, AIRCOND, USECENAC, TEMPHOMEAC, TEMPGONEAC, TEMPNITEAC, TOTCSQFT, TOTHSQFT, TOTSQFT_EN, TOTUCSQFT, TOTUSQFT, NWEIGHT, starts_with("BRRWT"), CDD30YR, CDD65, CDD80, CLIMATE_REGION_PUB, IECC_CLIMATE_PUB, HDD30YR, HDD65, HDD50, GNDHDD65, BTUEL, DOLLAREL, BTUNG, DOLLARNG, BTULP, DOLLARLP, BTUFO, DOLLARFO, TOTALBTU, TOTALDOL, BTUWOOD=WOODBTU, BTUPELLET=PELLETBTU ) %>%
30 | mutate(
31 | Region=parse_factor(
32 | case_when(
33 | REGIONC==1~"Northeast",
34 | REGIONC==2~"Midwest",
35 | REGIONC==3~"South",
36 | REGIONC==4~"West",
37 | ), levels=c("Northeast", "Midwest", "South", "West")),
38 | Division=parse_factor(
39 | case_when(
40 | DIVISION==1~"New England",
41 | DIVISION==2~"Middle Atlantic",
42 | DIVISION==3~"East North Central",
43 | DIVISION==4~"West North Central",
44 | DIVISION==5~"South Atlantic",
45 | DIVISION==6~"East South Central",
46 | DIVISION==7~"West South Central",
47 | DIVISION==8~"Mountain North",
48 | DIVISION==9~"Mountain South",
49 | DIVISION==10~"Pacific",
50 | ), levels=c("New England", "Middle Atlantic", "East North Central", "West North Central", "South Atlantic", "East South Central", "West South Central", "Mountain North", "Mountain South", "Pacific")),
51 | MSAStatus=fct_recode(METROMICRO, "Metropolitan Statistical Area"="METRO", "Micropolitan Statistical Area"="MICRO", "None"="NONE"),
52 | Urbanicity=parse_factor(
53 | case_when(
54 | UATYP10=="U"~"Urban Area",
55 | UATYP10=="C"~"Urban Cluster",
56 | UATYP10=="R"~"Rural"
57 | ),
58 | levels=c("Urban Area", "Urban Cluster", "Rural")
59 | ),
60 | HousingUnitType=parse_factor(
61 | case_when(
62 | TYPEHUQ==1~"Mobile home",
63 | TYPEHUQ==2~"Single-family detached",
64 | TYPEHUQ==3~"Single-family attached",
65 | TYPEHUQ==4~"Apartment: 2-4 Units",
66 | TYPEHUQ==5~"Apartment: 5 or more units",
67 | ), levels=c("Mobile home", "Single-family detached", "Single-family attached", "Apartment: 2-4 Units", "Apartment: 5 or more units")),
68 | YearMade=parse_factor(
69 | case_when(
70 | YEARMADERANGE==1~"Before 1950",
71 | YEARMADERANGE==2~"1950-1959",
72 | YEARMADERANGE==3~"1960-1969",
73 | YEARMADERANGE==4~"1970-1979",
74 | YEARMADERANGE==5~"1980-1989",
75 | YEARMADERANGE==6~"1990-1999",
76 | YEARMADERANGE==7~"2000-2009",
77 | YEARMADERANGE==8~"2010-2015",
78 | ),
79 | levels=c("Before 1950", "1950-1959", "1960-1969", "1970-1979", "1980-1989", "1990-1999", "2000-2009", "2010-2015"),
80 | ordered = TRUE
81 | ),
82 | SpaceHeatingUsed=as.logical(HEATHOME),
83 | HeatingBehavior=parse_factor(
84 | case_when(
85 | EQUIPMUSE==1~"Set one temp and leave it",
86 | EQUIPMUSE==2~"Manually adjust at night/no one home",
87 | EQUIPMUSE==3~"Program thermostat to change at certain times",
88 | EQUIPMUSE==4~"Turn on or off as needed",
89 | EQUIPMUSE==5~"No control",
90 | EQUIPMUSE==9~"Other",
91 | EQUIPMUSE==-9~NA_character_),
92 | levels=c("Set one temp and leave it", "Manually adjust at night/no one home", "Program thermostat to change at certain times", "Turn on or off as needed", "No control", "Other")
93 | ),
94 | WinterTempDay=if_else(TEMPHOME>0, TEMPHOME, NA_real_),
95 | WinterTempAway=if_else(TEMPGONE>0, TEMPGONE, NA_real_),
96 | WinterTempNight=if_else(TEMPNITE>0, TEMPNITE, NA_real_),
97 | ACUsed=as.logical(AIRCOND),
98 | ACBehavior=parse_factor(
99 | case_when(
100 | USECENAC==1~"Set one temp and leave it",
101 | USECENAC==2~"Manually adjust at night/no one home",
102 | USECENAC==3~"Program thermostat to change at certain times",
103 | USECENAC==4~"Turn on or off as needed",
104 | USECENAC==5~"No control",
105 | USECENAC==-9~NA_character_),
106 | levels=c("Set one temp and leave it", "Manually adjust at night/no one home", "Program thermostat to change at certain times", "Turn on or off as needed", "No control")
107 | ),
108 | SummerTempDay=if_else(TEMPHOMEAC>0, TEMPHOMEAC, NA_real_),
109 | SummerTempAway=if_else(TEMPGONEAC>0, TEMPGONEAC, NA_real_),
110 | SummerTempNight=if_else(TEMPNITEAC>0, TEMPNITEAC, NA_real_),
111 | ClimateRegion_BA=parse_factor(CLIMATE_REGION_PUB),
112 | ClimateRegion_IECC=factor(IECC_CLIMATE_PUB)
113 |
114 | )
115 |
116 | ```
117 |
118 |
119 | ## Check derived variables for correct coding
120 |
121 | ```{r checkvars}
122 | recs %>% count(Region, REGIONC)
123 | recs %>% count(Division, DIVISION)
124 | recs %>% count(MSAStatus, METROMICRO)
125 | recs %>% count(Urbanicity, UATYP10)
126 | recs %>% count(HousingUnitType, TYPEHUQ)
127 | recs %>% count(YearMade, YEARMADERANGE)
128 | recs %>% count(SpaceHeatingUsed, HEATHOME)
129 | recs %>% count(HeatingBehavior, EQUIPMUSE)
130 | recs %>% count(ACUsed, AIRCOND)
131 | recs %>% count(ACBehavior, USECENAC)
132 | recs %>% count(ClimateRegion_BA, CLIMATE_REGION_PUB)
133 | recs %>% count(ClimateRegion_IECC, IECC_CLIMATE_PUB)
134 |
135 | ```
136 | ## Save data
137 |
138 | ```{r savedat}
139 | recs_out <- recs %>%
140 | select(DOEID, Region, Division, MSAStatus, Urbanicity, HousingUnitType, YearMade, SpaceHeatingUsed, HeatingBehavior, WinterTempDay, WinterTempAway, WinterTempNight, ACUsed, ACBehavior, SummerTempDay, SummerTempAway, SummerTempNight, TOTCSQFT, TOTHSQFT, TOTSQFT_EN, TOTUCSQFT, TOTUSQFT, NWEIGHT, starts_with("BRRWT"), CDD30YR, CDD65, CDD80, ClimateRegion_BA, ClimateRegion_IECC, HDD30YR, HDD65, HDD50, GNDHDD65, BTUEL, DOLLAREL, BTUNG, DOLLARNG, BTULP, DOLLARLP, BTUFO, DOLLARFO, TOTALBTU, TOTALDOL, BTUWOOD, BTUPELLET)
141 |
142 | summary(recs_out)
143 | write_rds(recs_out, here("Data", "recs.rds"), compress="gz")
144 | ```
145 |
146 |
147 |
--------------------------------------------------------------------------------
/DataCleaningScripts/TargetPopulation.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Target Population 2020 ANES"
3 | output: github_document
4 | ---
5 |
6 | ```{r setup, include=FALSE}
7 | knitr::opts_chunk$set(echo = TRUE)
8 | ```
9 |
10 | ## Target pop 2020 ANES
11 | From the User Guide: "The target population for the fresh cross-section was the 231 million non-institutional U.S. citizens aged 18 or older living in the 50 US states or the District of Columbia."
12 |
13 | - We will use Current Population Survey (CPS) to find this total from November 2020
14 | - Relevant data dictionary: https://www2.census.gov/programs-surveys/cps/datasets/2020/basic/2020_Basic_CPS_Public_Use_Record_Layout_plus_IO_Code_list.txt
15 |
16 | ```{r}
17 | library(censusapi)
18 | library(tidyverse)
19 |
20 | cps_state_in <- getCensus(
21 | name="cps/basic/nov",
22 | vintage=2020,
23 | region="state",
24 | vars=c("HRHHID", "HRMONTH", "HRYEAR4", "PRTAGE", "PRCITSHP", "PWSSWGT"),
25 | key = Sys.getenv("CENSUS_API_KEY")
26 | )
27 |
28 | cps_state <- cps_state_in %>%
29 | as_tibble() %>%
30 | mutate(across(.fns=as.numeric))
31 |
32 | # confirm this doesn't include territories
33 | cps_state %>%
34 | count(state)
35 |
36 | # confirm this is only November 2020
37 | cps_state %>%
38 | count(HRMONTH, HRYEAR4)
39 |
40 | # voting age citizen population
41 |
42 | targetpop <- cps_state %>%
43 | as_tibble() %>%
44 | filter(
45 | PRTAGE>=18,
46 | PRCITSHP %in% (1:4)
47 | ) %>%
48 | pull(PWSSWGT) %>%
49 | sum()
50 |
51 |
52 |
53 | ```
54 |
55 | The target population in 2020 is: `r prettyNum(targetpop, big.mark=",")`.
56 |
57 |
58 | ## Target pop 2016 ANES
59 | From the User Guide: "...and the target population for the Internet mode was 224.1 million U.S. citizens age 18 or older living in the 50 US states or the District of Columbia"
60 |
61 | - We will use Current Population Survey (CPS) to find this total from November 2016
62 |
63 | ```{r}
64 |
65 | cps_state_in <- getCensus(
66 | name="cps/basic/nov",
67 | vintage=2016,
68 | region="state",
69 | vars=c("HRHHID", "HRMONTH", "HRYEAR4", "PRTAGE", "PRCITSHP", "PWSSWGT"),
70 | key = Sys.getenv("CENSUS_API_KEY")
71 | )
72 |
73 | cps_state <- cps_state_in %>%
74 | as_tibble() %>%
75 | mutate(across(.fns=as.numeric))
76 |
77 | # confirm this doesn't include territories
78 | cps_state %>%
79 | count(state)
80 |
81 | # confirm this is only November 2016
82 | cps_state %>%
83 | count(HRMONTH, HRYEAR4)
84 |
85 | # voting age citizen population
86 |
87 | targetpop <- cps_state %>%
88 | as_tibble() %>%
89 | filter(
90 | PRTAGE>=18,
91 | PRCITSHP %in% (1:4)
92 | ) %>%
93 | pull(PWSSWGT) %>%
94 | sum()
95 |
96 |
97 |
98 | ```
99 |
100 | The target population in 2016 is: `r prettyNum(targetpop, big.mark=",")`.
101 |
102 |
--------------------------------------------------------------------------------
/DataCleaningScripts/TargetPopulation.md:
--------------------------------------------------------------------------------
1 | Target Population 2020 ANES
2 | ================
3 |
4 | ## Target pop 2020 ANES
5 |
6 | From the User Guide: “The target population for the fresh cross-section
7 | was the 231 million non-institutional U.S. citizens aged 18 or older
8 | living in the 50 US states or the District of Columbia.”
9 |
10 | - We will use Current Population Survey (CPS) to find this total from
11 | November 2020
12 | - Relevant data dictionary:
13 |
14 |
15 | ``` r
16 | library(censusapi)
17 | ```
18 |
19 | ##
20 | ## Attaching package: 'censusapi'
21 |
22 | ## The following object is masked from 'package:methods':
23 | ##
24 | ## getFunction
25 |
26 | ``` r
27 | library(tidyverse)
28 | ```
29 |
30 | ## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
31 |
32 | ## v ggplot2 3.3.5 v purrr 0.3.4
33 | ## v tibble 3.1.6 v dplyr 1.0.8
34 | ## v tidyr 1.2.0 v stringr 1.4.0
35 | ## v readr 2.1.2 v forcats 0.5.1
36 |
37 | ## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
38 | ## x dplyr::filter() masks stats::filter()
39 | ## x dplyr::lag() masks stats::lag()
40 |
41 | ``` r
42 | cps_state_in <- getCensus(
43 | name="cps/basic/nov",
44 | vintage=2020,
45 | region="state",
46 | vars=c("HRHHID", "HRMONTH", "HRYEAR4", "PRTAGE", "PRCITSHP", "PWSSWGT"),
47 | key = Sys.getenv("CENSUS_API_KEY")
48 | )
49 |
50 | cps_state <- cps_state_in %>%
51 | as_tibble() %>%
52 | mutate(across(.fns=as.numeric))
53 |
54 | # confirm this doesn't include territories
55 | cps_state %>%
56 | count(state)
57 | ```
58 |
59 | ## # A tibble: 51 x 2
60 | ## state n
61 | ##
62 | ## 1 1 2406
63 | ## 2 2 1289
64 | ## 3 4 1969
65 | ## 4 5 1988
66 | ## 5 6 9574
67 | ## 6 8 1365
68 | ## 7 9 1157
69 | ## 8 10 1285
70 | ## 9 11 1622
71 | ## 10 12 5055
72 | ## # ... with 41 more rows
73 |
74 | ``` r
75 | # confirm this is only November 2020
76 | cps_state %>%
77 | count(HRMONTH, HRYEAR4)
78 | ```
79 |
80 | ## # A tibble: 1 x 3
81 | ## HRMONTH HRYEAR4 n
82 | ##
83 | ## 1 11 2020 112037
84 |
85 | ``` r
86 | # voting age citizen population
87 |
88 | targetpop <- cps_state %>%
89 | as_tibble() %>%
90 | filter(
91 | PRTAGE>=18,
92 | PRCITSHP %in% (1:4)
93 | ) %>%
94 | pull(PWSSWGT) %>%
95 | sum()
96 | ```
97 |
98 | The target population in 2020 is: 231,592,693.
99 |
100 | ## Target pop 2016 ANES
101 |
102 | From the User Guide: “…and the target population for the Internet mode
103 | was 224.1 million U.S. citizens age 18 or older living in the 50 US
104 | states or the District of Columbia”
105 |
106 | - We will use Current Population Survey (CPS) to find this total from
107 | November 2016
108 |
109 | ``` r
110 | cps_state_in <- getCensus(
111 | name="cps/basic/nov",
112 | vintage=2016,
113 | region="state",
114 | vars=c("HRHHID", "HRMONTH", "HRYEAR4", "PRTAGE", "PRCITSHP", "PWSSWGT"),
115 | key = Sys.getenv("CENSUS_API_KEY")
116 | )
117 |
118 | cps_state <- cps_state_in %>%
119 | as_tibble() %>%
120 | mutate(across(.fns=as.numeric))
121 |
122 | # confirm this doesn't include territories
123 | cps_state %>%
124 | count(state)
125 | ```
126 |
127 | ## # A tibble: 51 x 2
128 | ## state n
129 | ##
130 | ## 1 1 2651
131 | ## 2 2 1720
132 | ## 3 4 2145
133 | ## 4 5 2342
134 | ## 5 6 11200
135 | ## 6 8 1551
136 | ## 7 9 1228
137 | ## 8 10 1508
138 | ## 9 11 2094
139 | ## 10 12 5777
140 | ## # ... with 41 more rows
141 |
142 | ``` r
143 | # confirm this is only November 2016
144 | cps_state %>%
145 | count(HRMONTH, HRYEAR4)
146 | ```
147 |
148 | ## # A tibble: 1 x 3
149 | ## HRMONTH HRYEAR4 n
150 | ##
151 | ## 1 11 2016 131389
152 |
153 | ``` r
154 | # voting age citizen population
155 |
156 | targetpop <- cps_state %>%
157 | as_tibble() %>%
158 | filter(
159 | PRTAGE>=18,
160 | PRCITSHP %in% (1:4)
161 | ) %>%
162 | pull(PWSSWGT) %>%
163 | sum()
164 | ```
165 |
166 | The target population in 2016 is: 224,059,005.
167 |
--------------------------------------------------------------------------------
/Exercises/CategorialExercises.R:
--------------------------------------------------------------------------------
1 | #' ---
2 | #' title: "Categorical Data Analysis Exercise Solutions"
3 | #' output:
4 | #' html_document:
5 | #' df_print: paged
6 | #' ---
7 | #'
8 | #' # Set-up
9 | ## ----setup---------------------------------------------------------------
10 | library(tidyverse) # for tidyverse
11 | library(here) # for file paths
12 | library(survey) # for survey analysis
13 | library(srvyr) # for tidy survey analysis
14 |
15 | anes <- read_rds(here("Data", "anes_2020.rds")) %>%
16 | mutate(Weight=Weight/sum(Weight)*231592693)
17 | # adjust weight to sum to citizen pop, 18+ in Nov 2020 per ANES methodology documentation
18 | anes_des <- anes %>%
19 | as_survey_design(weights = Weight,
20 | strata = Stratum,
21 | ids = VarUnit,
22 | nest = TRUE)
23 |
24 | #'
25 | #' # Part 1
26 | #'
27 | #' 1. How many females have a graduate degree? Hint: the variables `Gender` and `Education` will be useful.
28 | #'
29 | ## ----ex1_1---------------------------------------------------------------
30 |
31 |
32 |
33 | #'
34 | #'
35 | #' 2. What percentage of people identify as "Strong democrat"? Hint: The variable `PartyID` indicates what party people identify with.
36 | #'
37 | ## ----ex1_2---------------------------------------------------------------
38 |
39 |
40 | #'
41 | #'
42 | #'
43 | #' 3. What percentage of people who voted in the 2020 election identify as "Strong republican"? Hint: The variable `VotedPres2020` indicates whether someone voted in 2020.
44 | #'
45 | ## ----ex1_3---------------------------------------------------------------
46 |
47 |
48 | #'
49 | #' 4. What percentage of people voted in both the 2016 election and in the 2020 election? Include the logit confidence interval. Hint: The variable `VotedPres2016` indicates whether someone voted in 2016.
50 | #'
51 | ## ----ex1_4---------------------------------------------------------------
52 |
53 |
54 | #'
55 | #' 5. What is the design effect for the proportion of people who voted early? Hint: The variable `EarlyVote2020` indicates whether someone voted early in 2020.
56 | #'
57 | ## ----ex1_5---------------------------------------------------------------
58 |
59 |
60 | #'
61 | #' # Part 2
62 | #'
63 | #' 1. Is there a relationship between PartyID and whether people voted early?
64 | #'
65 | ## ----ex2_1---------------------------------------------------------------
66 |
67 |
68 | #'
69 | #'
70 | #' 2. Is there a relationship between PartyID and trust in the government? Hints: `TrustGovernment` indicates how strongly people trust the government. Use Wald as the `statistic` option.
71 | #'
72 | ## ----ex2_2---------------------------------------------------------------
73 |
74 |
75 | #'
76 | #'
77 | #' # Bonus
78 | #'
79 | #' 1. What percentage of people lean republican? These are individuals that are strong republicans, not very strong republicans and are independent-republicans. Include an appropriate confidence interval. Hint: to get the correct confidence interval, create a new variable BEFORE calculating the estimate.
80 | #'
81 | ## ----exb_1---------------------------------------------------------------
82 |
83 |
84 | #'
85 | #' 2. Were people who lean democrat more likely to vote early in the 2020 election? Hint: use a logistic model and 3-level party variable to use in the model.
86 | #'
87 | ## ----exb_2---------------------------------------------------------------
88 |
89 |
90 |
--------------------------------------------------------------------------------
/Exercises/CategorialExercises.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Categorical Data Analysis Exercise Solutions"
3 | output:
4 | html_document:
5 | df_print: paged
6 | ---
7 |
8 | # Set-up
9 | ```{r setup}
10 | library(tidyverse) # for tidyverse
11 | library(here) # for file paths
12 | library(survey) # for survey analysis
13 | library(srvyr) # for tidy survey analysis
14 |
15 | anes <- read_rds(here("Data", "anes_2020.rds")) %>%
16 | mutate(Weight=Weight/sum(Weight)*231592693)
17 | # adjust weight to sum to citizen pop, 18+ in Nov 2020 per ANES methodology documentation
18 | anes_des <- anes %>%
19 | as_survey_design(weights = Weight,
20 | strata = Stratum,
21 | ids = VarUnit,
22 | nest = TRUE)
23 | ```
24 |
25 | # Part 1
26 |
27 | 1. How many females have a graduate degree? Hint: the variables `Gender` and `Education` will be useful.
28 |
29 | ```{r ex1_1}
30 |
31 |
32 | ```
33 |
34 |
35 | 2. What percentage of people identify as "Strong democrat"? Hint: The variable `PartyID` indicates what party people identify with.
36 |
37 | ```{r ex1_2}
38 |
39 | ```
40 |
41 |
42 |
43 | 3. What percentage of people who voted in the 2020 election identify as "Strong republican"? Hint: The variable `VotedPres2020` indicates whether someone voted in 2020.
44 |
45 | ```{r ex1_3}
46 |
47 | ```
48 |
49 | 4. What percentage of people voted in both the 2016 election and in the 2020 election? Include the logit confidence interval. Hint: The variable `VotedPres2016` indicates whether someone voted in 2016.
50 |
51 | ```{r ex1_4}
52 |
53 | ```
54 |
55 | 5. What is the design effect for the proportion of people who voted early? Hint: The variable `EarlyVote2020` indicates whether someone voted early in 2020.
56 |
57 | ```{r ex1_5}
58 |
59 | ```
60 |
61 | # Part 2
62 |
63 | 1. Is there a relationship between PartyID and whether people voted early?
64 |
65 | ```{r ex2_1}
66 |
67 | ```
68 |
69 |
70 | 2. Is there a relationship between PartyID and trust in the government? Hints: `TrustGovernment` indicates how strongly people trust the government. Use Wald as the `statistic` option.
71 |
72 | ```{r ex2_2}
73 |
74 | ```
75 |
76 |
77 | # Bonus
78 |
79 | 1. What percentage of people lean republican? These are individuals that are strong republicans, not very strong republicans and are independent-republicans. Include an appropriate confidence interval. Hint: to get the correct confidence interval, create a new variable BEFORE calculating the estimate.
80 |
81 | ```{r exb_1}
82 |
83 | ```
84 |
85 | 2. Were people who lean democrat more likely to vote early in the 2020 election? Hint: use a logistic model and 3-level party variable to use in the model.
86 |
87 | ```{r exb_2}
88 |
89 | ```
90 |
--------------------------------------------------------------------------------
/Exercises/CategorialExercises_solutions.R:
--------------------------------------------------------------------------------
1 | #' ---
2 | #' title: "Categorical Data Analysis Exercise Solutions"
3 | #' output:
4 | #' html_document:
5 | #' df_print: paged
6 | #' ---
7 | #'
8 | #' # Set-up
9 | ## ----setup---------------------------------------------------------------
10 | library(tidyverse) # for tidyverse
11 | library(here) # for file paths
12 | library(survey) # for survey analysis
13 | library(srvyr) # for tidy survey analysis
14 |
15 | anes <- read_rds(here("Data", "anes_2020.rds")) %>%
16 | mutate(Weight=Weight/sum(Weight)*231592693)
17 | # adjust weight to sum to citizen pop, 18+ in Nov 2020 per ANES methodology documentation
18 | anes_des <- anes %>%
19 | as_survey_design(weights = Weight,
20 | strata = Stratum,
21 | ids = VarUnit,
22 | nest = TRUE)
23 |
24 | #'
25 | #' # Part 1
26 | #'
27 | #' 1. How many females have a graduate degree? Hint: the variables `Gender` and `Education` will be useful.
28 | #'
29 | ## ----ex1_1---------------------------------------------------------------
30 | #Option 1:
31 | femgd <- anes_des %>%
32 | filter(Gender=="Female", Education=="Graduate") %>%
33 | survey_count(name="n")
34 | #Option 2:
35 | femgd <- anes_des %>%
36 | filter(Gender=="Female", Education=="Graduate") %>%
37 | summarize(
38 | N=survey_total(), .groups="drop"
39 | )
40 |
41 |
42 | #'
43 | #' There are `r formatC(pull(femgd, N), format="d", big.mark=",")` females with a graduate degree.
44 | #'
45 | #'
46 | #' 2. What percentage of people identify as "Strong democrat"? Hint: The variable `PartyID` indicates what party people identify with.
47 | #'
48 | ## ----ex1_2---------------------------------------------------------------
49 | (psd <- anes_des %>%
50 | group_by(PartyID) %>%
51 | summarize(
52 | p=survey_mean()
53 | ) %>%
54 | filter(PartyID=="Strong democrat"))
55 |
56 | #'
57 | #' `r str_c(round(pull(psd, p)*100, 1), "%")` of people identify as a strong democrat.
58 | #'
59 | #'
60 | #' 3. What percentage of people who voted in the 2020 election identify as "Strong republican"? Hint: The variable `VotedPres2020` indicates whether someone voted in 2020.
61 | #'
62 | ## ----ex1_3---------------------------------------------------------------
63 | (psr <- anes_des %>%
64 | filter(VotedPres2020=="Yes") %>%
65 | group_by(PartyID) %>%
66 | summarize(
67 | p=survey_mean()
68 | ) %>%
69 | filter(PartyID=="Strong republican"))
70 |
71 | #'
72 | #' `r str_c(round(pull(psr, p)*100, 1), "%")` of people identify as a strong republican among those who voted in 2020.
73 | #'
74 | #' 4. What percentage of people voted in both the 2016 election and in the 2020 election? Include the logit confidence interval. Hint: The variable `VotedPres2016` indicates whether someone voted in 2016.
75 | #'
76 | ## ----ex1_4---------------------------------------------------------------
77 | (pvb <- anes_des %>%
78 | filter(!is.na(VotedPres2016), !is.na(VotedPres2020)) %>%
79 | group_by(interact(VotedPres2016, VotedPres2020)) %>%
80 | summarize(
81 | p=survey_prop(var="ci", method="logit"),
82 | ) %>%
83 | filter(VotedPres2016=="Yes", VotedPres2020=="Yes"))
84 |
85 | #'
86 | #' `r str_c(round(pull(pvb, p)*100, 1), "%")` (`r round(pull(pvb, p_low)*100, 1)`-`r str_c(round(pull(pvb, p_upp)*100, 1), "%")`) voted in both the 2016 and 2020 elections.
87 | #'
88 | #'
89 | #'
90 | #' 5. What is the design effect for the proportion of people who voted early? Hint: The variable `EarlyVote2020` indicates whether someone voted early in 2020.
91 | #'
92 | ## ----ex1_5---------------------------------------------------------------
93 | (pdeff <- anes_des %>%
94 | filter(!is.na(EarlyVote2020)) %>%
95 | group_by(EarlyVote2020) %>%
96 | summarize(
97 | p=survey_mean(deff=TRUE)
98 | ) %>%
99 | filter(EarlyVote2020=="Yes"))
100 |
101 | #'
102 | #' The design effect is `r round(pull(pdeff, p_deff), 2)`.
103 | #'
104 | #' # Part 2
105 | #'
106 | #' 1. Is there a relationship between PartyID and whether people voted early?
107 | #'
108 | ## ----ex2_1---------------------------------------------------------------
109 | anes_des %>%
110 | filter(!is.na(PartyID), !is.na(EarlyVote2020)) %>%
111 | group_by(PartyID, EarlyVote2020) %>%
112 | summarise(
113 | p=survey_mean(),
114 | .groups="drop"
115 | ) %>%
116 | filter(EarlyVote2020=="Yes")
117 |
118 | (pid_vote <- anes_des %>%
119 | svychisq(design=.,
120 | formula=~PartyID +EarlyVote2020))
121 |
122 | #'
123 | #' There is strong association with when people voted and their party, p-value=`r pluck(pid_vote, "p.value") %>% round(5)`
124 | #'
125 | #' 2. Is there a relationship between PartyID and trust in the government? Hints: `TrustGovernment` indicates how strongly people trust the government. Use Wald as the `statistic` option.
126 | #'
127 | ## ----ex2_2---------------------------------------------------------------
128 | anes_des %>%
129 | filter(!is.na(PartyID), !is.na(TrustGovernment)) %>%
130 | group_by(PartyID, TrustGovernment) %>%
131 | summarise(
132 | p=survey_mean(),
133 | .groups="drop"
134 | ) %>%
135 | pivot_wider(id_cols=PartyID, names_from = "TrustGovernment", values_from="p")
136 |
137 | (pid_trust <- anes_des %>%
138 | svychisq(design=.,
139 | formula=~PartyID+TrustGovernment,
140 | statistic="Wald"))
141 |
142 | #'
143 | #' There is strong association with how much people trust government and their party, p-value=`r pluck(pid_trust, "p.value") %>% round(5)`
144 | #'
145 | #' # Bonus
146 | #'
147 | #' 1. What percentage of people lean republican? These are individuals that are strong republicans, not very strong republicans and are independent-republicans. Include an appropriate confidence interval. Hint: to get the correct confidence interval, create a new variable BEFORE calculating the estimate.
148 | #'
149 | ## ----exb_1---------------------------------------------------------------
150 |
151 | #Solution 1: Using forcats package
152 | anes_des %>%
153 | filter(!is.na(PartyID)) %>%
154 | mutate(PartyID3=fct_collapse(PartyID,
155 | LeanDem=c("Strong democrat",
156 | "Not very strong democrat",
157 | "Independent-democrat"),
158 | LeanRep=c("Strong republican",
159 | "Not very strong republican",
160 | "Independent-republican"),
161 | other_level="Other")) %>%
162 | group_by(PartyID3) %>%
163 | summarize(p=survey_prop(vartype="ci", proportion = TRUE))
164 |
165 | #Solution 2: Using case_when
166 | anes_des %>%
167 | filter(!is.na(PartyID)) %>%
168 | mutate(PartyID3=case_when(PartyID %in% c("Strong democrat",
169 | "Not very strong democrat",
170 | "Independent-democrat")~"LeanDem",
171 | PartyID %in% c("Strong republican",
172 | "Not very strong republican",
173 | "Independent-republican")~"LeanRep",
174 | TRUE~"Other")) %>%
175 | group_by(PartyID3) %>%
176 | summarize(p=survey_prop(vartype="ci", proportion = TRUE))
177 |
178 |
179 | #'
180 | #' 2. Were people who lean democrat more likely to vote early in the 2020 election? Hint: use a logistic model and 3-level party variable to use in the model.
181 | #'
182 | ## ----exb_2---------------------------------------------------------------
183 | anes_des %>%
184 | mutate(PartyID3=fct_collapse(PartyID,
185 | LeanDem=c("Strong democrat",
186 | "Not very strong democrat",
187 | "Independent-democrat"),
188 | LeanRep=c("Strong republican",
189 | "Not very strong republican",
190 | "Independent-republican"),
191 | other_level="Other")) %>%
192 | filter(!is.na(PartyID3), !is.na(EarlyVote2020)) %>%
193 | group_by(PartyID3, EarlyVote2020) %>%
194 | summarise(
195 | p=survey_prop(proportion = TRUE)
196 | ) %>% filter(EarlyVote2020=="Yes")
197 |
198 | earlyv_glm<- anes_des %>%
199 | mutate(PartyID3=fct_collapse(PartyID,
200 | LeanDem=c("Strong democrat",
201 | "Not very strong democrat",
202 | "Independent-democrat"),
203 | LeanRep=c("Strong republican",
204 | "Not very strong republican",
205 | "Independent-republican"),
206 | other_level="Other")) %>%
207 | svyglm(design=.,
208 | formula=(EarlyVote2020=="Yes")~PartyID3,
209 | family=quasibinomial(),
210 | na.action=na.omit)
211 |
212 | summary(earlyv_glm)
213 |
214 | #'
215 | #' Yes, there is evidence that those leaning democrat were more likely to vote early. They are the reference level in the model and the other coeffecients are negative and significant.
216 | #'
217 | #' # Session information
218 | #'
219 | ## ----si------------------------------------------------------------------
220 | devtools::session_info(pkgs="attached")
221 |
222 | #'
223 |
--------------------------------------------------------------------------------
/Exercises/CategorialExercises_solutions.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Categorical Data Analysis Exercise Solutions"
3 | output:
4 | html_document:
5 | df_print: paged
6 | ---
7 |
8 | # Set-up
9 | ```{r setup}
10 | library(tidyverse) # for tidyverse
11 | library(here) # for file paths
12 | library(survey) # for survey analysis
13 | library(srvyr) # for tidy survey analysis
14 |
15 | anes <- read_rds(here("Data", "anes_2020.rds")) %>%
16 | mutate(Weight=Weight/sum(Weight)*231592693)
17 | # adjust weight to sum to citizen pop, 18+ in Nov 2020 per ANES methodology documentation
18 | anes_des <- anes %>%
19 | as_survey_design(weights = Weight,
20 | strata = Stratum,
21 | ids = VarUnit,
22 | nest = TRUE)
23 | ```
24 |
25 | # Part 1
26 |
27 | 1. How many females have a graduate degree? Hint: the variables `Gender` and `Education` will be useful.
28 |
29 | ```{r ex1_1}
30 | #Option 1:
31 | femgd <- anes_des %>%
32 | filter(Gender=="Female", Education=="Graduate") %>%
33 | survey_count(name="n")
34 | #Option 2:
35 | femgd <- anes_des %>%
36 | filter(Gender=="Female", Education=="Graduate") %>%
37 | summarize(
38 | N=survey_total(), .groups="drop"
39 | )
40 |
41 | ```
42 |
43 | There are `r formatC(pull(femgd, N), format="d", big.mark=",")` females with a graduate degree.
44 |
45 |
46 | 2. What percentage of people identify as "Strong democrat"? Hint: The variable `PartyID` indicates what party people identify with.
47 |
48 | ```{r ex1_2}
49 | (psd <- anes_des %>%
50 | group_by(PartyID) %>%
51 | summarize(
52 | p=survey_mean()
53 | ) %>%
54 | filter(PartyID=="Strong democrat"))
55 | ```
56 |
57 | `r str_c(round(pull(psd, p)*100, 1), "%")` of people identify as a strong democrat.
58 |
59 |
60 | 3. What percentage of people who voted in the 2020 election identify as "Strong republican"? Hint: The variable `VotedPres2020` indicates whether someone voted in 2020.
61 |
62 | ```{r ex1_3}
63 | (psr <- anes_des %>%
64 | filter(VotedPres2020=="Yes") %>%
65 | group_by(PartyID) %>%
66 | summarize(
67 | p=survey_mean()
68 | ) %>%
69 | filter(PartyID=="Strong republican"))
70 | ```
71 |
72 | `r str_c(round(pull(psr, p)*100, 1), "%")` of people identify as a strong republican among those who voted in 2020.
73 |
74 | 4. What percentage of people voted in both the 2016 election and in the 2020 election? Include the logit confidence interval. Hint: The variable `VotedPres2016` indicates whether someone voted in 2016.
75 |
76 | ```{r ex1_4}
77 | (pvb <- anes_des %>%
78 | filter(!is.na(VotedPres2016), !is.na(VotedPres2020)) %>%
79 | group_by(interact(VotedPres2016, VotedPres2020)) %>%
80 | summarize(
81 | p=survey_prop(var="ci", method="logit"),
82 | ) %>%
83 | filter(VotedPres2016=="Yes", VotedPres2020=="Yes"))
84 | ```
85 |
86 | `r str_c(round(pull(pvb, p)*100, 1), "%")` (`r round(pull(pvb, p_low)*100, 1)`-`r str_c(round(pull(pvb, p_upp)*100, 1), "%")`) voted in both the 2016 and 2020 elections.
87 |
88 |
89 |
90 | 5. What is the design effect for the proportion of people who voted early? Hint: The variable `EarlyVote2020` indicates whether someone voted early in 2020.
91 |
92 | ```{r ex1_5}
93 | (pdeff <- anes_des %>%
94 | filter(!is.na(EarlyVote2020)) %>%
95 | group_by(EarlyVote2020) %>%
96 | summarize(
97 | p=survey_mean(deff=TRUE)
98 | ) %>%
99 | filter(EarlyVote2020=="Yes"))
100 | ```
101 |
102 | The design effect is `r round(pull(pdeff, p_deff), 2)`.
103 |
104 | # Part 2
105 |
106 | 1. Is there a relationship between PartyID and whether people voted early?
107 |
108 | ```{r ex2_1}
109 | anes_des %>%
110 | filter(!is.na(PartyID), !is.na(EarlyVote2020)) %>%
111 | group_by(PartyID, EarlyVote2020) %>%
112 | summarise(
113 | p=survey_mean(),
114 | .groups="drop"
115 | ) %>%
116 | filter(EarlyVote2020=="Yes")
117 |
118 | (pid_vote <- anes_des %>%
119 | svychisq(design=.,
120 | formula=~PartyID +EarlyVote2020))
121 | ```
122 |
123 | There is strong association with when people voted and their party, p-value=`r pluck(pid_vote, "p.value") %>% round(5)`
124 |
125 | 2. Is there a relationship between PartyID and trust in the government? Hints: `TrustGovernment` indicates how strongly people trust the government. Use Wald as the `statistic` option.
126 |
127 | ```{r ex2_2}
128 | anes_des %>%
129 | filter(!is.na(PartyID), !is.na(TrustGovernment)) %>%
130 | group_by(PartyID, TrustGovernment) %>%
131 | summarise(
132 | p=survey_mean(),
133 | .groups="drop"
134 | ) %>%
135 | pivot_wider(id_cols=PartyID, names_from = "TrustGovernment", values_from="p")
136 |
137 | (pid_trust <- anes_des %>%
138 | svychisq(design=.,
139 | formula=~PartyID+TrustGovernment,
140 | statistic="Wald"))
141 | ```
142 |
143 | There is strong association with how much people trust government and their party, p-value=`r pluck(pid_trust, "p.value") %>% round(5)`
144 |
145 | # Bonus
146 |
147 | 1. What percentage of people lean republican? These are individuals that are strong republicans, not very strong republicans and are independent-republicans. Include an appropriate confidence interval. Hint: to get the correct confidence interval, create a new variable BEFORE calculating the estimate.
148 |
149 | ```{r exb_1}
150 |
151 | #Solution 1: Using forcats package
152 | anes_des %>%
153 | filter(!is.na(PartyID)) %>%
154 | mutate(PartyID3=fct_collapse(PartyID,
155 | LeanDem=c("Strong democrat",
156 | "Not very strong democrat",
157 | "Independent-democrat"),
158 | LeanRep=c("Strong republican",
159 | "Not very strong republican",
160 | "Independent-republican"),
161 | other_level="Other")) %>%
162 | group_by(PartyID3) %>%
163 | summarize(p=survey_prop(vartype="ci", proportion = TRUE))
164 |
165 | #Solution 2: Using case_when
166 | anes_des %>%
167 | filter(!is.na(PartyID)) %>%
168 | mutate(PartyID3=case_when(PartyID %in% c("Strong democrat",
169 | "Not very strong democrat",
170 | "Independent-democrat")~"LeanDem",
171 | PartyID %in% c("Strong republican",
172 | "Not very strong republican",
173 | "Independent-republican")~"LeanRep",
174 | TRUE~"Other")) %>%
175 | group_by(PartyID3) %>%
176 | summarize(p=survey_prop(vartype="ci", proportion = TRUE))
177 |
178 | ```
179 |
180 | 2. Were people who lean democrat more likely to vote early in the 2020 election? Hint: use a logistic model and 3-level party variable to use in the model.
181 |
182 | ```{r exb_2}
183 | anes_des %>%
184 | mutate(PartyID3=fct_collapse(PartyID,
185 | LeanDem=c("Strong democrat",
186 | "Not very strong democrat",
187 | "Independent-democrat"),
188 | LeanRep=c("Strong republican",
189 | "Not very strong republican",
190 | "Independent-republican"),
191 | other_level="Other")) %>%
192 | filter(!is.na(PartyID3), !is.na(EarlyVote2020)) %>%
193 | group_by(PartyID3, EarlyVote2020) %>%
194 | summarise(
195 | p=survey_prop(proportion = TRUE)
196 | ) %>% filter(EarlyVote2020=="Yes")
197 |
198 | earlyv_glm<- anes_des %>%
199 | mutate(PartyID3=fct_collapse(PartyID,
200 | LeanDem=c("Strong democrat",
201 | "Not very strong democrat",
202 | "Independent-democrat"),
203 | LeanRep=c("Strong republican",
204 | "Not very strong republican",
205 | "Independent-republican"),
206 | other_level="Other")) %>%
207 | svyglm(design=.,
208 | formula=(EarlyVote2020=="Yes")~PartyID3,
209 | family=quasibinomial(),
210 | na.action=na.omit)
211 |
212 | summary(earlyv_glm)
213 | ```
214 |
215 | Yes, there is evidence that those leaning democrat were more likely to vote early. They are the reference level in the model and the other coeffecients are negative and significant.
216 |
217 | # Session information
218 |
219 | ```{r si}
220 | devtools::session_info(pkgs="attached")
221 | ```
222 |
223 |
--------------------------------------------------------------------------------
/Exercises/ContinuousExercises.R:
--------------------------------------------------------------------------------
1 | #' ---
2 | #' title: "Continous Data Analysis Exercises"
3 | #' output:
4 | #' html_document:
5 | #' df_print: paged
6 | #' ---
7 | #'
8 | #' # Set-up
9 | ## ------------------------------------------------------------------------
10 | library(tidyverse) # for tidyverse
11 | library(here) # for file paths
12 | library(survey) # for survey analysis
13 | library(srvyr) # for tidy survey analysis
14 |
15 | recs <- read_rds(here("Data", "recs.rds"))
16 |
17 | recs_des <- recs %>%
18 | as_survey_rep(weights=NWEIGHT,
19 | repweights=starts_with("BRRWT"),
20 | type="Fay",
21 | rho=0.5,
22 | mse=TRUE)
23 |
24 | #'
25 | #' # Part 1
26 | #'
27 | #' 1. Find the average square footage of housing units (TOTSQFT_EN) with a 90% confidence interval.
28 | #'
29 | ## ------------------------------------------------------------------------
30 |
31 |
32 | #'
33 | #' 2. Estimate the ratio of cooled square footage to total square footage (TOTCSQFT) to the total square footage of housing units (TOTSQFT_EN) with its standard error.
34 | #'
35 | ## ------------------------------------------------------------------------
36 |
37 |
38 | #'
39 | #' 3. Estimate the median temperature housing units are set to during the night in the winter (WinterTempNight) using the `survey_median` function.
40 | #'
41 | ## ------------------------------------------------------------------------
42 |
43 |
44 | #'
45 | #' 4. Estimate the median temperature housing units are set to during the night in the winter (WinterTempNight) using the `survey_quantile` function.
46 | #'
47 | ## ------------------------------------------------------------------------
48 |
49 |
50 | #'
51 | #' # Part 2
52 | #'
53 | #' 1. Estimate the total average energy cost (TOTALDOL) by region, division, and urbanicity.
54 | #'
55 | ## ------------------------------------------------------------------------
56 |
57 |
58 | #'
59 | #' 2. What is the median electric cost (DOLLAREL) for housing units in the South Region? What is the 95% confidence interval?
60 | #'
61 | ## ------------------------------------------------------------------------
62 |
63 |
64 | #'
65 | #' 3. Test whether daytime winter and daytime summer temperatures of homes are set the same.
66 | #'
67 | ## ------------------------------------------------------------------------
68 |
69 |
70 | #'
71 | #' 4. Test whether average electric bill (DOLLAREL) varies by region (Region).
72 | #'
73 | ## ------------------------------------------------------------------------
74 |
75 |
76 | #'
77 | #' 5. Fit a regression between the cooled square footage of a housing unit (TOTCSQFT) and the total amount spent on energy (TOTALDOL).
78 | #'
79 | ## ------------------------------------------------------------------------
80 |
81 |
82 | #'
83 |
--------------------------------------------------------------------------------
/Exercises/ContinuousExercises.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Continous Data Analysis Exercises"
3 | output:
4 | html_document:
5 | df_print: paged
6 | ---
7 |
8 | # Set-up
9 | ```{r}
10 | library(tidyverse) # for tidyverse
11 | library(here) # for file paths
12 | library(survey) # for survey analysis
13 | library(srvyr) # for tidy survey analysis
14 |
15 | recs <- read_rds(here("Data", "recs.rds"))
16 |
17 | recs_des <- recs %>%
18 | as_survey_rep(weights=NWEIGHT,
19 | repweights=starts_with("BRRWT"),
20 | type="Fay",
21 | rho=0.5,
22 | mse=TRUE)
23 | ```
24 |
25 | # Part 1
26 |
27 | 1. Find the average square footage of housing units (TOTSQFT_EN) with a 90% confidence interval.
28 |
29 | ```{r}
30 |
31 | ```
32 |
33 | 2. Estimate the ratio of cooled square footage to total square footage (TOTCSQFT) to the total square footage of housing units (TOTSQFT_EN) with its standard error.
34 |
35 | ```{r}
36 |
37 | ```
38 |
39 | 3. Estimate the median temperature housing units are set to during the night in the winter (WinterTempNight) using the `survey_median` function.
40 |
41 | ```{r}
42 |
43 | ```
44 |
45 | 4. Estimate the median temperature housing units are set to during the night in the winter (WinterTempNight) using the `survey_quantile` function.
46 |
47 | ```{r}
48 |
49 | ```
50 |
51 | # Part 2
52 |
53 | 1. Estimate the total average energy cost (TOTALDOL) by region, division, and urbanicity.
54 |
55 | ```{r}
56 |
57 | ```
58 |
59 | 2. What is the median electric cost (DOLLAREL) for housing units in the South Region? What is the 95% confidence interval?
60 |
61 | ```{r}
62 |
63 | ```
64 |
65 | 3. Test whether daytime winter and daytime summer temperatures of homes are set the same.
66 |
67 | ```{r}
68 |
69 | ```
70 |
71 | 4. Test whether average electric bill (DOLLAREL) varies by region (Region).
72 |
73 | ```{r}
74 |
75 | ```
76 |
77 | 5. Fit a regression between the cooled square footage of a housing unit (TOTCSQFT) and the total amount spent on energy (TOTALDOL).
78 |
79 | ```{r}
80 |
81 | ```
82 |
83 |
--------------------------------------------------------------------------------
/Exercises/ContinuousExercises_solutions.R:
--------------------------------------------------------------------------------
1 | #' ---
2 | #' title: "Continous Data Analysis Exercise Solutions"
3 | #' output:
4 | #' html_document:
5 | #' df_print: paged
6 | #' ---
7 | #'
8 | #' # Set-up
9 | ## -------------------------------------------------------------------------------------------------------------------------------
10 | library(tidyverse) # for tidyverse
11 | library(here) # for file paths
12 | library(survey) # for survey analysis
13 | library(srvyr) # for tidy survey analysis
14 |
15 | recs <- read_rds(here("Data", "recs.rds"))
16 |
17 | recs_des <- recs %>%
18 | as_survey_rep(weights=NWEIGHT,
19 | repweights=starts_with("BRRWT"),
20 | type="Fay",
21 | rho=0.5,
22 | mse=TRUE)
23 |
24 | #'
25 | #' # Part 1
26 | #'
27 | #' 1. Find the average square footage of housing units (TOTSQFT_EN) with a 90% confidence interval.
28 | #'
29 | ## ----ex1_1----------------------------------------------------------------------------------------------------------------------
30 | avg_sqci<-recs_des %>%
31 | summarize(
32 | SF_HU=survey_mean(TOTSQFT_EN,
33 | vartype = "ci",
34 | level = 0.9)
35 | )
36 |
37 | #'
38 | #' On average US households have `r formatC(pull(avg_sqci, SF_HU), format="d", big.mark=",")` square feet, with a 90% CI of (`r formatC(pull(avg_sqci, SF_HU_low), format="d", big.mark=",")` sq ft, `r formatC(pull(avg_sqci, SF_HU_upp), format="d", big.mark=",")` sq ft).
39 | #'
40 | #'
41 | #' 2. Estimate the ratio of cooled square footage to total square footage (TOTCSQFT) to the total square footage of housing units (TOTSQFT_EN) with its standard error.
42 | #'
43 | ## ----ex1_2----------------------------------------------------------------------------------------------------------------------
44 | cool_totratio<-recs_des %>%
45 | summarize(
46 | PropCooled=survey_ratio(
47 | numerator = TOTCSQFT,
48 | denominator = TOTSQFT_EN,
49 | vartype = "se")
50 | )
51 |
52 | #'
53 | #' On average US households have a ratio of `r round(pull(cool_totratio, PropCooled), 2)` square feet cooled per total square feet.
54 | #'
55 | #'
56 | #' 3. Estimate the median temperature housing units are set to during the night in the winter (WinterTempNight) using the `survey_median` function.
57 | #'
58 | ## ----ex1_3----------------------------------------------------------------------------------------------------------------------
59 | med_wintertemp<-recs_des %>%
60 | summarize(
61 | temp_winter=survey_median(WinterTempNight,
62 | vartype = "se",
63 | na.rm = TRUE)
64 | )
65 |
66 | #'
67 | #' The median temperature housing units are set to during the night in the winter is `r round(pull(med_wintertemp, temp_winter), 2)` degrees Fahrenheit.
68 | #'
69 | #'
70 | #' 4. Estimate the median temperature housing units are set to during the night in the winter (WinterTempNight) using the `survey_quantile` function.
71 | #'
72 | ## ----ex1_4----------------------------------------------------------------------------------------------------------------------
73 | recs_des %>%
74 | summarize(
75 | WinterNightTemp=survey_quantile(WinterTempNight,
76 | quantiles = 0.5,
77 | vartype = "se",
78 | na.rm = TRUE)
79 | )
80 |
81 | #'
82 | #' The 50th percentile (median) temperature housing units are set to during the night in the winter is `r round(pull(med_wintertemp, temp_winter), 2)` degrees Fahrenheit.
83 | #'
84 | #'
85 | #' # Part 2
86 | #'
87 | #' 1. Estimate the total average energy cost (TOTALDOL) by region, division, and urbanicity.
88 | #'
89 | ## ----ex2_1----------------------------------------------------------------------------------------------------------------------
90 | # option 1
91 | recs_des %>%
92 | group_by(Region, Division, Urbanicity) %>%
93 | cascade(
94 | EnergyCost=survey_mean(TOTALDOL)
95 | )
96 |
97 | # option 2
98 | # one way
99 | recs_des %>%
100 | group_by(Region, Division, Urbanicity) %>%
101 | summarize(
102 | EnergyCost=survey_mean(TOTALDOL)
103 | )
104 |
105 | #'
106 | #' 2. What is the median electric cost (DOLLAREL) for housing units in the South Region? What is the 95% confidence interval?
107 | #'
108 | ## ----ex2_2----------------------------------------------------------------------------------------------------------------------
109 | med_billsouth<-recs_des %>%
110 | filter(Region=="South") %>%
111 | summarize(
112 | MedElBill=survey_median(DOLLAREL,
113 | vartype="ci")
114 | )
115 |
116 | #'
117 | #' The median electric cost for housing units in the South is \$`r formatC(pull(med_billsouth, MedElBill), format="d", big.mark=",")` (\$`r formatC(pull(med_billsouth, MedElBill_low), format="d", big.mark=",")`, \$`r formatC(pull(med_billsouth, MedElBill_upp), format="d", big.mark=",")`).
118 | #'
119 | #'
120 | #' 3. Test whether daytime winter and daytime summer temperatures of homes are set the same.
121 | #'
122 | ## ----ex2_3----------------------------------------------------------------------------------------------------------------------
123 | daytemp_ttest<-recs_des %>%
124 | svyttest(design=.,
125 | formula = I(WinterTempDay-SummerTempDay)~0,
126 | na.rm = TRUE)
127 |
128 | #'
129 | #' On average housing units have set the temperature lower in the winter than the summer, p-value=`r pluck(daytemp_ttest, "p.value") %>% round(5)`.
130 | #'
131 | #'
132 | #' 4. Test whether average electric bill (DOLLAREL) varies by region (Region).
133 | #'
134 | ## ----ex2_4----------------------------------------------------------------------------------------------------------------------
135 | m1 <- recs_des %>%
136 | svyglm(design=.,
137 | formula=DOLLAREL~Region,
138 | na.action=na.omit)
139 | summary(m1)
140 |
141 | #'
142 | #' Yes, there is evidence that the average electric bill varies by region.
143 | #'
144 | #'
145 | #' 5. Fit a regression between the cooled square footage of a housing unit (TOTCSQFT) and the total amount spent on energy (TOTALDOL).
146 | #'
147 | ## ----ex2_5----------------------------------------------------------------------------------------------------------------------
148 | m2 <- recs_des %>%
149 | svyglm(design=.,
150 | formula=TOTALDOL~TOTCSQFT,
151 | na.action=na.omit)
152 | summary(m2)
153 |
154 | #'
155 | #' For each additional cooled square foot, the total energy cost increases by \$`r round(pluck(m2$coefficients,"TOTCSQFT"),2)`.
156 |
--------------------------------------------------------------------------------
/Exercises/ContinuousExercises_solutions.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Continous Data Analysis Exercise Solutions"
3 | output:
4 | html_document:
5 | df_print: paged
6 | ---
7 |
8 | # Set-up
9 | ```{r}
10 | library(tidyverse) # for tidyverse
11 | library(here) # for file paths
12 | library(survey) # for survey analysis
13 | library(srvyr) # for tidy survey analysis
14 |
15 | recs <- read_rds(here("Data", "recs.rds"))
16 |
17 | recs_des <- recs %>%
18 | as_survey_rep(weights=NWEIGHT,
19 | repweights=starts_with("BRRWT"),
20 | type="Fay",
21 | rho=0.5,
22 | mse=TRUE)
23 | ```
24 |
25 | # Part 1
26 |
27 | 1. Find the average square footage of housing units (TOTSQFT_EN) with a 90% confidence interval.
28 |
29 | ```{r ex1_1}
30 | avg_sqci<-recs_des %>%
31 | summarize(
32 | SF_HU=survey_mean(TOTSQFT_EN,
33 | vartype = "ci",
34 | level = 0.9)
35 | )
36 | ```
37 |
38 | On average US households have `r formatC(pull(avg_sqci, SF_HU), format="d", big.mark=",")` square feet, with a 90% CI of (`r formatC(pull(avg_sqci, SF_HU_low), format="d", big.mark=",")` sq ft, `r formatC(pull(avg_sqci, SF_HU_upp), format="d", big.mark=",")` sq ft).
39 |
40 |
41 | 2. Estimate the ratio of cooled square footage to total square footage (TOTCSQFT) to the total square footage of housing units (TOTSQFT_EN) with its standard error.
42 |
43 | ```{r ex1_2}
44 | cool_totratio<-recs_des %>%
45 | summarize(
46 | PropCooled=survey_ratio(
47 | numerator = TOTCSQFT,
48 | denominator = TOTSQFT_EN,
49 | vartype = "se")
50 | )
51 | ```
52 |
53 | On average US households have a ratio of `r round(pull(cool_totratio, PropCooled), 2)` square feet cooled per total square feet.
54 |
55 |
56 | 3. Estimate the median temperature housing units are set to during the night in the winter (WinterTempNight) using the `survey_median` function.
57 |
58 | ```{r ex1_3}
59 | med_wintertemp<-recs_des %>%
60 | summarize(
61 | temp_winter=survey_median(WinterTempNight,
62 | vartype = "se",
63 | na.rm = TRUE)
64 | )
65 | ```
66 |
67 | The median temperature housing units are set to during the night in the winter is `r round(pull(med_wintertemp, temp_winter), 2)` degrees Fahrenheit.
68 |
69 |
70 | 4. Estimate the median temperature housing units are set to during the night in the winter (WinterTempNight) using the `survey_quantile` function.
71 |
72 | ```{r ex1_4}
73 | recs_des %>%
74 | summarize(
75 | WinterNightTemp=survey_quantile(WinterTempNight,
76 | quantiles = 0.5,
77 | vartype = "se",
78 | na.rm = TRUE)
79 | )
80 | ```
81 |
82 | The 50th percentile (median) temperature housing units are set to during the night in the winter is `r round(pull(med_wintertemp, temp_winter), 2)` degrees Fahrenheit.
83 |
84 |
85 | # Part 2
86 |
87 | 1. Estimate the total average energy cost (TOTALDOL) by region, division, and urbanicity.
88 |
89 | ```{r ex2_1}
90 | # option 1
91 | recs_des %>%
92 | group_by(Region, Division, Urbanicity) %>%
93 | cascade(
94 | EnergyCost=survey_mean(TOTALDOL)
95 | )
96 |
97 | # option 2
98 | # one way
99 | recs_des %>%
100 | group_by(Region, Division, Urbanicity) %>%
101 | summarize(
102 | EnergyCost=survey_mean(TOTALDOL)
103 | )
104 | ```
105 |
106 | 2. What is the median electric cost (DOLLAREL) for housing units in the South Region? What is the 95% confidence interval?
107 |
108 | ```{r ex2_2}
109 | med_billsouth<-recs_des %>%
110 | filter(Region=="South") %>%
111 | summarize(
112 | MedElBill=survey_median(DOLLAREL,
113 | vartype="ci")
114 | )
115 | ```
116 |
117 | The median electric cost for housing units in the South is \$`r formatC(pull(med_billsouth, MedElBill), format="d", big.mark=",")` (\$`r formatC(pull(med_billsouth, MedElBill_low), format="d", big.mark=",")`, \$`r formatC(pull(med_billsouth, MedElBill_upp), format="d", big.mark=",")`).
118 |
119 |
120 | 3. Test whether daytime winter and daytime summer temperatures of homes are set the same.
121 |
122 | ```{r ex2_3}
123 | daytemp_ttest<-recs_des %>%
124 | svyttest(design=.,
125 | formula = I(WinterTempDay-SummerTempDay)~0,
126 | na.rm = TRUE)
127 | ```
128 |
129 | On average housing units have set the temperature lower in the winter than the summer, p-value=`r pluck(daytemp_ttest, "p.value") %>% round(5)`.
130 |
131 |
132 | 4. Test whether average electric bill (DOLLAREL) varies by region (Region).
133 |
134 | ```{r ex2_4}
135 | m1 <- recs_des %>%
136 | svyglm(design=.,
137 | formula=DOLLAREL~Region,
138 | na.action=na.omit)
139 | summary(m1)
140 | ```
141 |
142 | Yes, there is evidence that the average electric bill varies by region.
143 |
144 |
145 | 5. Fit a regression between the cooled square footage of a housing unit (TOTCSQFT) and the total amount spent on energy (TOTALDOL).
146 |
147 | ```{r ex2_5}
148 | m2 <- recs_des %>%
149 | svyglm(design=.,
150 | formula=TOTALDOL~TOTCSQFT,
151 | na.action=na.omit)
152 | summary(m2)
153 | ```
154 |
155 | For each additional cooled square foot, the total energy cost increases by \$`r round(pluck(m2$coefficients,"TOTCSQFT"),2)`.
156 |
--------------------------------------------------------------------------------
/Exercises/DesignDerivedVariablesExercises.R:
--------------------------------------------------------------------------------
1 | #' ---
2 | #' title: "Design objects and derived variables exercise"
3 | #' output:
4 | #' html_document:
5 | #' df_print: paged
6 | #' ---
7 | #'
8 | #' # Course set-up
9 | #'
10 | #' First, let's make sure you have everything you need for the course. Run the following library statements. If something is not installed, install it.
11 | #'
12 | ## ----setup---------------------------------------------------------------
13 | # install.packages("tidyverse")
14 | # remotes::install_github("bschneidr/r-forge-survey-mirror")
15 | # install.packages("srvyr")
16 | # install.packages("here")
17 | # install.packages("palmerpenguins")
18 | # install.packages("remotes")
19 |
20 | library(tidyverse) # for tidyverse
21 | library(here) # for file paths
22 | library(srvyr)
23 |
24 |
25 |
26 | #'
27 | #' # Part 1 - Design Objects
28 | #'
29 | #' In these exercises, you will be given a study and assume you have the data. How would you create the design object?
30 | #'
31 | #' 1. California Health Interview Survey - 2019-2020
32 | #' - Note that you can do this with design variables aka Taylor's series or replicate weights.
33 | #' - Useful links:
34 | #' -
35 | #' -
36 | #' -
37 | #' - Assume you have the Public Use File for **adults** already read into R and the dataframe is called `chis19_adult`.
38 | #'
39 | ## ----chis, eval=FALSE----------------------------------------------------
40 | ## chis19_adult <- haven::read_sas(here::here("RawData", "adult_2019_sas", "adult.sas7bdat"))
41 | ##
42 |
43 | #'
44 | #' 2. National Survey on Drug Use and Health - 2019
45 | #' - Useful links:
46 | #' -
47 | #' -
48 | #' - Assume you have the Public Use File for already read into R and the dataframe is called `nsduh19`.
49 | #'
50 | ## ----nsduh, eval=FALSE---------------------------------------------------
51 | ## nsduh19 <- haven::read_sav(here::here("RawData", "NSDUH_2019", "NSDUH_2019.SAV"))
52 | ##
53 |
54 | #'
55 | #'
56 | #' # Part 2 - Derived variables
57 | #'
58 | #' Before exercises, read the data in
59 | ## ----datin, cache=TRUE---------------------------------------------------
60 | anes <- read_rds(here("Data", "anes_2020.rds"))
61 | recs_in <- read_csv(here("RawData", "RECS_2015", "recs2015_public_v4.csv"))
62 |
63 | #'
64 | #' In these exercises, you will be given specifications for a derived variable. Create the variable and check your work. Useful resources:
65 | #'
66 | #' - [ANES Codebook Raw Variables](https://electionstudies.org/wp-content/uploads/2022/02/anes_timeseries_2020_userguidecodebook_20220210.pdf)
67 | #' - [ANES Codebook Exiting Derived Variables](https://github.com/tidy-survey-r/tidy-survey-short-course/blob/main/Codebook/ANES-2020-Derived-Variable-Codebook.md)
68 | #' - [RECS Codebook Raw Variables](https://www.eia.gov/consumption/residential/data/2015/xls/codebook_publicv4.xlsx) - Note this will download a spreadsheet
69 | #'
70 | #' 1. ANES: Create a 5-level income variable as described below. For cases where the income is refused or the interview was a breakoff, this derived variable should be `NA`. The variable should be created such that "Under $25,000" comes first and so on. Hint: Use the variable `V201617x`
71 | #'
72 | #' - Under $25,000
73 | #' - $25,000-49,999
74 | #' - $50,000-74,999
75 | #' - $75,000-99,999
76 | #' - $100,000 or more
77 | #'
78 | ## ----income5-------------------------------------------------------------
79 |
80 |
81 | #'
82 | #' 2. ANES: Create a 5-level age variable as described below. For the cases where age is refused, this derived variable should be `NA`. Hint: use variable `V201507x`
83 | #'
84 | #' - 18-24
85 | #' - 25-44
86 | #' - 45-64
87 | #' - 65-74
88 | #' - 75 or older
89 | #'
90 | #'
91 | ## ----age5----------------------------------------------------------------
92 |
93 |
94 | #'
95 | #' 3. RECS: Create a logical variable indicating whether there was any household energy insecurity. Household energy insecurity is defined as households that have any of the following occur at least once:
96 | #'
97 | #' - Reducing or forgoing basic necesseties to pay energy costs (SCALEB)
98 | #' - Leaving home at unhealthy temperature (SCALEG)
99 | #' - Receiving disconnect or delivery stop notice (SCALEE)
100 | #' - Unable to use heating equipment (NOHEATBROKE, NOHEATEL, NOHEATNG, NOHEATBULK)
101 | #' - Unable to use cooling equipment (NOACBROKE, NOACEL)
102 | #'
103 | #' The relevant variables that should be used are included in parentheses.
104 | #'
105 | ## ----energyinsec---------------------------------------------------------
106 |
107 |
108 | #'
109 |
--------------------------------------------------------------------------------
/Exercises/DesignDerivedVariablesExercises.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Design objects and derived variables exercise"
3 | output:
4 | html_document:
5 | df_print: paged
6 | ---
7 |
8 | # Course set-up
9 |
10 | First, let's make sure you have everything you need for the course. Run the following library statements. If something is not installed, install it.
11 |
12 | ```{r setup}
13 | # install.packages("tidyverse")
14 | # remotes::install_github("bschneidr/r-forge-survey-mirror")
15 | # install.packages("srvyr")
16 | # install.packages("here")
17 | # install.packages("palmerpenguins")
18 | # install.packages("remotes")
19 |
20 | library(tidyverse) # for tidyverse
21 | library(here) # for file paths
22 | library(srvyr)
23 |
24 |
25 | ```
26 |
27 | # Part 1 - Design Objects
28 |
29 | In these exercises, you will be given a study and assume you have the data. How would you create the design object?
30 |
31 | 1. California Health Interview Survey - 2019-2020
32 | - Note that you can do this with design variables aka Taylor's series or replicate weights.
33 | - Useful links:
34 | -
35 | -
36 | -
37 | - Assume you have the Public Use File for **adults** already read into R and the dataframe is called `chis19_adult`.
38 |
39 | ```{r chis, eval=FALSE}
40 | chis19_adult <- haven::read_sas(here::here("RawData", "adult_2019_sas", "adult.sas7bdat"))
41 |
42 | ```
43 |
44 | 2. National Survey on Drug Use and Health - 2019
45 | - Useful links:
46 | -
47 | -
48 | - Assume you have the Public Use File for already read into R and the dataframe is called `nsduh19`.
49 |
50 | ```{r nsduh, eval=FALSE}
51 | nsduh19 <- haven::read_sav(here::here("RawData", "NSDUH_2019", "NSDUH_2019.SAV"))
52 |
53 | ```
54 |
55 |
56 | # Part 2 - Derived variables
57 |
58 | Before exercises, read the data in
59 | ```{r datin, cache=TRUE}
60 | anes <- read_rds(here("Data", "anes_2020.rds"))
61 | recs_in <- read_csv(here("RawData", "RECS_2015", "recs2015_public_v4.csv"))
62 | ```
63 |
64 | In these exercises, you will be given specifications for a derived variable. Create the variable and check your work. Useful resources:
65 |
66 | - [ANES Codebook Raw Variables](https://electionstudies.org/wp-content/uploads/2022/02/anes_timeseries_2020_userguidecodebook_20220210.pdf)
67 | - [ANES Codebook Exiting Derived Variables](https://github.com/tidy-survey-r/tidy-survey-short-course/blob/main/Codebook/ANES-2020-Derived-Variable-Codebook.md)
68 | - [RECS Codebook Raw Variables](https://www.eia.gov/consumption/residential/data/2015/xls/codebook_publicv4.xlsx) - Note this will download a spreadsheet
69 |
70 | 1. ANES: Create a 5-level income variable as described below. For cases where the income is refused or the interview was a breakoff, this derived variable should be `NA`. The variable should be created such that "Under $25,000" comes first and so on. Hint: Use the variable `V201617x`
71 |
72 | - Under $25,000
73 | - $25,000-49,999
74 | - $50,000-74,999
75 | - $75,000-99,999
76 | - $100,000 or more
77 |
78 | ```{r income5}
79 |
80 | ```
81 |
82 | 2. ANES: Create a 5-level age variable as described below. For the cases where age is refused, this derived variable should be `NA`. Hint: use variable `V201507x`
83 |
84 | - 18-24
85 | - 25-44
86 | - 45-64
87 | - 65-74
88 | - 75 or older
89 |
90 |
91 | ```{r age5}
92 |
93 | ```
94 |
95 | 3. RECS: Create a logical variable indicating whether there was any household energy insecurity. Household energy insecurity is defined as households that have any of the following occur at least once:
96 |
97 | - Reducing or forgoing basic necesseties to pay energy costs (SCALEB)
98 | - Leaving home at unhealthy temperature (SCALEG)
99 | - Receiving disconnect or delivery stop notice (SCALEE)
100 | - Unable to use heating equipment (NOHEATBROKE, NOHEATEL, NOHEATNG, NOHEATBULK)
101 | - Unable to use cooling equipment (NOACBROKE, NOACEL)
102 |
103 | The relevant variables that should be used are included in parentheses.
104 |
105 | ```{r energyinsec}
106 |
107 | ```
108 |
109 |
--------------------------------------------------------------------------------
/Exercises/DesignDerivedVariablesExercises_cache/html/__packages:
--------------------------------------------------------------------------------
1 | base
2 | methods
3 | datasets
4 | utils
5 | grDevices
6 | graphics
7 | stats
8 | tidyverse
9 | ggplot2
10 | tibble
11 | tidyr
12 | readr
13 | purrr
14 | dplyr
15 | stringr
16 | forcats
17 | here
18 | srvyr
19 |
--------------------------------------------------------------------------------
/Exercises/DesignDerivedVariablesExercises_cache/html/datin_95c553bea5c677086a0878157dbd740f.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Exercises/DesignDerivedVariablesExercises_cache/html/datin_95c553bea5c677086a0878157dbd740f.RData
--------------------------------------------------------------------------------
/Exercises/DesignDerivedVariablesExercises_cache/html/datin_95c553bea5c677086a0878157dbd740f.rdb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Exercises/DesignDerivedVariablesExercises_cache/html/datin_95c553bea5c677086a0878157dbd740f.rdb
--------------------------------------------------------------------------------
/Exercises/DesignDerivedVariablesExercises_cache/html/datin_95c553bea5c677086a0878157dbd740f.rdx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Exercises/DesignDerivedVariablesExercises_cache/html/datin_95c553bea5c677086a0878157dbd740f.rdx
--------------------------------------------------------------------------------
/Exercises/DesignDerivedVariablesExercises_solutions.R:
--------------------------------------------------------------------------------
1 | #' ---
2 | #' title: "Design objects and derived variables exercise solutions"
3 | #' output:
4 | #' html_document:
5 | #' df_print: paged
6 | #' ---
7 | #'
8 | #' # Course set-up
9 | #'
10 | #' First, let's make sure you have everything you need for the course. Run the following library statements. If something is not installed, install it.
11 | #'
12 | ## ----setup---------------------------------------------------------------
13 | # install.packages("tidyverse")
14 | # remotes::install_github("bschneidr/r-forge-survey-mirror")
15 | # install.packages("srvyr")
16 | # install.packages("here")
17 | # install.packages("palmerpenguins")
18 | # install.packages("remotes")
19 |
20 | library(tidyverse) # for tidyverse
21 | library(here) # for file paths
22 | library(srvyr)
23 |
24 |
25 |
26 | #'
27 | #' # Part 1 - Design Objects
28 | #'
29 | #' In these exercises, you will be given a study and assume you have the data. How would you create the design object?
30 | #'
31 | #' 1. California Health Interview Survey - 2019-2020
32 | #' - Note that you can do this with design variables aka Taylor's series or replicate weights.
33 | #' - Useful links:
34 | #' -
35 | #' -
36 | #' -
37 | #' - Assume you have the Public Use File for **adults** already read into R and the dataframe is called `chis19_adult`.
38 | #'
39 | ## ----chis, eval=FALSE----------------------------------------------------
40 | ##
41 | ## chis19_adult <- haven::read_sas(here::here("RawData", "adult_2019_sas", "adult.sas7bdat"))
42 | ##
43 | ## rep_des <- chis19_adult %>%
44 | ## as_survey_rep(weights=RAKEDW0, repweights=stringr::str_c("RAKEDW", 1:80),
45 | ## type="JKn", rscales=1)
46 | ##
47 | ## tsl_des <- chis19_adult %>%
48 | ## as_survey_design(weights=RAKEDW0, strata=TSVARSTR, ids=1)
49 | ##
50 | ## # or
51 | ## tsl_des <- chis19_adult %>%
52 | ## as_survey_design(weights=RAKEDW0, strata=TSVARSTR, ids=0)
53 | ##
54 | ##
55 |
56 | #'
57 | #' 2. National Survey on Drug Use and Health - 2019
58 | #' - Useful links:
59 | #' -
60 | #' -
61 | #' - Assume you have the Public Use File for already read into R and the dataframe is called `nsduh19`.
62 | #'
63 | ## ----nsduh, eval=FALSE---------------------------------------------------
64 | ## nsduh19 <- haven::read_sav(here::here("RawData", "NSDUH_2019", "NSDUH_2019.SAV"))
65 | ## nsduh_des <- nsduh19 %>%
66 | ## as_survey_design(weights=ANALWT_C, strata=VESTR, ids=VEREP, nest=TRUE)
67 | ##
68 |
69 | #'
70 | #'
71 | #' # Part 2 - Derived variables
72 | #'
73 | #' Before exercises, read the data in
74 | ## ----datin, cache=TRUE---------------------------------------------------
75 | anes <- read_rds(here("Data", "anes_2020.rds"))
76 | recs_in <- read_csv(here("RawData", "RECS_2015", "recs2015_public_v4.csv"))
77 |
78 | #'
79 | #' In these exercises, you will be given specifications for a derived variable. Create the variable and check your work. Useful resources:
80 | #'
81 | #' - [ANES Codebook Raw Variables](https://electionstudies.org/wp-content/uploads/2022/02/anes_timeseries_2020_userguidecodebook_20220210.pdf)
82 | #' - [ANES Codebook Exiting Derived Variables](https://github.com/tidy-survey-r/tidy-survey-short-course/blob/main/Codebook/ANES-2020-Derived-Variable-Codebook.md)
83 | #' - [RECS Codebook Raw Variables](https://www.eia.gov/consumption/residential/data/2015/xls/codebook_publicv4.xlsx) - Note this will download a spreadsheet
84 | #'
85 | #' 1. ANES: Create a 5-level income variable as described below. For cases where the income is refused or the interview was a breakoff, this derived variable should be `NA`. The variable should be created such that "Under $25,000" comes first and so on. Hint: Use the variable `V201617x`
86 | #'
87 | #' - Under $25,000
88 | #' - $25,000-49,999
89 | #' - $50,000-74,999
90 | #' - $75,000-99,999
91 | #' - $100,000 or more
92 | #'
93 | ## ----income5-------------------------------------------------------------
94 | anes_income <- anes %>%
95 | mutate(
96 | Income5=factor(case_when(
97 | V201617x %in% c(1:4)~"Under $25,000",
98 | V201617x %in% c(5:9)~"$25,000-49,999",
99 | V201617x %in% c(10:13)~"$50,000-74,999",
100 | V201617x %in% c(14:16)~"$75,000-99,999",
101 | V201617x %in% c(17:22)~"$100,000 or more",
102 | TRUE ~ NA_character_
103 | ), levels=c("Under $25,000", "$25,000-49,999", "$50,000-74,999", "$75,000-99,999", "$100,000 or more"))
104 | )
105 |
106 | anes_income %>%
107 | count(Income5, V201617x)
108 |
109 | #'
110 | #' 2. ANES: Create a 5-level age variable as described below. For the cases where age is refused, this derived variable should be `NA`. Hint: use variable `V201507x`
111 | #'
112 | #' - 18-24
113 | #' - 25-44
114 | #' - 45-64
115 | #' - 65-74
116 | #' - 75 or older
117 | #'
118 | #'
119 | ## ----age5----------------------------------------------------------------
120 | anes_age <- anes %>%
121 | mutate(
122 | AgeGroup5=factor(case_when(
123 | V201507x %in% c(18:24)~"18-24",
124 | V201507x %in% c(25:44)~"25-44",
125 | V201507x %in% c(45:64)~"45-64",
126 | V201507x %in% c(65:74)~"65-74",
127 | V201507x %in% c(75:90)~"75 or older",
128 | TRUE ~ NA_character_),
129 | levels=c('18-24', '25-44', '45-64', '65-74', '75 or older' ))
130 | )
131 |
132 | anes_age %>%
133 | group_by(AgeGroup5) %>%
134 | summarise(
135 | minV=min(V201507x, na.rm = TRUE),
136 | maxV=max(V201507x, na.rm = TRUE),
137 | ncat=n(),
138 | nNA_v=sum(is.na(V201507x))
139 | )
140 |
141 | #'
142 | #' 3. RECS: Create a logical variable indicating whether there was any household energy insecurity. Household energy insecurity is defined as households that have any of the following occur at least once:
143 | #'
144 | #' - Reducing or forgoing basic necesseties to pay energy costs (SCALEB)
145 | #' - Leaving home at unhealthy temperature (SCALEG)
146 | #' - Receiving disconnect or delivery stop notice (SCALEE)
147 | #' - Unable to use heating equipment (NOHEATBROKE, NOHEATEL, NOHEATNG, NOHEATBULK)
148 | #' - Unable to use cooling equipment (NOACBROKE, NOACEL)
149 | #'
150 | #' The relevant variables that should be used are included in parentheses.
151 | #'
152 | ## ----energyinsec---------------------------------------------------------
153 | recs_insecur <- recs_in %>%
154 | select(starts_with("SCALE"), starts_with("NOHEAT"), starts_with("NOAC"), NWEIGHT) %>%
155 | mutate(
156 | EnergyInsec=SCALEB %in% c(1:3) | SCALEG %in% c(1:3) | SCALEE %in% c(1:3) |
157 | NOHEATBROKE==1 | NOHEATEL==1|NOHEATNG==1|NOHEATBULK==1|
158 | NOACBROKE==1|NOACEL==1
159 | )
160 |
161 | recs_insecur %>%
162 | count(EnergyInsec, SCALEB, SCALEG, SCALEE, NOHEATBROKE, NOHEATEL, NOHEATNG,
163 | NOHEATBULK, NOACBROKE, NOACEL)
164 |
165 |
166 | #'
167 |
--------------------------------------------------------------------------------
/Exercises/DesignDerivedVariablesExercises_solutions.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Design objects and derived variables exercise solutions"
3 | output:
4 | html_document:
5 | df_print: paged
6 | ---
7 |
8 | # Course set-up
9 |
10 | First, let's make sure you have everything you need for the course. Run the following library statements. If something is not installed, install it.
11 |
12 | ```{r setup}
13 | # install.packages("tidyverse")
14 | # remotes::install_github("bschneidr/r-forge-survey-mirror")
15 | # install.packages("srvyr")
16 | # install.packages("here")
17 | # install.packages("palmerpenguins")
18 | # install.packages("remotes")
19 |
20 | library(tidyverse) # for tidyverse
21 | library(here) # for file paths
22 | library(srvyr)
23 |
24 |
25 | ```
26 |
27 | # Part 1 - Design Objects
28 |
29 | In these exercises, you will be given a study and assume you have the data. How would you create the design object?
30 |
31 | 1. California Health Interview Survey - 2019-2020
32 | - Note that you can do this with design variables aka Taylor's series or replicate weights.
33 | - Useful links:
34 | -
35 | -
36 | -
37 | - Assume you have the Public Use File for **adults** already read into R and the dataframe is called `chis19_adult`.
38 |
39 | ```{r chis, eval=FALSE}
40 |
41 | chis19_adult <- haven::read_sas(here::here("RawData", "adult_2019_sas", "adult.sas7bdat"))
42 |
43 | rep_des <- chis19_adult %>%
44 | as_survey_rep(weights=RAKEDW0, repweights=stringr::str_c("RAKEDW", 1:80),
45 | type="JKn", rscales=1)
46 |
47 | tsl_des <- chis19_adult %>%
48 | as_survey_design(weights=RAKEDW0, strata=TSVARSTR, ids=1)
49 |
50 | # or
51 | tsl_des <- chis19_adult %>%
52 | as_survey_design(weights=RAKEDW0, strata=TSVARSTR, ids=0)
53 |
54 |
55 | ```
56 |
57 | 2. National Survey on Drug Use and Health - 2019
58 | - Useful links:
59 | -
60 | -
61 | - Assume you have the Public Use File for already read into R and the dataframe is called `nsduh19`.
62 |
63 | ```{r nsduh, eval=FALSE}
64 | nsduh19 <- haven::read_sav(here::here("RawData", "NSDUH_2019", "NSDUH_2019.SAV"))
65 | nsduh_des <- nsduh19 %>%
66 | as_survey_design(weights=ANALWT_C, strata=VESTR, ids=VEREP, nest=TRUE)
67 |
68 | ```
69 |
70 |
71 | # Part 2 - Derived variables
72 |
73 | Before exercises, read the data in
74 | ```{r datin, cache=TRUE}
75 | anes <- read_rds(here("Data", "anes_2020.rds"))
76 | recs_in <- read_csv(here("RawData", "RECS_2015", "recs2015_public_v4.csv"))
77 | ```
78 |
79 | In these exercises, you will be given specifications for a derived variable. Create the variable and check your work. Useful resources:
80 |
81 | - [ANES Codebook Raw Variables](https://electionstudies.org/wp-content/uploads/2022/02/anes_timeseries_2020_userguidecodebook_20220210.pdf)
82 | - [ANES Codebook Exiting Derived Variables](https://github.com/tidy-survey-r/tidy-survey-short-course/blob/main/Codebook/ANES-2020-Derived-Variable-Codebook.md)
83 | - [RECS Codebook Raw Variables](https://www.eia.gov/consumption/residential/data/2015/xls/codebook_publicv4.xlsx) - Note this will download a spreadsheet
84 |
85 | 1. ANES: Create a 5-level income variable as described below. For cases where the income is refused or the interview was a breakoff, this derived variable should be `NA`. The variable should be created such that "Under $25,000" comes first and so on. Hint: Use the variable `V201617x`
86 |
87 | - Under $25,000
88 | - $25,000-49,999
89 | - $50,000-74,999
90 | - $75,000-99,999
91 | - $100,000 or more
92 |
93 | ```{r income5}
94 | anes_income <- anes %>%
95 | mutate(
96 | Income5=factor(case_when(
97 | V201617x %in% c(1:4)~"Under $25,000",
98 | V201617x %in% c(5:9)~"$25,000-49,999",
99 | V201617x %in% c(10:13)~"$50,000-74,999",
100 | V201617x %in% c(14:16)~"$75,000-99,999",
101 | V201617x %in% c(17:22)~"$100,000 or more",
102 | TRUE ~ NA_character_
103 | ), levels=c("Under $25,000", "$25,000-49,999", "$50,000-74,999", "$75,000-99,999", "$100,000 or more"))
104 | )
105 |
106 | anes_income %>%
107 | count(Income5, V201617x)
108 | ```
109 |
110 | 2. ANES: Create a 5-level age variable as described below. For the cases where age is refused, this derived variable should be `NA`. Hint: use variable `V201507x`
111 |
112 | - 18-24
113 | - 25-44
114 | - 45-64
115 | - 65-74
116 | - 75 or older
117 |
118 |
119 | ```{r age5}
120 | anes_age <- anes %>%
121 | mutate(
122 | AgeGroup5=factor(case_when(
123 | V201507x %in% c(18:24)~"18-24",
124 | V201507x %in% c(25:44)~"25-44",
125 | V201507x %in% c(45:64)~"45-64",
126 | V201507x %in% c(65:74)~"65-74",
127 | V201507x %in% c(75:90)~"75 or older",
128 | TRUE ~ NA_character_),
129 | levels=c('18-24', '25-44', '45-64', '65-74', '75 or older' ))
130 | )
131 |
132 | anes_age %>%
133 | group_by(AgeGroup5) %>%
134 | summarise(
135 | minV=min(V201507x, na.rm = TRUE),
136 | maxV=max(V201507x, na.rm = TRUE),
137 | ncat=n(),
138 | nNA_v=sum(is.na(V201507x))
139 | )
140 | ```
141 |
142 | 3. RECS: Create a logical variable indicating whether there was any household energy insecurity. Household energy insecurity is defined as households that have any of the following occur at least once:
143 |
144 | - Reducing or forgoing basic necesseties to pay energy costs (SCALEB)
145 | - Leaving home at unhealthy temperature (SCALEG)
146 | - Receiving disconnect or delivery stop notice (SCALEE)
147 | - Unable to use heating equipment (NOHEATBROKE, NOHEATEL, NOHEATNG, NOHEATBULK)
148 | - Unable to use cooling equipment (NOACBROKE, NOACEL)
149 |
150 | The relevant variables that should be used are included in parentheses.
151 |
152 | ```{r energyinsec}
153 | recs_insecur <- recs_in %>%
154 | select(starts_with("SCALE"), starts_with("NOHEAT"), starts_with("NOAC"), NWEIGHT) %>%
155 | mutate(
156 | EnergyInsec=SCALEB %in% c(1:3) | SCALEG %in% c(1:3) | SCALEE %in% c(1:3) |
157 | NOHEATBROKE==1 | NOHEATEL==1|NOHEATNG==1|NOHEATBULK==1|
158 | NOACBROKE==1|NOACEL==1
159 | )
160 |
161 | recs_insecur %>%
162 | count(EnergyInsec, SCALEB, SCALEG, SCALEE, NOHEATBROKE, NOHEATEL, NOHEATNG,
163 | NOHEATBULK, NOACBROKE, NOACEL)
164 |
165 | ```
166 |
167 |
--------------------------------------------------------------------------------
/Exercises/DesignDerivedVariablesExercises_solutions_cache/html/__packages:
--------------------------------------------------------------------------------
1 | tidyverse
2 | ggplot2
3 | tibble
4 | tidyr
5 | readr
6 | purrr
7 | dplyr
8 | stringr
9 | forcats
10 | here
11 | srvyr
12 |
--------------------------------------------------------------------------------
/Exercises/DesignDerivedVariablesExercises_solutions_cache/html/datin_71bc85b99d78d2975dbdaf1205650ccd.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Exercises/DesignDerivedVariablesExercises_solutions_cache/html/datin_71bc85b99d78d2975dbdaf1205650ccd.RData
--------------------------------------------------------------------------------
/Exercises/DesignDerivedVariablesExercises_solutions_cache/html/datin_71bc85b99d78d2975dbdaf1205650ccd.rdb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Exercises/DesignDerivedVariablesExercises_solutions_cache/html/datin_71bc85b99d78d2975dbdaf1205650ccd.rdb
--------------------------------------------------------------------------------
/Exercises/DesignDerivedVariablesExercises_solutions_cache/html/datin_71bc85b99d78d2975dbdaf1205650ccd.rdx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Exercises/DesignDerivedVariablesExercises_solutions_cache/html/datin_71bc85b99d78d2975dbdaf1205650ccd.rdx
--------------------------------------------------------------------------------
/Exercises/WarmUpExercises.R:
--------------------------------------------------------------------------------
1 | #' ---
2 | #' title: "Warm-up Exercises"
3 | #' output:
4 | #' html_document:
5 | #' df_print: paged
6 | #' ---
7 | #'
8 | #' # Course set-up
9 | #' First, let's make sure you have everything you need for the course. Run the following library statements. If something is not installed, install it.
10 | #'
11 | ## ----setup----------------------------------------------------------------------------------------------------------------------
12 | # install.packages("tidyverse")
13 | # remotes::install_github("bschneidr/r-forge-survey-mirror")
14 | # install.packages("srvyr")
15 | # install.packages("here")
16 | # install.packages("palmerpenguins")
17 | # install.packages("remotes")
18 |
19 | library(tidyverse) # for tidyverse
20 | library(here) # for file paths
21 | library(palmerpenguins)
22 |
23 |
24 |
25 | #'
26 | #' # Warm-up exercises for Day 1
27 | #'
28 | ## ----datapeek-------------------------------------------------------------------------------------------------------------------
29 | glimpse(penguins)
30 |
31 | #'
32 | #' How many penguins of each species are there? Hint: use `count`
33 | ## ----speciestab-----------------------------------------------------------------------------------------------------------------
34 |
35 |
36 | #'
37 | #' How many penguins of each species and sex are there? Hint: use `count`
38 | #'
39 | ## ----speciessextab--------------------------------------------------------------------------------------------------------------
40 |
41 |
42 | #'
43 | #'
44 | #' What is the proportion of each species of penguins? Hint: use `count` then `mutate`
45 | #'
46 | ## ----speciestabp----------------------------------------------------------------------------------------------------------------
47 |
48 |
49 | #'
50 | #' What is the proportion of each sex of penguins within species? Hint: use `count` then `group_by` and `mutate`
51 | #'
52 | ## ----speciessextabp-------------------------------------------------------------------------------------------------------------
53 |
54 |
55 | #'
56 | #'
57 | #' # Warm-up exercises for Day 2
58 | #'
59 | #' What is the mean body mass in grams of all penguins? Hint: use `summarize` and remove missing data
60 | #'
61 | ## ----peng_meanmass--------------------------------------------------------------------------------------------------------------
62 |
63 |
64 | #'
65 | #' What is the mean length of flipper by species? Hint: use `group_by`
66 | #'
67 | ## ----peng_meanflip_species------------------------------------------------------------------------------------------------------
68 |
69 |
70 | #'
71 | #' What is the mean flipper length by species and sex?
72 | #'
73 | ## ----peng_meanflip_speciessex---------------------------------------------------------------------------------------------------
74 |
75 |
76 | #'
77 | #' Fit a simple linear regression between body mass and flipper length.
78 | #'
79 | ## ----pengLM---------------------------------------------------------------------------------------------------------------------
80 |
81 |
82 | #'
83 |
--------------------------------------------------------------------------------
/Exercises/WarmUpExercises.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Warm-up Exercises"
3 | output:
4 | html_document:
5 | df_print: paged
6 | ---
7 |
8 | # Course set-up
9 | First, let's make sure you have everything you need for the course. Run the following library statements. If something is not installed, install it.
10 |
11 | ```{r setup}
12 | # install.packages("tidyverse")
13 | # remotes::install_github("bschneidr/r-forge-survey-mirror")
14 | # install.packages("srvyr")
15 | # install.packages("here")
16 | # install.packages("palmerpenguins")
17 | # install.packages("remotes")
18 |
19 | library(tidyverse) # for tidyverse
20 | library(here) # for file paths
21 | library(palmerpenguins)
22 |
23 |
24 | ```
25 |
26 | # Warm-up exercises for Day 1
27 |
28 | ```{r datapeek}
29 | glimpse(penguins)
30 | ```
31 |
32 | How many penguins of each species are there? Hint: use `count`
33 | ```{r speciestab}
34 |
35 | ```
36 |
37 | How many penguins of each species and sex are there? Hint: use `count`
38 |
39 | ```{r speciessextab}
40 |
41 | ```
42 |
43 |
44 | What is the proportion of each species of penguins? Hint: use `count` then `mutate`
45 |
46 | ```{r speciestabp}
47 |
48 | ```
49 |
50 | What is the proportion of each sex of penguins within species? Hint: use `count` then `group_by` and `mutate`
51 |
52 | ```{r speciessextabp}
53 |
54 | ```
55 |
56 |
57 | # Warm-up exercises for Day 2
58 |
59 | What is the mean body mass in grams of all penguins? Hint: use `summarize` and remove missing data
60 |
61 | ```{r peng_meanmass}
62 |
63 | ```
64 |
65 | What is the mean length of flipper by species? Hint: use `group_by`
66 |
67 | ```{r peng_meanflip_species}
68 |
69 | ```
70 |
71 | What is the mean flipper length by species and sex?
72 |
73 | ```{r peng_meanflip_speciessex}
74 |
75 | ```
76 |
77 | Fit a simple linear regression between body mass and flipper length.
78 |
79 | ```{r pengLM}
80 |
81 | ```
82 |
83 |
--------------------------------------------------------------------------------
/Exercises/WarmUpExercises_solutions.R:
--------------------------------------------------------------------------------
1 | #' ---
2 | #' title: "Warm-up Exercise Solutions"
3 | #' output:
4 | #' html_document:
5 | #' df_print: paged
6 | #' ---
7 | #'
8 | #' # Course set-up
9 | #' First, let's make sure you have everything you need for the course. Run the following library statements. If something is not installed, install it.
10 | #'
11 | ## ----setup----------------------------------------------------------------------------------------------------------------------
12 | # install.packages("tidyverse")
13 | # remotes::install_github("bschneidr/r-forge-survey-mirror")
14 | # install.packages("srvyr")
15 | # install.packages("here")
16 | # install.packages("palmerpenguins")
17 | # install.packages("remotes")
18 |
19 | library(tidyverse) # for tidyverse
20 | library(here) # for file paths
21 | library(palmerpenguins)
22 |
23 |
24 |
25 | #'
26 | #' # Warm-up exercises for Day 1
27 | #'
28 | ## ----datapeek-------------------------------------------------------------------------------------------------------------------
29 | glimpse(penguins)
30 |
31 | #'
32 | #' How many penguins of each species are there? Hint: use `count`
33 | #'
34 | ## ----speciestab-----------------------------------------------------------------------------------------------------------------
35 | penguins %>%
36 | count(species)
37 |
38 | #'
39 | #' How many penguins of each species and sex are there? Hint: use `count`
40 | #'
41 | ## ----speciessextab--------------------------------------------------------------------------------------------------------------
42 | penguins %>%
43 | count(species, sex)
44 |
45 | #'
46 | #' What is the proportion of each species of penguins? Hint: use `count` then `mutate`
47 | #'
48 | ## ----speciestabp----------------------------------------------------------------------------------------------------------------
49 | penguins %>%
50 | count(species) %>%
51 | mutate(
52 | p=n/sum(n)
53 | )
54 |
55 | #'
56 | #' What is the proportion of each sex of penguins within species? Hint: use `count` then `group_by` and `mutate`
57 | #'
58 | ## ----speciessextabp-------------------------------------------------------------------------------------------------------------
59 | penguins %>%
60 | count(species, sex) %>%
61 | group_by(species) %>%
62 | mutate(
63 | p=n/sum(n)
64 | )
65 |
66 |
67 | #'
68 | #' # Warm-up exercises for Day 2
69 | #'
70 | #' What is the mean body mass in grams of all penguins? Hint: use `summarize` and remove missing data
71 | #'
72 | ## ----peng_meanmass--------------------------------------------------------------------------------------------------------------
73 | penguins %>%
74 | summarize(
75 | MeanBodyMass=mean(body_mass_g,
76 | na.rm=TRUE)
77 | )
78 |
79 | #'
80 | #' What is the mean length of flipper by species? Hint: use `group_by`
81 | #'
82 | ## ----peng_meanflip_species------------------------------------------------------------------------------------------------------
83 | penguins %>%
84 | group_by(species) %>%
85 | summarize(
86 | MeanFlipperLength=mean(flipper_length_mm,
87 | na.rm=TRUE)
88 | )
89 |
90 | #'
91 | #' What is the mean flipper length by species and sex?
92 | #'
93 | ## ----peng_meanflip_speciessex---------------------------------------------------------------------------------------------------
94 | penguins %>%
95 | group_by(species,sex) %>%
96 | summarize(
97 | MeanFlipperLength=mean(flipper_length_mm,
98 | na.rm=TRUE))
99 |
100 | #'
101 | #' Fit a simple linear regression between body mass and flipper length.
102 | #'
103 | ## ----pengLM---------------------------------------------------------------------------------------------------------------------
104 | #Option 1
105 | mod1 <- lm(body_mass_g ~ flipper_length_mm, data=penguins)
106 | summary(mod1)
107 |
108 | #Option 2
109 | mod2 <- glm(body_mass_g ~ flipper_length_mm, data=penguins)
110 | summary(mod2)
111 |
112 | #'
113 | #'
114 | #' # Session information
115 | #'
116 | ## ----si-------------------------------------------------------------------------------------------------------------------------
117 | devtools::session_info(pkgs="attached")
118 |
119 |
--------------------------------------------------------------------------------
/Exercises/WarmUpExercises_solutions.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Warm-up Exercise Solutions"
3 | output:
4 | html_document:
5 | df_print: paged
6 | ---
7 |
8 | # Course set-up
9 | First, let's make sure you have everything you need for the course. Run the following library statements. If something is not installed, install it.
10 |
11 | ```{r setup}
12 | # install.packages("tidyverse")
13 | # remotes::install_github("bschneidr/r-forge-survey-mirror")
14 | # install.packages("srvyr")
15 | # install.packages("here")
16 | # install.packages("palmerpenguins")
17 | # install.packages("remotes")
18 |
19 | library(tidyverse) # for tidyverse
20 | library(here) # for file paths
21 | library(palmerpenguins)
22 |
23 |
24 | ```
25 |
26 | # Warm-up exercises for Day 1
27 |
28 | ```{r datapeek}
29 | glimpse(penguins)
30 | ```
31 |
32 | How many penguins of each species are there? Hint: use `count`
33 |
34 | ```{r speciestab}
35 | penguins %>%
36 | count(species)
37 | ```
38 |
39 | How many penguins of each species and sex are there? Hint: use `count`
40 |
41 | ```{r speciessextab}
42 | penguins %>%
43 | count(species, sex)
44 | ```
45 |
46 | What is the proportion of each species of penguins? Hint: use `count` then `mutate`
47 |
48 | ```{r speciestabp}
49 | penguins %>%
50 | count(species) %>%
51 | mutate(
52 | p=n/sum(n)
53 | )
54 | ```
55 |
56 | What is the proportion of each sex of penguins within species? Hint: use `count` then `group_by` and `mutate`
57 |
58 | ```{r speciessextabp}
59 | penguins %>%
60 | count(species, sex) %>%
61 | group_by(species) %>%
62 | mutate(
63 | p=n/sum(n)
64 | )
65 |
66 | ```
67 |
68 | # Warm-up exercises for Day 2
69 |
70 | What is the mean body mass in grams of all penguins? Hint: use `summarize` and remove missing data
71 |
72 | ```{r peng_meanmass}
73 | penguins %>%
74 | summarize(
75 | MeanBodyMass=mean(body_mass_g,
76 | na.rm=TRUE)
77 | )
78 | ```
79 |
80 | What is the mean length of flipper by species? Hint: use `group_by`
81 |
82 | ```{r peng_meanflip_species}
83 | penguins %>%
84 | group_by(species) %>%
85 | summarize(
86 | MeanFlipperLength=mean(flipper_length_mm,
87 | na.rm=TRUE)
88 | )
89 | ```
90 |
91 | What is the mean flipper length by species and sex?
92 |
93 | ```{r peng_meanflip_speciessex}
94 | penguins %>%
95 | group_by(species,sex) %>%
96 | summarize(
97 | MeanFlipperLength=mean(flipper_length_mm,
98 | na.rm=TRUE))
99 | ```
100 |
101 | Fit a simple linear regression between body mass and flipper length.
102 |
103 | ```{r pengLM}
104 | #Option 1
105 | mod1 <- lm(body_mass_g ~ flipper_length_mm, data=penguins)
106 | summary(mod1)
107 |
108 | #Option 2
109 | mod2 <- glm(body_mass_g ~ flipper_length_mm, data=penguins)
110 | summary(mod2)
111 | ```
112 |
113 |
114 | # Session information
115 |
116 | ```{r si}
117 | devtools::session_info(pkgs="attached")
118 | ```
--------------------------------------------------------------------------------
/FinalizeMaterials.R:
--------------------------------------------------------------------------------
1 | ### This program creates PDF slides, PPTX slides, and R files from the Rmd files
2 |
3 | # remotes::install_github("jhelvy/xaringanBuilder")
4 | # remotes::install_github('rstudio/chromote')
5 | # install.packages('pdftools')
6 | # install.packages('officer')
7 |
8 | library(knitr)
9 | library(here)
10 |
11 | mypurl <- function(folder, fn){
12 | purl(here(folder, stringr::str_c(fn, ".Rmd")),
13 | output=here(folder, stringr::str_c(fn, ".R")),
14 | documentation=2)
15 |
16 | }
17 |
18 | # Day 1 processing
19 |
20 | mypurl("Presentation", "Slides-day-1")
21 | xaringanBuilder::build_html(
22 | input=here("Presentation", "Slides-day-1.Rmd"),
23 | output_file=here("Presentation", "Slides-day-1.html"))
24 | xaringanBuilder::build_pdf(
25 | input=here("Presentation", "Slides-day-1.html"),
26 | output_file=here("Presentation", "Slides-day-1.pdf"),
27 | partial_slides= TRUE)
28 | xaringanBuilder::build_pptx(
29 | input=here("Presentation", "Slides-day-1.pdf"),
30 | output_file=here("Presentation", "Slides-day-1.pptx"),
31 | partial_slides= TRUE)
32 | mypurl("Exercises", "CategorialExercises")
33 | mypurl("Exercises", "CategorialExercises_solutions")
34 |
35 | # Day 1/2 processing
36 | mypurl("Exercises", "WarmUpExercises")
37 | mypurl("Exercises", "WarmUpExercises_solutions")
38 |
39 | # Day 2 processing
40 | mypurl("Presentation", "Slides-day-2")
41 | xaringanBuilder::build_html(
42 | input=here("Presentation", "Slides-day-2.Rmd"),
43 | output_file=here("Presentation", "Slides-day-2.html"))
44 | xaringanBuilder::build_pdf(
45 | input=here("Presentation", "Slides-day-2.html"),
46 | output_file=here("Presentation", "Slides-day-2.pdf"),
47 | partial_slides= TRUE)
48 | xaringanBuilder::build_pptx(
49 | input=here("Presentation", "Slides-day-2.pdf"),
50 | output_file=here("Presentation", "Slides-day-2.pptx"),
51 | partial_slides= TRUE)
52 | mypurl("Exercises", "ContinuousExercises")
53 | mypurl("Exercises", "ContinuousExercises_solutions")
54 |
55 |
56 |
57 | # Day 3 processing
58 |
59 | mypurl("Presentation", "Slides-day-3")
60 | xaringanBuilder::build_html(
61 | input=here("Presentation", "Slides-day-3.Rmd"),
62 | output_file=here("Presentation", "Slides-day-3.html"))
63 | xaringanBuilder::build_pdf(
64 | input=here("Presentation", "Slides-day-3.html"),
65 | output_file=here("Presentation", "Slides-day-3.pdf"),
66 | partial_slides= TRUE)
67 | xaringanBuilder::build_pptx(
68 | input=here("Presentation", "Slides-day-3.pdf"),
69 | output_file=here("Presentation", "Slides-day-3.pptx"),
70 | partial_slides= TRUE)
71 | mypurl("Exercises", "DesignDerivedVariablesExercises")
72 | mypurl("Exercises", "DesignDerivedVariablesExercises_solutions")
73 |
--------------------------------------------------------------------------------
/Presentation/Images/IsabellaVelasquez_Headshot.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Presentation/Images/IsabellaVelasquez_Headshot.jpeg
--------------------------------------------------------------------------------
/Presentation/Images/MAPOR-Logo1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Presentation/Images/MAPOR-Logo1.png
--------------------------------------------------------------------------------
/Presentation/Images/Project-Contents.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Presentation/Images/Project-Contents.png
--------------------------------------------------------------------------------
/Presentation/Images/RebeccaPowell_Headshot.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Presentation/Images/RebeccaPowell_Headshot.jpeg
--------------------------------------------------------------------------------
/Presentation/Images/StephanieZimmer_Headshot.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Presentation/Images/StephanieZimmer_Headshot.jpeg
--------------------------------------------------------------------------------
/Presentation/Slides-day-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Presentation/Slides-day-1.pdf
--------------------------------------------------------------------------------
/Presentation/Slides-day-1.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Presentation/Slides-day-1.pptx
--------------------------------------------------------------------------------
/Presentation/Slides-day-1_files/crosstalk-1.2.0/css/crosstalk.min.css:
--------------------------------------------------------------------------------
1 | .container-fluid.crosstalk-bscols{margin-left:-30px;margin-right:-30px;white-space:normal}body>.container-fluid.crosstalk-bscols{margin-left:auto;margin-right:auto}.crosstalk-input-checkboxgroup .crosstalk-options-group .crosstalk-options-column{display:inline-block;padding-right:12px;vertical-align:top}@media only screen and (max-width: 480px){.crosstalk-input-checkboxgroup .crosstalk-options-group .crosstalk-options-column{display:block;padding-right:inherit}}.crosstalk-input{margin-bottom:15px}.crosstalk-input .control-label{margin-bottom:0;vertical-align:middle}.crosstalk-input input[type="checkbox"]{margin:4px 0 0;margin-top:1px;line-height:normal}.crosstalk-input .checkbox{position:relative;display:block;margin-top:10px;margin-bottom:10px}.crosstalk-input .checkbox>label{padding-left:20px;margin-bottom:0;font-weight:400;cursor:pointer}.crosstalk-input .checkbox input[type="checkbox"],.crosstalk-input .checkbox-inline input[type="checkbox"]{position:absolute;margin-top:2px;margin-left:-20px}.crosstalk-input .checkbox+.checkbox{margin-top:-5px}.crosstalk-input .checkbox-inline{position:relative;display:inline-block;padding-left:20px;margin-bottom:0;font-weight:400;vertical-align:middle;cursor:pointer}.crosstalk-input .checkbox-inline+.checkbox-inline{margin-top:0;margin-left:10px}
2 |
--------------------------------------------------------------------------------
/Presentation/Slides-day-1_files/crosstalk-1.2.0/js/crosstalk.min.js:
--------------------------------------------------------------------------------
1 | !function o(u,a,l){function s(n,e){if(!a[n]){if(!u[n]){var t="function"==typeof require&&require;if(!e&&t)return t(n,!0);if(f)return f(n,!0);var r=new Error("Cannot find module '"+n+"'");throw r.code="MODULE_NOT_FOUND",r}var i=a[n]={exports:{}};u[n][0].call(i.exports,function(e){var t=u[n][1][e];return s(t||e)},i,i.exports,o,u,a,l)}return a[n].exports}for(var f="function"==typeof require&&require,e=0;e?@[\\\]^`{|}~])/g,"\\$1")+"']"),r=JSON.parse(n[0].innerText),i=e.factory(t,r);o(t).data("crosstalk-instance",i),o(t).addClass("crosstalk-input-bound")}if(t.Shiny){var e=new t.Shiny.InputBinding,u=t.jQuery;u.extend(e,{find:function(e){return u(e).find(".crosstalk-input")},initialize:function(e){var t,n;u(e).hasClass("crosstalk-input-bound")||(n=o(t=e),Object.keys(r).forEach(function(e){n.hasClass(e)&&!n.hasClass("crosstalk-input-bound")&&i(r[e],t)}))},getId:function(e){return e.id},getValue:function(e){},setValue:function(e,t){},receiveMessage:function(e,t){},subscribe:function(e,t){u(e).data("crosstalk-instance").resume()},unsubscribe:function(e){u(e).data("crosstalk-instance").suspend()}}),t.Shiny.inputBindings.register(e,"crosstalk.inputBinding")}}).call(this,"undefined"!=typeof global?global:"undefined"!=typeof self?self:"undefined"!=typeof window?window:{})},{}],7:[function(r,e,t){(function(e){"use strict";var t=function(e){{if(e&&e.__esModule)return e;var t={};if(null!=e)for(var n in e)Object.prototype.hasOwnProperty.call(e,n)&&(t[n]=e[n]);return t.default=e,t}}(r("./input")),n=r("./filter");var a=e.jQuery;t.register({className:"crosstalk-input-checkboxgroup",factory:function(e,r){var i=new n.FilterHandle(r.group),o=void 0,u=a(e);return u.on("change","input[type='checkbox']",function(){var e=u.find("input[type='checkbox']:checked");if(0===e.length)o=null,i.clear();else{var t={};e.each(function(){r.map[this.value].forEach(function(e){t[e]=!0})});var n=Object.keys(t);n.sort(),o=n,i.set(n)}}),{suspend:function(){i.clear()},resume:function(){o&&i.set(o)}}}})}).call(this,"undefined"!=typeof global?global:"undefined"!=typeof self?self:"undefined"!=typeof window?window:{})},{"./filter":2,"./input":6}],8:[function(r,e,t){(function(e){"use strict";var t=n(r("./input")),l=n(r("./util")),s=r("./filter");function n(e){if(e&&e.__esModule)return e;var t={};if(null!=e)for(var n in e)Object.prototype.hasOwnProperty.call(e,n)&&(t[n]=e[n]);return t.default=e,t}var f=e.jQuery;t.register({className:"crosstalk-input-select",factory:function(e,n){var t=l.dataframeToD3(n.items),r={options:[{value:"",label:"(All)"}].concat(t),valueField:"value",labelField:"label",searchField:"label"},i=f(e).find("select")[0],o=f(i).selectize(r)[0].selectize,u=new s.FilterHandle(n.group),a=void 0;return o.on("change",function(){if(0===o.items.length)a=null,u.clear();else{var t={};o.items.forEach(function(e){n.map[e].forEach(function(e){t[e]=!0})});var e=Object.keys(t);e.sort(),a=e,u.set(e)}}),{suspend:function(){u.clear()},resume:function(){a&&u.set(a)}}}})}).call(this,"undefined"!=typeof global?global:"undefined"!=typeof self?self:"undefined"!=typeof window?window:{})},{"./filter":2,"./input":6,"./util":11}],9:[function(n,e,t){(function(e){"use strict";var d=function(e,t){if(Array.isArray(e))return e;if(Symbol.iterator in Object(e))return function(e,t){var n=[],r=!0,i=!1,o=void 0;try{for(var u,a=e[Symbol.iterator]();!(r=(u=a.next()).done)&&(n.push(u.value),!t||n.length!==t);r=!0);}catch(e){i=!0,o=e}finally{try{!r&&a.return&&a.return()}finally{if(i)throw o}}return n}(e,t);throw new TypeError("Invalid attempt to destructure non-iterable instance")},t=function(e){{if(e&&e.__esModule)return e;var t={};if(null!=e)for(var n in e)Object.prototype.hasOwnProperty.call(e,n)&&(t[n]=e[n]);return t.default=e,t}}(n("./input")),a=n("./filter");var v=e.jQuery,p=e.strftime;function y(e,t){for(var n=e.toString();n.length .container-fluid.crosstalk-bscols {
12 | margin-left: auto;
13 | margin-right: auto;
14 | }
15 |
16 | .crosstalk-input-checkboxgroup .crosstalk-options-group .crosstalk-options-column {
17 | display: inline-block;
18 | padding-right: 12px;
19 | vertical-align: top;
20 | }
21 |
22 | @media only screen and (max-width:480px) {
23 | .crosstalk-input-checkboxgroup .crosstalk-options-group .crosstalk-options-column {
24 | display: block;
25 | padding-right: inherit;
26 | }
27 | }
28 |
29 | /* Relevant BS3 styles to make filter_checkbox() look reasonable without Bootstrap */
30 | .crosstalk-input {
31 | margin-bottom: 15px; /* a la .form-group */
32 | .control-label {
33 | margin-bottom: 0;
34 | vertical-align: middle;
35 | }
36 | input[type="checkbox"] {
37 | margin: 4px 0 0;
38 | margin-top: 1px;
39 | line-height: normal;
40 | }
41 | .checkbox {
42 | position: relative;
43 | display: block;
44 | margin-top: 10px;
45 | margin-bottom: 10px;
46 | }
47 | .checkbox > label{
48 | padding-left: 20px;
49 | margin-bottom: 0;
50 | font-weight: 400;
51 | cursor: pointer;
52 | }
53 | .checkbox input[type="checkbox"],
54 | .checkbox-inline input[type="checkbox"] {
55 | position: absolute;
56 | margin-top: 2px;
57 | margin-left: -20px;
58 | }
59 | .checkbox + .checkbox {
60 | margin-top: -5px;
61 | }
62 | .checkbox-inline {
63 | position: relative;
64 | display: inline-block;
65 | padding-left: 20px;
66 | margin-bottom: 0;
67 | font-weight: 400;
68 | vertical-align: middle;
69 | cursor: pointer;
70 | }
71 | .checkbox-inline + .checkbox-inline {
72 | margin-top: 0;
73 | margin-left: 10px;
74 | }
75 | }
76 |
--------------------------------------------------------------------------------
/Presentation/Slides-day-1_files/datatables-css-0.0.0/datatables-crosstalk.css:
--------------------------------------------------------------------------------
1 | .dt-crosstalk-fade {
2 | opacity: 0.2;
3 | }
4 |
5 | html body div.DTS div.dataTables_scrollBody {
6 | background: none;
7 | }
8 |
9 |
10 | /*
11 | Fix https://github.com/rstudio/DT/issues/563
12 | If the `table.display` is set to "block" (e.g., pkgdown), the browser will display
13 | datatable objects strangely. The search panel and the page buttons will still be
14 | in full-width but the table body will be "compact" and shorter.
15 | In therory, having this attributes will affect `dom="t"`
16 | with `display: block` users. But in reality, there should be no one.
17 | We may remove the below lines in the future if the upstream agree to have this there.
18 | See https://github.com/DataTables/DataTablesSrc/issues/160
19 | */
20 |
21 | table.dataTable {
22 | display: table;
23 | }
24 |
--------------------------------------------------------------------------------
/Presentation/Slides-day-1_files/dt-core-1.11.3/css/jquery.dataTables.extra.css:
--------------------------------------------------------------------------------
1 | /* Selected rows/cells */
2 | table.dataTable tr.selected td, table.dataTable td.selected {
3 | background-color: #b0bed9 !important;
4 | }
5 | /* In case of scrollX/Y or FixedHeader */
6 | .dataTables_scrollBody .dataTables_sizing {
7 | visibility: hidden;
8 | }
9 |
10 | /* The datatables' theme CSS file doesn't define
11 | the color but with white background. It leads to an issue that
12 | when the HTML's body color is set to 'white', the user can't
13 | see the text since the background is white. One case happens in the
14 | RStudio's IDE when inline viewing the DT table inside an Rmd file,
15 | if the IDE theme is set to "Cobalt".
16 |
17 | See https://github.com/rstudio/DT/issues/447 for more info
18 |
19 | This fixes should have little side-effects because all the other elements
20 | of the default theme use the #333 font color.
21 |
22 | TODO: The upstream may use relative colors for both the table background
23 | and the color. It means the table can display well without this patch
24 | then. At that time, we need to remove the below CSS attributes.
25 | */
26 | div.datatables {
27 | color: #333;
28 | }
29 |
--------------------------------------------------------------------------------
/Presentation/Slides-day-1_files/dt-core-1.11.3/css/jquery.dataTables.min.css:
--------------------------------------------------------------------------------
1 | td.dt-control{background:url() no-repeat center center;cursor:pointer}tr.dt-hasChild td.dt-control{background:url() no-repeat center center}table.dataTable th.dt-left,table.dataTable td.dt-left{text-align:left}table.dataTable th.dt-center,table.dataTable td.dt-center,table.dataTable td.dataTables_empty{text-align:center}table.dataTable th.dt-right,table.dataTable td.dt-right{text-align:right}table.dataTable th.dt-justify,table.dataTable td.dt-justify{text-align:justify}table.dataTable th.dt-nowrap,table.dataTable td.dt-nowrap{white-space:nowrap}table.dataTable thead th.dt-head-left,table.dataTable thead td.dt-head-left,table.dataTable tfoot th.dt-head-left,table.dataTable tfoot td.dt-head-left{text-align:left}table.dataTable thead th.dt-head-center,table.dataTable thead td.dt-head-center,table.dataTable tfoot th.dt-head-center,table.dataTable tfoot td.dt-head-center{text-align:center}table.dataTable thead th.dt-head-right,table.dataTable thead td.dt-head-right,table.dataTable tfoot th.dt-head-right,table.dataTable tfoot td.dt-head-right{text-align:right}table.dataTable thead th.dt-head-justify,table.dataTable thead td.dt-head-justify,table.dataTable tfoot th.dt-head-justify,table.dataTable tfoot td.dt-head-justify{text-align:justify}table.dataTable thead th.dt-head-nowrap,table.dataTable thead td.dt-head-nowrap,table.dataTable tfoot th.dt-head-nowrap,table.dataTable tfoot td.dt-head-nowrap{white-space:nowrap}table.dataTable tbody th.dt-body-left,table.dataTable tbody td.dt-body-left{text-align:left}table.dataTable tbody th.dt-body-center,table.dataTable tbody td.dt-body-center{text-align:center}table.dataTable tbody th.dt-body-right,table.dataTable tbody td.dt-body-right{text-align:right}table.dataTable tbody th.dt-body-justify,table.dataTable tbody td.dt-body-justify{text-align:justify}table.dataTable tbody th.dt-body-nowrap,table.dataTable tbody td.dt-body-nowrap{white-space:nowrap}table.dataTable{width:100%;margin:0 auto;clear:both;border-collapse:separate;border-spacing:0}table.dataTable thead th,table.dataTable tfoot th{font-weight:bold}table.dataTable thead th,table.dataTable thead td{padding:10px 18px;border-bottom:1px solid #111}table.dataTable thead th:active,table.dataTable thead td:active{outline:none}table.dataTable tfoot th,table.dataTable tfoot td{padding:10px 18px 6px 18px;border-top:1px solid #111}table.dataTable thead .sorting,table.dataTable thead .sorting_asc,table.dataTable thead .sorting_desc,table.dataTable thead .sorting_asc_disabled,table.dataTable thead .sorting_desc_disabled{cursor:pointer;*cursor:hand;background-repeat:no-repeat;background-position:center right}table.dataTable thead .sorting{background-image:url()}table.dataTable thead .sorting_asc{background-image:url() !important}table.dataTable thead .sorting_desc{background-image:url() !important}table.dataTable thead .sorting_asc_disabled{background-image:url()}table.dataTable thead .sorting_desc_disabled{background-image:url()}table.dataTable tbody tr{background-color:#fff}table.dataTable tbody tr.selected{background-color:#b0bed9}table.dataTable tbody th,table.dataTable tbody td{padding:8px 10px}table.dataTable.row-border tbody th,table.dataTable.row-border tbody td,table.dataTable.display tbody th,table.dataTable.display tbody td{border-top:1px solid #ddd}table.dataTable.row-border tbody tr:first-child th,table.dataTable.row-border tbody tr:first-child td,table.dataTable.display tbody tr:first-child th,table.dataTable.display tbody tr:first-child td{border-top:none}table.dataTable.cell-border tbody th,table.dataTable.cell-border tbody td{border-top:1px solid #ddd;border-right:1px solid #ddd}table.dataTable.cell-border tbody tr th:first-child,table.dataTable.cell-border tbody tr td:first-child{border-left:1px solid #ddd}table.dataTable.cell-border tbody tr:first-child th,table.dataTable.cell-border tbody tr:first-child td{border-top:none}table.dataTable.stripe tbody tr.odd,table.dataTable.display tbody tr.odd{background-color:#f9f9f9}table.dataTable.stripe tbody tr.odd.selected,table.dataTable.display tbody tr.odd.selected{background-color:#acbad4}table.dataTable.hover tbody tr:hover,table.dataTable.display tbody tr:hover{background-color:#f6f6f6}table.dataTable.hover tbody tr:hover.selected,table.dataTable.display tbody tr:hover.selected{background-color:#aab7d1}table.dataTable.order-column tbody tr>.sorting_1,table.dataTable.order-column tbody tr>.sorting_2,table.dataTable.order-column tbody tr>.sorting_3,table.dataTable.display tbody tr>.sorting_1,table.dataTable.display tbody tr>.sorting_2,table.dataTable.display tbody tr>.sorting_3{background-color:#fafafa}table.dataTable.order-column tbody tr.selected>.sorting_1,table.dataTable.order-column tbody tr.selected>.sorting_2,table.dataTable.order-column tbody tr.selected>.sorting_3,table.dataTable.display tbody tr.selected>.sorting_1,table.dataTable.display tbody tr.selected>.sorting_2,table.dataTable.display tbody tr.selected>.sorting_3{background-color:#acbad5}table.dataTable.display tbody tr.odd>.sorting_1,table.dataTable.order-column.stripe tbody tr.odd>.sorting_1{background-color:#f1f1f1}table.dataTable.display tbody tr.odd>.sorting_2,table.dataTable.order-column.stripe tbody tr.odd>.sorting_2{background-color:#f3f3f3}table.dataTable.display tbody tr.odd>.sorting_3,table.dataTable.order-column.stripe tbody tr.odd>.sorting_3{background-color:whitesmoke}table.dataTable.display tbody tr.odd.selected>.sorting_1,table.dataTable.order-column.stripe tbody tr.odd.selected>.sorting_1{background-color:#a6b4cd}table.dataTable.display tbody tr.odd.selected>.sorting_2,table.dataTable.order-column.stripe tbody tr.odd.selected>.sorting_2{background-color:#a8b5cf}table.dataTable.display tbody tr.odd.selected>.sorting_3,table.dataTable.order-column.stripe tbody tr.odd.selected>.sorting_3{background-color:#a9b7d1}table.dataTable.display tbody tr.even>.sorting_1,table.dataTable.order-column.stripe tbody tr.even>.sorting_1{background-color:#fafafa}table.dataTable.display tbody tr.even>.sorting_2,table.dataTable.order-column.stripe tbody tr.even>.sorting_2{background-color:#fcfcfc}table.dataTable.display tbody tr.even>.sorting_3,table.dataTable.order-column.stripe tbody tr.even>.sorting_3{background-color:#fefefe}table.dataTable.display tbody tr.even.selected>.sorting_1,table.dataTable.order-column.stripe tbody tr.even.selected>.sorting_1{background-color:#acbad5}table.dataTable.display tbody tr.even.selected>.sorting_2,table.dataTable.order-column.stripe tbody tr.even.selected>.sorting_2{background-color:#aebcd6}table.dataTable.display tbody tr.even.selected>.sorting_3,table.dataTable.order-column.stripe tbody tr.even.selected>.sorting_3{background-color:#afbdd8}table.dataTable.display tbody tr:hover>.sorting_1,table.dataTable.order-column.hover tbody tr:hover>.sorting_1{background-color:#eaeaea}table.dataTable.display tbody tr:hover>.sorting_2,table.dataTable.order-column.hover tbody tr:hover>.sorting_2{background-color:#ececec}table.dataTable.display tbody tr:hover>.sorting_3,table.dataTable.order-column.hover tbody tr:hover>.sorting_3{background-color:#efefef}table.dataTable.display tbody tr:hover.selected>.sorting_1,table.dataTable.order-column.hover tbody tr:hover.selected>.sorting_1{background-color:#a2aec7}table.dataTable.display tbody tr:hover.selected>.sorting_2,table.dataTable.order-column.hover tbody tr:hover.selected>.sorting_2{background-color:#a3b0c9}table.dataTable.display tbody tr:hover.selected>.sorting_3,table.dataTable.order-column.hover tbody tr:hover.selected>.sorting_3{background-color:#a5b2cb}table.dataTable.no-footer{border-bottom:1px solid #111}table.dataTable.nowrap th,table.dataTable.nowrap td{white-space:nowrap}table.dataTable.compact thead th,table.dataTable.compact thead td{padding:4px 17px}table.dataTable.compact tfoot th,table.dataTable.compact tfoot td{padding:4px}table.dataTable.compact tbody th,table.dataTable.compact tbody td{padding:4px}table.dataTable th,table.dataTable td{box-sizing:content-box}.dataTables_wrapper{position:relative;clear:both}.dataTables_wrapper .dataTables_length{float:left}.dataTables_wrapper .dataTables_length select{border:1px solid #aaa;border-radius:3px;padding:5px;background-color:transparent;padding:4px}.dataTables_wrapper .dataTables_filter{float:right;text-align:right}.dataTables_wrapper .dataTables_filter input{border:1px solid #aaa;border-radius:3px;padding:5px;background-color:transparent;margin-left:3px}.dataTables_wrapper .dataTables_info{clear:both;float:left;padding-top:.755em}.dataTables_wrapper .dataTables_paginate{float:right;text-align:right;padding-top:.25em}.dataTables_wrapper .dataTables_paginate .paginate_button{box-sizing:border-box;display:inline-block;min-width:1.5em;padding:.5em 1em;margin-left:2px;text-align:center;text-decoration:none !important;cursor:pointer;*cursor:hand;color:#333 !important;border:1px solid transparent;border-radius:2px}.dataTables_wrapper .dataTables_paginate .paginate_button.current,.dataTables_wrapper .dataTables_paginate .paginate_button.current:hover{color:#333 !important;border:1px solid #979797;background-color:white;background:-webkit-gradient(linear, left top, left bottom, color-stop(0%, white), color-stop(100%, #dcdcdc));background:-webkit-linear-gradient(top, white 0%, #dcdcdc 100%);background:-moz-linear-gradient(top, white 0%, #dcdcdc 100%);background:-ms-linear-gradient(top, white 0%, #dcdcdc 100%);background:-o-linear-gradient(top, white 0%, #dcdcdc 100%);background:linear-gradient(to bottom, white 0%, #dcdcdc 100%)}.dataTables_wrapper .dataTables_paginate .paginate_button.disabled,.dataTables_wrapper .dataTables_paginate .paginate_button.disabled:hover,.dataTables_wrapper .dataTables_paginate .paginate_button.disabled:active{cursor:default;color:#666 !important;border:1px solid transparent;background:transparent;box-shadow:none}.dataTables_wrapper .dataTables_paginate .paginate_button:hover{color:white !important;border:1px solid #111;background-color:#585858;background:-webkit-gradient(linear, left top, left bottom, color-stop(0%, #585858), color-stop(100%, #111));background:-webkit-linear-gradient(top, #585858 0%, #111 100%);background:-moz-linear-gradient(top, #585858 0%, #111 100%);background:-ms-linear-gradient(top, #585858 0%, #111 100%);background:-o-linear-gradient(top, #585858 0%, #111 100%);background:linear-gradient(to bottom, #585858 0%, #111 100%)}.dataTables_wrapper .dataTables_paginate .paginate_button:active{outline:none;background-color:#2b2b2b;background:-webkit-gradient(linear, left top, left bottom, color-stop(0%, #2b2b2b), color-stop(100%, #0c0c0c));background:-webkit-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:-moz-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:-ms-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:-o-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:linear-gradient(to bottom, #2b2b2b 0%, #0c0c0c 100%);box-shadow:inset 0 0 3px #111}.dataTables_wrapper .dataTables_paginate .ellipsis{padding:0 1em}.dataTables_wrapper .dataTables_processing{position:absolute;top:50%;left:50%;width:100%;height:40px;margin-left:-50%;margin-top:-25px;padding-top:20px;text-align:center;font-size:1.2em;background-color:white;background:-webkit-gradient(linear, left top, right top, color-stop(0%, rgba(255, 255, 255, 0)), color-stop(25%, rgba(255, 255, 255, 0.9)), color-stop(75%, rgba(255, 255, 255, 0.9)), color-stop(100%, rgba(255, 255, 255, 0)));background:-webkit-linear-gradient(left, rgba(255, 255, 255, 0) 0%, rgba(255, 255, 255, 0.9) 25%, rgba(255, 255, 255, 0.9) 75%, rgba(255, 255, 255, 0) 100%);background:-moz-linear-gradient(left, rgba(255, 255, 255, 0) 0%, rgba(255, 255, 255, 0.9) 25%, rgba(255, 255, 255, 0.9) 75%, rgba(255, 255, 255, 0) 100%);background:-ms-linear-gradient(left, rgba(255, 255, 255, 0) 0%, rgba(255, 255, 255, 0.9) 25%, rgba(255, 255, 255, 0.9) 75%, rgba(255, 255, 255, 0) 100%);background:-o-linear-gradient(left, rgba(255, 255, 255, 0) 0%, rgba(255, 255, 255, 0.9) 25%, rgba(255, 255, 255, 0.9) 75%, rgba(255, 255, 255, 0) 100%);background:linear-gradient(to right, rgba(255, 255, 255, 0) 0%, rgba(255, 255, 255, 0.9) 25%, rgba(255, 255, 255, 0.9) 75%, rgba(255, 255, 255, 0) 100%)}.dataTables_wrapper .dataTables_length,.dataTables_wrapper .dataTables_filter,.dataTables_wrapper .dataTables_info,.dataTables_wrapper .dataTables_processing,.dataTables_wrapper .dataTables_paginate{color:#333}.dataTables_wrapper .dataTables_scroll{clear:both}.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody{*margin-top:-1px;-webkit-overflow-scrolling:touch}.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>thead>tr>th,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>thead>tr>td,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>tbody>tr>th,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>tbody>tr>td{vertical-align:middle}.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>thead>tr>th>div.dataTables_sizing,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>thead>tr>td>div.dataTables_sizing,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>tbody>tr>th>div.dataTables_sizing,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>tbody>tr>td>div.dataTables_sizing{height:0;overflow:hidden;margin:0 !important;padding:0 !important}.dataTables_wrapper.no-footer .dataTables_scrollBody{border-bottom:1px solid #111}.dataTables_wrapper.no-footer div.dataTables_scrollHead table.dataTable,.dataTables_wrapper.no-footer div.dataTables_scrollBody>table{border-bottom:none}.dataTables_wrapper:after{visibility:hidden;display:block;content:"";clear:both;height:0}@media screen and (max-width: 767px){.dataTables_wrapper .dataTables_info,.dataTables_wrapper .dataTables_paginate{float:none;text-align:center}.dataTables_wrapper .dataTables_paginate{margin-top:.5em}}@media screen and (max-width: 640px){.dataTables_wrapper .dataTables_length,.dataTables_wrapper .dataTables_filter{float:none;text-align:center}.dataTables_wrapper .dataTables_filter{margin-top:.5em}}
2 |
--------------------------------------------------------------------------------
/Presentation/Slides-day-1_files/header-attrs-2.11.22/header-attrs.js:
--------------------------------------------------------------------------------
1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
2 | // be compatible with the behavior of Pandoc < 2.8).
3 | document.addEventListener('DOMContentLoaded', function(e) {
4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
5 | var i, h, a;
6 | for (i = 0; i < hs.length; i++) {
7 | h = hs[i];
8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6
9 | a = h.attributes;
10 | while (a.length > 0) h.removeAttribute(a[0].name);
11 | }
12 | });
13 |
--------------------------------------------------------------------------------
/Presentation/Slides-day-1_files/header-attrs-2.11/header-attrs.js:
--------------------------------------------------------------------------------
1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
2 | // be compatible with the behavior of Pandoc < 2.8).
3 | document.addEventListener('DOMContentLoaded', function(e) {
4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
5 | var i, h, a;
6 | for (i = 0; i < hs.length; i++) {
7 | h = hs[i];
8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6
9 | a = h.attributes;
10 | while (a.length > 0) h.removeAttribute(a[0].name);
11 | }
12 | });
13 |
--------------------------------------------------------------------------------
/Presentation/Slides-day-1_files/header-attrs-2.13/header-attrs.js:
--------------------------------------------------------------------------------
1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
2 | // be compatible with the behavior of Pandoc < 2.8).
3 | document.addEventListener('DOMContentLoaded', function(e) {
4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
5 | var i, h, a;
6 | for (i = 0; i < hs.length; i++) {
7 | h = hs[i];
8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6
9 | a = h.attributes;
10 | while (a.length > 0) h.removeAttribute(a[0].name);
11 | }
12 | });
13 |
--------------------------------------------------------------------------------
/Presentation/Slides-day-2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Presentation/Slides-day-2.pdf
--------------------------------------------------------------------------------
/Presentation/Slides-day-2.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Presentation/Slides-day-2.pptx
--------------------------------------------------------------------------------
/Presentation/Slides-day-2_files/figure-html/plot_sf_elbill_disp-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Presentation/Slides-day-2_files/figure-html/plot_sf_elbill_disp-1.png
--------------------------------------------------------------------------------
/Presentation/Slides-day-2_files/header-attrs-2.11.22/header-attrs.js:
--------------------------------------------------------------------------------
1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
2 | // be compatible with the behavior of Pandoc < 2.8).
3 | document.addEventListener('DOMContentLoaded', function(e) {
4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
5 | var i, h, a;
6 | for (i = 0; i < hs.length; i++) {
7 | h = hs[i];
8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6
9 | a = h.attributes;
10 | while (a.length > 0) h.removeAttribute(a[0].name);
11 | }
12 | });
13 |
--------------------------------------------------------------------------------
/Presentation/Slides-day-2_files/header-attrs-2.13/header-attrs.js:
--------------------------------------------------------------------------------
1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
2 | // be compatible with the behavior of Pandoc < 2.8).
3 | document.addEventListener('DOMContentLoaded', function(e) {
4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
5 | var i, h, a;
6 | for (i = 0; i < hs.length; i++) {
7 | h = hs[i];
8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6
9 | a = h.attributes;
10 | while (a.length > 0) h.removeAttribute(a[0].name);
11 | }
12 | });
13 |
--------------------------------------------------------------------------------
/Presentation/Slides-day-3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Presentation/Slides-day-3.pdf
--------------------------------------------------------------------------------
/Presentation/Slides-day-3.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Presentation/Slides-day-3.pptx
--------------------------------------------------------------------------------
/Presentation/Slides-day-3_cache/html/__packages:
--------------------------------------------------------------------------------
1 | tidyverse
2 | ggplot2
3 | tibble
4 | tidyr
5 | readr
6 | purrr
7 | dplyr
8 | stringr
9 | forcats
10 | here
11 | srvyr
12 | Matrix
13 | survival
14 | survey
15 | xaringan
16 | knitr
17 | tidycensus
18 |
--------------------------------------------------------------------------------
/Presentation/Slides-day-3_cache/html/acsin_2028e4f3797786227f2217d2d50f2b92.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Presentation/Slides-day-3_cache/html/acsin_2028e4f3797786227f2217d2d50f2b92.RData
--------------------------------------------------------------------------------
/Presentation/Slides-day-3_cache/html/acsin_2028e4f3797786227f2217d2d50f2b92.rdb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Presentation/Slides-day-3_cache/html/acsin_2028e4f3797786227f2217d2d50f2b92.rdb
--------------------------------------------------------------------------------
/Presentation/Slides-day-3_cache/html/acsin_2028e4f3797786227f2217d2d50f2b92.rdx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Presentation/Slides-day-3_cache/html/acsin_2028e4f3797786227f2217d2d50f2b92.rdx
--------------------------------------------------------------------------------
/Presentation/Slides-day-3_files/figure-html/der3c-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/Presentation/Slides-day-3_files/figure-html/der3c-1.png
--------------------------------------------------------------------------------
/Presentation/Slides-day-3_files/header-attrs-2.11.22/header-attrs.js:
--------------------------------------------------------------------------------
1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
2 | // be compatible with the behavior of Pandoc < 2.8).
3 | document.addEventListener('DOMContentLoaded', function(e) {
4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
5 | var i, h, a;
6 | for (i = 0; i < hs.length; i++) {
7 | h = hs[i];
8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6
9 | a = h.attributes;
10 | while (a.length > 0) h.removeAttribute(a[0].name);
11 | }
12 | });
13 |
--------------------------------------------------------------------------------
/Presentation/Slides-day-3_files/header-attrs-2.13/header-attrs.js:
--------------------------------------------------------------------------------
1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
2 | // be compatible with the behavior of Pandoc < 2.8).
3 | document.addEventListener('DOMContentLoaded', function(e) {
4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
5 | var i, h, a;
6 | for (i = 0; i < hs.length; i++) {
7 | h = hs[i];
8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6
9 | a = h.attributes;
10 | while (a.length > 0) h.removeAttribute(a[0].name);
11 | }
12 | });
13 |
--------------------------------------------------------------------------------
/Presentation/xaringan-themer-mod.css:
--------------------------------------------------------------------------------
1 | /* -------------------------------------------------------
2 | *
3 | * !! This file was generated by xaringanthemer !!
4 | * !! and then customized !!
5 | *
6 | *
7 | * Issues or likes?
8 | * - https://github.com/gadenbuie/xaringanthemer
9 | * - https://www.garrickadenbuie.com
10 | *
11 | * Need help? Try:
12 | * - vignette(package = "xaringanthemer")
13 | * - ?xaringanthemer::style_xaringan
14 | * - xaringan wiki: https://github.com/yihui/xaringan/wiki
15 | * - remarkjs wiki: https://github.com/gnab/remark/wiki
16 | *
17 | * Version: 0.4.1
18 | *
19 | * ------------------------------------------------------- */
20 | @import url(https://fonts.googleapis.com/css?family=Noto+Sans:400,400i,700,700i&display=swap);
21 | @import url(https://fonts.googleapis.com/css?family=Cabin:600,600i&display=swap);
22 | @import url(https://fonts.googleapis.com/css?family=Source+Code+Pro:400,700&display=swap);
23 | @import url('https://fonts.googleapis.com/css2?family=Telex&family=Ubuntu:wght@300&display=swap');
24 |
25 |
26 | :root {
27 | /* Fonts */
28 | --text-font-family: 'Noto Sans';
29 | --text-font-is-google: 1;
30 | --text-font-family-fallback: -apple-system, BlinkMacSystemFont, avenir next, avenir, helvetica neue, helvetica, Ubuntu, roboto, noto, segoe ui, arial;
31 | --text-font-base: sans-serif;
32 | --header-font-family: Cabin;
33 | --header-font-is-google: 1;
34 | --header-font-family-fallback: Georgia, serif;
35 | --code-font-family: 'Source Code Pro';
36 | --code-font-is-google: 1;
37 | --base-font-size: 20px;
38 | --text-font-size: 1rem;
39 | --code-font-size: 0.9rem;
40 | --code-inline-font-size: 1em;
41 | --header-h1-font-size: 2.75rem;
42 | --header-h2-font-size: 2.25rem;
43 | --header-h3-font-size: 1.75rem;
44 |
45 | /* Colors */
46 | --text-color: #000000;
47 | --header-color: #1E4F96;
48 | --background-color: #FFFFFF;
49 | --link-color: #1E4F96;
50 | --text-bold-color: #1E4F96;
51 | --code-highlight-color: rgba(255,255,0,0.5);
52 | --inverse-text-color: #000000;
53 | --inverse-background-color: #00A3E0;
54 | --inverse-header-color: #FFFFFF;
55 | --inverse-link-color: #1E4F96;
56 | --title-slide-background-color: #1E4F96;
57 | --title-slide-text-color: #FFFFFF;
58 | --header-background-color: #1E4F96;
59 | --header-background-text-color: #FFFFFF;
60 | --primary: #1E4F96;
61 | --secondary: #00A3E0;
62 | --white: #FFFFFF;
63 | --black: #000000;
64 | }
65 |
66 | html {
67 | font-size: var(--base-font-size);
68 | }
69 |
70 | body {
71 | font-family: 'Telex', sans-serif;
72 | font-weight: normal;
73 | color: var(--text-color);
74 | }
75 | h1, h2, h3 {
76 | font-family: 'Ubuntu', sans-serif;
77 | font-weight: 600;
78 | color: var(--header-color);
79 | }
80 | .remark-slide-content {
81 | background-color: var(--background-color);
82 | font-size: 1rem;
83 | padding: 16px 64px 16px 64px;
84 | width: 100%;
85 | height: 100%;
86 | }
87 | .remark-slide-content h1 {
88 | font-size: var(--header-h1-font-size);
89 | }
90 | .remark-slide-content h2 {
91 | font-size: var(--header-h2-font-size);
92 | }
93 | .remark-slide-content h3 {
94 | font-size: var(--header-h3-font-size);
95 | }
96 | .remark-code, .remark-inline-code {
97 | font-family: var(--code-font-family), Menlo, Consolas, Monaco, Liberation Mono, Lucida Console, monospace;
98 | }
99 | .remark-code {
100 | font-size: var(--code-font-size);
101 | }
102 | .remark-inline-code {
103 | font-size: var(--code-inline-font-size);
104 | color: #1E4F96;
105 | }
106 | .remark-slide-number {
107 | color: #1E4F96;
108 | opacity: 1;
109 | font-size: 0.9rem;
110 | }
111 | strong {
112 | font-weight: bold;
113 | color: var(--text-bold-color);
114 | }
115 | a, a > code {
116 | color: var(--link-color);
117 | text-decoration: none;
118 | }
119 | .footnote {
120 | position: absolute;
121 | bottom: 60px;
122 | padding-right: 4em;
123 | font-size: 0.9em;
124 | }
125 | .remark-code-line-highlighted {
126 | background-color: var(--code-highlight-color);
127 | }
128 | .inverse {
129 | background-color: var(--inverse-background-color);
130 | color: var(--inverse-text-color);
131 |
132 | }
133 | .inverse h1, .inverse h2, .inverse h3 {
134 | color: var(--inverse-header-color);
135 | }
136 | .inverse a, .inverse a > code {
137 | color: var(--inverse-link-color);
138 | }
139 | .title-slide, .title-slide h1, .title-slide h2, .title-slide h3 {
140 | color: var(--title-slide-text-color);
141 | }
142 | .title-slide {
143 | background-color: var(--title-slide-background-color);
144 | }
145 | .title-slide .remark-slide-number {
146 | display: none;
147 | }
148 | /* Two-column layout */
149 | .left-column {
150 | width: 20%;
151 | height: 92%;
152 | float: left;
153 | }
154 | .left-column h2, .left-column h3 {
155 | color: #1E4F9699;
156 | }
157 | .left-column h2:last-of-type, .left-column h3:last-child {
158 | color: #1E4F96;
159 | }
160 | .right-column {
161 | width: 75%;
162 | float: right;
163 | padding-top: 1em;
164 | }
165 | .pull-left {
166 | float: left;
167 | width: 47%;
168 | }
169 | .pull-right {
170 | float: right;
171 | width: 47%;
172 | }
173 | .pull-right + * {
174 | clear: both;
175 | }
176 | img, video, iframe {
177 | max-width: 100%;
178 | }
179 | blockquote {
180 | border-left: solid 5px #00A3E080;
181 | padding-left: 1em;
182 | }
183 | .remark-slide table {
184 | margin: auto;
185 | border-top: 1px solid #666;
186 | border-bottom: 1px solid #666;
187 | }
188 | .remark-slide table thead th {
189 | border-bottom: 1px solid #ddd;
190 | }
191 | th, td {
192 | padding: 5px;
193 | }
194 | .remark-slide thead, .remark-slide tfoot, .remark-slide tr:nth-child(even) {
195 | background: #CCECF8;
196 | }
197 | table.dataTable tbody {
198 | background-color: var(--background-color);
199 | color: var(--text-color);
200 | }
201 | table.dataTable.display tbody tr.odd {
202 | background-color: var(--background-color);
203 | }
204 | table.dataTable.display tbody tr.even {
205 | background-color: #CCECF8;
206 | }
207 | table.dataTable.hover tbody tr:hover, table.dataTable.display tbody tr:hover {
208 | background-color: rgba(255, 255, 255, 0.5);
209 | }
210 | .dataTables_wrapper .dataTables_length, .dataTables_wrapper .dataTables_filter, .dataTables_wrapper .dataTables_info, .dataTables_wrapper .dataTables_processing, .dataTables_wrapper .dataTables_paginate {
211 | color: var(--text-color);
212 | }
213 | .dataTables_wrapper .dataTables_paginate .paginate_button {
214 | color: var(--text-color) !important;
215 | }
216 |
217 | /* Horizontal alignment of code blocks */
218 | .remark-slide-content.left pre,
219 | .remark-slide-content.center pre,
220 | .remark-slide-content.right pre {
221 | text-align: start;
222 | width: max-content;
223 | max-width: 100%;
224 | }
225 | .remark-slide-content.left pre,
226 | .remark-slide-content.right pre {
227 | min-width: 50%;
228 | min-width: min(40ch, 100%);
229 | }
230 | .remark-slide-content.center pre {
231 | min-width: 66%;
232 | min-width: min(50ch, 100%);
233 | }
234 | .remark-slide-content.left pre {
235 | margin-left: unset;
236 | margin-right: auto;
237 | }
238 | .remark-slide-content.center pre {
239 | margin-left: auto;
240 | margin-right: auto;
241 | }
242 | .remark-slide-content.right pre {
243 | margin-left: auto;
244 | margin-right: unset;
245 | }
246 |
247 | /* Slide Header Background for h1 elements */
248 | .remark-slide-content.header_background > h1 {
249 | display: block;
250 | position: absolute;
251 | top: 0;
252 | left: 0;
253 | width: 100%;
254 | background: var(--header-background-color);
255 | color: var(--header-background-text-color);
256 | padding: 2rem 64px 1.5rem 64px;
257 | margin-top: 0;
258 | box-sizing: border-box;
259 | }
260 | .remark-slide-content.header_background {
261 | padding-top: 7rem;
262 | }
263 |
264 | @page { margin: 0; }
265 | @media print {
266 | .remark-slide-scaler {
267 | width: 100% !important;
268 | height: 100% !important;
269 | transform: scale(1) !important;
270 | top: 0 !important;
271 | left: 0 !important;
272 | }
273 | }
274 |
275 | .primary {
276 | color: var(--primary);
277 | }
278 | .bg-primary {
279 | background-color: var(--primary);
280 | }
281 | .secondary {
282 | color: var(--secondary);
283 | }
284 | .bg-secondary {
285 | background-color: var(--secondary);
286 | }
287 | .white {
288 | color: var(--white);
289 | }
290 | .bg-white {
291 | background-color: var(--white);
292 | }
293 | .black {
294 | color: var(--black);
295 | }
296 | .bg-black {
297 | background-color: var(--black);
298 | }
299 |
300 |
301 | .column {
302 | float: left;
303 | width: 33.33%;
304 | }
305 |
306 | /* Clear floats after the columns */
307 | .row:after {
308 | content: "";
309 | display: table;
310 | clear: both;
311 | }
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Tidy Survey Analysis in R using the srvyr Package
2 | Materials for a forthcoming short course presented in 3 sessions.
3 |
4 | - **RawData** folder contains public use file data along with any documentation
5 | - American National Election Studies, 2016
6 | - Residential Energy Consumption Survey, 2015
7 | - **DataCleaningScripts** folder contains scripts for making public use files analysis ready
8 | - Create derived variables
9 | - Renames some variables
10 | - Selects fewer variables just for examples
11 | - **Data** folder contains data files ready for analysis in presentation and examples
12 | - **Presentation** folder contains the slides for the course
13 | - Includes Rmd to create slides
14 | - Slides are available in html, pptx, R, and PDF
15 | - **Exercises** folder contains RMD and R files with exercises and solutions to practice concepts
16 | - **Codebooks** folder contains HTML and MD codebooks for the variables used from ANES 2020 and RECS 2015.
17 |
18 | ## Sources
19 |
20 | - The American National Election Studies (https://electionstudies.org/). These materials are based on work supported by the National Science Foundation under grant numbers SES 1444721, 2014-2017, the University of Michigan, and Stanford University.
21 |
22 | - *Residential Energy Consumption Survey: Using the 2015 Microdata File to Compute Estimates and Standard Errors.* U.S. Department of Energy (2017) https://www.eia.gov/consumption/residential/data/2015/pdf/microdata_v3.pdf
23 |
24 | - Horst AM, Hill AP, Gorman KB (2020). palmerpenguins: Palmer Archipelago (Antarctica) penguin data. R package version 0.1.0. https://allisonhorst.github.io/palmerpenguins/
25 |
26 | - T. Lumley (2020) "survey: analysis of complex survey samples". R package version 4.0. https://r-survey.r-forge.r-project.org/survey/
27 |
28 | - Greg Freedman Ellis and Ben Schneider (2020). srvyr: 'dplyr'-Like Syntax for Summary Statistics of Survey Data. R package version 1.0.0. https://CRAN.R-project.org/package=srvyr
29 |
30 | - Hadley Wickham, Romain François, Lionel Henry and Kirill Müller (2021). dplyr: A Grammar of Data Manipulation. R package version 1.0.5. https://CRAN.R-project.org/package=dplyr
31 |
--------------------------------------------------------------------------------
/RawData/ANES_2016/anes_timeseries_2016.sav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/RawData/ANES_2016/anes_timeseries_2016.sav
--------------------------------------------------------------------------------
/RawData/ANES_2016/anes_timeseries_2016_qnaire_post.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/RawData/ANES_2016/anes_timeseries_2016_qnaire_post.pdf
--------------------------------------------------------------------------------
/RawData/ANES_2016/anes_timeseries_2016_qnaire_pre.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/RawData/ANES_2016/anes_timeseries_2016_qnaire_pre.pdf
--------------------------------------------------------------------------------
/RawData/ANES_2016/anes_timeseries_2016_userguidecodebook.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/RawData/ANES_2016/anes_timeseries_2016_userguidecodebook.pdf
--------------------------------------------------------------------------------
/RawData/ANES_2020/anes_timeseries_2020_methodology_userguidecodebook_20210719.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/RawData/ANES_2020/anes_timeseries_2020_methodology_userguidecodebook_20210719.pdf
--------------------------------------------------------------------------------
/RawData/ANES_2020/anes_timeseries_2020_questionnaire_20210719.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/RawData/ANES_2020/anes_timeseries_2020_questionnaire_20210719.pdf
--------------------------------------------------------------------------------
/RawData/ANES_2020/anes_timeseries_2020_questionnaire_screener_20210719.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/RawData/ANES_2020/anes_timeseries_2020_questionnaire_screener_20210719.pdf
--------------------------------------------------------------------------------
/RawData/ANES_2020/anes_timeseries_2020_spss_20220210.sav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/RawData/ANES_2020/anes_timeseries_2020_spss_20220210.sav
--------------------------------------------------------------------------------
/RawData/ANES_2020/anes_timeseries_2020_userguidecodebook_20220210.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/RawData/ANES_2020/anes_timeseries_2020_userguidecodebook_20220210.pdf
--------------------------------------------------------------------------------
/RawData/RECS_2015/2020_RECS-457A.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/RawData/RECS_2015/2020_RECS-457A.pdf
--------------------------------------------------------------------------------
/RawData/RECS_2015/README.md:
--------------------------------------------------------------------------------
1 | # Residential Energy Consumption Survey (RECS) 2015
2 |
3 | All data and resources were downloaded from https://www.eia.gov/consumption/residential/data/2015/index.php?view=microdata on March 3, 2021.
--------------------------------------------------------------------------------
/RawData/RECS_2015/codebook_publicv4.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/RawData/RECS_2015/codebook_publicv4.xlsx
--------------------------------------------------------------------------------
/RawData/RECS_2015/microdata_v3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidy-survey-r/tidy-survey-short-course/eb54a0c42e36e5bb12249164f0f139f86338f24d/RawData/RECS_2015/microdata_v3.pdf
--------------------------------------------------------------------------------
/tidy-survey-short-course.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: Default
4 | SaveWorkspace: Default
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 3
10 | Encoding: UTF-8
11 |
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 |
--------------------------------------------------------------------------------
/xaringan-themer.css:
--------------------------------------------------------------------------------
1 | /* -------------------------------------------------------
2 | *
3 | * !! This file was generated by xaringanthemer !!
4 | *
5 | * Changes made to this file directly will be overwritten
6 | * if you used xaringanthemer in your xaringan slides Rmd
7 | *
8 | * Issues or likes?
9 | * - https://github.com/gadenbuie/xaringanthemer
10 | * - https://www.garrickadenbuie.com
11 | *
12 | * Need help? Try:
13 | * - vignette(package = "xaringanthemer")
14 | * - ?xaringanthemer::style_xaringan
15 | * - xaringan wiki: https://github.com/yihui/xaringan/wiki
16 | * - remarkjs wiki: https://github.com/gnab/remark/wiki
17 | *
18 | * Version: 0.4.1
19 | *
20 | * ------------------------------------------------------- */
21 | @import url(https://fonts.googleapis.com/css?family=Noto+Sans:400,400i,700,700i&display=swap);
22 | @import url(https://fonts.googleapis.com/css?family=Cabin:600,600i&display=swap);
23 | @import url(https://fonts.googleapis.com/css?family=Source+Code+Pro:400,700&display=swap);
24 |
25 |
26 | :root {
27 | /* Fonts */
28 | --text-font-family: 'Noto Sans';
29 | --text-font-is-google: 1;
30 | --text-font-family-fallback: -apple-system, BlinkMacSystemFont, avenir next, avenir, helvetica neue, helvetica, Ubuntu, roboto, noto, segoe ui, arial;
31 | --text-font-base: sans-serif;
32 | --header-font-family: Cabin;
33 | --header-font-is-google: 1;
34 | --header-font-family-fallback: Georgia, serif;
35 | --code-font-family: 'Source Code Pro';
36 | --code-font-is-google: 1;
37 | --base-font-size: 20px;
38 | --text-font-size: 1rem;
39 | --code-font-size: 0.9rem;
40 | --code-inline-font-size: 1em;
41 | --header-h1-font-size: 2.75rem;
42 | --header-h2-font-size: 2.25rem;
43 | --header-h3-font-size: 1.75rem;
44 |
45 | /* Colors */
46 | --text-color: #000000;
47 | --header-color: #1E4F96;
48 | --background-color: #FFFFFF;
49 | --link-color: #1E4F96;
50 | --text-bold-color: #1E4F96;
51 | --code-highlight-color: rgba(255,255,0,0.5);
52 | --inverse-text-color: #000000;
53 | --inverse-background-color: #00A3E0;
54 | --inverse-header-color: #FFFFFF;
55 | --inverse-link-color: #1E4F96;
56 | --title-slide-background-color: #1E4F96;
57 | --title-slide-text-color: #FFFFFF;
58 | --header-background-color: #1E4F96;
59 | --header-background-text-color: #FFFFFF;
60 | --primary: #1E4F96;
61 | --secondary: #00A3E0;
62 | --white: #FFFFFF;
63 | --black: #000000;
64 | }
65 |
66 | html {
67 | font-size: var(--base-font-size);
68 | }
69 |
70 | body {
71 | font-family: var(--text-font-family), var(--text-font-family-fallback), var(--text-font-base);
72 | font-weight: normal;
73 | color: var(--text-color);
74 | }
75 | h1, h2, h3 {
76 | font-family: var(--header-font-family), var(--header-font-family-fallback);
77 | font-weight: 600;
78 | color: var(--header-color);
79 | }
80 | .remark-slide-content {
81 | background-color: var(--background-color);
82 | font-size: 1rem;
83 | padding: 16px 64px 16px 64px;
84 | width: 100%;
85 | height: 100%;
86 | }
87 | .remark-slide-content h1 {
88 | font-size: var(--header-h1-font-size);
89 | }
90 | .remark-slide-content h2 {
91 | font-size: var(--header-h2-font-size);
92 | }
93 | .remark-slide-content h3 {
94 | font-size: var(--header-h3-font-size);
95 | }
96 | .remark-code, .remark-inline-code {
97 | font-family: var(--code-font-family), Menlo, Consolas, Monaco, Liberation Mono, Lucida Console, monospace;
98 | }
99 | .remark-code {
100 | font-size: var(--code-font-size);
101 | }
102 | .remark-inline-code {
103 | font-size: var(--code-inline-font-size);
104 | color: #1E4F96;
105 | }
106 | .remark-slide-number {
107 | color: #1E4F96;
108 | opacity: 1;
109 | font-size: 0.9rem;
110 | }
111 | strong {
112 | font-weight: bold;
113 | color: var(--text-bold-color);
114 | }
115 | a, a > code {
116 | color: var(--link-color);
117 | text-decoration: none;
118 | }
119 | .footnote {
120 | position: absolute;
121 | bottom: 60px;
122 | padding-right: 4em;
123 | font-size: 0.9em;
124 | }
125 | .remark-code-line-highlighted {
126 | background-color: var(--code-highlight-color);
127 | }
128 | .inverse {
129 | background-color: var(--inverse-background-color);
130 | color: var(--inverse-text-color);
131 |
132 | }
133 | .inverse h1, .inverse h2, .inverse h3 {
134 | color: var(--inverse-header-color);
135 | }
136 | .inverse a, .inverse a > code {
137 | color: var(--inverse-link-color);
138 | }
139 | .title-slide, .title-slide h1, .title-slide h2, .title-slide h3 {
140 | color: var(--title-slide-text-color);
141 | }
142 | .title-slide {
143 | background-color: var(--title-slide-background-color);
144 | }
145 | .title-slide .remark-slide-number {
146 | display: none;
147 | }
148 | /* Two-column layout */
149 | .left-column {
150 | width: 20%;
151 | height: 92%;
152 | float: left;
153 | }
154 | .left-column h2, .left-column h3 {
155 | color: #1E4F9699;
156 | }
157 | .left-column h2:last-of-type, .left-column h3:last-child {
158 | color: #1E4F96;
159 | }
160 | .right-column {
161 | width: 75%;
162 | float: right;
163 | padding-top: 1em;
164 | }
165 | .pull-left {
166 | float: left;
167 | width: 47%;
168 | }
169 | .pull-right {
170 | float: right;
171 | width: 47%;
172 | }
173 | .pull-right + * {
174 | clear: both;
175 | }
176 | img, video, iframe {
177 | max-width: 100%;
178 | }
179 | blockquote {
180 | border-left: solid 5px #00A3E080;
181 | padding-left: 1em;
182 | }
183 | .remark-slide table {
184 | margin: auto;
185 | border-top: 1px solid #666;
186 | border-bottom: 1px solid #666;
187 | }
188 | .remark-slide table thead th {
189 | border-bottom: 1px solid #ddd;
190 | }
191 | th, td {
192 | padding: 5px;
193 | }
194 | .remark-slide thead, .remark-slide tfoot, .remark-slide tr:nth-child(even) {
195 | background: #CCECF8;
196 | }
197 | table.dataTable tbody {
198 | background-color: var(--background-color);
199 | color: var(--text-color);
200 | }
201 | table.dataTable.display tbody tr.odd {
202 | background-color: var(--background-color);
203 | }
204 | table.dataTable.display tbody tr.even {
205 | background-color: #CCECF8;
206 | }
207 | table.dataTable.hover tbody tr:hover, table.dataTable.display tbody tr:hover {
208 | background-color: rgba(255, 255, 255, 0.5);
209 | }
210 | .dataTables_wrapper .dataTables_length, .dataTables_wrapper .dataTables_filter, .dataTables_wrapper .dataTables_info, .dataTables_wrapper .dataTables_processing, .dataTables_wrapper .dataTables_paginate {
211 | color: var(--text-color);
212 | }
213 | .dataTables_wrapper .dataTables_paginate .paginate_button {
214 | color: var(--text-color) !important;
215 | }
216 |
217 | /* Horizontal alignment of code blocks */
218 | .remark-slide-content.left pre,
219 | .remark-slide-content.center pre,
220 | .remark-slide-content.right pre {
221 | text-align: start;
222 | width: max-content;
223 | max-width: 100%;
224 | }
225 | .remark-slide-content.left pre,
226 | .remark-slide-content.right pre {
227 | min-width: 50%;
228 | min-width: min(40ch, 100%);
229 | }
230 | .remark-slide-content.center pre {
231 | min-width: 66%;
232 | min-width: min(50ch, 100%);
233 | }
234 | .remark-slide-content.left pre {
235 | margin-left: unset;
236 | margin-right: auto;
237 | }
238 | .remark-slide-content.center pre {
239 | margin-left: auto;
240 | margin-right: auto;
241 | }
242 | .remark-slide-content.right pre {
243 | margin-left: auto;
244 | margin-right: unset;
245 | }
246 |
247 | /* Slide Header Background for h1 elements */
248 | .remark-slide-content.header_background > h1 {
249 | display: block;
250 | position: absolute;
251 | top: 0;
252 | left: 0;
253 | width: 100%;
254 | background: var(--header-background-color);
255 | color: var(--header-background-text-color);
256 | padding: 2rem 64px 1.5rem 64px;
257 | margin-top: 0;
258 | box-sizing: border-box;
259 | }
260 | .remark-slide-content.header_background {
261 | padding-top: 7rem;
262 | }
263 |
264 | @page { margin: 0; }
265 | @media print {
266 | .remark-slide-scaler {
267 | width: 100% !important;
268 | height: 100% !important;
269 | transform: scale(1) !important;
270 | top: 0 !important;
271 | left: 0 !important;
272 | }
273 | }
274 |
275 | .primary {
276 | color: var(--primary);
277 | }
278 | .bg-primary {
279 | background-color: var(--primary);
280 | }
281 | .secondary {
282 | color: var(--secondary);
283 | }
284 | .bg-secondary {
285 | background-color: var(--secondary);
286 | }
287 | .white {
288 | color: var(--white);
289 | }
290 | .bg-white {
291 | background-color: var(--white);
292 | }
293 | .black {
294 | color: var(--black);
295 | }
296 | .bg-black {
297 | background-color: var(--black);
298 | }
299 |
300 |
--------------------------------------------------------------------------------