├── .gitignore
├── Data
├── anes.rds
└── recs.rds
├── DataCleaningScripts
├── ANES_DataPrep.Rmd
├── ANES_DataPrep.md
├── RECS_DataPrep.Rmd
└── RECS_DataPrep.md
├── Exercises
├── CategorialExercises.R
├── CategorialExercises.Rmd
├── CategorialExercises_solutions.R
├── CategorialExercises_solutions.Rmd
├── CategorialExercises_solutions.html
├── ContinuousExercises.R
├── ContinuousExercises.Rmd
├── ContinuousExercises_solutions.R
├── ContinuousExercises_solutions.Rmd
├── ContinuousExercises_solutions.html
├── WarmUpExercises.R
├── WarmUpExercises.Rmd
├── WarmUpExercises_solutions.R
├── WarmUpExercises_solutions.Rmd
└── WarmUpExercises_solutions.html
├── FinalizeMaterials.R
├── LICENSE
├── Presentation
├── Slides.R
├── Slides.Rmd
├── Slides.html
├── Slides.pdf
├── Slides.pptx
├── Slides_files
│ └── figure-html
│ │ └── plot_sf_elbill_disp-1.png
└── xaringan-themer.css
├── README.md
├── RawData
├── ANES_2016
│ ├── anes_timeseries_2016.sav
│ ├── anes_timeseries_2016_qnaire_post.pdf
│ ├── anes_timeseries_2016_qnaire_pre.pdf
│ └── anes_timeseries_2016_userguidecodebook.pdf
└── RECS_2015
│ ├── 2020_RECS-457A.pdf
│ ├── README.md
│ ├── codebook_publicv4.xlsx
│ ├── microdata_v3.pdf
│ └── recs2015_public_v4.csv
└── tidy-survey-short-course.Rproj
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | debug.log
--------------------------------------------------------------------------------
/Data/anes.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/szimmer/tidy-survey-aapor-2021/5a82c36eb619bf67ccec2593a770904eedcd1d18/Data/anes.rds
--------------------------------------------------------------------------------
/Data/recs.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/szimmer/tidy-survey-aapor-2021/5a82c36eb619bf67ccec2593a770904eedcd1d18/Data/recs.rds
--------------------------------------------------------------------------------
/DataCleaningScripts/ANES_DataPrep.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "American National Election Studies (ANES) 2016 Time Series Study Data Prep"
3 | output: github_document
4 | ---
5 |
6 | ```{r setup, include=FALSE}
7 | knitr::opts_chunk$set(echo = TRUE)
8 | ```
9 |
10 | ## Data information
11 |
12 | All data and resources were downloaded from https://electionstudies.org/data-center/2016-time-series-study/ on April 3, 2021.
13 |
14 | American National Election Studies. 2019. ANES 2016 Time Series Study [dataset and documentation]. September 4, 2019 version. www.electionstudies.org
15 | ```{r loadpackageh, message=FALSE}
16 | library(here) #easy relative paths
17 | ```
18 |
19 |
20 |
21 | ```{r loadpackages}
22 | library(tidyverse) #data manipulation
23 | library(haven) #data import
24 | library(tidylog) #informative logging messages
25 | ```
26 | ## Import data and create derived variables
27 |
28 | ```{r derivedata}
29 | anes_in <- read_sav(here("RawData", "ANES_2016", "anes_timeseries_2016.sav"))
30 |
31 |
32 | anes <- anes_in %>%
33 | select('V160102', 'V160201', 'V160202', 'V160501', 'V161004', 'V161005', 'V161006', 'V161024x', 'V161158x', 'V161215', 'V161219', 'V161267', 'V161267', 'V161270', 'V161310x', 'V161342', 'V161361x', 'V162031', 'V162031x', 'V162034', 'V162034a', 'V162062x', 'V162062x'
34 | ) %>%
35 | mutate(
36 | InterviewMode=fct_recode(as.character(V160501), FTF="1", Web="2"),
37 | Weight=V160102,
38 | Stratum=as.factor(V160201),
39 | VarUnit=as.factor(V160202),
40 | Age=if_else(V161267>0, as.numeric(V161267), NA_real_),
41 | AgeGroup=cut(Age, c(17, 29, 39, 49, 59, 69, 200),
42 | labels=c("18-29", "30-39", "40-49", "50-59", "60-69", "70 or older")),
43 | Gender=factor(
44 | case_when(
45 | V161342==1~"Male",
46 | V161342==2~"Female",
47 | V161342==3~"Other",
48 | TRUE~NA_character_
49 | ),
50 | levels=c("Male", "Female", "Other")
51 | ),
52 | RaceEth=factor(
53 | case_when(
54 | V161310x==1~"White",
55 | V161310x==2~"Black",
56 | V161310x==5~"Hispanic",
57 | V161310x==3~"Asian, NH/PI",
58 | near(V161310x, 4)~"AI/AN",
59 | near(V161310x, 6)~"Other/multiple race",
60 | TRUE ~ NA_character_
61 | ),
62 | levels=c("White", "Black", "Hispanic", "Asian, NH/PI", "AI/AN", "Other/multiple race", NA_character_)
63 | ),
64 | PartyID=factor(
65 | case_when(
66 | V161158x==1~"Strong democrat",
67 | V161158x==2~"Not very strong democrat",
68 | V161158x==3~"Independent-democrat",
69 | V161158x==4~"Independent",
70 | V161158x==5~"Independent-republican",
71 | V161158x==6~"Not very strong republican",
72 | V161158x==7~"Strong republican",
73 | TRUE ~ NA_character_
74 | ),
75 | levels=c("Strong democrat", "Not very strong democrat", "Independent-democrat", "Independent", "Independent-republican", "Not very strong republican", "Strong republican")
76 | ),
77 | Education=factor(
78 | case_when(
79 | V161270 <=0~NA_character_,
80 | V161270 <= 8~"Less than HS",
81 | V161270==9|V161270==90~"High school",
82 | V161270<=12~"Post HS",
83 | V161270==13~"Bachelor's",
84 | V161270<=16~"Graduate",
85 | TRUE~NA_character_
86 | ),
87 | levels=c("Less than HS", "High school", "Post HS", "Bachelor's", "Graduate")
88 | ),
89 | Income=cut(V161361x, c(-5, 1:28),
90 | labels=c("Under $5k",
91 | "$5-10k", "$10-12.5k", "$12.5-15", "$15-17.5k", "$17.5-20k", "$20-22.5k", "$22.5-25k", "$25-27.5k", "$27.5-30k", "$30-35k", "$35-40k", "$40-45k", "$45-50k", "$50-55k", "$55-60k", "$60-65k","$65-70k", "$70-75k", "$75-80k", "$80-90k", "$90-100k","$100-110k", "$110-125k", "$125-150k", "$150-175k", "$175-250k", "$250k or more" )
92 | ),
93 | Income7=fct_collapse(
94 | Income,
95 | "Under $20k"=c("Under $5k", "$5-10k", "$10-12.5k", "$12.5-15", "$15-17.5k", "$17.5-20k"),
96 | "$20-40k"=c("$20-22.5k", "$22.5-25k", "$25-27.5k", "$27.5-30k", "$30-35k", "$35-40k"),
97 | "$40-60k"=c( "$40-45k", "$45-50k", "$50-55k", "$55-60k"),
98 | "$60-80k"=c( "$60-65k", "$65-70k", "$70-75k", "$75-80k"),
99 | "$80-100k"=c("$80-90k", "$90-100k"),
100 | "$100-125k"=c("$100-110k", "$110-125k"),
101 | "$125k or more"=c("$125-150k", "$150-175k", "$175-250k", "$250k or more")
102 | ),
103 | CampaignInterest=factor(
104 | case_when(
105 | V161004==1~"Very much interested",
106 | V161004==2~"Somewhat interested",
107 | V161004==3~"Not much interested",
108 | TRUE~NA_character_
109 | ),
110 | levels=c("Very much interested", "Somewhat interested", "Not much interested")
111 | ),
112 | TrustGovernment=factor(
113 | case_when(
114 | V161215==1~"Always",
115 | V161215==2~"Most of the time",
116 | V161215==3~"About half the time",
117 | V161215==4~"Some of the time",
118 | V161215==5~"Never",
119 | TRUE~NA_character_
120 | ),
121 | levels=c("Always", "Most of the time", "About half the time", "Some of the time", "Never")
122 | ),
123 | TrustPeople=factor(
124 | case_when(
125 | V161219==1~"Always",
126 | V161219==2~"Most of the time",
127 | V161219==3~"About half the time",
128 | V161219==4~"Some of the time",
129 | V161219==5~"Never",
130 | TRUE ~ NA_character_
131 | ),
132 | levels=c("Always", "Most of the time", "About half the time", "Some of the time", "Never")
133 | ),
134 | VotedPres2012=factor(
135 | case_when(
136 | V161005==1~"Yes",
137 | V161005==2~"No",
138 | TRUE~NA_character_
139 | ), levels=c("Yes", "No")
140 | ),
141 | VotedPres2012_selection=factor(
142 | case_when(
143 | V161006==1~"Obama",
144 | V161006==2~"Romney",
145 | V161006==5~"Other",
146 | TRUE~NA_character_
147 | ), levels=c("Obama", "Romney", "Other")
148 | ),
149 | VotedPres2016=factor(
150 | case_when(
151 | V162031x==1~"Yes",
152 | V162031x==0~"No",
153 | TRUE~NA_character_
154 | ), levels=c("Yes", "No")
155 | ),
156 | VotedPres2016_selection=factor(
157 | case_when(
158 | V162062x==1~"Clinton",
159 | V162062x==2~"Trump",
160 | V162062x >=3 ~"Other",
161 | TRUE~NA_character_
162 | ), levels=c("Clinton", "Trump", "Other")
163 | ),
164 | EarlyVote2016=factor(
165 | case_when(
166 | V161024x==4~"Yes",
167 | VotedPres2016=="Yes"~"No",
168 | TRUE~NA_character_
169 | ), levels=c("Yes", "No")
170 | )
171 | )
172 |
173 |
174 |
175 | summary(anes)
176 | ```
177 |
178 |
179 | ## Check derived variables for correct coding
180 |
181 | ```{r checkvars}
182 |
183 | anes %>% count(InterviewMode, V160501)
184 | anes %>% group_by(AgeGroup) %>% summarise(minAge=min(Age), maxAge=max(Age), minV=min(V161267), maxV=max(V161267))
185 | anes %>% count(Gender, V161342)
186 | anes %>% count(RaceEth, V161310x)
187 | anes %>% count(PartyID, V161158x)
188 | anes %>% count(Education, V161270)
189 | anes %>% count(Income, Income7, V161361x) %>% print(n=30)
190 | anes %>% count(CampaignInterest, V161004)
191 | anes %>% count(TrustGovernment, V161215)
192 | anes %>% count(TrustPeople, V161219)
193 | anes %>% count(VotedPres2012, V161005)
194 | anes %>% count(VotedPres2012_selection, V161006)
195 | anes %>% count(VotedPres2016, V162031x)
196 | anes %>% count(VotedPres2016_selection, V162062x)
197 | anes %>% count(EarlyVote2016, V161024x, VotedPres2016)
198 |
199 | anes %>%
200 | summarise(WtSum=sum(Weight)) %>%
201 | pull(WtSum)
202 |
203 | ```
204 | ## Save data
205 |
206 | ```{r savedat}
207 | write_rds(anes, here("Data", "anes.rds"), compress="gz")
208 | ```
209 |
210 |
211 |
--------------------------------------------------------------------------------
/DataCleaningScripts/ANES_DataPrep.md:
--------------------------------------------------------------------------------
1 | American National Election Studies (ANES) 2016 Time Series Study Data
2 | Prep
3 | ================
4 |
5 | ## Data information
6 |
7 | All data and resources were downloaded from
8 | on
9 | April 3, 2021.
10 |
11 | American National Election Studies. 2019. ANES 2016 Time Series Study
12 | \[dataset and documentation\]. September 4, 2019 version.
13 | www.electionstudies.org
14 |
15 | ``` r
16 | library(here) #easy relative paths
17 | ```
18 |
19 | ``` r
20 | library(tidyverse) #data manipulation
21 | ```
22 |
23 | ## -- Attaching packages ----------------------------- tidyverse 1.3.0 --
24 |
25 | ## v ggplot2 3.3.3 v purrr 0.3.4
26 | ## v tibble 3.1.0 v dplyr 1.0.5
27 | ## v tidyr 1.1.3 v stringr 1.4.0
28 | ## v readr 1.4.0 v forcats 0.5.1
29 |
30 | ## -- Conflicts -------------------------------- tidyverse_conflicts() --
31 | ## x dplyr::filter() masks stats::filter()
32 | ## x dplyr::lag() masks stats::lag()
33 |
34 | ``` r
35 | library(haven) #data import
36 | library(tidylog) #informative logging messages
37 | ```
38 |
39 | ##
40 | ## Attaching package: 'tidylog'
41 |
42 | ## The following objects are masked from 'package:dplyr':
43 | ##
44 | ## add_count, add_tally, anti_join, count, distinct, distinct_all,
45 | ## distinct_at, distinct_if, filter, filter_all, filter_at, filter_if,
46 | ## full_join, group_by, group_by_all, group_by_at, group_by_if,
47 | ## inner_join, left_join, mutate, mutate_all, mutate_at, mutate_if,
48 | ## relocate, rename, rename_all, rename_at, rename_if, rename_with,
49 | ## right_join, sample_frac, sample_n, select, select_all, select_at,
50 | ## select_if, semi_join, slice, slice_head, slice_max, slice_min,
51 | ## slice_sample, slice_tail, summarise, summarise_all, summarise_at,
52 | ## summarise_if, summarize, summarize_all, summarize_at, summarize_if,
53 | ## tally, top_frac, top_n, transmute, transmute_all, transmute_at,
54 | ## transmute_if, ungroup
55 |
56 | ## The following objects are masked from 'package:tidyr':
57 | ##
58 | ## drop_na, fill, gather, pivot_longer, pivot_wider, replace_na,
59 | ## spread, uncount
60 |
61 | ## The following object is masked from 'package:stats':
62 | ##
63 | ## filter
64 |
65 | ## Import data and create derived variables
66 |
67 | ``` r
68 | anes_in <- read_sav(here("RawData", "ANES_2016", "anes_timeseries_2016.sav"))
69 |
70 |
71 | anes <- anes_in %>%
72 | select('V160102', 'V160201', 'V160202', 'V160501', 'V161004', 'V161005', 'V161006', 'V161024x', 'V161158x', 'V161215', 'V161219', 'V161267', 'V161267', 'V161270', 'V161310x', 'V161342', 'V161361x', 'V162031', 'V162031x', 'V162034', 'V162034a', 'V162062x', 'V162062x'
73 | ) %>%
74 | mutate(
75 | InterviewMode=fct_recode(as.character(V160501), FTF="1", Web="2"),
76 | Weight=V160102,
77 | Stratum=as.factor(V160201),
78 | VarUnit=as.factor(V160202),
79 | Age=if_else(V161267>0, as.numeric(V161267), NA_real_),
80 | AgeGroup=cut(Age, c(17, 29, 39, 49, 59, 69, 200),
81 | labels=c("18-29", "30-39", "40-49", "50-59", "60-69", "70 or older")),
82 | Gender=factor(
83 | case_when(
84 | V161342==1~"Male",
85 | V161342==2~"Female",
86 | V161342==3~"Other",
87 | TRUE~NA_character_
88 | ),
89 | levels=c("Male", "Female", "Other")
90 | ),
91 | RaceEth=factor(
92 | case_when(
93 | V161310x==1~"White",
94 | V161310x==2~"Black",
95 | V161310x==5~"Hispanic",
96 | V161310x==3~"Asian, NH/PI",
97 | near(V161310x, 4)~"AI/AN",
98 | near(V161310x, 6)~"Other/multiple race",
99 | TRUE ~ NA_character_
100 | ),
101 | levels=c("White", "Black", "Hispanic", "Asian, NH/PI", "AI/AN", "Other/multiple race", NA_character_)
102 | ),
103 | PartyID=factor(
104 | case_when(
105 | V161158x==1~"Strong democrat",
106 | V161158x==2~"Not very strong democrat",
107 | V161158x==3~"Independent-democrat",
108 | V161158x==4~"Independent",
109 | V161158x==5~"Independent-republican",
110 | V161158x==6~"Not very strong republican",
111 | V161158x==7~"Strong republican",
112 | TRUE ~ NA_character_
113 | ),
114 | levels=c("Strong democrat", "Not very strong democrat", "Independent-democrat", "Independent", "Independent-republican", "Not very strong republican", "Strong republican")
115 | ),
116 | Education=factor(
117 | case_when(
118 | V161270 <=0~NA_character_,
119 | V161270 <= 8~"Less than HS",
120 | V161270==9|V161270==90~"High school",
121 | V161270<=12~"Post HS",
122 | V161270==13~"Bachelor's",
123 | V161270<=16~"Graduate",
124 | TRUE~NA_character_
125 | ),
126 | levels=c("Less than HS", "High school", "Post HS", "Bachelor's", "Graduate")
127 | ),
128 | Income=cut(V161361x, c(-5, 1:28),
129 | labels=c("Under $5k",
130 | "$5-10k", "$10-12.5k", "$12.5-15", "$15-17.5k", "$17.5-20k", "$20-22.5k", "$22.5-25k", "$25-27.5k", "$27.5-30k", "$30-35k", "$35-40k", "$40-45k", "$45-50k", "$50-55k", "$55-60k", "$60-65k","$65-70k", "$70-75k", "$75-80k", "$80-90k", "$90-100k","$100-110k", "$110-125k", "$125-150k", "$150-175k", "$175-250k", "$250k or more" )
131 | ),
132 | Income7=fct_collapse(
133 | Income,
134 | "Under $20k"=c("Under $5k", "$5-10k", "$10-12.5k", "$12.5-15", "$15-17.5k", "$17.5-20k"),
135 | "$20-40k"=c("$20-22.5k", "$22.5-25k", "$25-27.5k", "$27.5-30k", "$30-35k", "$35-40k"),
136 | "$40-60k"=c( "$40-45k", "$45-50k", "$50-55k", "$55-60k"),
137 | "$60-80k"=c( "$60-65k", "$65-70k", "$70-75k", "$75-80k"),
138 | "$80-100k"=c("$80-90k", "$90-100k"),
139 | "$100-125k"=c("$100-110k", "$110-125k"),
140 | "$125k or more"=c("$125-150k", "$150-175k", "$175-250k", "$250k or more")
141 | ),
142 | CampaignInterest=factor(
143 | case_when(
144 | V161004==1~"Very much interested",
145 | V161004==2~"Somewhat interested",
146 | V161004==3~"Not much interested",
147 | TRUE~NA_character_
148 | ),
149 | levels=c("Very much interested", "Somewhat interested", "Not much interested")
150 | ),
151 | TrustGovernment=factor(
152 | case_when(
153 | V161215==1~"Always",
154 | V161215==2~"Most of the time",
155 | V161215==3~"About half the time",
156 | V161215==4~"Some of the time",
157 | V161215==5~"Never",
158 | TRUE~NA_character_
159 | ),
160 | levels=c("Always", "Most of the time", "About half the time", "Some of the time", "Never")
161 | ),
162 | TrustPeople=factor(
163 | case_when(
164 | V161219==1~"Always",
165 | V161219==2~"Most of the time",
166 | V161219==3~"About half the time",
167 | V161219==4~"Some of the time",
168 | V161219==5~"Never",
169 | TRUE ~ NA_character_
170 | ),
171 | levels=c("Always", "Most of the time", "About half the time", "Some of the time", "Never")
172 | ),
173 | VotedPres2012=factor(
174 | case_when(
175 | V161005==1~"Yes",
176 | V161005==2~"No",
177 | TRUE~NA_character_
178 | ), levels=c("Yes", "No")
179 | ),
180 | VotedPres2012_selection=factor(
181 | case_when(
182 | V161006==1~"Obama",
183 | V161006==2~"Romney",
184 | V161006==5~"Other",
185 | TRUE~NA_character_
186 | ), levels=c("Obama", "Romney", "Other")
187 | ),
188 | VotedPres2016=factor(
189 | case_when(
190 | V162031x==1~"Yes",
191 | V162031x==0~"No",
192 | TRUE~NA_character_
193 | ), levels=c("Yes", "No")
194 | ),
195 | VotedPres2016_selection=factor(
196 | case_when(
197 | V162062x==1~"Clinton",
198 | V162062x==2~"Trump",
199 | V162062x >=3 ~"Other",
200 | TRUE~NA_character_
201 | ), levels=c("Clinton", "Trump", "Other")
202 | ),
203 | EarlyVote2016=factor(
204 | case_when(
205 | V161024x==4~"Yes",
206 | VotedPres2016=="Yes"~"No",
207 | TRUE~NA_character_
208 | ), levels=c("Yes", "No")
209 | )
210 | )
211 | ```
212 |
213 | ## select: dropped 1,821 variables (version, V160001, V160001_orig, V160101, V160101f, …)
214 |
215 | ## mutate: new variable 'InterviewMode' (factor) with 2 unique values and 0% NA
216 |
217 | ## new variable 'Weight' (double) with 2,609 unique values and 0% NA
218 |
219 | ## new variable 'Stratum' (factor) with 132 unique values and 0% NA
220 |
221 | ## new variable 'VarUnit' (factor) with 3 unique values and 0% NA
222 |
223 | ## new variable 'Age' (double) with 74 unique values and 3% NA
224 |
225 | ## new variable 'AgeGroup' (factor) with 7 unique values and 3% NA
226 |
227 | ## new variable 'Gender' (factor) with 4 unique values and 1% NA
228 |
229 | ## new variable 'RaceEth' (factor) with 7 unique values and 1% NA
230 |
231 | ## new variable 'PartyID' (factor) with 8 unique values and 1% NA
232 |
233 | ## new variable 'Education' (factor) with 6 unique values and 1% NA
234 |
235 | ## new variable 'Income' (factor) with 29 unique values and 5% NA
236 |
237 | ## new variable 'Income7' (factor) with 8 unique values and 5% NA
238 |
239 | ## new variable 'CampaignInterest' (factor) with 3 unique values and 0% NA
240 |
241 | ## new variable 'TrustGovernment' (factor) with 6 unique values and 1% NA
242 |
243 | ## new variable 'TrustPeople' (factor) with 6 unique values and <1% NA
244 |
245 | ## new variable 'VotedPres2012' (factor) with 3 unique values and <1% NA
246 |
247 | ## new variable 'VotedPres2012_selection' (factor) with 4 unique values and 28% NA
248 |
249 | ## new variable 'VotedPres2016' (factor) with 3 unique values and 22% NA
250 |
251 | ## new variable 'VotedPres2016_selection' (factor) with 4 unique values and 34% NA
252 |
253 | ## new variable 'EarlyVote2016' (factor) with 3 unique values and 32% NA
254 |
255 | ``` r
256 | summary(anes)
257 | ```
258 |
259 | ## V160102 V160201 V160202 V160501
260 | ## Min. :0.0000 Min. : 1.00 Min. :1.000 Min. :1.000
261 | ## 1st Qu.:0.3934 1st Qu.: 36.00 1st Qu.:1.000 1st Qu.:1.000
262 | ## Median :0.7481 Median : 71.00 Median :1.500 Median :2.000
263 | ## Mean :0.8541 Mean : 69.58 Mean :1.505 Mean :1.724
264 | ## 3rd Qu.:1.1294 3rd Qu.:105.00 3rd Qu.:2.000 3rd Qu.:2.000
265 | ## Max. :6.4445 Max. :133.00 Max. :3.000 Max. :2.000
266 | ##
267 | ## V161004 V161005 V161006 V161024x
268 | ## Min. :1.0 Min. :-9.000 Min. :-9.0000 Min. :1.000
269 | ## 1st Qu.:1.0 1st Qu.: 1.000 1st Qu.:-1.0000 1st Qu.:3.000
270 | ## Median :1.0 Median : 1.000 Median : 1.0000 Median :3.000
271 | ## Mean :1.6 Mean : 1.232 Mean : 0.6773 Mean :2.804
272 | ## 3rd Qu.:2.0 3rd Qu.: 2.000 3rd Qu.: 2.0000 3rd Qu.:3.000
273 | ## Max. :3.0 Max. : 2.000 Max. : 6.0000 Max. :4.000
274 | ##
275 | ## V161158x V161215 V161219 V161267
276 | ## Min. :-9.000 Min. :-9.00 Min. :-9.000 Min. :-9.00
277 | ## 1st Qu.: 2.000 1st Qu.: 3.00 1st Qu.: 2.000 1st Qu.:33.00
278 | ## Median : 4.000 Median : 4.00 Median : 3.000 Median :49.00
279 | ## Mean : 3.792 Mean : 3.49 Mean : 2.831 Mean :47.92
280 | ## 3rd Qu.: 6.000 3rd Qu.: 4.00 3rd Qu.: 4.000 3rd Qu.:63.00
281 | ## Max. : 7.000 Max. : 5.00 Max. : 5.000 Max. :90.00
282 | ##
283 | ## V161270 V161310x V161342 V161361x
284 | ## Min. :-9.00 Min. :-2.000 Min. :-9.000 Min. :-9.00
285 | ## 1st Qu.: 9.00 1st Qu.: 1.000 1st Qu.: 1.000 1st Qu.: 8.00
286 | ## Median :11.00 Median : 1.000 Median : 2.000 Median :15.00
287 | ## Mean :11.66 Mean : 1.787 Mean : 1.432 Mean :14.25
288 | ## 3rd Qu.:13.00 3rd Qu.: 2.000 3rd Qu.: 2.000 3rd Qu.:22.00
289 | ## Max. :95.00 Max. : 6.000 Max. : 3.000 Max. :28.00
290 | ##
291 | ## V162031 V162031x V162034 V162034a
292 | ## Min. :-8.000 Min. :-8.0000 Min. :-9.0000 Min. :-9.0000
293 | ## 1st Qu.:-1.000 1st Qu.: 0.0000 1st Qu.:-1.0000 1st Qu.:-1.0000
294 | ## Median : 4.000 Median : 1.0000 Median : 1.0000 Median : 1.0000
295 | ## Mean : 1.759 Mean : 0.2349 Mean :-0.4625 Mean :-0.1468
296 | ## 3rd Qu.: 4.000 3rd Qu.: 1.0000 3rd Qu.: 1.0000 3rd Qu.: 2.0000
297 | ## Max. : 4.000 Max. : 1.0000 Max. : 2.0000 Max. : 9.0000
298 | ##
299 | ## V162062x InterviewMode Weight Stratum VarUnit
300 | ## Min. :-9.0000 FTF:1180 Min. :0.0000 123 : 57 1:2135
301 | ## 1st Qu.:-2.0000 Web:3090 1st Qu.:0.3934 121 : 55 2:2115
302 | ## Median : 1.0000 Median :0.7481 126 : 55 3: 20
303 | ## Mean : 0.3393 Mean :0.8541 118 : 52
304 | ## 3rd Qu.: 2.0000 3rd Qu.:1.1294 108 : 50
305 | ## Max. : 5.0000 Max. :6.4445 107 : 46
306 | ## (Other):3955
307 | ## Age AgeGroup Gender RaceEth
308 | ## Min. :18.00 18-29 :651 Male :1987 White :3038
309 | ## 1st Qu.:34.00 30-39 :761 Female:2231 Black : 397
310 | ## Median :50.00 40-49 :620 Other : 11 Hispanic : 450
311 | ## Mean :49.58 50-59 :781 NA's : 41 Asian, NH/PI : 148
312 | ## 3rd Qu.:63.00 60-69 :769 AI/AN : 27
313 | ## Max. :90.00 70 or older:567 Other/multiple race: 177
314 | ## NA's :121 NA's :121 NA's : 33
315 | ## PartyID Education Income
316 | ## Strong democrat :890 Less than HS: 282 Under $5k: 275
317 | ## Strong republican :721 High school : 815 $80-90k : 231
318 | ## Independent :579 Post HS :1499 $30-35k : 213
319 | ## Not very strong democrat :559 Bachelor's : 955 $60-65k : 205
320 | ## Not very strong republican:508 Graduate : 680 $50-55k : 204
321 | ## (Other) :990 NA's : 39 (Other) :2940
322 | ## NA's : 23 NA's : 202
323 | ## Income7 CampaignInterest TrustGovernment
324 | ## $20-40k :773 Very much interested:2230 Always : 66
325 | ## Under $20k :703 Somewhat interested :1519 Most of the time : 429
326 | ## $40-60k :621 Not much interested : 521 About half the time:1382
327 | ## $125k or more:615 Some of the time :1826
328 | ## $60-80k :576 Never : 545
329 | ## (Other) :780 NA's : 22
330 | ## NA's :202
331 | ## TrustPeople VotedPres2012 VotedPres2012_selection VotedPres2016
332 | ## Always : 50 Yes :3117 Obama :1728 Yes :2887
333 | ## Most of the time :1765 No :1137 Romney:1268 No : 444
334 | ## About half the time:1305 NA's: 16 Other : 58 NA's: 939
335 | ## Some of the time : 947 NA's :1216
336 | ## Never : 188
337 | ## NA's : 15
338 | ##
339 | ## VotedPres2016_selection EarlyVote2016
340 | ## Clinton:1364 Yes : 156
341 | ## Trump :1245 No :2731
342 | ## Other : 202 NA's:1383
343 | ## NA's :1459
344 | ##
345 | ##
346 | ##
347 |
348 | ## Check derived variables for correct coding
349 |
350 | ``` r
351 | anes %>% count(InterviewMode, V160501)
352 | ```
353 |
354 | ## count: now 2 rows and 3 columns, ungrouped
355 |
356 | ## # A tibble: 2 x 3
357 | ## InterviewMode V160501 n
358 | ##
359 | ## 1 FTF 1 [1. FTF /CASI] 1180
360 | ## 2 Web 2 [2. Web] 3090
361 |
362 | ``` r
363 | anes %>% group_by(AgeGroup) %>% summarise(minAge=min(Age), maxAge=max(Age), minV=min(V161267), maxV=max(V161267))
364 | ```
365 |
366 | ## group_by: one grouping variable (AgeGroup)
367 |
368 | ## summarise: now 7 rows and 5 columns, ungrouped
369 |
370 | ## # A tibble: 7 x 5
371 | ## AgeGroup minAge maxAge minV maxV
372 | ##
373 | ## 1 18-29 18 29 18 29
374 | ## 2 30-39 30 39 30 39
375 | ## 3 40-49 40 49 40 49
376 | ## 4 50-59 50 59 50 59
377 | ## 5 60-69 60 69 60 69
378 | ## 6 70 or older 70 90 70 90 [90. Age 90 or older]
379 | ## 7 NA NA NA -9 [-9. RF (year of b~ -8 [-8. DK (year of birth, F~
380 |
381 | ``` r
382 | anes %>% count(Gender, V161342)
383 | ```
384 |
385 | ## count: now 4 rows and 3 columns, ungrouped
386 |
387 | ## # A tibble: 4 x 3
388 | ## Gender V161342 n
389 | ##
390 | ## 1 Male 1 [1. Male] 1987
391 | ## 2 Female 2 [2. Female] 2231
392 | ## 3 Other 3 [3. Other] 11
393 | ## 4 NA -9 [-9. Refused] 41
394 |
395 | ``` r
396 | anes %>% count(RaceEth, V161310x)
397 | ```
398 |
399 | ## count: now 7 rows and 3 columns, ungrouped
400 |
401 | ## # A tibble: 7 x 3
402 | ## RaceEth V161310x n
403 | ##
404 | ## 1 White 1 [1. White, non-Hispanic] 3038
405 | ## 2 Black 2 [2. Black, non-Hispanic] 397
406 | ## 3 Hispanic 5 [5. Hispanic] 450
407 | ## 4 Asian, NH/PI 3 [3. Asian, native Hawaiian or other Pacif Islr,non-~ 148
408 | ## 5 AI/AN 4 [4. Native American or Alaska Native, non-Hispanic] 27
409 | ## 6 Other/multiple ~ 6 [6. Other non-Hispanic incl multiple races [WEB: bl~ 177
410 | ## 7 NA -2 [-2. Missing] 33
411 |
412 | ``` r
413 | anes %>% count(PartyID, V161158x)
414 | ```
415 |
416 | ## count: now 9 rows and 3 columns, ungrouped
417 |
418 | ## # A tibble: 9 x 3
419 | ## PartyID V161158x n
420 | ##
421 | ## 1 Strong democrat 1 [1. Strong Democrat] 890
422 | ## 2 Not very strong democ~ 2 [2. Not very strong Democract] 559
423 | ## 3 Independent-democrat 3 [3. Independent-Democrat] 490
424 | ## 4 Independent 4 [4. Independent] 579
425 | ## 5 Independent-republican 5 [5. Independent-Republican] 500
426 | ## 6 Not very strong repub~ 6 [6. Not very strong Republican] 508
427 | ## 7 Strong republican 7 [7. Strong Republican] 721
428 | ## 8 NA -9 [-9. RF (-9) in V161155 (FTF only) /-9 in V16~ 12
429 | ## 9 NA -8 [-8. DK (-8) in V161156 or V161157 (FTF only)] 11
430 |
431 | ``` r
432 | anes %>% count(Education, V161270)
433 | ```
434 |
435 | ## count: now 19 rows and 3 columns, ungrouped
436 |
437 | ## # A tibble: 19 x 3
438 | ## Education V161270 n
439 | ##
440 | ## 1 Less than HS 1 [1. Less than 1st grade] 1
441 | ## 2 Less than HS 2 [2. 1st, 2nd, 3rd or 4th grade] 3
442 | ## 3 Less than HS 3 [3. 5th or 6th grade] 15
443 | ## 4 Less than HS 4 [4. 7th or 8th grade] 22
444 | ## 5 Less than HS 5 [5. 9th grade] 32
445 | ## 6 Less than HS 6 [6. 10th grade] 40
446 | ## 7 Less than HS 7 [7. 11th grade] 62
447 | ## 8 Less than HS 8 [8. 12th grade no diploma] 107
448 | ## 9 High school 9 [9. High school graduate- high school diploma or equiv~ 810
449 | ## 10 High school 90 [90. Other specify given as: high school graduate] 5
450 | ## 11 Post HS 10 [10. Some college but no degree] 898
451 | ## 12 Post HS 11 [11. Associate degree in college - occupational /vocat~ 313
452 | ## 13 Post HS 12 [12. Associate degree in college -- academic program] 288
453 | ## 14 Bachelor's 13 [13. Bachelor's degree (for example: BA, AB, BS)] 955
454 | ## 15 Graduate 14 [14. Master's degree (for example: MA, MS, MENG, MED, ~ 499
455 | ## 16 Graduate 15 [15. Professional school degree (for example: MD, DDS,~ 88
456 | ## 17 Graduate 16 [16. Doctorate degree (for example: PHD, EDD)] 93
457 | ## 18 NA -9 [-9. Refused] 15
458 | ## 19 NA 95 [95. Other SPECIFY] 24
459 |
460 | ``` r
461 | anes %>% count(Income, Income7, V161361x) %>% print(n=30)
462 | ```
463 |
464 | ## count: now 30 rows and 4 columns, ungrouped
465 |
466 | ## # A tibble: 30 x 4
467 | ## Income Income7 V161361x n
468 | ##
469 | ## 1 Under $5k Under $20k 1 [01. Under $5,000] 275
470 | ## 2 $5-10k Under $20k 2 [02. $5,000-$9,999] 96
471 | ## 3 $10-12.5k Under $20k 3 [03. $10,000-$12,499] 133
472 | ## 4 $12.5-15 Under $20k 4 [04. $12,500-$14,999] 37
473 | ## 5 $15-17.5k Under $20k 5 [05. $15,000-$17,499] 110
474 | ## 6 $17.5-20k Under $20k 6 [06. $17,500-$19,999] 52
475 | ## 7 $20-22.5k $20-40k 7 [07. $20,000-$22,499] 153
476 | ## 8 $22.5-25k $20-40k 8 [08. $22,500-$24,999] 64
477 | ## 9 $25-27.5k $20-40k 9 [09. $25,000-$27,499] 143
478 | ## 10 $27.5-30k $20-40k 10 [10. $27,500-$29,999] 34
479 | ## 11 $30-35k $20-40k 11 [11. $30,000-$34,999] 213
480 | ## 12 $35-40k $20-40k 12 [12. $35,000-$39,999] 166
481 | ## 13 $40-45k $40-60k 13 [13. $40,000-$44,999] 178
482 | ## 14 $45-50k $40-60k 14 [14. $45,000-$49,999] 154
483 | ## 15 $50-55k $40-60k 15 [15. $50,000-$54,999] 204
484 | ## 16 $55-60k $40-60k 16 [16. $55,000-$59,999] 85
485 | ## 17 $60-65k $60-80k 17 [17. $60,000-$64,999] 205
486 | ## 18 $65-70k $60-80k 18 [18. $65,000-$69,999] 107
487 | ## 19 $70-75k $60-80k 19 [19. $70,000-$74,999] 138
488 | ## 20 $75-80k $60-80k 20 [20. $75,000-$79,999] 126
489 | ## 21 $80-90k $80-100k 21 [21. $80,000-$89,999] 231
490 | ## 22 $90-100k $80-100k 22 [22. $90,000-$99,999] 176
491 | ## 23 $100-110k $100-125k 23 [23. $100,000-$109,999] 191
492 | ## 24 $110-125k $100-125k 24 [24. $110,000-$124,999] 182
493 | ## 25 $125-150k $125k or more 25 [25. $125,000-$149,999] 166
494 | ## 26 $150-175k $125k or more 26 [26. $150,000-$174,999] 154
495 | ## 27 $175-250k $125k or more 27 [27. $175,000-$249,999] 154
496 | ## 28 $250k or mo~ $125k or more 28 [28. $250,000 or more] 141
497 | ## 29 NA NA -9 [-9. Refused] 190
498 | ## 30 NA NA -5 [-5. Interview breakoff (sufficient part~ 12
499 |
500 | ``` r
501 | anes %>% count(CampaignInterest, V161004)
502 | ```
503 |
504 | ## count: now 3 rows and 3 columns, ungrouped
505 |
506 | ## # A tibble: 3 x 3
507 | ## CampaignInterest V161004 n
508 | ##
509 | ## 1 Very much interested 1 [1. Very much interested] 2230
510 | ## 2 Somewhat interested 2 [2. Somewhat interested] 1519
511 | ## 3 Not much interested 3 [3. Not much interested] 521
512 |
513 | ``` r
514 | anes %>% count(TrustGovernment, V161215)
515 | ```
516 |
517 | ## count: now 7 rows and 3 columns, ungrouped
518 |
519 | ## # A tibble: 7 x 3
520 | ## TrustGovernment V161215 n
521 | ##
522 | ## 1 Always 1 [1. Always] 66
523 | ## 2 Most of the time 2 [2. Most of the time] 429
524 | ## 3 About half the time 3 [3. About half the time] 1382
525 | ## 4 Some of the time 4 [4. Some of the time] 1826
526 | ## 5 Never 5 [5. Never] 545
527 | ## 6 NA -9 [-9. Refused] 19
528 | ## 7 NA -8 [-8. Don't know (FTF only)] 3
529 |
530 | ``` r
531 | anes %>% count(TrustPeople, V161219)
532 | ```
533 |
534 | ## count: now 7 rows and 3 columns, ungrouped
535 |
536 | ## # A tibble: 7 x 3
537 | ## TrustPeople V161219 n
538 | ##
539 | ## 1 Always 1 [1. Always] 50
540 | ## 2 Most of the time 2 [2. Most of the time] 1765
541 | ## 3 About half the time 3 [3. About half the time] 1305
542 | ## 4 Some of the time 4 [4. Some of the time] 947
543 | ## 5 Never 5 [5. Never] 188
544 | ## 6 NA -9 [-9. Refused] 14
545 | ## 7 NA -8 [-8. Don't know (FTF only)] 1
546 |
547 | ``` r
548 | anes %>% count(VotedPres2012, V161005)
549 | ```
550 |
551 | ## count: now 4 rows and 3 columns, ungrouped
552 |
553 | ## # A tibble: 4 x 3
554 | ## VotedPres2012 V161005 n
555 | ##
556 | ## 1 Yes 1 [1. Yes, voted] 3117
557 | ## 2 No 2 [2. No, didn't vote] 1137
558 | ## 3 NA -9 [-9. Refused] 2
559 | ## 4 NA -8 [-8. Don't know (FTF only)] 14
560 |
561 | ``` r
562 | anes %>% count(VotedPres2012_selection, V161006)
563 | ```
564 |
565 | ## count: now 7 rows and 3 columns, ungrouped
566 |
567 | ## # A tibble: 7 x 3
568 | ## VotedPres2012_select~ V161006 n
569 | ##
570 | ## 1 Obama 1 [1. Barack Obama] 1728
571 | ## 2 Romney 2 [2. Mitt Romney] 1268
572 | ## 3 Other 5 [5. Other SPECIFY] 58
573 | ## 4 NA -9 [-9. Refused] 47
574 | ## 5 NA -8 [-8. Don't know (FTF only)] 13
575 | ## 6 NA -1 [-1. Inap, 2,-8,-9 in V161005] 1153
576 | ## 7 NA 6 [6. Other specify - specified as: Did not vot~ 3
577 |
578 | ``` r
579 | anes %>% count(VotedPres2016, V162031x)
580 | ```
581 |
582 | ## count: now 4 rows and 3 columns, ungrouped
583 |
584 | ## # A tibble: 4 x 3
585 | ## VotedPres2016 V162031x n
586 | ##
587 | ## 1 Yes 1 [1. Voted in 2016] 2887
588 | ## 2 No 0 [0. Did not vote in 2016] 444
589 | ## 3 NA -8 [-8. Don't know (in V162031)] 1
590 | ## 4 NA -2 [-2. Missing, 3 in V162022 /FTF: -8,-9 in V162022 /WEB~ 938
591 |
592 | ``` r
593 | anes %>% count(VotedPres2016_selection, V162062x)
594 | ```
595 |
596 | ## count: now 8 rows and 3 columns, ungrouped
597 |
598 | ## # A tibble: 8 x 3
599 | ## VotedPres2016_select~ V162062x n
600 | ##
601 | ## 1 Clinton 1 [1. Hillary Clinton] 1364
602 | ## 2 Trump 2 [2. Donald Trump] 1245
603 | ## 3 Other 3 [3. Gary Johnson] 118
604 | ## 4 Other 4 [4. Jill Stein] 32
605 | ## 5 Other 5 [5. Other candidate SPECIFY] 52
606 | ## 6 NA -9 [-9. Refused] 31
607 | ## 7 NA -8 [-8. Don't know (FTF only)] 2
608 | ## 8 NA -2 [-2. Missing, no vote for Pres in Post /no Pos~ 1426
609 |
610 | ``` r
611 | anes %>% count(EarlyVote2016, V161024x, VotedPres2016)
612 | ```
613 |
614 | ## count: now 10 rows and 4 columns, ungrouped
615 |
616 | ## # A tibble: 10 x 4
617 | ## EarlyVote2016 V161024x VotedPres2016 n
618 | ##
619 | ## 1 Yes 4 [4. Registered and voted early] Yes 156
620 | ## 2 No 1 [1. Not (or DK /RF if) registered, does ~ Yes 28
621 | ## 3 No 2 [2. Not (or DK /RF if) registered, inten~ Yes 65
622 | ## 4 No 3 [3. Registered but did not vote early (o~ Yes 2638
623 | ## 5 NA 1 [1. Not (or DK /RF if) registered, does ~ No 31
624 | ## 6 NA 1 [1. Not (or DK /RF if) registered, does ~ NA 322
625 | ## 7 NA 2 [2. Not (or DK /RF if) registered, inten~ No 46
626 | ## 8 NA 2 [2. Not (or DK /RF if) registered, inten~ NA 120
627 | ## 9 NA 3 [3. Registered but did not vote early (o~ No 367
628 | ## 10 NA 3 [3. Registered but did not vote early (o~ NA 497
629 |
630 | ``` r
631 | anes %>%
632 | summarise(WtSum=sum(Weight)) %>%
633 | pull(WtSum)
634 | ```
635 |
636 | ## summarise: now one row and one column, ungrouped
637 |
638 | ## [1] 3646.921
639 |
640 | ## Save data
641 |
642 | ``` r
643 | write_rds(anes, here("Data", "anes.rds"), compress="gz")
644 | ```
645 |
--------------------------------------------------------------------------------
/DataCleaningScripts/RECS_DataPrep.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Residential Energy Consumption Survey (RECS) 2015 Data Prep"
3 | output: github_document
4 | ---
5 |
6 | ```{r setup, include=FALSE}
7 | knitr::opts_chunk$set(echo = TRUE)
8 | ```
9 |
10 | ## Data information
11 |
12 | All data and resources were downloaded from https://www.eia.gov/consumption/residential/data/2015/index.php?view=microdata on March 3, 2021.
13 |
14 | ```{r loadpackageh, message=FALSE}
15 | library(here) #easy relative paths
16 | ```
17 |
18 | ```{r loadpackages}
19 | library(tidyverse) #data manipulation
20 | library(haven) #data import
21 | library(tidylog) #informative logging messages
22 | ```
23 | ## Import data and create derived variables
24 |
25 | ```{r derivedata}
26 | recs_in <- read_csv(here("RawData", "RECS_2015", "recs2015_public_v4.csv"))
27 |
28 | recs <- recs_in %>%
29 | select(DOEID, REGIONC, DIVISION, METROMICRO, UATYP10, TYPEHUQ, YEARMADERANGE, HEATHOME, EQUIPMUSE, TEMPHOME, TEMPGONE, TEMPNITE, AIRCOND, USECENAC, TEMPHOMEAC, TEMPGONEAC, TEMPNITEAC, TOTCSQFT, TOTHSQFT, TOTSQFT_EN, TOTUCSQFT, TOTUSQFT, NWEIGHT, starts_with("BRRWT"), CDD30YR, CDD65, CDD80, CLIMATE_REGION_PUB, IECC_CLIMATE_PUB, HDD30YR, HDD65, HDD50, GNDHDD65, BTUEL, DOLLAREL, BTUNG, DOLLARNG, BTULP, DOLLARLP, BTUFO, DOLLARFO, TOTALBTU, TOTALDOL, BTUWOOD=WOODBTU, BTUPELLET=PELLETBTU ) %>%
30 | mutate(
31 | Region=parse_factor(
32 | case_when(
33 | REGIONC==1~"Northeast",
34 | REGIONC==2~"Midwest",
35 | REGIONC==3~"South",
36 | REGIONC==4~"West",
37 | ), levels=c("Northeast", "Midwest", "South", "West")),
38 | Division=parse_factor(
39 | case_when(
40 | DIVISION==1~"New England",
41 | DIVISION==2~"Middle Atlantic",
42 | DIVISION==3~"East North Central",
43 | DIVISION==4~"West North Central",
44 | DIVISION==5~"South Atlantic",
45 | DIVISION==6~"East South Central",
46 | DIVISION==7~"West South Central",
47 | DIVISION==8~"Mountain North",
48 | DIVISION==9~"Mountain South",
49 | DIVISION==10~"Pacific",
50 | ), levels=c("New England", "Middle Atlantic", "East North Central", "West North Central", "South Atlantic", "East South Central", "West South Central", "Mountain North", "Mountain South", "Pacific")),
51 | MSAStatus=fct_recode(METROMICRO, "Metropolitan Statistical Area"="METRO", "Micropolitan Statistical Area"="MICRO", "None"="NONE"),
52 | Urbanicity=parse_factor(
53 | case_when(
54 | UATYP10=="U"~"Urban Area",
55 | UATYP10=="C"~"Urban Cluster",
56 | UATYP10=="R"~"Rural"
57 | ),
58 | levels=c("Urban Area", "Urban Cluster", "Rural")
59 | ),
60 | HousingUnitType=parse_factor(
61 | case_when(
62 | TYPEHUQ==1~"Mobile home",
63 | TYPEHUQ==2~"Single-family detached",
64 | TYPEHUQ==3~"Single-family attached",
65 | TYPEHUQ==4~"Apartment: 2-4 Units",
66 | TYPEHUQ==5~"Apartment: 5 or more units",
67 | ), levels=c("Mobile home", "Single-family detached", "Single-family attached", "Apartment: 2-4 Units", "Apartment: 5 or more units")),
68 | YearMade=parse_factor(
69 | case_when(
70 | YEARMADERANGE==1~"Before 1950",
71 | YEARMADERANGE==2~"1950-1959",
72 | YEARMADERANGE==3~"1960-1969",
73 | YEARMADERANGE==4~"1970-1979",
74 | YEARMADERANGE==5~"1980-1989",
75 | YEARMADERANGE==6~"1990-1999",
76 | YEARMADERANGE==7~"2000-2009",
77 | YEARMADERANGE==8~"2010-2015",
78 | ),
79 | levels=c("Before 1950", "1950-1959", "1960-1969", "1970-1979", "1980-1989", "1990-1999", "2000-2009", "2010-2015"),
80 | ordered = TRUE
81 | ),
82 | SpaceHeatingUsed=as.logical(HEATHOME),
83 | HeatingBehavior=parse_factor(
84 | case_when(
85 | EQUIPMUSE==1~"Set one temp and leave it",
86 | EQUIPMUSE==2~"Manually adjust at night/no one home",
87 | EQUIPMUSE==3~"Program thermostat to change at certain times",
88 | EQUIPMUSE==4~"Turn on or off as needed",
89 | EQUIPMUSE==5~"No control",
90 | EQUIPMUSE==9~"Other",
91 | EQUIPMUSE==-9~NA_character_),
92 | levels=c("Set one temp and leave it", "Manually adjust at night/no one home", "Program thermostat to change at certain times", "Turn on or off as needed", "No control", "Other")
93 | ),
94 | WinterTempDay=if_else(TEMPHOME>0, TEMPHOME, NA_real_),
95 | WinterTempAway=if_else(TEMPGONE>0, TEMPGONE, NA_real_),
96 | WinterTempNight=if_else(TEMPNITE>0, TEMPNITE, NA_real_),
97 | ACUsed=as.logical(AIRCOND),
98 | ACBehavior=parse_factor(
99 | case_when(
100 | USECENAC==1~"Set one temp and leave it",
101 | USECENAC==2~"Manually adjust at night/no one home",
102 | USECENAC==3~"Program thermostat to change at certain times",
103 | USECENAC==4~"Turn on or off as needed",
104 | USECENAC==5~"No control",
105 | USECENAC==-9~NA_character_),
106 | levels=c("Set one temp and leave it", "Manually adjust at night/no one home", "Program thermostat to change at certain times", "Turn on or off as needed", "No control")
107 | ),
108 | SummerTempDay=if_else(TEMPHOMEAC>0, TEMPHOMEAC, NA_real_),
109 | SummerTempAway=if_else(TEMPGONEAC>0, TEMPGONEAC, NA_real_),
110 | SummerTempNight=if_else(TEMPNITEAC>0, TEMPNITEAC, NA_real_),
111 | ClimateRegion_BA=parse_factor(CLIMATE_REGION_PUB),
112 | ClimateRegion_IECC=factor(IECC_CLIMATE_PUB)
113 |
114 | )
115 |
116 | ```
117 |
118 |
119 | ## Check derived variables for correct coding
120 |
121 | ```{r checkvars}
122 | recs %>% count(Region, REGIONC)
123 | recs %>% count(Division, DIVISION)
124 | recs %>% count(MSAStatus, METROMICRO)
125 | recs %>% count(Urbanicity, UATYP10)
126 | recs %>% count(HousingUnitType, TYPEHUQ)
127 | recs %>% count(YearMade, YEARMADERANGE)
128 | recs %>% count(SpaceHeatingUsed, HEATHOME)
129 | recs %>% count(HeatingBehavior, EQUIPMUSE)
130 | recs %>% count(ACUsed, AIRCOND)
131 | recs %>% count(ACBehavior, USECENAC)
132 | recs %>% count(ClimateRegion_BA, CLIMATE_REGION_PUB)
133 | recs %>% count(ClimateRegion_IECC, IECC_CLIMATE_PUB)
134 |
135 | ```
136 | ## Save data
137 |
138 | ```{r savedat}
139 | recs_out <- recs %>%
140 | select(DOEID, Region, Division, MSAStatus, Urbanicity, HousingUnitType, YearMade, SpaceHeatingUsed, HeatingBehavior, WinterTempDay, WinterTempAway, WinterTempNight, ACUsed, ACBehavior, SummerTempDay, SummerTempAway, SummerTempNight, TOTCSQFT, TOTHSQFT, TOTSQFT_EN, TOTUCSQFT, TOTUSQFT, NWEIGHT, starts_with("BRRWT"), CDD30YR, CDD65, CDD80, ClimateRegion_BA, ClimateRegion_IECC, HDD30YR, HDD65, HDD50, GNDHDD65, BTUEL, DOLLAREL, BTUNG, DOLLARNG, BTULP, DOLLARLP, BTUFO, DOLLARFO, TOTALBTU, TOTALDOL, BTUWOOD, BTUPELLET)
141 |
142 | summary(recs_out)
143 | write_rds(recs_out, here("Data", "recs.rds"), compress="gz")
144 | ```
145 |
146 |
147 |
--------------------------------------------------------------------------------
/DataCleaningScripts/RECS_DataPrep.md:
--------------------------------------------------------------------------------
1 | Residential Energy Consumption Survey (RECS) 2015 Data Prep
2 | ================
3 |
4 | ## Data information
5 |
6 | All data and resources were downloaded from
7 |
8 | on March 3, 2021.
9 |
10 | ``` r
11 | library(here) #easy relative paths
12 | ```
13 |
14 | ## Warning: package 'here' was built under R version 4.0.4
15 |
16 | ``` r
17 | library(tidyverse) #data manipulation
18 | ```
19 |
20 | ## -- Attaching packages ------------------------------------------------------------------------------ tidyverse 1.3.0 --
21 |
22 | ## v ggplot2 3.3.2 v purrr 0.3.4
23 | ## v tibble 3.0.3 v dplyr 1.0.2
24 | ## v tidyr 1.1.2 v stringr 1.4.0
25 | ## v readr 1.3.1 v forcats 0.5.0
26 |
27 | ## -- Conflicts --------------------------------------------------------------------------------- tidyverse_conflicts() --
28 | ## x dplyr::filter() masks stats::filter()
29 | ## x dplyr::lag() masks stats::lag()
30 |
31 | ``` r
32 | library(haven) #data import
33 | library(tidylog) #informative logging messages
34 | ```
35 |
36 | ## Warning: package 'tidylog' was built under R version 4.0.4
37 |
38 | ##
39 | ## Attaching package: 'tidylog'
40 |
41 | ## The following objects are masked from 'package:dplyr':
42 | ##
43 | ## add_count, add_tally, anti_join, count, distinct, distinct_all,
44 | ## distinct_at, distinct_if, filter, filter_all, filter_at, filter_if,
45 | ## full_join, group_by, group_by_all, group_by_at, group_by_if,
46 | ## inner_join, left_join, mutate, mutate_all, mutate_at, mutate_if,
47 | ## relocate, rename, rename_all, rename_at, rename_if, rename_with,
48 | ## right_join, sample_frac, sample_n, select, select_all, select_at,
49 | ## select_if, semi_join, slice, slice_head, slice_max, slice_min,
50 | ## slice_sample, slice_tail, summarise, summarise_all, summarise_at,
51 | ## summarise_if, summarize, summarize_all, summarize_at, summarize_if,
52 | ## tally, top_frac, top_n, transmute, transmute_all, transmute_at,
53 | ## transmute_if, ungroup
54 |
55 | ## The following objects are masked from 'package:tidyr':
56 | ##
57 | ## drop_na, fill, gather, pivot_longer, pivot_wider, replace_na,
58 | ## spread, uncount
59 |
60 | ## The following object is masked from 'package:stats':
61 | ##
62 | ## filter
63 |
64 | ## Import data and create derived variables
65 |
66 | ``` r
67 | recs_in <- read_csv(here("RawData", "RECS_2015", "recs2015_public_v4.csv"))
68 | ```
69 |
70 | ## Parsed with column specification:
71 | ## cols(
72 | ## .default = col_double(),
73 | ## METROMICRO = col_character(),
74 | ## UATYP10 = col_character(),
75 | ## CLIMATE_REGION_PUB = col_character(),
76 | ## IECC_CLIMATE_PUB = col_character()
77 | ## )
78 |
79 | ## See spec(...) for full column specifications.
80 |
81 | ``` r
82 | recs <- recs_in %>%
83 | select(DOEID, REGIONC, DIVISION, METROMICRO, UATYP10, TYPEHUQ, YEARMADERANGE, HEATHOME, EQUIPMUSE, TEMPHOME, TEMPGONE, TEMPNITE, AIRCOND, USECENAC, TEMPHOMEAC, TEMPGONEAC, TEMPNITEAC, TOTCSQFT, TOTHSQFT, TOTSQFT_EN, TOTUCSQFT, TOTUSQFT, NWEIGHT, starts_with("BRRWT"), CDD30YR, CDD65, CDD80, CLIMATE_REGION_PUB, IECC_CLIMATE_PUB, HDD30YR, HDD65, HDD50, GNDHDD65, BTUEL, DOLLAREL, BTUNG, DOLLARNG, BTULP, DOLLARLP, BTUFO, DOLLARFO, TOTALBTU, TOTALDOL, BTUWOOD=WOODBTU, BTUPELLET=PELLETBTU ) %>%
84 | mutate(
85 | Region=parse_factor(
86 | case_when(
87 | REGIONC==1~"Northeast",
88 | REGIONC==2~"Midwest",
89 | REGIONC==3~"South",
90 | REGIONC==4~"West",
91 | ), levels=c("Northeast", "Midwest", "South", "West")),
92 | Division=parse_factor(
93 | case_when(
94 | DIVISION==1~"New England",
95 | DIVISION==2~"Middle Atlantic",
96 | DIVISION==3~"East North Central",
97 | DIVISION==4~"West North Central",
98 | DIVISION==5~"South Atlantic",
99 | DIVISION==6~"East South Central",
100 | DIVISION==7~"West South Central",
101 | DIVISION==8~"Mountain North",
102 | DIVISION==9~"Mountain South",
103 | DIVISION==10~"Pacific",
104 | ), levels=c("New England", "Middle Atlantic", "East North Central", "West North Central", "South Atlantic", "East South Central", "West South Central", "Mountain North", "Mountain South", "Pacific")),
105 | MSAStatus=fct_recode(METROMICRO, "Metropolitan Statistical Area"="METRO", "Micropolitan Statistical Area"="MICRO", "None"="NONE"),
106 | Urbanicity=parse_factor(
107 | case_when(
108 | UATYP10=="U"~"Urban Area",
109 | UATYP10=="C"~"Urban Cluster",
110 | UATYP10=="R"~"Rural"
111 | ),
112 | levels=c("Urban Area", "Urban Cluster", "Rural")
113 | ),
114 | HousingUnitType=parse_factor(
115 | case_when(
116 | TYPEHUQ==1~"Mobile home",
117 | TYPEHUQ==2~"Single-family detached",
118 | TYPEHUQ==3~"Single-family attached",
119 | TYPEHUQ==4~"Apartment: 2-4 Units",
120 | TYPEHUQ==5~"Apartment: 5 or more units",
121 | ), levels=c("Mobile home", "Single-family detached", "Single-family attached", "Apartment: 2-4 Units", "Apartment: 5 or more units")),
122 | YearMade=parse_factor(
123 | case_when(
124 | YEARMADERANGE==1~"Before 1950",
125 | YEARMADERANGE==2~"1950-1959",
126 | YEARMADERANGE==3~"1960-1969",
127 | YEARMADERANGE==4~"1970-1979",
128 | YEARMADERANGE==5~"1980-1989",
129 | YEARMADERANGE==6~"1990-1999",
130 | YEARMADERANGE==7~"2000-2009",
131 | YEARMADERANGE==8~"2010-2015",
132 | ),
133 | levels=c("Before 1950", "1950-1959", "1960-1969", "1970-1979", "1980-1989", "1990-1999", "2000-2009", "2010-2015"),
134 | ordered = TRUE
135 | ),
136 | SpaceHeatingUsed=as.logical(HEATHOME),
137 | HeatingBehavior=parse_factor(
138 | case_when(
139 | EQUIPMUSE==1~"Set one temp and leave it",
140 | EQUIPMUSE==2~"Manually adjust at night/no one home",
141 | EQUIPMUSE==3~"Program thermostat to change at certain times",
142 | EQUIPMUSE==4~"Turn on or off as needed",
143 | EQUIPMUSE==5~"No control",
144 | EQUIPMUSE==9~"Other",
145 | EQUIPMUSE==-9~NA_character_),
146 | levels=c("Set one temp and leave it", "Manually adjust at night/no one home", "Program thermostat to change at certain times", "Turn on or off as needed", "No control", "Other")
147 | ),
148 | WinterTempDay=if_else(TEMPHOME>0, TEMPHOME, NA_real_),
149 | WinterTempAway=if_else(TEMPGONE>0, TEMPGONE, NA_real_),
150 | WinterTempNight=if_else(TEMPNITE>0, TEMPNITE, NA_real_),
151 | ACUsed=as.logical(AIRCOND),
152 | ACBehavior=parse_factor(
153 | case_when(
154 | USECENAC==1~"Set one temp and leave it",
155 | USECENAC==2~"Manually adjust at night/no one home",
156 | USECENAC==3~"Program thermostat to change at certain times",
157 | USECENAC==4~"Turn on or off as needed",
158 | USECENAC==5~"No control",
159 | USECENAC==-9~NA_character_),
160 | levels=c("Set one temp and leave it", "Manually adjust at night/no one home", "Program thermostat to change at certain times", "Turn on or off as needed", "No control")
161 | ),
162 | SummerTempDay=if_else(TEMPHOMEAC>0, TEMPHOMEAC, NA_real_),
163 | SummerTempAway=if_else(TEMPGONEAC>0, TEMPGONEAC, NA_real_),
164 | SummerTempNight=if_else(TEMPNITEAC>0, TEMPNITEAC, NA_real_),
165 | ClimateRegion_BA=parse_factor(CLIMATE_REGION_PUB),
166 | ClimateRegion_IECC=factor(IECC_CLIMATE_PUB)
167 |
168 | )
169 | ```
170 |
171 | ## select: renamed 2 variables (BTUWOOD, BTUPELLET) and dropped 619 variables
172 |
173 | ## mutate: new variable 'Region' (factor) with 4 unique values and 0% NA
174 |
175 | ## new variable 'Division' (factor) with 10 unique values and 0% NA
176 |
177 | ## new variable 'MSAStatus' (factor) with 3 unique values and 0% NA
178 |
179 | ## new variable 'Urbanicity' (factor) with 3 unique values and 0% NA
180 |
181 | ## new variable 'HousingUnitType' (factor) with 5 unique values and 0% NA
182 |
183 | ## new variable 'YearMade' (ordered factor) with 8 unique values and 0% NA
184 |
185 | ## new variable 'SpaceHeatingUsed' (logical) with 2 unique values and 0% NA
186 |
187 | ## new variable 'HeatingBehavior' (factor) with 7 unique values and 0% NA
188 |
189 | ## new variable 'WinterTempDay' (double) with 35 unique values and 5% NA
190 |
191 | ## new variable 'WinterTempAway' (double) with 37 unique values and 5% NA
192 |
193 | ## new variable 'WinterTempNight' (double) with 38 unique values and 5% NA
194 |
195 | ## new variable 'ACUsed' (logical) with 2 unique values and 0% NA
196 |
197 | ## new variable 'ACBehavior' (factor) with 6 unique values and 0% NA
198 |
199 | ## new variable 'SummerTempDay' (double) with 38 unique values and 13% NA
200 |
201 | ## new variable 'SummerTempAway' (double) with 35 unique values and 13% NA
202 |
203 | ## new variable 'SummerTempNight' (double) with 36 unique values and 13% NA
204 |
205 | ## new variable 'ClimateRegion_BA' (factor) with 5 unique values and 0% NA
206 |
207 | ## new variable 'ClimateRegion_IECC' (factor) with 11 unique values and 0% NA
208 |
209 | ## Check derived variables for correct coding
210 |
211 | ``` r
212 | recs %>% count(Region, REGIONC)
213 | ```
214 |
215 | ## count: now 4 rows and 3 columns, ungrouped
216 |
217 | ## # A tibble: 4 x 3
218 | ## Region REGIONC n
219 | ##
220 | ## 1 Northeast 1 794
221 | ## 2 Midwest 2 1327
222 | ## 3 South 3 2010
223 | ## 4 West 4 1555
224 |
225 | ``` r
226 | recs %>% count(Division, DIVISION)
227 | ```
228 |
229 | ## count: now 10 rows and 3 columns, ungrouped
230 |
231 | ## # A tibble: 10 x 3
232 | ## Division DIVISION n
233 | ##
234 | ## 1 New England 1 253
235 | ## 2 Middle Atlantic 2 541
236 | ## 3 East North Central 3 836
237 | ## 4 West North Central 4 491
238 | ## 5 South Atlantic 5 1058
239 | ## 6 East South Central 6 372
240 | ## 7 West South Central 7 580
241 | ## 8 Mountain North 8 228
242 | ## 9 Mountain South 9 242
243 | ## 10 Pacific 10 1085
244 |
245 | ``` r
246 | recs %>% count(MSAStatus, METROMICRO)
247 | ```
248 |
249 | ## count: now 3 rows and 3 columns, ungrouped
250 |
251 | ## # A tibble: 3 x 3
252 | ## MSAStatus METROMICRO n
253 | ##
254 | ## 1 Metropolitan Statistical Area METRO 4745
255 | ## 2 Micropolitan Statistical Area MICRO 584
256 | ## 3 None NONE 357
257 |
258 | ``` r
259 | recs %>% count(Urbanicity, UATYP10)
260 | ```
261 |
262 | ## count: now 3 rows and 3 columns, ungrouped
263 |
264 | ## # A tibble: 3 x 3
265 | ## Urbanicity UATYP10 n
266 | ##
267 | ## 1 Urban Area U 3928
268 | ## 2 Urban Cluster C 598
269 | ## 3 Rural R 1160
270 |
271 | ``` r
272 | recs %>% count(HousingUnitType, TYPEHUQ)
273 | ```
274 |
275 | ## count: now 5 rows and 3 columns, ungrouped
276 |
277 | ## # A tibble: 5 x 3
278 | ## HousingUnitType TYPEHUQ n
279 | ##
280 | ## 1 Mobile home 1 286
281 | ## 2 Single-family detached 2 3752
282 | ## 3 Single-family attached 3 479
283 | ## 4 Apartment: 2-4 Units 4 311
284 | ## 5 Apartment: 5 or more units 5 858
285 |
286 | ``` r
287 | recs %>% count(YearMade, YEARMADERANGE)
288 | ```
289 |
290 | ## count: now 8 rows and 3 columns, ungrouped
291 |
292 | ## # A tibble: 8 x 3
293 | ## YearMade YEARMADERANGE n
294 | ##
295 | ## 1 Before 1950 1 858
296 | ## 2 1950-1959 2 544
297 | ## 3 1960-1969 3 565
298 | ## 4 1970-1979 4 928
299 | ## 5 1980-1989 5 874
300 | ## 6 1990-1999 6 786
301 | ## 7 2000-2009 7 901
302 | ## 8 2010-2015 8 230
303 |
304 | ``` r
305 | recs %>% count(SpaceHeatingUsed, HEATHOME)
306 | ```
307 |
308 | ## count: now 2 rows and 3 columns, ungrouped
309 |
310 | ## # A tibble: 2 x 3
311 | ## SpaceHeatingUsed HEATHOME n
312 | ##
313 | ## 1 FALSE 0 258
314 | ## 2 TRUE 1 5428
315 |
316 | ``` r
317 | recs %>% count(HeatingBehavior, EQUIPMUSE)
318 | ```
319 |
320 | ## count: now 7 rows and 3 columns, ungrouped
321 |
322 | ## # A tibble: 7 x 3
323 | ## HeatingBehavior EQUIPMUSE n
324 | ##
325 | ## 1 Set one temp and leave it 1 2156
326 | ## 2 Manually adjust at night/no one home 2 1414
327 | ## 3 Program thermostat to change at certain times 3 972
328 | ## 4 Turn on or off as needed 4 761
329 | ## 5 No control 5 114
330 | ## 6 Other 9 11
331 | ## 7 -2 258
332 |
333 | ``` r
334 | recs %>% count(ACUsed, AIRCOND)
335 | ```
336 |
337 | ## count: now 2 rows and 3 columns, ungrouped
338 |
339 | ## # A tibble: 2 x 3
340 | ## ACUsed AIRCOND n
341 | ##
342 | ## 1 FALSE 0 737
343 | ## 2 TRUE 1 4949
344 |
345 | ``` r
346 | recs %>% count(ACBehavior, USECENAC)
347 | ```
348 |
349 | ## count: now 6 rows and 3 columns, ungrouped
350 |
351 | ## # A tibble: 6 x 3
352 | ## ACBehavior USECENAC n
353 | ##
354 | ## 1 Set one temp and leave it 1 1661
355 | ## 2 Manually adjust at night/no one home 2 984
356 | ## 3 Program thermostat to change at certain times 3 727
357 | ## 4 Turn on or off as needed 4 438
358 | ## 5 No control 5 2
359 | ## 6 -2 1874
360 |
361 | ``` r
362 | recs %>% count(ClimateRegion_BA, CLIMATE_REGION_PUB)
363 | ```
364 |
365 | ## count: now 5 rows and 3 columns, ungrouped
366 |
367 | ## # A tibble: 5 x 3
368 | ## ClimateRegion_BA CLIMATE_REGION_PUB n
369 | ##
370 | ## 1 Hot-Dry/Mixed-Dry Hot-Dry/Mixed-Dry 750
371 | ## 2 Hot-Humid Hot-Humid 1036
372 | ## 3 Mixed-Humid Mixed-Humid 1468
373 | ## 4 Cold/Very Cold Cold/Very Cold 2008
374 | ## 5 Marine Marine 424
375 |
376 | ``` r
377 | recs %>% count(ClimateRegion_IECC, IECC_CLIMATE_PUB)
378 | ```
379 |
380 | ## count: now 11 rows and 3 columns, ungrouped
381 |
382 | ## # A tibble: 11 x 3
383 | ## ClimateRegion_IECC IECC_CLIMATE_PUB n
384 | ##
385 | ## 1 1A-2A 1A-2A 846
386 | ## 2 2B 2B 106
387 | ## 3 3A 3A 637
388 | ## 4 3B-4B 3B-4B 644
389 | ## 5 3C 3C 209
390 | ## 6 4A 4A 1021
391 | ## 7 4C 4C 215
392 | ## 8 5A 5A 1240
393 | ## 9 5B-5C 5B-5C 332
394 | ## 10 6A-6B 6A-6B 376
395 | ## 11 7A-7B-7AK-8AK 7A-7B-7AK-8AK 60
396 |
397 | ## Save data
398 |
399 | ``` r
400 | recs_out <- recs %>%
401 | select(DOEID, Region, Division, MSAStatus, Urbanicity, HousingUnitType, YearMade, SpaceHeatingUsed, HeatingBehavior, WinterTempDay, WinterTempAway, WinterTempNight, ACUsed, ACBehavior, SummerTempDay, SummerTempAway, SummerTempNight, TOTCSQFT, TOTHSQFT, TOTSQFT_EN, TOTUCSQFT, TOTUSQFT, NWEIGHT, starts_with("BRRWT"), CDD30YR, CDD65, CDD80, ClimateRegion_BA, ClimateRegion_IECC, HDD30YR, HDD65, HDD50, GNDHDD65, BTUEL, DOLLAREL, BTUNG, DOLLARNG, BTULP, DOLLARLP, BTUFO, DOLLARFO, TOTALBTU, TOTALDOL, BTUWOOD, BTUPELLET)
402 | ```
403 |
404 | ## select: dropped 18 variables (REGIONC, DIVISION, METROMICRO, UATYP10, TYPEHUQ, …)
405 |
406 | ``` r
407 | summary(recs_out)
408 | ```
409 |
410 | ## DOEID Region Division
411 | ## Min. :10001 Northeast: 794 Pacific :1085
412 | ## 1st Qu.:11422 Midwest :1327 South Atlantic :1058
413 | ## Median :12844 South :2010 East North Central: 836
414 | ## Mean :12844 West :1555 West South Central: 580
415 | ## 3rd Qu.:14265 Middle Atlantic : 541
416 | ## Max. :15686 West North Central: 491
417 | ## (Other) :1095
418 | ## MSAStatus Urbanicity
419 | ## Metropolitan Statistical Area:4745 Urban Area :3928
420 | ## Micropolitan Statistical Area: 584 Urban Cluster: 598
421 | ## None : 357 Rural :1160
422 | ##
423 | ##
424 | ##
425 | ##
426 | ## HousingUnitType YearMade SpaceHeatingUsed
427 | ## Mobile home : 286 1970-1979 :928 Mode :logical
428 | ## Single-family detached :3752 2000-2009 :901 FALSE:258
429 | ## Single-family attached : 479 1980-1989 :874 TRUE :5428
430 | ## Apartment: 2-4 Units : 311 Before 1950:858
431 | ## Apartment: 5 or more units: 858 1990-1999 :786
432 | ## 1960-1969 :565
433 | ## (Other) :774
434 | ## HeatingBehavior WinterTempDay
435 | ## Set one temp and leave it :2156 Min. :50.00
436 | ## Manually adjust at night/no one home :1414 1st Qu.:68.00
437 | ## Program thermostat to change at certain times: 972 Median :70.00
438 | ## Turn on or off as needed : 761 Mean :70.06
439 | ## No control : 114 3rd Qu.:72.00
440 | ## Other : 11 Max. :90.00
441 | ## NA : 258 NA's :258
442 | ## WinterTempAway WinterTempNight ACUsed
443 | ## Min. :50.00 Min. :50.00 Mode :logical
444 | ## 1st Qu.:65.00 1st Qu.:65.00 FALSE:737
445 | ## Median :68.00 Median :68.00 TRUE :4949
446 | ## Mean :67.12 Mean :68.06
447 | ## 3rd Qu.:70.00 3rd Qu.:70.00
448 | ## Max. :90.00 Max. :90.00
449 | ## NA's :258 NA's :258
450 | ## ACBehavior SummerTempDay
451 | ## Set one temp and leave it :1661 Min. :50.00
452 | ## Manually adjust at night/no one home : 984 1st Qu.:70.00
453 | ## Program thermostat to change at certain times: 727 Median :72.00
454 | ## Turn on or off as needed : 438 Mean :72.66
455 | ## No control : 2 3rd Qu.:76.00
456 | ## NA :1874 Max. :90.00
457 | ## NA's :737
458 | ## SummerTempAway SummerTempNight TOTCSQFT TOTHSQFT TOTSQFT_EN
459 | ## Min. :50.00 Min. :50.00 Min. : 0.0 Min. : 0 Min. : 221
460 | ## 1st Qu.:71.00 1st Qu.:70.00 1st Qu.: 466.2 1st Qu.:1008 1st Qu.:1100
461 | ## Median :75.00 Median :72.00 Median :1218.5 Median :1559 Median :1774
462 | ## Mean :74.63 Mean :71.82 Mean :1454.5 Mean :1816 Mean :2081
463 | ## 3rd Qu.:78.00 3rd Qu.:75.00 3rd Qu.:2094.0 3rd Qu.:2400 3rd Qu.:2766
464 | ## Max. :90.00 Max. :90.00 Max. :8066.0 Max. :8066 Max. :8501
465 | ## NA's :737 NA's :737
466 | ## TOTUCSQFT TOTUSQFT NWEIGHT BRRWT1
467 | ## Min. : 0.0 Min. : 0.0 Min. : 1236 Min. : 1836
468 | ## 1st Qu.: 0.0 1st Qu.: 0.0 1st Qu.: 13874 1st Qu.: 9859
469 | ## Median : 400.0 Median : 250.0 Median : 18510 Median : 16942
470 | ## Mean : 793.9 Mean : 432.6 Mean : 20789 Mean : 20789
471 | ## 3rd Qu.:1150.0 3rd Qu.: 569.8 3rd Qu.: 24840 3rd Qu.: 27219
472 | ## Max. :7986.0 Max. :6660.0 Max. :139307 Max. :203902
473 | ##
474 | ## BRRWT2 BRRWT3 BRRWT4 BRRWT5
475 | ## Min. : 685.9 Min. : 543.9 Min. : 699.7 Min. : 649.3
476 | ## 1st Qu.: 9733.0 1st Qu.: 9575.3 1st Qu.: 9518.5 1st Qu.: 9598.5
477 | ## Median : 16993.7 Median : 16698.7 Median : 17034.2 Median : 16487.5
478 | ## Mean : 20789.3 Mean : 20789.3 Mean : 20789.3 Mean : 20789.3
479 | ## 3rd Qu.: 27825.1 3rd Qu.: 27941.8 3rd Qu.: 27931.5 3rd Qu.: 27856.7
480 | ## Max. :189788.1 Max. :180155.3 Max. :159902.6 Max. :141796.4
481 | ##
482 | ## BRRWT6 BRRWT7 BRRWT8 BRRWT9
483 | ## Min. : 638.7 Min. : 564.1 Min. : 591 Min. : 545.2
484 | ## 1st Qu.: 9501.7 1st Qu.: 9534.4 1st Qu.: 9653 1st Qu.: 9595.0
485 | ## Median : 16150.6 Median : 16332.5 Median : 16802 Median : 17352.7
486 | ## Mean : 20789.3 Mean : 20789.3 Mean : 20789 Mean : 20789.3
487 | ## 3rd Qu.: 28092.8 3rd Qu.: 27992.5 3rd Qu.: 27926 3rd Qu.: 27753.7
488 | ## Max. :189031.8 Max. :192311.7 Max. :195071 Max. :117167.3
489 | ##
490 | ## BRRWT10 BRRWT11 BRRWT12 BRRWT13
491 | ## Min. : 732.5 Min. : 586.1 Min. : 549.8 Min. : 668
492 | ## 1st Qu.: 9077.6 1st Qu.: 9448.5 1st Qu.: 9388.2 1st Qu.: 9757
493 | ## Median : 16601.9 Median : 16172.3 Median : 16167.4 Median : 16584
494 | ## Mean : 20789.3 Mean : 20789.3 Mean : 20789.3 Mean : 20789
495 | ## 3rd Qu.: 28089.9 3rd Qu.: 28022.1 3rd Qu.: 28075.4 3rd Qu.: 27455
496 | ## Max. :183073.4 Max. :195408.4 Max. :197373.3 Max. :182228
497 | ##
498 | ## BRRWT14 BRRWT15 BRRWT16 BRRWT17
499 | ## Min. : 544.5 Min. : 671.4 Min. : 603.4 Min. : 563.3
500 | ## 1st Qu.: 9491.8 1st Qu.: 9341.8 1st Qu.: 9804.6 1st Qu.: 9593.2
501 | ## Median : 17028.9 Median : 15996.8 Median : 16562.6 Median : 16750.8
502 | ## Mean : 20789.3 Mean : 20789.3 Mean : 20789.3 Mean : 20789.3
503 | ## 3rd Qu.: 27975.3 3rd Qu.: 28117.5 3rd Qu.: 27322.1 3rd Qu.: 27458.0
504 | ## Max. :173341.2 Max. :179152.7 Max. :210507.2 Max. :195346.9
505 | ##
506 | ## BRRWT18 BRRWT19 BRRWT20 BRRWT21
507 | ## Min. : 517.2 Min. : 657 Min. : 682.2 Min. : 689.4
508 | ## 1st Qu.: 9839.6 1st Qu.: 9776 1st Qu.: 9569.2 1st Qu.: 9663.9
509 | ## Median : 16560.5 Median : 16779 Median : 16881.2 Median : 16503.8
510 | ## Mean : 20789.3 Mean : 20789 Mean : 20789.3 Mean : 20789.3
511 | ## 3rd Qu.: 27636.2 3rd Qu.: 27986 3rd Qu.: 27467.7 3rd Qu.: 27863.0
512 | ## Max. :158094.9 Max. :197236 Max. :146347.4 Max. :181583.8
513 | ##
514 | ## BRRWT22 BRRWT23 BRRWT24 BRRWT25
515 | ## Min. : 581.3 Min. : 658.4 Min. : 698.7 Min. : 541.3
516 | ## 1st Qu.: 9805.3 1st Qu.: 9597.1 1st Qu.: 9387.9 1st Qu.: 9502.9
517 | ## Median : 16711.4 Median : 16205.0 Median : 16398.2 Median : 17120.6
518 | ## Mean : 20789.3 Mean : 20789.3 Mean : 20789.3 Mean : 20789.3
519 | ## 3rd Qu.: 27503.4 3rd Qu.: 27855.2 3rd Qu.: 27791.0 3rd Qu.: 28108.8
520 | ## Max. :173557.2 Max. :182366.0 Max. :170970.0 Max. :128220.6
521 | ##
522 | ## BRRWT26 BRRWT27 BRRWT28 BRRWT29
523 | ## Min. : 832.9 Min. : 1372 Min. : 764.7 Min. : 854
524 | ## 1st Qu.: 9593.2 1st Qu.: 9333 1st Qu.: 9358.0 1st Qu.: 9596
525 | ## Median : 16642.2 Median : 16671 Median : 16663.4 Median : 16336
526 | ## Mean : 20789.3 Mean : 20789 Mean : 20789.3 Mean : 20789
527 | ## 3rd Qu.: 28018.5 3rd Qu.: 27832 3rd Qu.: 28065.9 3rd Qu.: 27506
528 | ## Max. :176770.0 Max. :176453 Max. :210413.6 Max. :194434
529 | ##
530 | ## BRRWT30 BRRWT31 BRRWT32 BRRWT33
531 | ## Min. : 680.6 Min. : 868.4 Min. : 645.1 Min. : 714.2
532 | ## 1st Qu.: 9689.3 1st Qu.: 9493.1 1st Qu.: 9370.6 1st Qu.: 9530.8
533 | ## Median : 16683.8 Median : 16876.0 Median : 16594.5 Median : 16839.7
534 | ## Mean : 20789.3 Mean : 20789.3 Mean : 20789.3 Mean : 20789.3
535 | ## 3rd Qu.: 27613.1 3rd Qu.: 27807.8 3rd Qu.: 28250.9 3rd Qu.: 27610.2
536 | ## Max. :118557.6 Max. :197960.8 Max. :182658.3 Max. :183414.8
537 | ##
538 | ## BRRWT34 BRRWT35 BRRWT36 BRRWT37
539 | ## Min. : 1880 Min. : 629.3 Min. : 980.2 Min. : 634.6
540 | ## 1st Qu.: 9703 1st Qu.: 9842.0 1st Qu.: 9439.6 1st Qu.: 9276.7
541 | ## Median : 16380 Median : 17204.4 Median : 16440.6 Median : 16620.9
542 | ## Mean : 20789 Mean : 20789.3 Mean : 20789.3 Mean : 20789.3
543 | ## 3rd Qu.: 27846 3rd Qu.: 27533.4 3rd Qu.: 28354.2 3rd Qu.: 27754.3
544 | ## Max. :130246 Max. :125674.9 Max. :171375.9 Max. :209103.9
545 | ##
546 | ## BRRWT38 BRRWT39 BRRWT40 BRRWT41
547 | ## Min. : 738.1 Min. : 684.5 Min. : 1531 Min. : 1406
548 | ## 1st Qu.: 9737.9 1st Qu.: 9389.5 1st Qu.: 9624 1st Qu.: 9776
549 | ## Median : 16862.8 Median : 16797.7 Median : 16644 Median : 16910
550 | ## Mean : 20789.3 Mean : 20789.3 Mean : 20789 Mean : 20789
551 | ## 3rd Qu.: 27710.0 3rd Qu.: 27850.3 3rd Qu.: 27858 3rd Qu.: 27616
552 | ## Max. :187208.7 Max. :136106.4 Max. :165612 Max. :145467
553 | ##
554 | ## BRRWT42 BRRWT43 BRRWT44 BRRWT45
555 | ## Min. : 943.8 Min. : 683.3 Min. : 866.4 Min. : 1105
556 | ## 1st Qu.: 9446.7 1st Qu.: 9563.6 1st Qu.: 9595.5 1st Qu.: 9563
557 | ## Median : 16177.2 Median : 16999.1 Median : 17034.6 Median : 16629
558 | ## Mean : 20789.3 Mean : 20789.3 Mean : 20789.3 Mean : 20789
559 | ## 3rd Qu.: 28089.3 3rd Qu.: 27724.1 3rd Qu.: 27593.8 3rd Qu.: 27773
560 | ## Max. :189726.6 Max. :192302.9 Max. :190671.5 Max. :160108
561 | ##
562 | ## BRRWT46 BRRWT47 BRRWT48 BRRWT49
563 | ## Min. : 750.7 Min. : 1230 Min. : 684.4 Min. : 627.1
564 | ## 1st Qu.: 9616.2 1st Qu.: 9362 1st Qu.: 9383.9 1st Qu.: 9489.0
565 | ## Median : 16821.6 Median : 16243 Median : 16720.3 Median : 17068.6
566 | ## Mean : 20789.3 Mean : 20789 Mean : 20789.3 Mean : 20789.3
567 | ## 3rd Qu.: 27563.3 3rd Qu.: 27547 3rd Qu.: 27965.8 3rd Qu.: 27829.1
568 | ## Max. :183963.8 Max. :196001 Max. :199079.7 Max. :203407.7
569 | ##
570 | ## BRRWT50 BRRWT51 BRRWT52 BRRWT53
571 | ## Min. : 1638 Min. : 922.9 Min. : 749.9 Min. : 871.8
572 | ## 1st Qu.: 9601 1st Qu.: 9704.7 1st Qu.: 9496.9 1st Qu.: 9489.1
573 | ## Median : 16788 Median : 16706.2 Median : 16442.9 Median : 16494.9
574 | ## Mean : 20789 Mean : 20789.3 Mean : 20789.3 Mean : 20789.3
575 | ## 3rd Qu.: 27667 3rd Qu.: 27755.8 3rd Qu.: 27621.2 3rd Qu.: 28075.0
576 | ## Max. :223546 Max. :161561.8 Max. :146056.0 Max. :143796.6
577 | ##
578 | ## BRRWT54 BRRWT55 BRRWT56 BRRWT57
579 | ## Min. : 687.9 Min. : 2056 Min. : 623.7 Min. : 713.4
580 | ## 1st Qu.: 9623.3 1st Qu.: 9595 1st Qu.: 9798.4 1st Qu.: 9393.8
581 | ## Median : 16662.9 Median : 16589 Median : 16624.8 Median : 17198.4
582 | ## Mean : 20789.3 Mean : 20789 Mean : 20789.3 Mean : 20789.3
583 | ## 3rd Qu.: 27612.8 3rd Qu.: 27857 3rd Qu.: 27650.0 3rd Qu.: 27964.1
584 | ## Max. :174657.5 Max. :206797 Max. :226169.8 Max. :162193.6
585 | ##
586 | ## BRRWT58 BRRWT59 BRRWT60 BRRWT61
587 | ## Min. : 905.5 Min. : 630.7 Min. : 1275 Min. : 546.4
588 | ## 1st Qu.: 9559.2 1st Qu.: 9623.7 1st Qu.: 9577 1st Qu.: 9387.4
589 | ## Median : 16540.0 Median : 16656.6 Median : 16197 Median : 16376.3
590 | ## Mean : 20789.3 Mean : 20789.3 Mean : 20789 Mean : 20789.3
591 | ## 3rd Qu.: 27780.9 3rd Qu.: 27577.8 3rd Qu.: 27781 3rd Qu.: 28016.5
592 | ## Max. :211170.6 Max. :206702.7 Max. :169387 Max. :122260.9
593 | ##
594 | ## BRRWT62 BRRWT63 BRRWT64 BRRWT65
595 | ## Min. : 739.7 Min. : 671.5 Min. : 926.4 Min. : 1144
596 | ## 1st Qu.: 9643.5 1st Qu.: 9455.3 1st Qu.: 9400.5 1st Qu.: 9597
597 | ## Median : 17067.2 Median : 16632.1 Median : 16508.1 Median : 16442
598 | ## Mean : 20789.3 Mean : 20789.3 Mean : 20789.3 Mean : 20789
599 | ## 3rd Qu.: 27540.6 3rd Qu.: 28020.8 3rd Qu.: 27693.9 3rd Qu.: 27348
600 | ## Max. :158200.9 Max. :196933.9 Max. :217490.7 Max. :239712
601 | ##
602 | ## BRRWT66 BRRWT67 BRRWT68 BRRWT69
603 | ## Min. : 1264 Min. : 684.8 Min. : 1053 Min. : 1676
604 | ## 1st Qu.: 9758 1st Qu.: 9588.0 1st Qu.: 9245 1st Qu.: 9371
605 | ## Median : 16565 Median : 16560.8 Median : 16464 Median : 16682
606 | ## Mean : 20789 Mean : 20789.3 Mean : 20789 Mean : 20789
607 | ## 3rd Qu.: 27884 3rd Qu.: 27838.7 3rd Qu.: 28108 3rd Qu.: 27957
608 | ## Max. :157193 Max. :179204.9 Max. :183266 Max. :193274
609 | ##
610 | ## BRRWT70 BRRWT71 BRRWT72 BRRWT73
611 | ## Min. : 758.4 Min. : 892.2 Min. : 695.5 Min. : 875
612 | ## 1st Qu.: 9622.5 1st Qu.: 9451.9 1st Qu.: 9516.0 1st Qu.: 9734
613 | ## Median : 16676.4 Median : 16482.8 Median : 16717.8 Median : 16930
614 | ## Mean : 20789.3 Mean : 20789.3 Mean : 20789.3 Mean : 20789
615 | ## 3rd Qu.: 27897.7 3rd Qu.: 27882.7 3rd Qu.: 27611.7 3rd Qu.: 27756
616 | ## Max. :146583.8 Max. :126528.3 Max. :196704.6 Max. :184412
617 | ##
618 | ## BRRWT74 BRRWT75 BRRWT76 BRRWT77
619 | ## Min. : 541.6 Min. : 669.7 Min. : 617 Min. : 560.5
620 | ## 1st Qu.: 9503.9 1st Qu.: 9835.9 1st Qu.: 9385 1st Qu.: 9673.8
621 | ## Median : 16128.6 Median : 16921.5 Median : 17000 Median : 16713.6
622 | ## Mean : 20789.3 Mean : 20789.3 Mean : 20789 Mean : 20789.3
623 | ## 3rd Qu.: 27849.9 3rd Qu.: 27352.3 3rd Qu.: 27558 3rd Qu.: 27712.8
624 | ## Max. :125833.8 Max. :194829.8 Max. :212262 Max. :234971.4
625 | ##
626 | ## BRRWT78 BRRWT79 BRRWT80 BRRWT81
627 | ## Min. : 526.7 Min. : 651.1 Min. : 675.7 Min. : 681.2
628 | ## 1st Qu.: 9744.1 1st Qu.: 9549.7 1st Qu.: 9554.4 1st Qu.: 9489.0
629 | ## Median : 17098.9 Median : 16676.0 Median : 16707.8 Median : 16769.3
630 | ## Mean : 20789.3 Mean : 20789.3 Mean : 20789.3 Mean : 20789.3
631 | ## 3rd Qu.: 27459.8 3rd Qu.: 27857.9 3rd Qu.: 27688.3 3rd Qu.: 27901.5
632 | ## Max. :152055.4 Max. :180157.0 Max. :165661.6 Max. :191740.1
633 | ##
634 | ## BRRWT82 BRRWT83 BRRWT84 BRRWT85
635 | ## Min. : 563.6 Min. : 656.9 Min. : 652.7 Min. : 675.4
636 | ## 1st Qu.: 9216.4 1st Qu.: 9634.4 1st Qu.: 9432.5 1st Qu.: 9551.2
637 | ## Median : 16121.6 Median : 16516.9 Median : 16454.8 Median : 16902.2
638 | ## Mean : 20789.3 Mean : 20789.3 Mean : 20789.3 Mean : 20789.3
639 | ## 3rd Qu.: 28253.1 3rd Qu.: 27725.8 3rd Qu.: 28006.4 3rd Qu.: 27325.4
640 | ## Max. :171004.8 Max. :184719.0 Max. :191550.3 Max. :198238.4
641 | ##
642 | ## BRRWT86 BRRWT87 BRRWT88 BRRWT89
643 | ## Min. : 680.3 Min. : 551.7 Min. : 704.2 Min. : 644.9
644 | ## 1st Qu.: 9619.8 1st Qu.: 9436.6 1st Qu.: 9393.1 1st Qu.: 9643.2
645 | ## Median : 16772.0 Median : 16799.0 Median : 16778.6 Median : 16586.1
646 | ## Mean : 20789.3 Mean : 20789.3 Mean : 20789.3 Mean : 20789.3
647 | ## 3rd Qu.: 27638.1 3rd Qu.: 28046.3 3rd Qu.: 27789.9 3rd Qu.: 28075.4
648 | ## Max. :232065.5 Max. :179835.0 Max. :166866.1 Max. :144299.3
649 | ##
650 | ## BRRWT90 BRRWT91 BRRWT92 BRRWT93
651 | ## Min. : 649.2 Min. : 568.2 Min. : 591.9 Min. : 545.3
652 | ## 1st Qu.: 9467.7 1st Qu.: 9506.3 1st Qu.: 9610.6 1st Qu.: 9688.4
653 | ## Median : 16212.0 Median : 16781.5 Median : 16524.1 Median : 16258.4
654 | ## Mean : 20789.3 Mean : 20789.3 Mean : 20789.3 Mean : 20789.3
655 | ## 3rd Qu.: 28020.8 3rd Qu.: 27876.1 3rd Qu.: 27915.1 3rd Qu.: 27728.8
656 | ## Max. :175279.5 Max. :205917.4 Max. :225638.4 Max. :117260.5
657 | ##
658 | ## BRRWT94 BRRWT95 BRRWT96 CDD30YR
659 | ## Min. : 716.2 Min. : 566.4 Min. : 551.1 Min. : 0
660 | ## 1st Qu.: 9561.6 1st Qu.: 9530.2 1st Qu.: 9533.2 1st Qu.: 712
661 | ## Median : 17099.7 Median : 16577.2 Median : 16358.9 Median :1150
662 | ## Mean : 20789.3 Mean : 20789.3 Mean : 20789.3 Mean :1451
663 | ## 3rd Qu.: 27853.9 3rd Qu.: 27441.4 3rd Qu.: 27823.1 3rd Qu.:1880
664 | ## Max. :207264.3 Max. :205015.8 Max. :171550.8 Max. :5792
665 | ##
666 | ## CDD65 CDD80 ClimateRegion_BA ClimateRegion_IECC
667 | ## Min. : 0 Min. : 0.0 Hot-Dry/Mixed-Dry: 750 5A :1240
668 | ## 1st Qu.: 793 1st Qu.: 10.0 Hot-Humid :1036 4A :1021
669 | ## Median :1378 Median : 60.0 Mixed-Humid :1468 1A-2A : 846
670 | ## Mean :1719 Mean : 174.7 Cold/Very Cold :2008 3B-4B : 644
671 | ## 3rd Qu.:2231 3rd Qu.: 208.0 Marine : 424 3A : 637
672 | ## Max. :6607 Max. :2297.0 6A-6B : 376
673 | ## (Other): 922
674 | ## HDD30YR HDD65 HDD50 GNDHDD65
675 | ## Min. : 0 Min. : 0 Min. : 0 Min. : 0
676 | ## 1st Qu.: 2102 1st Qu.:1881 1st Qu.: 260 1st Qu.: 1337
677 | ## Median : 4353 Median :3878 Median :1260 Median : 3704
678 | ## Mean : 4087 Mean :3708 Mean :1486 Mean : 3578
679 | ## 3rd Qu.: 5967 3rd Qu.:5467 3rd Qu.:2499 3rd Qu.: 5630
680 | ## Max. :12184 Max. :9843 Max. :4956 Max. :11851
681 | ##
682 | ## BTUEL DOLLAREL BTUNG DOLLARNG
683 | ## Min. : 201.6 Min. : 18.72 Min. : 0 Min. : 0.0
684 | ## 1st Qu.: 20221.3 1st Qu.: 815.12 1st Qu.: 0 1st Qu.: 0.0
685 | ## Median : 32582.4 Median :1253.02 Median : 17961 Median : 231.8
686 | ## Mean : 37630.7 Mean :1403.78 Mean : 33331 Mean : 346.8
687 | ## 3rd Qu.: 49670.6 3rd Qu.:1830.83 3rd Qu.: 57126 3rd Qu.: 605.1
688 | ## Max. :215695.7 Max. :8121.56 Max. :306594 Max. :2789.8
689 | ##
690 | ## BTULP DOLLARLP BTUFO DOLLARFO
691 | ## Min. : 0 Min. : 0.00 Min. : 0 Min. : 0.00
692 | ## 1st Qu.: 0 1st Qu.: 0.00 1st Qu.: 0 1st Qu.: 0.00
693 | ## Median : 0 Median : 0.00 Median : 0 Median : 0.00
694 | ## Mean : 3192 Mean : 67.72 Mean : 3569 Mean : 64.08
695 | ## 3rd Qu.: 0 3rd Qu.: 0.00 3rd Qu.: 0 3rd Qu.: 0.00
696 | ## Max. :220435 Max. :5121.27 Max. :273608 Max. :4700.03
697 | ##
698 | ## TOTALBTU TOTALDOL BTUWOOD BTUPELLET
699 | ## Min. : 201.6 Min. : 60.46 Min. : 0 Min. : 0.0
700 | ## 1st Qu.: 42655.8 1st Qu.: 1175.49 1st Qu.: 0 1st Qu.: 0.0
701 | ## Median : 68663.3 Median : 1724.60 Median : 0 Median : 0.0
702 | ## Mean : 77722.9 Mean : 1882.34 Mean : 4140 Mean : 197.4
703 | ## 3rd Qu.:103832.9 3rd Qu.: 2385.84 3rd Qu.: 0 3rd Qu.: 0.0
704 | ## Max. :490187.4 Max. :10135.99 Max. :295476 Max. :115500.0
705 | ##
706 |
707 | ``` r
708 | write_rds(recs_out, here("Data", "recs.rds"), compress="gz")
709 | ```
710 |
--------------------------------------------------------------------------------
/Exercises/CategorialExercises.R:
--------------------------------------------------------------------------------
1 | #' ---
2 | #' title: "Categorical Data Analysis Exercise Solutions"
3 | #' output:
4 | #' html_document:
5 | #' df_print: paged
6 | #' ---
7 | #'
8 | #' # Set-up
9 | ## -------------------------------------------------------------------
10 | library(tidyverse) # for tidyverse
11 | library(here) # for file paths
12 | library(survey) # for survey analysis
13 | library(srvyr) # for tidy survey analysis
14 |
15 | anes <- read_rds(here("Data", "anes.rds")) %>%
16 | mutate(Weight=Weight/sum(Weight)*224059005)
17 | # adjust weight to sum to citizen pop, 18+ in Nov 2016 per ANES methodology documentation
18 |
19 | anes_des <- anes %>%
20 | as_survey_design(weights = Weight,
21 | strata = Stratum,
22 | ids = VarUnit,
23 | nest = TRUE)
24 |
25 | #'
26 | #' # Part 1
27 | #'
28 | #' 1. How many females have a graduate degree?
29 | #'
30 | ## -------------------------------------------------------------------
31 |
32 |
33 |
34 | #'
35 | #' 2. What percentage of people identify as "Strong democrat"?
36 | #'
37 | ## -------------------------------------------------------------------
38 |
39 |
40 | #'
41 | #' 3. What percentage of people who voted in the 2016 election identify as "Strong republican"?
42 | #'
43 | ## -------------------------------------------------------------------
44 |
45 |
46 | #'
47 | #' 4. What percentage of people voted in both the 2012 election and in the 2016 election? Include the confidence interval.
48 | #'
49 | ## -------------------------------------------------------------------
50 |
51 |
52 | #'
53 | #' 5. What is the design effect for the proportion of people who voted early?
54 | #'
55 | ## -------------------------------------------------------------------
56 |
57 |
58 | #'
59 | #' # Part 2
60 | #'
61 | #' 1. Is there a relationship between PartyID and When people voted in the 2016 election (on election day or early voting)?
62 | #'
63 | ## -------------------------------------------------------------------
64 |
65 |
66 | #'
67 | #' 2. Is there a relationship between PartyID and trust in the government?
68 | #'
69 | ## -------------------------------------------------------------------
70 |
71 |
72 | #'
73 | #'
74 | #' # Bonus
75 | #'
76 | #' 1. What percentage of people lean republican?
77 | #'
78 | ## -------------------------------------------------------------------
79 |
80 |
81 | #'
82 | #' 2. Were people who lean democrat more likely to vote early in the 2020 election?
83 | #'
84 | ## -------------------------------------------------------------------
85 |
86 |
87 |
--------------------------------------------------------------------------------
/Exercises/CategorialExercises.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Categorical Data Analysis Exercise Solutions"
3 | output:
4 | html_document:
5 | df_print: paged
6 | ---
7 |
8 | # Set-up
9 | ```{r}
10 | library(tidyverse) # for tidyverse
11 | library(here) # for file paths
12 | library(survey) # for survey analysis
13 | library(srvyr) # for tidy survey analysis
14 |
15 | anes <- read_rds(here("Data", "anes.rds")) %>%
16 | mutate(Weight=Weight/sum(Weight)*224059005)
17 | # adjust weight to sum to citizen pop, 18+ in Nov 2016 per ANES methodology documentation
18 |
19 | anes_des <- anes %>%
20 | as_survey_design(weights = Weight,
21 | strata = Stratum,
22 | ids = VarUnit,
23 | nest = TRUE)
24 | ```
25 |
26 | # Part 1
27 |
28 | 1. How many females have a graduate degree?
29 |
30 | ```{r}
31 |
32 |
33 | ```
34 |
35 | 2. What percentage of people identify as "Strong democrat"?
36 |
37 | ```{r}
38 |
39 | ```
40 |
41 | 3. What percentage of people who voted in the 2016 election identify as "Strong republican"?
42 |
43 | ```{r}
44 |
45 | ```
46 |
47 | 4. What percentage of people voted in both the 2012 election and in the 2016 election? Include the confidence interval.
48 |
49 | ```{r}
50 |
51 | ```
52 |
53 | 5. What is the design effect for the proportion of people who voted early?
54 |
55 | ```{r}
56 |
57 | ```
58 |
59 | # Part 2
60 |
61 | 1. Is there a relationship between PartyID and When people voted in the 2016 election (on election day or early voting)?
62 |
63 | ```{r}
64 |
65 | ```
66 |
67 | 2. Is there a relationship between PartyID and trust in the government?
68 |
69 | ```{r}
70 |
71 | ```
72 |
73 |
74 | # Bonus
75 |
76 | 1. What percentage of people lean republican?
77 |
78 | ```{r}
79 |
80 | ```
81 |
82 | 2. Were people who lean democrat more likely to vote early in the 2020 election?
83 |
84 | ```{r}
85 |
86 | ```
--------------------------------------------------------------------------------
/Exercises/CategorialExercises_solutions.R:
--------------------------------------------------------------------------------
1 | #' ---
2 | #' title: "Categorical Data Analysis Exercise Solutions"
3 | #' output:
4 | #' html_document:
5 | #' df_print: paged
6 | #' ---
7 | #'
8 | #' # Set-up
9 | ## -------------------------------------------------------------------
10 | library(tidyverse) # for tidyverse
11 | library(here) # for file paths
12 | library(survey) # for survey analysis
13 | library(srvyr) # for tidy survey analysis
14 |
15 | anes <- read_rds(here("Data", "anes.rds")) %>%
16 | mutate(Weight=Weight/sum(Weight)*224059005)
17 | # adjust weight to sum to citizen pop, 18+ in Nov 2016 per ANES methodology documentation
18 |
19 | anes_des <- anes %>%
20 | as_survey_design(weights = Weight,
21 | strata = Stratum,
22 | ids = VarUnit,
23 | nest = TRUE)
24 |
25 | #'
26 | #' # Part 1
27 | #'
28 | #' 1. How many females have a graduate degree?
29 | #'
30 | ## -------------------------------------------------------------------
31 | #Option 1:
32 | anes_des %>%
33 | filter(Gender=="Female", Education=="Graduate") %>%
34 | survey_count(name="n")
35 | #Option 2:
36 | anes_des %>%
37 | filter(Gender=="Female", Education=="Graduate") %>%
38 | summarize(
39 | N=survey_total(), .groups="drop"
40 | )
41 |
42 |
43 | #'
44 | #' 2. What percentage of people identify as "Strong democrat"?
45 | #'
46 | ## -------------------------------------------------------------------
47 | anes_des %>%
48 | group_by(PartyID) %>%
49 | summarize(
50 | p=survey_mean()
51 | ) %>%
52 | filter(PartyID=="Strong democrat")
53 |
54 | #'
55 | #' 3. What percentage of people who voted in the 2016 election identify as "Strong republican"?
56 | #'
57 | ## -------------------------------------------------------------------
58 | anes_des %>%
59 | filter(VotedPres2016=="Yes") %>%
60 | group_by(PartyID) %>%
61 | summarize(
62 | p=survey_mean()
63 | )
64 |
65 | #'
66 | #' 4. What percentage of people voted in both the 2012 election and in the 2016 election? Include the confidence interval.
67 | #'
68 | ## -------------------------------------------------------------------
69 | anes_des %>%
70 | group_by(groups=interaction(VotedPres2012, VotedPres2016)) %>%
71 | filter(!is.na(groups)) %>%
72 | summarize(
73 | VotedPres2012=VotedPres2012[1],
74 | VotedPres2016=VotedPres2016[1],
75 | p=survey_mean(var="ci")
76 | )
77 |
78 | #'
79 | #' 5. What is the design effect for the proportion of people who voted early?
80 | #'
81 | ## -------------------------------------------------------------------
82 | anes_des %>%
83 | filter(!is.na(EarlyVote2016)) %>%
84 | group_by(EarlyVote2016) %>%
85 | summarize(
86 | p=survey_mean(deff=TRUE),
87 | N=survey_total()
88 | )
89 |
90 | #'
91 | #' # Part 2
92 | #'
93 | #' 1. Is there a relationship between PartyID and When people voted in the 2016 election (on election day or early voting)?
94 | #'
95 | ## -------------------------------------------------------------------
96 | anes_des %>%
97 | svychisq(design=.,
98 | formula=~PartyID +EarlyVote2016)
99 |
100 | #'
101 | #' 2. Is there a relationship between PartyID and trust in the government?
102 | #'
103 | ## -------------------------------------------------------------------
104 | anes_des %>%
105 | svychisq(design=.,
106 | formula=~PartyID+TrustGovernment,
107 | statistic="Wald")
108 |
109 | #'
110 | #'
111 | #' # Bonus
112 | #'
113 | #' 1. What percentage of people lean republican?
114 | #'
115 | ## -------------------------------------------------------------------
116 |
117 | #Solution 1: Using forcats package
118 | anes_des %>%
119 | mutate(PartyID3=fct_collapse(PartyID,
120 | LeanDem=c("Strong democrat",
121 | "Not very strong democrat",
122 | "Independent-democrat"),
123 | LeanRep=c("Strong republican",
124 | "Not very strong republican",
125 | "Independent-republican"),
126 | other_level="Other")) %>%
127 | group_by(PartyID3) %>%
128 | summarize(p=survey_mean())
129 |
130 | #Solution 2: Using case_when
131 | anes_des %>%
132 | mutate(PartyID3=case_when(PartyID %in% c("Strong democrat",
133 | "Not very strong democrat",
134 | "Independent-democrat")~"LeanDem",
135 | PartyID %in% c("Strong republican",
136 | "Not very strong republican",
137 | "Independent-republican")~"LeanRep",
138 | is.na(PartyID)~NA_character_,
139 | TRUE~"Other")) %>%
140 | group_by(PartyID3) %>%
141 | summarize(p=survey_mean())
142 |
143 |
144 | #'
145 | #' 2. Were people who lean democrat more likely to vote early in the 2020 election?
146 | #'
147 | ## -------------------------------------------------------------------
148 |
149 | earlyv_glm<-anes_des %>%
150 | mutate(PartyID3=fct_collapse(PartyID,
151 | LeanDem=c("Strong democrat",
152 | "Not very strong democrat",
153 | "Independent-democrat"),
154 | LeanRep=c("Strong republican",
155 | "Not very strong republican",
156 | "Independent-republican"),
157 | other_level="Other")) %>%
158 | svyglm(design=.,
159 | formula=(EarlyVote2016=="Yes")~PartyID3,
160 | family=quasibinomial(),
161 | na.action=na.omit)
162 |
163 | summary(earlyv_glm)
164 |
165 |
--------------------------------------------------------------------------------
/Exercises/CategorialExercises_solutions.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Categorical Data Analysis Exercise Solutions"
3 | output:
4 | html_document:
5 | df_print: paged
6 | ---
7 |
8 | # Set-up
9 | ```{r}
10 | library(tidyverse) # for tidyverse
11 | library(here) # for file paths
12 | library(survey) # for survey analysis
13 | library(srvyr) # for tidy survey analysis
14 |
15 | anes <- read_rds(here("Data", "anes.rds")) %>%
16 | mutate(Weight=Weight/sum(Weight)*224059005)
17 | # adjust weight to sum to citizen pop, 18+ in Nov 2016 per ANES methodology documentation
18 |
19 | anes_des <- anes %>%
20 | as_survey_design(weights = Weight,
21 | strata = Stratum,
22 | ids = VarUnit,
23 | nest = TRUE)
24 | ```
25 |
26 | # Part 1
27 |
28 | 1. How many females have a graduate degree?
29 |
30 | ```{r}
31 | #Option 1:
32 | anes_des %>%
33 | filter(Gender=="Female", Education=="Graduate") %>%
34 | survey_count(name="n")
35 | #Option 2:
36 | anes_des %>%
37 | filter(Gender=="Female", Education=="Graduate") %>%
38 | summarize(
39 | N=survey_total(), .groups="drop"
40 | )
41 |
42 | ```
43 |
44 | 2. What percentage of people identify as "Strong democrat"?
45 |
46 | ```{r}
47 | anes_des %>%
48 | group_by(PartyID) %>%
49 | summarize(
50 | p=survey_mean()
51 | ) %>%
52 | filter(PartyID=="Strong democrat")
53 | ```
54 |
55 | 3. What percentage of people who voted in the 2016 election identify as "Strong republican"?
56 |
57 | ```{r}
58 | anes_des %>%
59 | filter(VotedPres2016=="Yes") %>%
60 | group_by(PartyID) %>%
61 | summarize(
62 | p=survey_mean()
63 | )
64 | ```
65 |
66 | 4. What percentage of people voted in both the 2012 election and in the 2016 election? Include the confidence interval.
67 |
68 | ```{r}
69 | anes_des %>%
70 | group_by(groups=interaction(VotedPres2012, VotedPres2016)) %>%
71 | filter(!is.na(groups)) %>%
72 | summarize(
73 | VotedPres2012=VotedPres2012[1],
74 | VotedPres2016=VotedPres2016[1],
75 | p=survey_mean(var="ci")
76 | )
77 | ```
78 |
79 | 5. What is the design effect for the proportion of people who voted early?
80 |
81 | ```{r}
82 | anes_des %>%
83 | filter(!is.na(EarlyVote2016)) %>%
84 | group_by(EarlyVote2016) %>%
85 | summarize(
86 | p=survey_mean(deff=TRUE),
87 | N=survey_total()
88 | )
89 | ```
90 |
91 | # Part 2
92 |
93 | 1. Is there a relationship between PartyID and When people voted in the 2016 election (on election day or early voting)?
94 |
95 | ```{r}
96 | anes_des %>%
97 | svychisq(design=.,
98 | formula=~PartyID +EarlyVote2016)
99 | ```
100 |
101 | 2. Is there a relationship between PartyID and trust in the government?
102 |
103 | ```{r}
104 | anes_des %>%
105 | svychisq(design=.,
106 | formula=~PartyID+TrustGovernment,
107 | statistic="Wald")
108 | ```
109 |
110 |
111 | # Bonus
112 |
113 | 1. What percentage of people lean republican?
114 |
115 | ```{r}
116 |
117 | #Solution 1: Using forcats package
118 | anes_des %>%
119 | mutate(PartyID3=fct_collapse(PartyID,
120 | LeanDem=c("Strong democrat",
121 | "Not very strong democrat",
122 | "Independent-democrat"),
123 | LeanRep=c("Strong republican",
124 | "Not very strong republican",
125 | "Independent-republican"),
126 | other_level="Other")) %>%
127 | group_by(PartyID3) %>%
128 | summarize(p=survey_mean())
129 |
130 | #Solution 2: Using case_when
131 | anes_des %>%
132 | mutate(PartyID3=case_when(PartyID %in% c("Strong democrat",
133 | "Not very strong democrat",
134 | "Independent-democrat")~"LeanDem",
135 | PartyID %in% c("Strong republican",
136 | "Not very strong republican",
137 | "Independent-republican")~"LeanRep",
138 | is.na(PartyID)~NA_character_,
139 | TRUE~"Other")) %>%
140 | group_by(PartyID3) %>%
141 | summarize(p=survey_mean())
142 |
143 | ```
144 |
145 | 2. Were people who lean democrat more likely to vote early in the 2020 election?
146 |
147 | ```{r}
148 |
149 | earlyv_glm<-anes_des %>%
150 | mutate(PartyID3=fct_collapse(PartyID,
151 | LeanDem=c("Strong democrat",
152 | "Not very strong democrat",
153 | "Independent-democrat"),
154 | LeanRep=c("Strong republican",
155 | "Not very strong republican",
156 | "Independent-republican"),
157 | other_level="Other")) %>%
158 | svyglm(design=.,
159 | formula=(EarlyVote2016=="Yes")~PartyID3,
160 | family=quasibinomial(),
161 | na.action=na.omit)
162 |
163 | summary(earlyv_glm)
164 | ```
--------------------------------------------------------------------------------
/Exercises/ContinuousExercises.R:
--------------------------------------------------------------------------------
1 | #' ---
2 | #' title: "Continous Data Analysis Exercises"
3 | #' output:
4 | #' html_document:
5 | #' df_print: paged
6 | #' ---
7 | #'
8 | #' # Set-up
9 | ## -------------------------------------------------------------------
10 | library(tidyverse) # for tidyverse
11 | library(here) # for file paths
12 | library(survey) # for survey analysis
13 | library(srvyr) # for tidy survey analysis
14 |
15 | recs <- read_rds(here("Data", "recs.rds"))
16 |
17 | recs_des <- recs %>%
18 | as_survey_rep(weights=NWEIGHT,
19 | repweights=starts_with("BRRWT"),
20 | type="Fay",
21 | rho=0.5,
22 | mse=TRUE)
23 |
24 | #'
25 | #' # Part 1
26 | #'
27 | #' 1. Find the average square footage of housing units (TOTSQFT_EN) with a 90% confidence interval.
28 | #'
29 | ## -------------------------------------------------------------------
30 |
31 |
32 | #'
33 | #' 2. Estimate the ratio of cooled square footage to total square footage (TOTCSQFT) to the total square footage of housing units (TOTSQFT_EN) with its standard error.
34 | #'
35 | ## -------------------------------------------------------------------
36 |
37 |
38 | #'
39 | #' 3. Estimate the median temperature housing units are set to during the night in the winter (WinterTempNight) using the `survey_median` function.
40 | #'
41 | ## -------------------------------------------------------------------
42 |
43 |
44 | #'
45 | #' 4. Estimate the median temperature housing units are set to during the night in the winter (WinterTempNight) using the `survey_quantile` function.
46 | #'
47 | ## -------------------------------------------------------------------
48 |
49 |
50 | #'
51 | #' # Part 2
52 | #'
53 | #' 1. Estimate the total average energy cost (TOTALDOL) by region, division, and urbanicity.
54 | #'
55 | ## -------------------------------------------------------------------
56 |
57 |
58 | #'
59 | #' 2. What is the median electric cost (DOLLAREL) for housing units in the South Region? What is the 95% confidence interval?
60 | #'
61 | ## -------------------------------------------------------------------
62 |
63 |
64 | #'
65 | #' 3. Test whether daytime winter and daytime summer temperatures of homes are set the same.
66 | #'
67 | ## -------------------------------------------------------------------
68 |
69 |
70 | #'
71 | #' 4. Test whether average electric bill (DOLLAREL) varies by region (Region).
72 | #'
73 | ## -------------------------------------------------------------------
74 |
75 |
76 | #'
77 | #' 5. Fit a regression between the cooled square footage of a housing unit (TOTCSQFT) and the total amount spent on energy (TOTALDOL).
78 | #'
79 | ## -------------------------------------------------------------------
80 |
81 |
82 | #'
83 |
--------------------------------------------------------------------------------
/Exercises/ContinuousExercises.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Continous Data Analysis Exercises"
3 | output:
4 | html_document:
5 | df_print: paged
6 | ---
7 |
8 | # Set-up
9 | ```{r}
10 | library(tidyverse) # for tidyverse
11 | library(here) # for file paths
12 | library(survey) # for survey analysis
13 | library(srvyr) # for tidy survey analysis
14 |
15 | recs <- read_rds(here("Data", "recs.rds"))
16 |
17 | recs_des <- recs %>%
18 | as_survey_rep(weights=NWEIGHT,
19 | repweights=starts_with("BRRWT"),
20 | type="Fay",
21 | rho=0.5,
22 | mse=TRUE)
23 | ```
24 |
25 | # Part 1
26 |
27 | 1. Find the average square footage of housing units (TOTSQFT_EN) with a 90% confidence interval.
28 |
29 | ```{r}
30 |
31 | ```
32 |
33 | 2. Estimate the ratio of cooled square footage to total square footage (TOTCSQFT) to the total square footage of housing units (TOTSQFT_EN) with its standard error.
34 |
35 | ```{r}
36 |
37 | ```
38 |
39 | 3. Estimate the median temperature housing units are set to during the night in the winter (WinterTempNight) using the `survey_median` function.
40 |
41 | ```{r}
42 |
43 | ```
44 |
45 | 4. Estimate the median temperature housing units are set to during the night in the winter (WinterTempNight) using the `survey_quantile` function.
46 |
47 | ```{r}
48 |
49 | ```
50 |
51 | # Part 2
52 |
53 | 1. Estimate the total average energy cost (TOTALDOL) by region, division, and urbanicity.
54 |
55 | ```{r}
56 |
57 | ```
58 |
59 | 2. What is the median electric cost (DOLLAREL) for housing units in the South Region? What is the 95% confidence interval?
60 |
61 | ```{r}
62 |
63 | ```
64 |
65 | 3. Test whether daytime winter and daytime summer temperatures of homes are set the same.
66 |
67 | ```{r}
68 |
69 | ```
70 |
71 | 4. Test whether average electric bill (DOLLAREL) varies by region (Region).
72 |
73 | ```{r}
74 |
75 | ```
76 |
77 | 5. Fit a regression between the cooled square footage of a housing unit (TOTCSQFT) and the total amount spent on energy (TOTALDOL).
78 |
79 | ```{r}
80 |
81 | ```
82 |
83 |
--------------------------------------------------------------------------------
/Exercises/ContinuousExercises_solutions.R:
--------------------------------------------------------------------------------
1 | #' ---
2 | #' title: "Continous Data Analysis Exercise Solutions"
3 | #' output:
4 | #' html_document:
5 | #' df_print: paged
6 | #' ---
7 | #'
8 | #' # Set-up
9 | ## -------------------------------------------------------------------
10 | library(tidyverse) # for tidyverse
11 | library(here) # for file paths
12 | library(survey) # for survey analysis
13 | library(srvyr) # for tidy survey analysis
14 |
15 | recs <- read_rds(here("Data", "recs.rds"))
16 |
17 | recs_des <- recs %>%
18 | as_survey_rep(weights=NWEIGHT,
19 | repweights=starts_with("BRRWT"),
20 | type="Fay",
21 | rho=0.5,
22 | mse=TRUE)
23 |
24 | #'
25 | #' # Part 1
26 | #'
27 | #' 1. Find the average square footage of housing units (TOTSQFT_EN) with a 90% confidence interval.
28 | #'
29 | ## -------------------------------------------------------------------
30 | recs_des %>%
31 | summarize(
32 | SF_HU=survey_mean(TOTSQFT_EN,
33 | vartype = "ci",
34 | level = 0.9)
35 | )
36 |
37 | #'
38 | #' 2. Estimate the ratio of cooled square footage to total square footage (TOTCSQFT) to the total square footage of housing units (TOTSQFT_EN) with its standard error.
39 | #'
40 | ## -------------------------------------------------------------------
41 | recs_des %>%
42 | summarize(
43 | PropCooled=survey_ratio(
44 | numerator = TOTCSQFT,
45 | denominator = TOTSQFT_EN,
46 | vartype = "se")
47 | )
48 |
49 | #'
50 | #' 3. Estimate the median temperature housing units are set to during the night in the winter (WinterTempNight) using the `survey_median` function.
51 | #'
52 | ## -------------------------------------------------------------------
53 | recs_des %>%
54 | summarize(
55 | WinterNightTemp=survey_median(WinterTempNight,
56 | vartype = "se",
57 | na.rm = TRUE)
58 | )
59 |
60 | #'
61 | #' 4. Estimate the median temperature housing units are set to during the night in the winter (WinterTempNight) using the `survey_quantile` function.
62 | #'
63 | ## -------------------------------------------------------------------
64 | recs_des %>%
65 | summarize(
66 | WinterNightTemp=survey_median(WinterTempNight,
67 | quantiles = "0.5",
68 | vartype = "se",
69 | na.rm = TRUE)
70 | )
71 |
72 | #'
73 | #' # Part 2
74 | #'
75 | #' 1. Estimate the total average energy cost (TOTALDOL) by region, division, and urbanicity.
76 | #'
77 | ## -------------------------------------------------------------------
78 | # option 1
79 | recs_des %>%
80 | group_by(Region, Division, Urbanicity) %>%
81 | cascade(
82 | EnergyCost=survey_mean(TOTALDOL)
83 | )
84 | # option 2
85 | # one way
86 | recs_des %>%
87 | group_by(Region, Division, Urbanicity) %>%
88 | summarize(
89 | EnergyCost=survey_mean(TOTALDOL)
90 | )
91 |
92 | #'
93 | #' 2. What is the median electric cost (DOLLAREL) for housing units in the South Region? What is the 95% confidence interval?
94 | #'
95 | ## -------------------------------------------------------------------
96 | recs_des %>%
97 | filter(Region=="South") %>%
98 | summarize(
99 | MedElBill=survey_median(DOLLAREL,
100 | vartype="ci")
101 | )
102 |
103 | #'
104 | #' 3. Test whether daytime winter and daytime summer temperatures of homes are set the same.
105 | #'
106 | ## -------------------------------------------------------------------
107 | recs_des %>%
108 | svyttest(design=.,
109 | formula = I(WinterTempDay-SummerTempDay)~0,
110 | na.rm = TRUE)
111 |
112 | #'
113 | #' 4. Test whether average electric bill (DOLLAREL) varies by region (Region).
114 | #'
115 | ## -------------------------------------------------------------------
116 | m1 <- recs_des %>%
117 | svyglm(design=.,
118 | formula=DOLLAREL~Region,
119 | na.action=na.omit)
120 | summary(m1)
121 |
122 | #'
123 | #' 5. Fit a regression between the cooled square footage of a housing unit (TOTCSQFT) and the total amount spent on energy (TOTALDOL).
124 | #'
125 | ## -------------------------------------------------------------------
126 | m2 <- recs_des %>%
127 | svyglm(design=.,
128 | formula=TOTALDOL~TOTCSQFT,
129 | na.action=na.omit)
130 | summary(m2)
131 |
132 | #'
133 |
--------------------------------------------------------------------------------
/Exercises/ContinuousExercises_solutions.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Continous Data Analysis Exercise Solutions"
3 | output:
4 | html_document:
5 | df_print: paged
6 | ---
7 |
8 | # Set-up
9 | ```{r}
10 | library(tidyverse) # for tidyverse
11 | library(here) # for file paths
12 | library(survey) # for survey analysis
13 | library(srvyr) # for tidy survey analysis
14 |
15 | recs <- read_rds(here("Data", "recs.rds"))
16 |
17 | recs_des <- recs %>%
18 | as_survey_rep(weights=NWEIGHT,
19 | repweights=starts_with("BRRWT"),
20 | type="Fay",
21 | rho=0.5,
22 | mse=TRUE)
23 | ```
24 |
25 | # Part 1
26 |
27 | 1. Find the average square footage of housing units (TOTSQFT_EN) with a 90% confidence interval.
28 |
29 | ```{r}
30 | recs_des %>%
31 | summarize(
32 | SF_HU=survey_mean(TOTSQFT_EN,
33 | vartype = "ci",
34 | level = 0.9)
35 | )
36 | ```
37 |
38 | 2. Estimate the ratio of cooled square footage to total square footage (TOTCSQFT) to the total square footage of housing units (TOTSQFT_EN) with its standard error.
39 |
40 | ```{r}
41 | recs_des %>%
42 | summarize(
43 | PropCooled=survey_ratio(
44 | numerator = TOTCSQFT,
45 | denominator = TOTSQFT_EN,
46 | vartype = "se")
47 | )
48 | ```
49 |
50 | 3. Estimate the median temperature housing units are set to during the night in the winter (WinterTempNight) using the `survey_median` function.
51 |
52 | ```{r}
53 | recs_des %>%
54 | summarize(
55 | WinterNightTemp=survey_median(WinterTempNight,
56 | vartype = "se",
57 | na.rm = TRUE)
58 | )
59 | ```
60 |
61 | 4. Estimate the median temperature housing units are set to during the night in the winter (WinterTempNight) using the `survey_quantile` function.
62 |
63 | ```{r}
64 | recs_des %>%
65 | summarize(
66 | WinterNightTemp=survey_median(WinterTempNight,
67 | quantiles = "0.5",
68 | vartype = "se",
69 | na.rm = TRUE)
70 | )
71 | ```
72 |
73 | # Part 2
74 |
75 | 1. Estimate the total average energy cost (TOTALDOL) by region, division, and urbanicity.
76 |
77 | ```{r}
78 | # option 1
79 | recs_des %>%
80 | group_by(Region, Division, Urbanicity) %>%
81 | cascade(
82 | EnergyCost=survey_mean(TOTALDOL)
83 | )
84 | # option 2
85 | # one way
86 | recs_des %>%
87 | group_by(Region, Division, Urbanicity) %>%
88 | summarize(
89 | EnergyCost=survey_mean(TOTALDOL)
90 | )
91 | ```
92 |
93 | 2. What is the median electric cost (DOLLAREL) for housing units in the South Region? What is the 95% confidence interval?
94 |
95 | ```{r}
96 | recs_des %>%
97 | filter(Region=="South") %>%
98 | summarize(
99 | MedElBill=survey_median(DOLLAREL,
100 | vartype="ci")
101 | )
102 | ```
103 |
104 | 3. Test whether daytime winter and daytime summer temperatures of homes are set the same.
105 |
106 | ```{r}
107 | recs_des %>%
108 | svyttest(design=.,
109 | formula = I(WinterTempDay-SummerTempDay)~0,
110 | na.rm = TRUE)
111 | ```
112 |
113 | 4. Test whether average electric bill (DOLLAREL) varies by region (Region).
114 |
115 | ```{r}
116 | m1 <- recs_des %>%
117 | svyglm(design=.,
118 | formula=DOLLAREL~Region,
119 | na.action=na.omit)
120 | summary(m1)
121 | ```
122 |
123 | 5. Fit a regression between the cooled square footage of a housing unit (TOTCSQFT) and the total amount spent on energy (TOTALDOL).
124 |
125 | ```{r}
126 | m2 <- recs_des %>%
127 | svyglm(design=.,
128 | formula=TOTALDOL~TOTCSQFT,
129 | na.action=na.omit)
130 | summary(m2)
131 | ```
132 |
133 |
--------------------------------------------------------------------------------
/Exercises/WarmUpExercises.R:
--------------------------------------------------------------------------------
1 | #' ---
2 | #' title: "Warm-up Exercises"
3 | #' output:
4 | #' html_document:
5 | #' df_print: paged
6 | #' ---
7 | #'
8 | #' # Course set-up
9 | #' First, let's make sure you have everything you need for the course. Run the following library statements. If something is not installed, install it.
10 | #'
11 | ## -------------------------------------------------------------------
12 | # install.packages("tidyverse")
13 | # install.packages("srvyr")
14 | # install.packages("here")
15 | # install.packages("palmerpenguins")
16 | # install.packages("remotes")
17 |
18 | library(tidyverse) # for tidyverse
19 | library(here) # for file paths
20 |
21 | remotes::install_github("bschneidr/survey", ref = "c217689")
22 | library(srvyr)
23 | library(palmerpenguins)
24 |
25 |
26 |
27 | #'
28 | #' # Warm-up exercises: Play with penguin data!!!
29 | #'
30 | ## -------------------------------------------------------------------
31 | penguins
32 |
33 | #'
34 | #' How many penguins of each species are there? Hint: use `count`
35 | ## -------------------------------------------------------------------
36 |
37 |
38 | #'
39 | #' How many penguins of each species and sex are there? Hint: use `count`
40 | #'
41 | ## -------------------------------------------------------------------
42 |
43 |
44 | #'
45 | #' What is the mean length of flipper by species? Hint: use `group_by` and `summarise`
46 | ## -------------------------------------------------------------------
47 |
48 |
49 | #'
50 | #'
51 | #' What is the mean flipper length by species and sex? Hint: use `group_by` and `summarise`
52 | ## -------------------------------------------------------------------
53 |
54 |
55 | #'
56 | #' # Advanced warm-up exercises
57 | #'
58 | #' Fit a simple linear regression between body mass and flipper length.
59 | #'
60 | ## -------------------------------------------------------------------
61 |
62 |
63 | #'
64 | #'
65 | #' Test whether the average flipper length is significantly different between male and female penguins. Use t-test, lm, or glm
66 | #'
67 | ## -------------------------------------------------------------------
68 |
69 |
70 | #'
71 |
--------------------------------------------------------------------------------
/Exercises/WarmUpExercises.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Warm-up Exercises"
3 | output:
4 | html_document:
5 | df_print: paged
6 | ---
7 |
8 | # Course set-up
9 | First, let's make sure you have everything you need for the course. Run the following library statements. If something is not installed, install it.
10 |
11 | ```{r}
12 | # install.packages("tidyverse")
13 | # install.packages("srvyr")
14 | # install.packages("here")
15 | # install.packages("palmerpenguins")
16 | # install.packages("remotes")
17 |
18 | library(tidyverse) # for tidyverse
19 | library(here) # for file paths
20 |
21 | remotes::install_github("bschneidr/survey", ref = "c217689")
22 | library(srvyr)
23 | library(palmerpenguins)
24 |
25 |
26 | ```
27 |
28 | # Warm-up exercises: Play with penguin data!!!
29 |
30 | ```{r}
31 | penguins
32 | ```
33 |
34 | How many penguins of each species are there? Hint: use `count`
35 | ```{r}
36 |
37 | ```
38 |
39 | How many penguins of each species and sex are there? Hint: use `count`
40 |
41 | ```{r}
42 |
43 | ```
44 |
45 | What is the mean length of flipper by species? Hint: use `group_by` and `summarise`
46 | ```{r}
47 |
48 | ```
49 |
50 |
51 | What is the mean flipper length by species and sex? Hint: use `group_by` and `summarise`
52 | ```{r}
53 |
54 | ```
55 |
56 | # Advanced warm-up exercises
57 |
58 | Fit a simple linear regression between body mass and flipper length.
59 |
60 | ```{r}
61 |
62 | ```
63 |
64 |
65 | Test whether the average flipper length is significantly different between male and female penguins. Use t-test, lm, or glm
66 |
67 | ```{r}
68 |
69 | ```
70 |
71 |
--------------------------------------------------------------------------------
/Exercises/WarmUpExercises_solutions.R:
--------------------------------------------------------------------------------
1 | #' ---
2 | #' title: "Warm-up Exercise Solutions"
3 | #' output:
4 | #' html_document:
5 | #' df_print: paged
6 | #' ---
7 | #'
8 | #' # Course set-up
9 | #' First, let's make sure you have everything you need for the course. Run the following library statements. If something is not installed, install it.
10 | #'
11 | ## -------------------------------------------------------------------
12 | # install.packages("tidyverse")
13 | # install.packages("srvyr")
14 | # install.packages("here")
15 | # install.packages("palmerpenguins")
16 | # install.packages("remotes")
17 |
18 | library(tidyverse) # for tidyverse
19 | library(here) # for file paths
20 |
21 | remotes::install_github("bschneidr/survey", ref = "c217689")
22 | library(srvyr)
23 | library(palmerpenguins)
24 |
25 |
26 |
27 | #'
28 | #' # Warm-up exercises: Play with penguin data!!!
29 | #'
30 | ## -------------------------------------------------------------------
31 | penguins
32 |
33 | #'
34 | #' How many penguins of each species are there? Hint: use `count`
35 | ## -------------------------------------------------------------------
36 | penguins %>%
37 | count(species)
38 |
39 | #'
40 | #' How many penguins of each species and sex are there? Hint: use `count`
41 | #'
42 | ## -------------------------------------------------------------------
43 | penguins %>%
44 | count(species, sex)
45 |
46 | #'
47 | #' What is the mean length of flipper by species? Hint: use `group_by` and `summarise`
48 | ## -------------------------------------------------------------------
49 | penguins %>%
50 | group_by(species) %>%
51 | summarize(
52 | MeanFlipperLength=mean(flipper_length_mm,
53 | na.rm=TRUE))
54 |
55 | #'
56 | #'
57 | #' What is the mean flipper length by species and sex? Hint: use `group_by` and `summarise`
58 | ## -------------------------------------------------------------------
59 | penguins %>%
60 | group_by(species, sex) %>%
61 | summarize(
62 | MeanFlipperLength=mean(flipper_length_mm,
63 | na.rm=TRUE))
64 |
65 | #'
66 | #' # Advanced warm-up exercises
67 | #'
68 | #' Fit a simple linear regression between body mass and flipper length.
69 | #'
70 | ## -------------------------------------------------------------------
71 | mod1 <- lm(body_mass_g ~ flipper_length_mm,
72 | data=penguins)
73 | summary(mod1)
74 |
75 | #'
76 | #'
77 | #' Test whether the average flipper length is significantly different between male and female penguins. Use t-test, lm, or glm
78 | #'
79 | ## -------------------------------------------------------------------
80 | t.test(flipper_length_mm ~ sex, data=penguins)
81 |
82 | mod3 <- lm(flipper_length_mm ~ sex, data=penguins)
83 | summary(mod3)
84 |
85 | mod4 <- glm(flipper_length_mm ~ sex, data=penguins)
86 | summary(mod4)
87 |
88 | #'
89 |
--------------------------------------------------------------------------------
/Exercises/WarmUpExercises_solutions.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Warm-up Exercise Solutions"
3 | output:
4 | html_document:
5 | df_print: paged
6 | ---
7 |
8 | # Course set-up
9 | First, let's make sure you have everything you need for the course. Run the following library statements. If something is not installed, install it.
10 |
11 | ```{r}
12 | # install.packages("tidyverse")
13 | # install.packages("srvyr")
14 | # install.packages("here")
15 | # install.packages("palmerpenguins")
16 | # install.packages("remotes")
17 |
18 | library(tidyverse) # for tidyverse
19 | library(here) # for file paths
20 |
21 | remotes::install_github("bschneidr/survey", ref = "c217689")
22 | library(srvyr)
23 | library(palmerpenguins)
24 |
25 |
26 | ```
27 |
28 | # Warm-up exercises: Play with penguin data!!!
29 |
30 | ```{r}
31 | penguins
32 | ```
33 |
34 | How many penguins of each species are there? Hint: use `count`
35 | ```{r}
36 | penguins %>%
37 | count(species)
38 | ```
39 |
40 | How many penguins of each species and sex are there? Hint: use `count`
41 |
42 | ```{r}
43 | penguins %>%
44 | count(species, sex)
45 | ```
46 |
47 | What is the mean length of flipper by species? Hint: use `group_by` and `summarise`
48 | ```{r}
49 | penguins %>%
50 | group_by(species) %>%
51 | summarize(
52 | MeanFlipperLength=mean(flipper_length_mm,
53 | na.rm=TRUE))
54 | ```
55 |
56 |
57 | What is the mean flipper length by species and sex? Hint: use `group_by` and `summarise`
58 | ```{r}
59 | penguins %>%
60 | group_by(species, sex) %>%
61 | summarize(
62 | MeanFlipperLength=mean(flipper_length_mm,
63 | na.rm=TRUE))
64 | ```
65 |
66 | # Advanced warm-up exercises
67 |
68 | Fit a simple linear regression between body mass and flipper length.
69 |
70 | ```{r}
71 | mod1 <- lm(body_mass_g ~ flipper_length_mm,
72 | data=penguins)
73 | summary(mod1)
74 | ```
75 |
76 |
77 | Test whether the average flipper length is significantly different between male and female penguins. Use t-test, lm, or glm
78 |
79 | ```{r}
80 | t.test(flipper_length_mm ~ sex, data=penguins)
81 |
82 | mod3 <- lm(flipper_length_mm ~ sex, data=penguins)
83 | summary(mod3)
84 |
85 | mod4 <- glm(flipper_length_mm ~ sex, data=penguins)
86 | summary(mod4)
87 | ```
88 |
89 |
--------------------------------------------------------------------------------
/FinalizeMaterials.R:
--------------------------------------------------------------------------------
1 | ### This program creates PDF slides and R files from the Rmd files
2 |
3 | library(knitr)
4 | library(here)
5 |
6 | mypurl <- function(folder, fn){
7 | purl(here(folder, stringr::str_c(fn, ".Rmd")),
8 | output=here(folder, stringr::str_c(fn, ".R")),
9 | documentation=2)
10 |
11 | }
12 |
13 | mypurl("Exercises", "CategorialExercises")
14 | mypurl("Exercises", "ContinuousExercises")
15 | mypurl("Exercises", "WarmUpExercises")
16 |
17 | mypurl("Exercises", "CategorialExercises_solutions")
18 | mypurl("Exercises", "ContinuousExercises_solutions")
19 | mypurl("Exercises", "WarmUpExercises_solutions")
20 |
21 | mypurl("Presentation", "Slides")
22 |
23 | # remotes::install_github("jhelvy/xaringanBuilder")
24 | # remotes::install_github('rstudio/chromote')
25 | xaringanBuilder::build_pdf(
26 | input=here("Presentation", "Slides.html"),
27 | output_file=here("Presentation", "Slides.pdf"),
28 | partial_slides= TRUE)
29 | xaringanBuilder::build_pptx(
30 | input=here("Presentation", "Slides.pdf"),
31 | output_file=here("Presentation", "Slides.pptx"),
32 | partial_slides= TRUE)
33 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 | Preamble
9 |
10 | The GNU General Public License is a free, copyleft license for
11 | software and other kinds of works.
12 |
13 | The licenses for most software and other practical works are designed
14 | to take away your freedom to share and change the works. By contrast,
15 | the GNU General Public License is intended to guarantee your freedom to
16 | share and change all versions of a program--to make sure it remains free
17 | software for all its users. We, the Free Software Foundation, use the
18 | GNU General Public License for most of our software; it applies also to
19 | any other work released this way by its authors. You can apply it to
20 | your programs, too.
21 |
22 | When we speak of free software, we are referring to freedom, not
23 | price. Our General Public Licenses are designed to make sure that you
24 | have the freedom to distribute copies of free software (and charge for
25 | them if you wish), that you receive source code or can get it if you
26 | want it, that you can change the software or use pieces of it in new
27 | free programs, and that you know you can do these things.
28 |
29 | To protect your rights, we need to prevent others from denying you
30 | these rights or asking you to surrender the rights. Therefore, you have
31 | certain responsibilities if you distribute copies of the software, or if
32 | you modify it: responsibilities to respect the freedom of others.
33 |
34 | For example, if you distribute copies of such a program, whether
35 | gratis or for a fee, you must pass on to the recipients the same
36 | freedoms that you received. You must make sure that they, too, receive
37 | or can get the source code. And you must show them these terms so they
38 | know their rights.
39 |
40 | Developers that use the GNU GPL protect your rights with two steps:
41 | (1) assert copyright on the software, and (2) offer you this License
42 | giving you legal permission to copy, distribute and/or modify it.
43 |
44 | For the developers' and authors' protection, the GPL clearly explains
45 | that there is no warranty for this free software. For both users' and
46 | authors' sake, the GPL requires that modified versions be marked as
47 | changed, so that their problems will not be attributed erroneously to
48 | authors of previous versions.
49 |
50 | Some devices are designed to deny users access to install or run
51 | modified versions of the software inside them, although the manufacturer
52 | can do so. This is fundamentally incompatible with the aim of
53 | protecting users' freedom to change the software. The systematic
54 | pattern of such abuse occurs in the area of products for individuals to
55 | use, which is precisely where it is most unacceptable. Therefore, we
56 | have designed this version of the GPL to prohibit the practice for those
57 | products. If such problems arise substantially in other domains, we
58 | stand ready to extend this provision to those domains in future versions
59 | of the GPL, as needed to protect the freedom of users.
60 |
61 | Finally, every program is threatened constantly by software patents.
62 | States should not allow patents to restrict development and use of
63 | software on general-purpose computers, but in those that do, we wish to
64 | avoid the special danger that patents applied to a free program could
65 | make it effectively proprietary. To prevent this, the GPL assures that
66 | patents cannot be used to render the program non-free.
67 |
68 | The precise terms and conditions for copying, distribution and
69 | modification follow.
70 |
71 | TERMS AND CONDITIONS
72 |
73 | 0. Definitions.
74 |
75 | "This License" refers to version 3 of the GNU General Public License.
76 |
77 | "Copyright" also means copyright-like laws that apply to other kinds of
78 | works, such as semiconductor masks.
79 |
80 | "The Program" refers to any copyrightable work licensed under this
81 | License. Each licensee is addressed as "you". "Licensees" and
82 | "recipients" may be individuals or organizations.
83 |
84 | To "modify" a work means to copy from or adapt all or part of the work
85 | in a fashion requiring copyright permission, other than the making of an
86 | exact copy. The resulting work is called a "modified version" of the
87 | earlier work or a work "based on" the earlier work.
88 |
89 | A "covered work" means either the unmodified Program or a work based
90 | on the Program.
91 |
92 | To "propagate" a work means to do anything with it that, without
93 | permission, would make you directly or secondarily liable for
94 | infringement under applicable copyright law, except executing it on a
95 | computer or modifying a private copy. Propagation includes copying,
96 | distribution (with or without modification), making available to the
97 | public, and in some countries other activities as well.
98 |
99 | To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies. Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 |
103 | An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License. If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 |
112 | 1. Source Code.
113 |
114 | The "source code" for a work means the preferred form of the work
115 | for making modifications to it. "Object code" means any non-source
116 | form of a work.
117 |
118 | A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 |
123 | The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form. A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 |
134 | The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities. However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work. For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 |
147 | The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 |
151 | The Corresponding Source for a work in source code form is that
152 | same work.
153 |
154 | 2. Basic Permissions.
155 |
156 | All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met. This License explicitly affirms your unlimited
159 | permission to run the unmodified Program. The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work. This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 |
164 | You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force. You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright. Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 |
175 | Conveying under any other circumstances is permitted solely under
176 | the conditions stated below. Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 |
179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 |
181 | No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 |
187 | When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 |
195 | 4. Conveying Verbatim Copies.
196 |
197 | You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 |
205 | You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 |
208 | 5. Conveying Modified Source Versions.
209 |
210 | You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 |
214 | a) The work must carry prominent notices stating that you modified
215 | it, and giving a relevant date.
216 |
217 | b) The work must carry prominent notices stating that it is
218 | released under this License and any conditions added under section
219 | 7. This requirement modifies the requirement in section 4 to
220 | "keep intact all notices".
221 |
222 | c) You must license the entire work, as a whole, under this
223 | License to anyone who comes into possession of a copy. This
224 | License will therefore apply, along with any applicable section 7
225 | additional terms, to the whole of the work, and all its parts,
226 | regardless of how they are packaged. This License gives no
227 | permission to license the work in any other way, but it does not
228 | invalidate such permission if you have separately received it.
229 |
230 | d) If the work has interactive user interfaces, each must display
231 | Appropriate Legal Notices; however, if the Program has interactive
232 | interfaces that do not display Appropriate Legal Notices, your
233 | work need not make them do so.
234 |
235 | A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit. Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 |
245 | 6. Conveying Non-Source Forms.
246 |
247 | You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 |
252 | a) Convey the object code in, or embodied in, a physical product
253 | (including a physical distribution medium), accompanied by the
254 | Corresponding Source fixed on a durable physical medium
255 | customarily used for software interchange.
256 |
257 | b) Convey the object code in, or embodied in, a physical product
258 | (including a physical distribution medium), accompanied by a
259 | written offer, valid for at least three years and valid for as
260 | long as you offer spare parts or customer support for that product
261 | model, to give anyone who possesses the object code either (1) a
262 | copy of the Corresponding Source for all the software in the
263 | product that is covered by this License, on a durable physical
264 | medium customarily used for software interchange, for a price no
265 | more than your reasonable cost of physically performing this
266 | conveying of source, or (2) access to copy the
267 | Corresponding Source from a network server at no charge.
268 |
269 | c) Convey individual copies of the object code with a copy of the
270 | written offer to provide the Corresponding Source. This
271 | alternative is allowed only occasionally and noncommercially, and
272 | only if you received the object code with such an offer, in accord
273 | with subsection 6b.
274 |
275 | d) Convey the object code by offering access from a designated
276 | place (gratis or for a charge), and offer equivalent access to the
277 | Corresponding Source in the same way through the same place at no
278 | further charge. You need not require recipients to copy the
279 | Corresponding Source along with the object code. If the place to
280 | copy the object code is a network server, the Corresponding Source
281 | may be on a different server (operated by you or a third party)
282 | that supports equivalent copying facilities, provided you maintain
283 | clear directions next to the object code saying where to find the
284 | Corresponding Source. Regardless of what server hosts the
285 | Corresponding Source, you remain obligated to ensure that it is
286 | available for as long as needed to satisfy these requirements.
287 |
288 | e) Convey the object code using peer-to-peer transmission, provided
289 | you inform other peers where the object code and Corresponding
290 | Source of the work are being offered to the general public at no
291 | charge under subsection 6d.
292 |
293 | A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 |
297 | A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling. In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage. For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product. A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 |
310 | "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source. The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 |
318 | If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information. But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 |
329 | The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed. Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 |
337 | Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 |
343 | 7. Additional Terms.
344 |
345 | "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law. If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 |
354 | When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it. (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.) You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 |
361 | Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 |
365 | a) Disclaiming warranty or limiting liability differently from the
366 | terms of sections 15 and 16 of this License; or
367 |
368 | b) Requiring preservation of specified reasonable legal notices or
369 | author attributions in that material or in the Appropriate Legal
370 | Notices displayed by works containing it; or
371 |
372 | c) Prohibiting misrepresentation of the origin of that material, or
373 | requiring that modified versions of such material be marked in
374 | reasonable ways as different from the original version; or
375 |
376 | d) Limiting the use for publicity purposes of names of licensors or
377 | authors of the material; or
378 |
379 | e) Declining to grant rights under trademark law for use of some
380 | trade names, trademarks, or service marks; or
381 |
382 | f) Requiring indemnification of licensors and authors of that
383 | material by anyone who conveys the material (or modified versions of
384 | it) with contractual assumptions of liability to the recipient, for
385 | any liability that these contractual assumptions directly impose on
386 | those licensors and authors.
387 |
388 | All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10. If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term. If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 |
398 | If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 |
403 | Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 |
407 | 8. Termination.
408 |
409 | You may not propagate or modify a covered work except as expressly
410 | provided under this License. Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 |
415 | However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 |
422 | Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 |
429 | Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License. If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 |
435 | 9. Acceptance Not Required for Having Copies.
436 |
437 | You are not required to accept this License in order to receive or
438 | run a copy of the Program. Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance. However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work. These actions infringe copyright if you do
443 | not accept this License. Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 |
446 | 10. Automatic Licensing of Downstream Recipients.
447 |
448 | Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License. You are not responsible
451 | for enforcing compliance by third parties with this License.
452 |
453 | An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations. If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 |
463 | You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License. For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 |
471 | 11. Patents.
472 |
473 | A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based. The
475 | work thus licensed is called the contributor's "contributor version".
476 |
477 | A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version. For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 |
487 | Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 |
492 | In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement). To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 |
499 | If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients. "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 |
513 | If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 |
521 | A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License. You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 |
536 | Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 |
540 | 12. No Surrender of Others' Freedom.
541 |
542 | If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License. If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all. For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 |
552 | 13. Use with the GNU Affero General Public License.
553 |
554 | Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work. The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 |
563 | 14. Revised Versions of this License.
564 |
565 | The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time. Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 |
570 | Each version is given a distinguishing version number. If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation. If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 |
579 | If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 |
584 | Later license versions may give you additional or different
585 | permissions. However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 |
589 | 15. Disclaimer of Warranty.
590 |
591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 |
600 | 16. Limitation of Liability.
601 |
602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 |
612 | 17. Interpretation of Sections 15 and 16.
613 |
614 | If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 |
621 | END OF TERMS AND CONDITIONS
622 |
623 | How to Apply These Terms to Your New Programs
624 |
625 | If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 |
629 | To do so, attach the following notices to the program. It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 |
634 |
635 | Copyright (C)
636 |
637 | This program is free software: you can redistribute it and/or modify
638 | it under the terms of the GNU General Public License as published by
639 | the Free Software Foundation, either version 3 of the License, or
640 | (at your option) any later version.
641 |
642 | This program is distributed in the hope that it will be useful,
643 | but WITHOUT ANY WARRANTY; without even the implied warranty of
644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
645 | GNU General Public License for more details.
646 |
647 | You should have received a copy of the GNU General Public License
648 | along with this program. If not, see .
649 |
650 | Also add information on how to contact you by electronic and paper mail.
651 |
652 | If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 |
655 | Copyright (C)
656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 | This is free software, and you are welcome to redistribute it
658 | under certain conditions; type `show c' for details.
659 |
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License. Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 |
664 | You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | .
668 |
669 | The GNU General Public License does not permit incorporating your program
670 | into proprietary programs. If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library. If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License. But first, please read
674 | .
675 |
--------------------------------------------------------------------------------
/Presentation/Slides.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Tidy Survey Analysis in R using the srvyr Package"
3 | subtitle: "AAPOR 2021 Short Course"
4 | author:
5 | - Stephanie Zimmer, RTI International
6 | - Rebecca Powell, RTI International
7 | date: "2021-05-06"
8 | output:
9 | xaringan::moon_reader:
10 | css: xaringan-themer.css
11 | nature:
12 | slideNumberFormat: "%current%"
13 | highlightStyle: github
14 | highlightLines: true
15 | ratio: 16:9
16 | countIncrementalSlides: true
17 | ---
18 |
19 | ```{r setup, include=FALSE}
20 | knitr::opts_chunk$set(echo = TRUE, message = FALSE, tidy = FALSE)
21 | ```
22 |
23 |
24 | ```{r xaringan-themer, include=FALSE, warning=FALSE}
25 | library(xaringanthemer)
26 | style_duo_accent(
27 | primary_color = "#1E4F96",
28 | secondary_color = "#00A3E0",
29 | inverse_header_color = "#FFFFFF"
30 | )
31 | ```
32 |
33 | class: inverse center middle
34 | # Introduction
35 |
36 | ---
37 |
38 | ```{css, echo = FALSE}
39 | .small .remark-code { /*Change made here*/
40 | font-size: 80% !important;
41 | }
42 | .smaller .remark-code { /*Change made here*/
43 | font-size: 70% !important;
44 | }
45 | ```
46 |
47 | ## Overview
48 |
49 | - At the end of this course, you should be able to
50 | - Calculate point estimates and their standard errors with survey data
51 | - Means & Proportions
52 | - Totals
53 | - Quantiles
54 | - Perform t-tests and chi-squared tests
55 | - Fit regression models
56 | - Specify a survey design in R to create a survey object
57 |
58 | - We will not be going over the following but provide some resources at the end
59 | - Weighting (calibration, post-stratification, raking, etc.)
60 | - Survival analysis
61 | - Nonlinear models
62 |
63 |
64 |
65 | ---
66 | ## Overview: Course Roadmap
67 |
68 | - Get familiar with RStudio Cloud with a warm-up exercise using the tidyverse
69 |
70 | - Introduce the survey data we'll be using in the course
71 |
72 | - Analysis of continuous data with time for practice
73 |
74 | - Analysis of categorical data with time for practice
75 |
76 | - Specify a survey design object in R with exercises
77 |
78 | - Resources for other survey analysis topics
79 |
80 | - Closing
81 |
82 | ---
83 | ## Logistics
84 |
85 | - We will be using RStudio Cloud today to ensure everyone has access
86 |
87 | - Sign-up for a free RStudio Cloud account
88 | - Access the project and files via link in email and Zoom chat
89 | - Click "START" to open the project and get started
90 | - Rstudio Cloud has the same features and appearance as RStudio for ease of use
91 |
92 | - All slides and code are available on GitHub: https://github.com/szimmer/tidy-survey-aapor-2021
93 |
94 | ???
95 | Github repo is for future reference, all material on RStudio cloud
96 | ---
97 | ## Intro to RStudio Cloud: Penguins!!
98 |
99 | - Using `palmerpenguins` data for warm-up exercises
100 |
101 | - Data were collected and made available by Dr. Kristen Gorman and the Palmer Station, Antarctica LTER, a member of the Long Term Ecological Research Network.
102 |
103 | - Access data through `palmerpenguins` package https://github.com/allisonhorst/palmerpenguins/
104 |
105 |
106 | ####If you are using your own RStudio environment:
107 | - Make sure you have `tidyverse`, `here`, and `palmerpenguins` installed
108 |
109 | ```{r inst_packages, error=FALSE, warning=FALSE, eval=FALSE}
110 | # Run package installation if you don't have these packages already
111 | # As a reminder, installing takes package from internet to your computer
112 | # and only needs to be done once, not each session
113 |
114 | install.packages(c("tidyverse", "here", "palmerpenguins"))
115 | ```
116 |
117 | ---
118 | ## Intro to RStudio Cloud: Penguins!!
119 |
120 | - Load `tidyverse`, `here`, and `palmerpenguins`
121 |
122 | - Look at the penguins dataset using `glimpse`
123 |
124 | ```{r load_pack1, error=FALSE, warning=FALSE}
125 | library(tidyverse) # for tidyverse
126 | library(here) # for file paths
127 | library(palmerpenguins) # for warm-up data
128 | glimpse(penguins)
129 | ```
130 |
131 | ---
132 | ## Warm-up Exercises: WarmUpExercises.Rmd
133 |
134 | - Let's open RStudio cloud and do some warm-up examples
135 | - We will do one together and then give you 5-minutes to work through other examples and get familiar with RStudio Cloud
136 |
137 | - Explore the penguins data
138 | - How many penguins of each species are there? We will do this one together
139 | - How many penguins of each species and sex are there? Hint: use `count`
140 | - What is the mean length of flipper by species? Hint: use `group_by` and `summarize`
141 | - What is the mean flipper length by species and sex?
142 |
143 | - More advanced warm-up
144 | - Fit a simple linear regression between body mass and flipper length.
145 | - Test whether the average flipper length is significantly different between male and female penguins. Use t-test, lm, or glm
146 |
147 | ---
148 | ## Ex. 1: How many penguins of each species are there?
149 |
150 | ```{r peng1}
151 | penguins %>%
152 | count(species)
153 | ```
154 |
155 | ???
156 | - Using `count` we see there are 3 different species and the number of penguins for each species
157 | ---
158 | ## Ex. 2: How many penguins of each species and sex are there?
159 |
160 | ```{r peng2}
161 | penguins %>%
162 | count(species, sex)
163 | ```
164 | ???
165 | - `count` can take more than one variable to get a cross-tabs between the two variables
166 |
167 | ---
168 | ## Ex. 3: What is the mean length of flipper by species?
169 |
170 | ```{r peng3}
171 | penguins %>%
172 | group_by(species) %>%
173 | summarize(
174 | MeanFlipperLength=mean(flipper_length_mm,
175 | na.rm=TRUE))
176 | ```
177 | ???
178 | - `group_by` allows us to look at metrics by different subgroups like species
179 | - when using `group_by` follow it with `summarize` to get metrics (like average) at the group level
180 | - `na.rm=TRUE` removes missing data from the calculation
181 | - forgetting this argument will result in a value of `NA` as the function will try to average missing data
182 |
183 | ---
184 | ## Ex. 4: What is the mean flipper length by species and sex?
185 |
186 | ```{r peng4}
187 | penguins %>%
188 | group_by(species, sex) %>%
189 | summarize(
190 | MeanFlipperLength=mean(flipper_length_mm,
191 | na.rm=TRUE))
192 | ```
193 | ???
194 | - As with `count`, you can `group_by` multiple variables
195 |
196 | ---
197 | ## Advanced Ex. 1: Linear regression (body mass & flipper length)
198 | .small[
199 | ```{r pengad1}
200 | mod1 <- lm(body_mass_g ~ flipper_length_mm, data=penguins)
201 | summary(mod1)
202 | ```
203 | ]
204 | ???
205 | - use `lm` (linear model) function
206 | - equation is written as y-variable ~ x-variables
207 |
208 | ---
209 | ## Advanced Ex. 2: Flipper length differences by sex: t-test
210 |
211 | ```{r pengad2a}
212 | t.test(flipper_length_mm ~ sex, data=penguins)
213 | ```
214 | ???
215 | - ~ also used in `t.test` to separate what we want to measure (our y) and the groups of interest (our x)
216 | ---
217 | ## Advanced Ex. 2: Flipper length differences by sex: lm
218 | .small[
219 | ```{r pengad2b}
220 | mod3 <- lm(flipper_length_mm ~ sex, data=penguins)
221 | summary(mod3)
222 | ```
223 | ]
224 | ---
225 | ## Advanced Ex. 2: Flipper length differences by sex: glm
226 | .small[
227 | ```{r pengad2c}
228 | mod4 <- glm(flipper_length_mm ~ sex, data=penguins)
229 | summary(mod4)
230 | ```
231 | ]
232 | ???
233 | - `glm` takes the same arguments as `lm`, but is more flexible for working with non-normal data
234 | ---
235 | class: inverse center middle
236 | # Survey Datasets
237 | ---
238 | ## Residential Energy Consumption Survey (RECS) 2015
239 |
240 | - Energy consumption/expenditures collected through energy suppliers
241 |
242 | - Fielded 14 times between 1950 and 2015
243 |
244 | - Topics include appliances, electronics, heating, a/c, temperatures, water heating, lighting, energy bills, respondent demographics, and energy assistance
245 |
246 | - Funded by the Energy Information Administration
247 |
248 | - Target Population: Primary occupied housing units in the US
249 |
250 | - Mode: In-person, paper, and web interview mode
251 |
252 | - Sample Information: BRR Replicate weights included for variance estimation
253 |
254 | https://www.eia.gov/consumption/residential/index.php
255 |
256 | ???
257 | - We have subset the columns of this data and created derived variables, code in repository
258 | ---
259 | ## American National Election Studies (ANES) 2016
260 |
261 | - Pre and post election surveys
262 |
263 | - Fielded almost every 2 years since 1948
264 |
265 | - Topics include voter registration status, candidate preference, opinions on country and government, party and ideology affiliation, opinions on policy, news sources, and more
266 |
267 | - Collaboration of Stanford, University of Michigan - funding by the National Science Foundation
268 |
269 | - Target Population: US citizens, 18 and older living in US
270 |
271 | - Mode: FTF with CASI and Web
272 |
273 | - Sample Information: Pseudo-strata and pseudo-cluster included for variance estimation
274 |
275 | https://electionstudies.org/
276 |
277 | ???
278 | Chose not to use 2020 data because it is still preliminary
279 |
280 | ---
281 | class: inverse center middle
282 | # Continuous data analysis
283 | ---
284 | ## Overview of Survey Analysis using `srvyr` Package
285 |
286 | 1. Create a `tbl_svy` object using: `as_survey_design` or `as_survey_rep`
287 |
288 | 2. Subset data (if needed) using `filter` (subpopulations)
289 |
290 | 3. Specify domains of analysis using `group_by`
291 |
292 | 4. Within `summarize`, specify variables to calculate including means, totals, proportions, quantiles and more
293 |
294 |
295 | Note: We will be teaching this in the reverse order!!!
296 | ---
297 | ## Set-up for Analysis
298 | - `srvyr` package uses tidy-syntax but uses the `survey` package behind it to do calculations
299 |
300 | - If using your own RStudio environment, install both packages:
301 | ```{r inst_srv, eval=FALSE}
302 | # Install survey and srvyr packages
303 |
304 | remotes::install_github("bschneidr/survey", ref = "c217689")
305 | install.packages("srvyr")
306 | ```
307 |
308 | - First, we will set-up a design object and later talk about what it means
309 | ```{r recs_des, error=FALSE, warning=FALSE}
310 | library(survey) # for survey analysis
311 | library(srvyr) # for tidy survey analysis
312 |
313 | recs <- read_rds(here("Data", "recs.rds"))
314 |
315 | recs_des <- recs %>%
316 | as_survey_rep(weights=NWEIGHT,
317 | repweights=starts_with("BRRWT"),
318 | type="Fay",
319 | rho=0.5,
320 | mse=TRUE)
321 |
322 | ```
323 | ???
324 | - need to install github version of survey package if you want CIs with quantiles
325 | ---
326 | ## Weighted Analysis for Continuous Variables
327 |
328 | - Common functions for continuous summaries
329 | - survey_mean
330 | - survey_total (like sum)
331 | - survey_median
332 | - survey_quantile
333 | - survey_ratio
334 |
335 | - Always call within `summarize`/`summarise`
336 | ---
337 | ## `survey_mean` Syntax
338 |
339 | ```{r survey_mean_syn, eval=FALSE}
340 | survey_mean(
341 | x,
342 | na.rm = FALSE,
343 | vartype = c("se", "ci", "var", "cv"),
344 | level = 0.95,
345 | proportion = FALSE,
346 | deff = FALSE,
347 | df = NULL,
348 | ...
349 | )
350 | ```
351 |
352 | To calculate a survey mean, we use this in `summarize`/`summarise`
353 | ```{r survey_mean_syn2, eval=FALSE}
354 | survey_design_object %>%
355 | summarize(
356 | mean_varname=survey_mean(x = continuous_varname)
357 | )
358 | ```
359 |
360 | ???
361 | Only required argument is the variable
362 |
363 | ---
364 | ## `survey_mean` Example 1: Mean dollars spent on energy
365 |
366 | This is an example using the `recs_des` survey design object and `survey_mean` function defaults
367 |
368 | ```{r survey_mean_ex1}
369 | recs_des %>%
370 | summarize(
371 | TD_mean=survey_mean(x = TOTALDOL)
372 | )
373 | ```
374 | ---
375 | ## `survey_mean` Example 2: Mean temperature setting for summer during the day
376 |
377 | Run this code. What happens? Why?
378 |
379 | ```{r survey_mean_ex2, eval=FALSE}
380 | recs_des %>%
381 | summarize(
382 | TD_mean=survey_mean(x = SummerTempDay)
383 | )
384 | ```
385 | ---
386 | ## `survey_mean` Example 2: Mean temperature setting for summer during the day
387 |
388 | Run this code. What happens? Why?
389 |
390 | ```{r survey_mean_ex2_r, error=TRUE}
391 | recs_des %>%
392 | summarize(
393 | TD_mean=survey_mean(x = SummerTempDay)
394 | )
395 | ```
396 |
397 | How do we fix this code?
398 |
399 | ???
400 | - missing data in temperature, need `na.rm=TRUE`
401 | ---
402 | ## `survey_mean` Example 2: Missing data solution
403 |
404 | ```{r survey_mean_ex2_sol, error=TRUE, tidy=FALSE}
405 | recs_des %>%
406 | summarize(
407 | TD_mean = survey_mean(
408 | x = SummerTempDay,
409 | na.rm = TRUE )#<<
410 | )
411 | ```
412 |
413 | ---
414 | ## `survey_median` Syntax
415 |
416 | ```{r survey_median_syn, eval=FALSE}
417 | survey_median(
418 | x,
419 | na.rm = FALSE,
420 | vartype = c("se", "ci"),
421 | level = 0.95,
422 | df = NULL,
423 | ...
424 | )
425 | ```
426 |
427 | ???
428 | Only required argument is the variable
429 |
430 |
431 | ---
432 | ## `survey_median` Example: Median temperature setting for summer during day
433 |
434 | Fill in the blank:
435 |
436 | ```{r survey_median_fib, eval=FALSE}
437 | recs_des %>%
438 | summarize(
439 | TD_median=survey_median(x=_________,
440 | na.rm=_________)
441 | )
442 | ```
443 |
444 | --
445 |
446 | ```{r survey_median_fib_sol}
447 | recs_des %>%
448 | summarize(
449 | TD_median=survey_median(x=SummerTempDay,
450 | na.rm=TRUE)
451 | )
452 | ```
453 |
454 |
455 |
456 | ---
457 | ## `survey_quantile` Syntax
458 |
459 | ```{r survey_quantile_syn, eval=FALSE}
460 | survey_quantile(
461 | x,
462 | quantiles, #<<
463 | na.rm = FALSE,
464 | vartype = c("se", "ci", "var", "cv"),
465 | level = 0.95,
466 | df = NULL,
467 | ...
468 | )
469 | ```
470 |
471 | ???
472 | - need both the variable and the quantiles in a vector e.g. (c(.25, .75))
473 | ---
474 | ## `survey_quantile` Example 1: 1st and 3rd quantile of dollars spent on energy
475 |
476 | ```{r survey_quantile_ex1, error=TRUE}
477 | recs_des %>%
478 | summarize(
479 | Spent=survey_quantile(
480 | x = TOTALDOL,
481 | quantiles = c(.25, .75)) #<<
482 | )
483 | ```
484 | ???
485 | - This estimates the 25th and 75th percentile
486 |
487 | ---
488 | ## `survey_quantile` Example 2: 1st and 3rd quantile of dollars spent on energy now with confidence interval
489 |
490 | ```{r survey_quantile_ex2, error=TRUE}
491 | recs_des %>%
492 | summarize(
493 | Spent=survey_quantile(x = TOTALDOL,
494 | quantiles = c(.25, .75),
495 | vartype = "ci" #<<
496 | )
497 | )
498 | ```
499 | ---
500 | ## `survey_ratio` Syntax
501 |
502 | - Note this estimates: $\sum x_i/\sum y_i$ not $\sum \frac{x_i}{y_i}$
503 |
504 | ```{r survey_ratio_syn, eval=FALSE}
505 | survey_ratio(
506 | numerator, #<<
507 | denominator, #<<
508 | na.rm = FALSE,
509 | vartype = c("se", "ci", "var", "cv"),
510 | level = 0.95,
511 | deff = FALSE,
512 | df = NULL,
513 | ...
514 | )
515 | ```
516 |
517 |
518 | ---
519 | ## `survey_ratio` Example: mean dollars per BTU spent on energy
520 |
521 | ```{r survey_ratio_ex}
522 | recs_des %>%
523 | summarize(
524 | DolPerBTU=survey_ratio(
525 | numerator = TOTALDOL, #<<
526 | denominator = TOTALBTU, #<<
527 | na.rm = TRUE
528 | )
529 | )
530 | ```
531 | ---
532 | ## Practice on your own
533 |
534 | - Open ContinuousExercises.Rmd and work through Part 1
535 |
536 | - We will take 15 minutes. Use this time for the exercises and a break
537 | ---
538 | ## Weighted Analysis for Continuous Variables: Domain Analysis
539 |
540 | - If we want to get estimates by another variable, we need to add a `group_by` statement before doing the analysis.
541 |
542 | - Example: Average dollars spent on electricity by whether AC is used
543 |
544 | ```{r domain_ex}
545 | recs_des %>%
546 | group_by(ACUsed) %>% #<<
547 | summarize(
548 | ElBill=survey_mean(DOLLAREL,
549 | na.rm=TRUE)
550 | )
551 | ```
552 | ---
553 | ## Domain Analysis: Totals
554 |
555 | - If we want the overall electric bill too, use the `cascade` function instead of `summarize`
556 |
557 | ```{r domain_ex_casc}
558 | recs_des %>%
559 | group_by(ACUsed) %>%
560 | cascade(
561 | ElBill=survey_mean(DOLLAREL,
562 | na.rm=TRUE)
563 | )
564 |
565 | ```
566 |
567 | ???
568 | - Note the overall appears as NA
569 |
570 | ---
571 | ## Domain Analysis: Totals
572 |
573 | - Also can add sample and pop sizes
574 |
575 | ```{r domain_tot}
576 | recs_des %>%
577 | group_by(ACUsed) %>%
578 | cascade(
579 | ElBill=survey_mean(DOLLAREL, na.rm=TRUE),
580 | N=survey_total(!is.na(DOLLAREL)), #<<
581 | n=unweighted(sum(!is.na(DOLLAREL))) #<<
582 | )
583 |
584 | ```
585 |
586 | ???
587 | - survey_total gets a weighted total
588 | - unweighted does just that, an unweighted estimate, can also get an unweighted mean or any other stat
589 |
590 | ---
591 | ## Weighted Analysis for Specific Subpopulations
592 |
593 | - filtering (subsetting) the data should be done AFTER specifying the design to ensure accurate standard errors
594 |
595 | - Use the `filter` function after creating the survey design object and before summarizing
596 |
597 | Wrong way:
598 | ```{r filter_bad, eval = FALSE}
599 | data %>%
600 | filter(state=="NC") %>% #<<
601 | as_survey_design(...) %>%
602 | summarize(AvgAge=mean(Age))
603 | ```
604 |
605 | Right way:
606 | ```{r filter_good, eval=FALSE}
607 | data %>%
608 | as_survey_design(...) %>%
609 | filter(state=="NC") %>% #<<
610 | summarize(AvgAge=mean(Age))
611 | ```
612 |
613 | ???
614 | - The difference in these two methods occurs when the subpopulation doesn't occur in all strata or PSUs
615 |
616 | ---
617 | ## Subpopulation Example 1: Average electric cost of single family homes
618 |
619 | ```{r subpop1}
620 | recs_des %>%
621 | filter(HousingUnitType %in% c("Single-family detached",
622 | "Single-family attached")) %>%
623 | summarize(
624 | ElBill=survey_mean(DOLLAREL,
625 | na.rm=TRUE)
626 | )
627 | ```
628 |
629 | ---
630 | ## Comparisons with t-tests: `svyttest` Syntax
631 |
632 | - t-tests are done in the package `survey` not `srvyr` but you can use the same design object
633 |
634 | ```{r ttest_syn, eval=FALSE}
635 | svyttest(formula, # outcome~group for two-sample, outcome~0 for one-sample
636 | design,
637 | na.rm = FALSE
638 | ....)
639 | ```
640 |
641 | ???
642 | - Uses standard R formula notation
643 | - will go over examples of 1-sample, 2-sample, and paired t-test
644 |
645 | ---
646 | ## `svyttest` Example 1: One-sample t-test
647 |
648 | - I keep my house at 68 degrees at night during the summer. Is this different from the national average?
649 |
650 | ```{r ttest_ex1}
651 | recs_des %>%
652 | svyttest(design=.,
653 | formula=I(SummerTempNight-68)~0,
654 | na.rm=TRUE)
655 | ```
656 |
657 | ???
658 | - Note the I notation, this does the arithmetic before modeling
659 |
660 | ---
661 | ## `svyttest` Example 2: Comparing two variables
662 |
663 | - Do people keep their house the same temperature at night during the summer and the winter?
664 |
665 | ```{r ttest_ex2}
666 | recs_des %>%
667 | svyttest(design=.,
668 | formula=I(SummerTempNight-WinterTempNight)~0,
669 | na.rm=TRUE)
670 | ```
671 |
672 | ???
673 | - this is a paired t-test
674 | - testing whether the difference is 0 for each household
675 | ---
676 | ## `svyttest` Example 3: Two-sample t-test
677 |
678 | - Are electric bills different between those with and without A/C?
679 |
680 | ```{r ttest_ex3}
681 | recs_des %>%
682 | svyttest(design=.,
683 | formula=DOLLAREL~ACUsed,
684 | na.rm=TRUE)
685 | ```
686 |
687 |
688 |
689 | ---
690 | ## Linear Regression or ANOVA: `svyglm` Syntax
691 |
692 | - As with t-tests, regressions are done in the package `survey` not `srvyr` but you can use the same design object
693 |
694 | - Syntax is similar between t-test and glm
695 |
696 | ```{r glm_syn, eval=FALSE}
697 | svyglm(formula,
698 | design,
699 | na.action, #default is na.omit
700 | ....)
701 | ```
702 | ---
703 | ## `svyglm` Example: Two-sample
704 |
705 | Same example as two-sample t-test: Are electric bills different between those with and without A/C?
706 |
707 | t-test:
708 | ```{r twosamp_ex_ttest, eval=FALSE}
709 | recs_des %>%
710 | svyttest(design=.,
711 | formula=DOLLAREL~ACUsed,
712 | na.rm=TRUE) #<<
713 | ```
714 |
715 | glm:
716 | ```{r twosamp_ex_glm, eval=FALSE}
717 | recs_des %>%
718 | svyglm(design=.,
719 | formula=DOLLAREL~ACUsed,
720 | na.action=na.omit) #<<
721 | ```
722 |
723 | ???
724 | - one major difference in how you specify to ignore NA values
725 | - svyttest can only have 2-levels in group variable
726 | - svyglm, the variable on right can be anything (continuous or factor)
727 |
728 | ---
729 | ## `svyglm` Example: Two-sample
730 |
731 | Are electric bills different between those with and without A/C?
732 | .small[
733 | ```{r twosamp_ex_ttest_run}
734 | recs_des %>%
735 | svyglm(design=.,
736 | formula=DOLLAREL~ACUsed,
737 | na.action=na.omit) %>%
738 | summary()
739 | ```
740 | ]
741 |
742 | ???
743 | - same results as t-test
744 |
745 | ---
746 | ## `svyglm` Example 1: ANOVA Test
747 |
748 | Does temperature of AC at night vary by region?
749 | .smaller[
750 | ```{r anova_ex}
751 | recs_des %>%
752 | svyglm(design=.,
753 | formula=SummerTempNight~Region,
754 | na.action=na.omit) %>%
755 | summary()
756 |
757 | ```
758 | ]
759 |
760 | ???
761 | - Region is a factor variable, if it is numeric - this will treat it like a linear model
762 |
763 | ---
764 | ## `svyglm` Example 2: Linear Model
765 |
766 | - Is there a relationship between square footage and electric bill?
767 | - Let's review the data first with a ggplot. Note we use the original data and do NOT use the survey design object.
768 |
769 | ```{r plot_sf_elbill}
770 | p <- recs %>%
771 | ggplot(aes(x=TOTSQFT_EN, y=DOLLAREL, weight=NWEIGHT)) +
772 | geom_hex() +
773 | theme(legend.position="right") +
774 | guides(fill=guide_legend(title="HUs"))
775 | ```
776 | ---
777 | ## `svyglm` Example 2: Linear Model
778 | ```{r plot_sf_elbill_disp, echo=FALSE, fig.asp=9/16, fig.align="center", out.width="90%", dpi=300}
779 | p +
780 | theme_xaringan()
781 | ```
782 |
783 |
784 | ---
785 | ## `svyglm` Example 2: Linear Model
786 | .small[
787 | ```{r lm_ex}
788 | m_electric_sqft <- recs_des %>%
789 | svyglm(design=.,
790 | formula=DOLLAREL~TOTSQFT_EN,
791 | na.action=na.omit)
792 | summary(m_electric_sqft)
793 | ```
794 | ]
795 |
796 | ???
797 | - for every square foot bigger, on average 24.6c more in electric
798 | ---
799 | ## Practice on your own
800 |
801 | - Open ContinuousExercises.Rmd and work through Part 2
802 |
803 | - We will take 15 minutes. Use this time for the exercises and a break
804 | ---
805 | class: inverse center middle
806 | # Categorical data analysis
807 | ---
808 | ## Weighted Analysis for Categorical Variable
809 |
810 | - Functions to use within `summarize` after `group_by`
811 | - survey_mean
812 | - survey_total
813 |
814 | - Functions to get counts
815 | - survey_count
816 |
817 | ???
818 |
819 | - we use the same mean and total functions as with continuous variables
820 | - `survey_count` is new
821 | - has a similar structure as the standard (non-survey) version of count
822 |
823 | ---
824 | ## Set-up ANES Data for Examples
825 |
826 | ```{r anes_des}
827 | anes <- read_rds(here("Data", "anes.rds")) %>%
828 | mutate(Weight=Weight/sum(Weight)*224059005)
829 | # adjust weight to sum to citizen pop, 18+ in Nov 2016 per ANES methodology documentation
830 | anes_des <- anes %>%
831 | as_survey_design(weights = Weight,
832 | strata = Stratum,
833 | ids = VarUnit,
834 | nest = TRUE)
835 |
836 | ```
837 | ???
838 |
839 | - American National Election Studies
840 | - provides weights that sum to the sample, but we want to get population estimates
841 | - need to adjust the weight to get it to the population count
842 | - as we mentioned before we will cover setting up the sample design object later
843 |
844 | ---
845 | ## `survey_count` Syntax
846 |
847 | - `survey_count` functions similarly to `count` in that it is NOT called within `summarize`
848 |
849 | - Produces weighted counts and variance of your choice of those counts
850 | ```{r survey_count_syn, eval=FALSE}
851 | survey_count(
852 | x,
853 | ...,
854 | wt = NULL,
855 | sort = FALSE,
856 | name = "n",
857 | .drop = dplyr::group_by_drop_default(x),
858 | vartype = c("se", "ci", "var", "cv")
859 | )
860 | ```
861 | ???
862 | - similar to count in that it takes one or many variables
863 | - can change the variance type as we have seen in the other survey functions
864 |
865 | ---
866 | ## `survey_count` Example
867 |
868 | - Cross-tab of population in each age group and gender
869 | ```{r survey_count_ex}
870 | anes_des %>%
871 | survey_count(AgeGroup, Gender, name="n")
872 |
873 | ```
874 | ???
875 | - `survey_count` is placed on its own like `count`
876 | - it does NOT go in a `summarize` function
877 | - can take multiple variables
878 | - can change the output count name, `n` is the default
879 |
880 | ---
881 | ## `survey_mean` and `survey_total` Examples
882 |
883 | - `survey_mean` used with no x (variable) calculates a proportion of groups specified in `group_by`
884 | - `survey_total` used with no x (variable) calculates a population count estimate for the groups specified in `group_by`
885 |
886 | Cross-tab of population who voted in 2016
887 | ```{r survey_p_ex1}
888 | anes_des %>%
889 | filter(!is.na(VotedPres2016)) %>%
890 | group_by(VotedPres2016) %>%
891 | summarize(
892 | p=survey_mean(),
893 | N=survey_total(),
894 | n=unweighted(n()), .groups="drop"
895 | )
896 | ```
897 | ???
898 | - to get proportions we use `group_by` and `survey_mean`
899 | - also use `survey_total` to get a population count estimate as before
900 | ---
901 | ## Conditional proportions with more than one group
902 |
903 | - Specifying more than one group calculates conditional proportions
904 | - Example: people voting in 2012 and 2016
905 |
906 | ```{r survey_p_cond}
907 | anes_des %>%
908 | filter(!is.na(VotedPres2012), !is.na(VotedPres2016)) %>%
909 | group_by(VotedPres2012, VotedPres2016) %>%
910 | summarize(
911 | p=survey_mean(),
912 | N=survey_total(),
913 | n=unweighted(n()), .groups="drop"
914 | )
915 | ```
916 | ???
917 | - Note that this is the proportion of voting in 2016 by whether people voted in 2012
918 | - What if we don't want conditional proportions?
919 |
920 | ---
921 | ## Joint proportions with more than one group
922 |
923 | - Specify an interaction to get joint distribution
924 | - Example: people voting in 2012 and 2016
925 |
926 | ```{r survey_p_joint}
927 | anes_des %>%
928 | filter(!is.na(VotedPres2012), !is.na(VotedPres2016)) %>%
929 | group_by(groups = interaction(VotedPres2016, VotedPres2012)) %>% #<<
930 | summarize(
931 | p=survey_mean(),
932 | N=survey_total(),
933 | .groups="drop"
934 | )
935 | ```
936 | ???
937 | - We add an interaction for the groups
938 | - This outputs the joint distribution, but the `groups` variable is hard to interpret
939 |
940 | ---
941 | ## Joint proportions with more than one group
942 |
943 | - Specify an interaction to get joint distribution
944 | - Example: people voting in 2012 and 2016
945 |
946 | ```{r survey_p_joint2}
947 | anes_des %>%
948 | filter(!is.na(VotedPres2012), !is.na(VotedPres2016)) %>%
949 | group_by(groups = interaction(VotedPres2016, VotedPres2012)) %>%
950 | summarize(
951 | VotedPres2012=VotedPres2012[1], #<<
952 | VotedPres2016=VotedPres2016[1], #<<
953 | p=survey_mean(),
954 | N=survey_total(),
955 | .groups="drop"
956 | )
957 | ```
958 | ???
959 | - We can add in two variables one for `VotedPres2012` and `VotedPres2016`
960 | - using the 1 in brackets pulls out the labels for these two variables so we see the "Yes" and "No" labels
961 |
962 | ---
963 | ## Proportions with Design Effects
964 |
965 | ```{r survey_p_deff}
966 | anes_des %>%
967 | filter(!is.na(VotedPres2012), !is.na(VotedPres2016)) %>%
968 | group_by(VotedPres2012, VotedPres2016) %>%
969 | summarize(
970 | p=survey_mean(deff=TRUE),
971 | N=survey_total()
972 | )
973 | ```
974 | ???
975 | - Use `deff=TRUE` option in the `survey_mean` function
976 |
977 | ---
978 | ## `svychisq` Syntax
979 |
980 | - As with testing on continuous variables, `svychisq` comes from the `survey` package
981 |
982 | ```{r svychisq_syn, eval=FALSE}
983 | svychisq(formula,
984 | design,
985 | statistic = c("F", "Chisq", "Wald", "adjWald", "lincom", "saddlepoint"),
986 | na.rm=TRUE,
987 | ...)
988 |
989 | ```
990 | ???
991 | - when we want to test categorical distributions we use `svychisq`
992 | - it takes a formula, and the survey design data
993 |
994 | ---
995 | ## `svychisq` Example 1: Function Defaults
996 |
997 | - How often can you trust the federal gov't to do what is right?
998 | - How often can you trust other people?
999 |
1000 | ```{r svychisq_ex1}
1001 | anes_des %>%
1002 | svychisq(design=.,
1003 | formula=~TrustPeople +TrustGovernment)
1004 |
1005 |
1006 | ```
1007 | ???
1008 | - We want to compare the distributions of these two questions
1009 |
1010 | ---
1011 | ## `svychisq` Example 2: Wald Statistic
1012 |
1013 | - How often can you trust the federal gov't to do what is right?
1014 | - Who did you vote for? Clinton, Trump, or Other
1015 |
1016 | ```{r svychisq_ex2}
1017 | anes_des %>%
1018 | svychisq(design=.,
1019 | formula=~TrustGovernment +VotedPres2016_selection,
1020 | statistic="Wald")
1021 |
1022 |
1023 | ```
1024 | ???
1025 | - Can use different statistics
1026 | ---
1027 | ## Practice on your own
1028 |
1029 | - Open CategoricalExercises.Rmd and work through the exercises
1030 |
1031 | - We will take 10 minutes. Use this time for the exercises and a break
1032 | ---
1033 | class: inverse center middle
1034 | # Sample design object
1035 | ---
1036 | ## `tbl_svy` Object: Taylor's Series
1037 |
1038 | - `tbl_svy` object defines the sampling design or replicate weights
1039 |
1040 | - Key information is usually found in documentation of a public use file
1041 |
1042 | ```{r sd_tsl_syn, eval=FALSE}
1043 | as_survey_design(
1044 | .data,
1045 | ids = NULL,#cluster IDs/PSUs
1046 | strata = NULL,#strata variables
1047 | variables = NULL,#defaults to all in .data
1048 | fpc = NULL,#variables defining the fpc
1049 | nest = FALSE,#TRUE/FALSE - relabel clusters to nest within strata
1050 | check_strata = !nest, #check that clusters are nested in strata
1051 | weights = NULL,# weight variable
1052 | ...
1053 | )
1054 | ```
1055 |
1056 | ???
1057 | - discussing TSL first
1058 | ---
1059 | ## `tbl_svy` for Common Designs
1060 |
1061 | ```{r sd_tsl_gen_ex, eval=FALSE}
1062 | # simple random sample (SRS)
1063 | apisrs %>% as_survey_design(fpc = fpc)
1064 |
1065 | # stratified sample
1066 | apistrat %>% as_survey_design(strata = stype, weights = pw)
1067 |
1068 | # one-stage cluster sample
1069 | apiclus1 %>% as_survey_design(ids = dnum, weights = pw, fpc = fpc)
1070 |
1071 | # two-stage cluster sample, weights computed from pop size
1072 | apiclus2 %>% as_survey_design(ids = c(dnum, snum), fpc = c(fpc1, fpc2))
1073 |
1074 | # stratified, cluster design
1075 | apistrat %>% as_survey_design(ids = dnum, strata = stype, weights =pw, nest = TRUE)
1076 |
1077 | ```
1078 |
1079 | - examples from `srvyr` help documentation
1080 |
1081 | ---
1082 | ## ANES Design Object
1083 |
1084 | .smaller[
1085 | ```{r sd_anes, eval=TRUE}
1086 | anes_des <- anes %>%
1087 | as_survey_design(weights = Weight,
1088 | strata = Stratum,
1089 | ids = VarUnit,
1090 | nest = TRUE)
1091 | summary(anes_des)
1092 | ```
1093 | ]
1094 |
1095 | ???
1096 | - Pseudo-strata (Stratum) and pseudo-cluster (VarUnit) included for variance estimation
1097 | - we renamed these variables to be more obvious but original documentation has diff var names
1098 |
1099 | ---
1100 | ## `tbl_svy` Objects with Supplied Replicate Weights
1101 |
1102 | - Key information is usually found in documentation of a public use file
1103 |
1104 | ```{r sd_rep_syn, eval=FALSE}
1105 | as_survey_rep(
1106 | .data,
1107 | variables = NULL,#defaults to all in .data
1108 | repweights = NULL,#variables specifying replicate weights
1109 | weights = NULL,#variable for analysis weight
1110 | type = c("BRR", "Fay", "JK1", "JKn", "bootstrap", "other"),
1111 | rho = NULL,#shrinkage factor for Fay's method,
1112 | mse = getOption("survey.replicates.mse"), # if TRUE, compute variances based on
1113 | # sum of squares around the point estimate, rather than the mean of the replicates
1114 | scale = NULL, # overall multiplier for squared deviations
1115 | ...
1116 | )
1117 |
1118 | ```
1119 | ---
1120 | ## RECS Design Object
1121 |
1122 | .smaller[
1123 | ```{r sd_recs, eval=TRUE}
1124 | recs_des <- recs %>%
1125 | as_survey_rep(weights=NWEIGHT,
1126 | repweights=starts_with("BRRWT"),
1127 | type="Fay",
1128 | rho=0.5,
1129 | mse=TRUE)
1130 | summary(recs_des)
1131 | ```
1132 | ]
1133 |
1134 | ???
1135 | - Fay's method of BRR weight with $\epsilon=0.5$
1136 | - RECS documentation includes syntax for creating survey design object
1137 | - https://www.eia.gov/consumption/residential/data/2015/pdf/microdata_v3.pdf
1138 |
1139 | ---
1140 | ## Create Replicate Weights: jackknife
1141 |
1142 | - You can also start with a design object specified by the design and create replicate weights
1143 | .smaller[
1144 | ```{r sd_create_rep}
1145 | data(api)
1146 | dclus1 <- apiclus1 %>% as_survey_design(ids = dnum, weights = pw, fpc = fpc)
1147 | rclus1 <- as_survey_rep(dclus1)
1148 | summary(rclus1)
1149 |
1150 | ```
1151 | ]
1152 | ---
1153 | ## Create Replicate Weights: bootstrap
1154 |
1155 | - You can also start with a design object specified by the design and create replicate weights
1156 | .small[
1157 | ```{r sd_create_boot}
1158 | bclus1 <- as_survey_rep(dclus1, type="bootstrap", replicates=100)
1159 | summary(bclus1)
1160 |
1161 | ```
1162 | ]
1163 | ---
1164 | ## Create Survey Design Object for ACS
1165 |
1166 | Fill in the blanks
1167 | - Analysis weight: PWGTP
1168 | - replicate weights: PWGTP1-PWGTP180
1169 | - jackknife with scale adjustment of 4/80
1170 | ```{r sd_acs_fib, eval=FALSE}
1171 | acs_des <- acs_pums %>%
1172 | as_survey_rep(
1173 | weights=___________,
1174 | repweights=___________,
1175 | type=___________,
1176 | scale=_________
1177 | )
1178 | ```
1179 | --
1180 |
1181 | ```{r sd_acs_fib_sol, eval=FALSE}
1182 | acs_des <- acs_pums %>%
1183 | as_survey_rep(
1184 | weights=PWGTP,
1185 | repweights=stringr::str_c("PWGTP", 1:80),
1186 | type="JK1",
1187 | scale=4/80
1188 | )
1189 |
1190 | ```
1191 | ---
1192 | ## Create Survey Design Object for CPS 2011 Supplement
1193 |
1194 | Fill in the blanks
1195 | - Analysis weight: wtsupp
1196 | - replicate weights: repwtp1 -repwtp160
1197 | - BRR
1198 | ```{r sd_cps_fib, eval=FALSE}
1199 | cps_des <- cps %>%
1200 | as_survey_rep(
1201 | weights=___________,
1202 | repweights=___________,
1203 | type=___________
1204 | )
1205 | ```
1206 | --
1207 | ```{r sd_cps_fib_sol, eval=FALSE}
1208 | cps_des <- cps %>%
1209 | as_survey_rep(
1210 | weights=wtsupp,
1211 | repweights=starts_with("repwtp"),
1212 | type="BRR"
1213 | )
1214 | ```
1215 | ---
1216 | ## Create Survey Design Object for NHANES
1217 |
1218 | Fill in the blanks
1219 | - Analysis weight: WTINT2YR
1220 | - Variance Stratum: SDMVSTRA
1221 | - Variance Primary Sampling Unit: VPSU
1222 | ```{r sd_nhanes_fib, eval=FALSE}
1223 | nhanes_des <- nhanes %>%
1224 | as_survey_design(
1225 | weights=___________,
1226 | ids=___________,
1227 | strata=___________,
1228 | fpc=___________
1229 | )
1230 | ```
1231 | --
1232 | ```{r sd_nhanes_fib_sol, eval=FALSE}
1233 | nhanes_des <- nhanes %>%
1234 | as_survey_design(
1235 | weights=WTINT2YR,
1236 | ids=VPSU,
1237 | strata=SDMVSTRA,
1238 | fpc=NULL
1239 | )
1240 | ```
1241 | ---
1242 | ## Create Survey Design Object for LEMAS 2016
1243 |
1244 | Fill in the blanks
1245 | - Analysis weight: ANALYSISWEIGHT
1246 | - Variance Stratum: STRATA
1247 | - FPC: FRAMESIZE
1248 | ```{r sd_lemas_fib, eval=FALSE}
1249 | lemas_des <- lemas %>%
1250 | as_survey_design(
1251 | weights=___________,
1252 | ids=___________,
1253 | strata=___________,
1254 | fpc=___________
1255 | )
1256 | ```
1257 | --
1258 |
1259 | ```{r sd_lemas_fib_sol, eval=FALSE}
1260 | lemas_des <- lemas %>%
1261 | as_survey_design(
1262 | weights=ANALYSISWEIGHT,
1263 | ids=1,
1264 | strata=STRATA,
1265 | fpc=FRAMESIZE
1266 | )
1267 | ```
1268 |
1269 | ---
1270 | class: inverse center middle
1271 | # Closing
1272 | ---
1273 | ## Resources for more learning
1274 |
1275 | - https://cran.r-project.org/web/packages/srvyr/vignettes/srvyr-vs-survey.html
1276 |
1277 | - https://r-survey.r-forge.r-project.org/survey/
1278 | - Includes more advanced modeling
1279 |
1280 |
1281 | ---
1282 | ## Thank You!
1283 |
1284 | ### We hope you learned a lot in this short course!
1285 |
1286 | Please let us know if you have any feedback on this course. You will receive an email from AAPOR asking you to fill out a survey about this course. All feedback is welcome!
1287 |
1288 |
1289 | ## Questions?
1290 |
1291 | ---
1292 | ## Sources
1293 |
1294 | - The American National Election Studies (https://electionstudies.org/). These materials are based on work supported by the National Science Foundation under grant numbers SES 1444721, 2014-2017, the University of Michigan, and Stanford University.
1295 |
1296 | - *Residential Energy Consumption Survey: Using the 2015 Microdata File to Compute Estimates and Standard Errors.* U.S. Department of Energy (2017) https://www.eia.gov/consumption/residential/data/2015/pdf/microdata_v3.pdf
1297 |
1298 | - Horst AM, Hill AP, Gorman KB (2020). palmerpenguins: Palmer Archipelago (Antarctica) penguin data. R package version 0.1.0. https://allisonhorst.github.io/palmerpenguins/
1299 |
1300 | - T. Lumley (2020) "survey: analysis of complex survey samples". R package version 4.0. https://r-survey.r-forge.r-project.org/survey/
1301 |
1302 | - Greg Freedman Ellis and Ben Schneider (2020). srvyr: 'dplyr'-Like Syntax for Summary Statistics of Survey Data. R package version 1.0.0. https://CRAN.R-project.org/package=srvyr
1303 |
1304 | - Hadley Wickham, Romain François, Lionel Henry and Kirill Müller (2021). dplyr: A Grammar of Data Manipulation. R package version 1.0.5. https://CRAN.R-project.org/package=dplyr
1305 |
--------------------------------------------------------------------------------
/Presentation/Slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/szimmer/tidy-survey-aapor-2021/5a82c36eb619bf67ccec2593a770904eedcd1d18/Presentation/Slides.pdf
--------------------------------------------------------------------------------
/Presentation/Slides.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/szimmer/tidy-survey-aapor-2021/5a82c36eb619bf67ccec2593a770904eedcd1d18/Presentation/Slides.pptx
--------------------------------------------------------------------------------
/Presentation/Slides_files/figure-html/plot_sf_elbill_disp-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/szimmer/tidy-survey-aapor-2021/5a82c36eb619bf67ccec2593a770904eedcd1d18/Presentation/Slides_files/figure-html/plot_sf_elbill_disp-1.png
--------------------------------------------------------------------------------
/Presentation/xaringan-themer.css:
--------------------------------------------------------------------------------
1 | /* -------------------------------------------------------
2 | *
3 | * !! This file was generated by xaringanthemer !!
4 | *
5 | * Changes made to this file directly will be overwritten
6 | * if you used xaringanthemer in your xaringan slides Rmd
7 | *
8 | * Issues or likes?
9 | * - https://github.com/gadenbuie/xaringanthemer
10 | * - https://www.garrickadenbuie.com
11 | *
12 | * Need help? Try:
13 | * - vignette(package = "xaringanthemer")
14 | * - ?xaringanthemer::style_xaringan
15 | * - xaringan wiki: https://github.com/yihui/xaringan/wiki
16 | * - remarkjs wiki: https://github.com/gnab/remark/wiki
17 | *
18 | * Version: 0.3.3
19 | *
20 | * ------------------------------------------------------- */
21 | @import url(https://fonts.googleapis.com/css?family=Noto+Sans:400,400i,700,700i&display=swap);
22 | @import url(https://fonts.googleapis.com/css?family=Cabin:600,600i&display=swap);
23 | @import url(https://fonts.googleapis.com/css?family=Source+Code+Pro:400,700&display=swap);
24 |
25 |
26 | :root {
27 | /* Fonts */
28 | --text-font-family: 'Noto Sans';
29 | --text-font-is-google: 1;
30 | --text-font-family-fallback: -apple-system, BlinkMacSystemFont, avenir next, avenir, helvetica neue, helvetica, Ubuntu, roboto, noto, segoe ui, arial;
31 | --text-font-base: sans-serif;
32 | --header-font-family: Cabin;
33 | --header-font-is-google: 1;
34 | --header-font-family-fallback: Georgia, serif;
35 | --code-font-family: 'Source Code Pro';
36 | --code-font-is-google: 1;
37 | --base-font-size: 20px;
38 | --text-font-size: 1rem;
39 | --code-font-size: 0.9rem;
40 | --code-inline-font-size: 1em;
41 | --header-h1-font-size: 2.75rem;
42 | --header-h2-font-size: 2.25rem;
43 | --header-h3-font-size: 1.75rem;
44 |
45 | /* Colors */
46 | --text-color: #000000;
47 | --header-color: #1E4F96;
48 | --background-color: #FFFFFF;
49 | --link-color: #1E4F96;
50 | --text-bold-color: #1E4F96;
51 | --code-highlight-color: rgba(255,255,0,0.5);
52 | --inverse-text-color: #000000;
53 | --inverse-background-color: #00A3E0;
54 | --inverse-header-color: #FFFFFF;
55 | --inverse-link-color: #1E4F96;
56 | --title-slide-background-color: #1E4F96;
57 | --title-slide-text-color: #FFFFFF;
58 | --header-background-color: #1E4F96;
59 | --header-background-text-color: #FFFFFF;
60 | --primary: #1E4F96;
61 | --secondary: #00A3E0;
62 | --white: #FFFFFF;
63 | --black: #000000;
64 | }
65 |
66 | html {
67 | font-size: var(--base-font-size);
68 | }
69 |
70 | body {
71 | font-family: var(--text-font-family), var(--text-font-family-fallback), var(--text-font-base);
72 | font-weight: normal;
73 | color: var(--text-color);
74 | }
75 | h1, h2, h3 {
76 | font-family: var(--header-font-family), var(--header-font-family-fallback);
77 | font-weight: 600;
78 | color: var(--header-color);
79 | }
80 | .remark-slide-content {
81 | background-color: var(--background-color);
82 | font-size: 1rem;
83 | padding: 16px 64px 16px 64px;
84 | width: 100%;
85 | height: 100%;
86 | }
87 | .remark-slide-content h1 {
88 | font-size: var(--header-h1-font-size);
89 | }
90 | .remark-slide-content h2 {
91 | font-size: var(--header-h2-font-size);
92 | }
93 | .remark-slide-content h3 {
94 | font-size: var(--header-h3-font-size);
95 | }
96 | .remark-code, .remark-inline-code {
97 | font-family: var(--code-font-family), Menlo, Consolas, Monaco, Liberation Mono, Lucida Console, monospace;
98 | }
99 | .remark-code {
100 | font-size: var(--code-font-size);
101 | }
102 | .remark-inline-code {
103 | font-size: var(--code-inline-font-size);
104 | color: #1E4F96;
105 | }
106 | .remark-slide-number {
107 | color: #1E4F96;
108 | opacity: 1;
109 | font-size: 0.9em;
110 | }
111 | strong { color: var(--text-bold-color); }
112 | a, a > code {
113 | color: var(--link-color);
114 | text-decoration: none;
115 | }
116 | .footnote {
117 | position: absolute;
118 | bottom: 60px;
119 | padding-right: 4em;
120 | font-size: 0.9em;
121 | }
122 | .remark-code-line-highlighted {
123 | background-color: var(--code-highlight-color);
124 | }
125 | .inverse {
126 | background-color: var(--inverse-background-color);
127 | color: var(--inverse-text-color);
128 |
129 | }
130 | .inverse h1, .inverse h2, .inverse h3 {
131 | color: var(--inverse-header-color);
132 | }
133 | .inverse a, .inverse a > code {
134 | color: var(--inverse-link-color);
135 | }
136 | .title-slide, .title-slide h1, .title-slide h2, .title-slide h3 {
137 | color: var(--title-slide-text-color);
138 | }
139 | .title-slide {
140 | background-color: var(--title-slide-background-color);
141 | }
142 | .title-slide .remark-slide-number {
143 | display: none;
144 | }
145 | /* Two-column layout */
146 | .left-column {
147 | width: 20%;
148 | height: 92%;
149 | float: left;
150 | }
151 | .left-column h2, .left-column h3 {
152 | color: #1E4F9699;
153 | }
154 | .left-column h2:last-of-type, .left-column h3:last-child {
155 | color: #1E4F96;
156 | }
157 | .right-column {
158 | width: 75%;
159 | float: right;
160 | padding-top: 1em;
161 | }
162 | .pull-left {
163 | float: left;
164 | width: 47%;
165 | }
166 | .pull-right {
167 | float: right;
168 | width: 47%;
169 | }
170 | .pull-right + * {
171 | clear: both;
172 | }
173 | img, video, iframe {
174 | max-width: 100%;
175 | }
176 | blockquote {
177 | border-left: solid 5px #00A3E080;
178 | padding-left: 1em;
179 | }
180 | .remark-slide table {
181 | margin: auto;
182 | border-top: 1px solid #666;
183 | border-bottom: 1px solid #666;
184 | }
185 | .remark-slide table thead th {
186 | border-bottom: 1px solid #ddd;
187 | }
188 | th, td {
189 | padding: 5px;
190 | }
191 | .remark-slide thead, .remark-slide tfoot, .remark-slide tr:nth-child(even) {
192 | background: #CCECF8;
193 | }
194 | table.dataTable tbody {
195 | background-color: var(--background-color);
196 | color: var(--text-color);
197 | }
198 | table.dataTable.display tbody tr.odd {
199 | background-color: var(--background-color);
200 | }
201 | table.dataTable.display tbody tr.even {
202 | background-color: #CCECF8;
203 | }
204 | table.dataTable.hover tbody tr:hover, table.dataTable.display tbody tr:hover {
205 | background-color: rgba(255, 255, 255, 0.5);
206 | }
207 | .dataTables_wrapper .dataTables_length, .dataTables_wrapper .dataTables_filter, .dataTables_wrapper .dataTables_info, .dataTables_wrapper .dataTables_processing, .dataTables_wrapper .dataTables_paginate {
208 | color: var(--text-color);
209 | }
210 | .dataTables_wrapper .dataTables_paginate .paginate_button {
211 | color: var(--text-color) !important;
212 | }
213 |
214 | /* Slide Header Background for h1 elements */
215 | .remark-slide-content.header_background > h1 {
216 | display: block;
217 | position: absolute;
218 | top: 0;
219 | left: 0;
220 | width: 100%;
221 | background: var(--header-background-color);
222 | color: var(--header-background-text-color);
223 | padding: 2rem 64px 1.5rem 64px;
224 | margin-top: 0;
225 | box-sizing: border-box;
226 | }
227 | .remark-slide-content.header_background {
228 | padding-top: 7rem;
229 | }
230 |
231 | @page { margin: 0; }
232 | @media print {
233 | .remark-slide-scaler {
234 | width: 100% !important;
235 | height: 100% !important;
236 | transform: scale(1) !important;
237 | top: 0 !important;
238 | left: 0 !important;
239 | }
240 | }
241 |
242 | .primary {
243 | color: var(--primary);
244 | }
245 | .bg-primary {
246 | background-color: var(--primary);
247 | }
248 | .secondary {
249 | color: var(--secondary);
250 | }
251 | .bg-secondary {
252 | background-color: var(--secondary);
253 | }
254 | .white {
255 | color: var(--white);
256 | }
257 | .bg-white {
258 | background-color: var(--white);
259 | }
260 | .black {
261 | color: var(--black);
262 | }
263 | .bg-black {
264 | background-color: var(--black);
265 | }
266 |
267 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | A new version of this course is at: https://github.com/tidy-survey-r/tidy-survey-short-course
2 |
3 | # Tidy Survey Analysis in R using the srvyr Package
4 | Materials for [AAPOR short course](https://www.aapor.org/Conference-Events/Annual-Meeting/Short-Courses.aspx) on Tidy Survey Analysis in R using the `srvyr` Package in May 2021
5 |
6 | - **RawData** folder contains public use file data along with any documentation
7 | - American National Election Studies, 2016
8 | - Residential Energy Consumption Survey, 2015
9 | - **DataCleaningScripts** folder contains scripts for making public use files analysis ready
10 | - Create derived variables
11 | - Renames some variables
12 | - Selects fewer variables just for examples
13 | - **Data** folder contains data files ready for analysis in presentation and examples
14 | - **Presentation** folder contains the slides for the course
15 | - Includes Rmd to create slides
16 | - Slides are available in html, pptx, R, and PDF
17 | - **Exercises** contains RMD and R files with exercises and solutions to practice concepts
18 |
19 | ## Sources
20 |
21 | - The American National Election Studies (https://electionstudies.org/). These materials are based on work supported by the National Science Foundation under grant numbers SES 1444721, 2014-2017, the University of Michigan, and Stanford University.
22 |
23 | - *Residential Energy Consumption Survey: Using the 2015 Microdata File to Compute Estimates and Standard Errors.* U.S. Department of Energy (2017) https://www.eia.gov/consumption/residential/data/2015/pdf/microdata_v3.pdf
24 |
25 | - Horst AM, Hill AP, Gorman KB (2020). palmerpenguins: Palmer Archipelago (Antarctica) penguin data. R package version 0.1.0. https://allisonhorst.github.io/palmerpenguins/
26 |
27 | - T. Lumley (2020) "survey: analysis of complex survey samples". R package version 4.0. https://r-survey.r-forge.r-project.org/survey/
28 |
29 | - Greg Freedman Ellis and Ben Schneider (2020). srvyr: 'dplyr'-Like Syntax for Summary Statistics of Survey Data. R package version 1.0.0. https://CRAN.R-project.org/package=srvyr
30 |
31 | - Hadley Wickham, Romain François, Lionel Henry and Kirill Müller (2021). dplyr: A Grammar of Data Manipulation. R package version 1.0.5. https://CRAN.R-project.org/package=dplyr
32 |
--------------------------------------------------------------------------------
/RawData/ANES_2016/anes_timeseries_2016.sav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/szimmer/tidy-survey-aapor-2021/5a82c36eb619bf67ccec2593a770904eedcd1d18/RawData/ANES_2016/anes_timeseries_2016.sav
--------------------------------------------------------------------------------
/RawData/ANES_2016/anes_timeseries_2016_qnaire_post.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/szimmer/tidy-survey-aapor-2021/5a82c36eb619bf67ccec2593a770904eedcd1d18/RawData/ANES_2016/anes_timeseries_2016_qnaire_post.pdf
--------------------------------------------------------------------------------
/RawData/ANES_2016/anes_timeseries_2016_qnaire_pre.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/szimmer/tidy-survey-aapor-2021/5a82c36eb619bf67ccec2593a770904eedcd1d18/RawData/ANES_2016/anes_timeseries_2016_qnaire_pre.pdf
--------------------------------------------------------------------------------
/RawData/ANES_2016/anes_timeseries_2016_userguidecodebook.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/szimmer/tidy-survey-aapor-2021/5a82c36eb619bf67ccec2593a770904eedcd1d18/RawData/ANES_2016/anes_timeseries_2016_userguidecodebook.pdf
--------------------------------------------------------------------------------
/RawData/RECS_2015/2020_RECS-457A.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/szimmer/tidy-survey-aapor-2021/5a82c36eb619bf67ccec2593a770904eedcd1d18/RawData/RECS_2015/2020_RECS-457A.pdf
--------------------------------------------------------------------------------
/RawData/RECS_2015/README.md:
--------------------------------------------------------------------------------
1 | # Residential Energy Consumption Survey (RECS) 2015
2 |
3 | All data and resources were downloaded from https://www.eia.gov/consumption/residential/data/2015/index.php?view=microdata on March 3, 2021.
--------------------------------------------------------------------------------
/RawData/RECS_2015/codebook_publicv4.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/szimmer/tidy-survey-aapor-2021/5a82c36eb619bf67ccec2593a770904eedcd1d18/RawData/RECS_2015/codebook_publicv4.xlsx
--------------------------------------------------------------------------------
/RawData/RECS_2015/microdata_v3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/szimmer/tidy-survey-aapor-2021/5a82c36eb619bf67ccec2593a770904eedcd1d18/RawData/RECS_2015/microdata_v3.pdf
--------------------------------------------------------------------------------
/tidy-survey-short-course.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: Default
4 | SaveWorkspace: Default
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 3
10 | Encoding: UTF-8
11 |
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 |
--------------------------------------------------------------------------------