├── 01-Slides.pdf
├── 02-Demos.R
├── 03-Solutions.R
├── README.md
└── RQuickstart.Rproj


/01-Slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rstudio/RQuickstart/6d61ff4d5f0d4e4e3abdd7ca2a8df44f52c131a6/01-Slides.pdf


--------------------------------------------------------------------------------
/02-Demos.R:
--------------------------------------------------------------------------------
  1 | # R
  2 | 
  3 | log10(100)
  4 | 
  5 | library(ggplot2)
  6 | 
  7 | View(mpg)
  8 | View(iris)
  9 | View(mtcars)
 10 | 
 11 | ?mpg
 12 | ?ggplot
 13 | ?geom_point
 14 | 
 15 | # Visualizations with ggplot2
 16 | 
 17 | ## plot
 18 | 
 19 | plot(iris$Sepal.Width, iris$Sepal.Length)
 20 | 
 21 | ## ggplot2
 22 | 
 23 | ggplot(iris, aes(Sepal.Width, Sepal.Length)) + 
 24 |   geom_point()
 25 | 
 26 | ggplot(iris, aes(Sepal.Width, Sepal.Length)) + 
 27 |   geom_point(aes(shape = Species, color = Species))
 28 | 
 29 | ggplot(iris, aes(Sepal.Width, Sepal.Length)) + 
 30 |   geom_point(aes(shape = Species, color = Species)) +
 31 |   theme_bw()
 32 | 
 33 | ggplot(iris, aes(Sepal.Width, Sepal.Length)) + 
 34 |   geom_point(aes(shape = Species, color = Species)) +
 35 |   theme_bw() + 
 36 |   geom_smooth(aes(shape = Species, color = Species), method = lm, se = FALSE) 
 37 | 
 38 | ggplot(iris, aes(Sepal.Width, Sepal.Length)) +
 39 |   geom_rug(aes(color = Species), position = "jitter") + 
 40 |   stat_density2d(aes(alpha = ..level.., fill = Species), geom = "polygon") +
 41 |   theme_bw() +
 42 |   scale_alpha(range = c(0.05, 0.5))
 43 | 
 44 | ggplot(iris, aes(Sepal.Width, Sepal.Length)) +
 45 |   geom_rug(aes(color = Species), position = "jitter") + 
 46 |   stat_density2d(aes(alpha = ..level.., fill = Species), geom = "polygon") +
 47 |   theme_bw() +
 48 |   scale_alpha(range = c(0.05, 0.5)) +
 49 |   facet_wrap( ~ Species)
 50 | 
 51 | ## ggplot
 52 | 
 53 | ggplot(mpg, aes(displ, hwy))
 54 | ggplot(mpg, aes(displ, hwy)) + geom_point()
 55 | 
 56 | # Aesthetics
 57 | 
 58 | ggplot(mpg) + geom_point(aes(x = displ, y = hwy, color = class))
 59 | ggplot(mpg) + geom_point(aes(x = displ, y = hwy, size = class))
 60 | ggplot(mpg) + geom_point(aes(x = displ, y = hwy, shape = class))
 61 | ggplot(mpg) + geom_point(aes(x = displ, y = hwy, alpha = class))
 62 | 
 63 | ## Mapping vs. setting
 64 | 
 65 | ggplot(mpg, aes(displ, hwy)) + geom_point(mapping = aes(color = class))
 66 | ggplot(mpg, aes(displ, hwy)) + geom_point(color = "green")
 67 | ggplot(mpg, aes(displ, hwy)) + geom_point(size = 5)
 68 | ggplot(mpg, aes(displ, hwy)) + geom_point(shape = 3)
 69 | ggplot(mpg, aes(displ, hwy)) + geom_point(alpha = 0.5)
 70 | 
 71 | ggplot(mpg, aes(displ, hwy)) + geom_point(aes(color = class))
 72 | ggplot(mpg, aes(displ, hwy)) + geom_point(color = "green")
 73 | 
 74 | # Geoms
 75 | 
 76 | ggplot(data = mpg) + geom_point(aes(x = displ, y = hwy))
 77 | ggplot(data = mpg) + geom_smooth(aes(x = displ, y = hwy))
 78 | ggplot(data = mpg) + 
 79 |   geom_point(aes(x = displ, y = hwy)) +
 80 |   geom_smooth(aes(x = displ, y = hwy))
 81 | 
 82 | ggplot(mpg) + geom_point(aes(class, hwy))
 83 | 
 84 | ## Global vs. Local
 85 | 
 86 | ggplot(mpg, aes(x = displ, y = hwy)) +
 87 |   geom_smooth(method = lm) +
 88 |   geom_point(aes(color = cyl), data = mpg[1:10, ])
 89 | 
 90 | ggplot(mpg, aes(displ, hwy, color = class)) + 
 91 |   geom_smooth(method = lm) +
 92 |   geom_point()
 93 | 
 94 | ggplot(mpg, aes(displ, hwy)) + 
 95 |   geom_smooth(method = lm) +
 96 |   geom_point(aes(color = class))
 97 | 
 98 | ggplot(mpg, aes(displ, hwy)) + 
 99 |   geom_point()
100 | 
101 | ggplot(mpg, aes(displ, hwy)) + 
102 |   geom_point() +
103 |   geom_point(data = mpg[1:50,], color = "green")
104 | 
105 | # Grammar of Graphics
106 | 
107 | # Data Wrnagling with dplyr
108 | 
109 | library(reportsWS)
110 | View(bnames)
111 | 
112 | my_name <- filter(bnames, name == "Garrett", sex == "M")
113 | my_name <- select(my_name, name, year, prop)
114 | ggplot(my_name) + 
115 |   geom_line(aes(x = year, y = prop))
116 | 
117 | ## dplyr
118 | 
119 | library(dplyr)
120 | ?tbl
121 | ?select
122 | ?filter
123 | ?left_join
124 | ?mutate
125 | ?summarise
126 | ?group_by
127 | ?`%>%`
128 | 
129 | ## tbl's
130 | 
131 | babynames
132 | tbl_df(bnames)
133 | 
134 | bnames <- tbl_df(bnames)
135 | 
136 | ## Verbs
137 | 
138 | arrange(storms, wind)
139 | arrange(storms, desc(wind))
140 | 
141 | select(storms, storm, pressure)
142 | 
143 | filter(storms, wind == 50)
144 | filter(storms, wind >= 50)
145 | filter(storms, wind > 60, wind <= 40)
146 | 
147 | View(births)
148 | 
149 | left_join(songs, artists, by = "name")
150 | 
151 | mutate(storms, ratio = pressure / wind)
152 | 
153 | summarise(pollution, median = median(amount))
154 | summarise(pollution, mean = mean(amount), sum = sum(amount), n = n())
155 | p <- group_by(pollution, city)
156 | summarise(p, mean = mean(amount), sum = sum(amount), n = n())
157 | 
158 | # %>%
159 | 
160 | p <- group_by(pollution, city)
161 | summarise(p, mean = mean(amount), sum=sum(amount), n=n())
162 | my_name <- filter(bnames, name == "Garrett", sex == "M")
163 | my_name <- select(my_name, name, year, prop)
164 | my_name <- left_join(my_name, boys, by = "year")
165 | my_name <- mutate(my_name, n = round(prop * births))
166 | 
167 | summarize(pollution, median = median(amount))
168 | pollution %>% summarize(median = median(amount))
169 | 
170 | bnames %>%
171 |   left_join(births, by = c("year", "sex")) %>%
172 |   mutate(n = round(prop * births)) %>%
173 |   select(name, sex, year, n) %>%
174 |   filter(!is.na(n)) %>%
175 |   group_by(name, sex) %>%
176 |   summarise(total = sum(n)) %>%
177 |   ungroup() %>%
178 |   arrange(desc(total))
179 | 
180 | tmp1 <- left_join(bnames, births, by = c("year", "sex"))
181 | tmp2 <- mutate(tmp1, n = round(prop * births))
182 | tmp3 <- select(tmp2, name, sex, year, n) 
183 | tmp4 <- filter(tmp3, !is.na(n)) 
184 | tmp5 <- group_by(tmp4, name, sex)
185 | tmp6 <- summarise(tmp5, total = sum(n))
186 | tmp7 <- ungroup(tmp6)
187 | tmp8 <- arrange(tmp7, desc(total))
188 | 
189 | arrange(
190 |   ungroup(
191 |     summarise(
192 |       group_by(
193 |         filter(
194 |           select(
195 |             mutate(
196 |               left_join(bnames, births, by = c("year", "sex")), 
197 |               n = round(prop * births)
198 |             ), name, sex, year, n
199 |           ), !is.na(n)
200 |         ), name, sex
201 |       ), total = sum(n)
202 |     )
203 |   ), desc(total),
204 | )
205 | 
206 | 
207 | 
208 | 
209 | 
210 | 
211 | 
212 | 


--------------------------------------------------------------------------------
/03-Solutions.R:
--------------------------------------------------------------------------------
 1 | #  How would you replace this scatterplot with one that draws 
 2 | #  boxplots? Try out your best guess.
 3 | #  ggplot(mpg) + geom_point(aes(class, hwy))
 4 | ggplot(data = mpg) + geom_boxplot(aes(x = displ, y = hwy))
 5 | 
 6 | 
 7 | #  How would you create this plot? 
 8 | #  Hint: histograms do not require a y aesthetic.
 9 | ggplot(data = mpg) + geom_histogram(aes(x = hwy))
10 | 
11 | 
12 | #  Make these plots:
13 | #    
14 | #  Plot 1
15 | #  Data = diamonds
16 | #  geom = count
17 | #  x = cut
18 | #  y = color
19 | ggplot(diamonds, aes(x = cut, y = color)) +
20 |   geom_count()
21 | 
22 | #  Plot 2
23 | #  Data = diamonds
24 | #  geom = point
25 | #  x = carat
26 | #  y = price
27 | ggplot(diamonds, aes(x = carat, y = price)) +
28 |   geom_point()
29 | 
30 | 
31 | #  Create a data set that contains only rows with your name and sex, 
32 | #  and only the columns name, year, and prop.
33 | #  Then plot the data with 
34 | #  ggplot(<data name here>) + 
35 | #    geom_line(aes(x = year, y = prop))
36 | library(dplyr)
37 | my_name <- filter(bnames, name == "Garrett", sex == "M")
38 | my_name <- select(my_name, name, year, prop)
39 | ggplot(my_name) + 
40 |   geom_line(aes(x = year, y = prop))
41 | 
42 | 
43 | #  1. filter() births to just rows with your sex.
44 | boys <- filter(births, sex == "M")
45 | 
46 | #  2. Join the result to my_name by year.
47 | my_name <- left_join(my_name, boys, by = "year")
48 | 
49 | #  3. Add a new variable to the data: n = round(prop * births)
50 | my_name <- mutate(my_name, n = round(prop * births))
51 | 
52 | #  4. Save the new data. Then plot n over time.
53 | ggplot(my_name) + geom_line(aes(x = year, y = n))
54 | 
55 | 
56 | #  Work with a neighbor to determine what each line of the 
57 | #  code below does.
58 | # Take bnames and 
59 | bnames %>% 
60 |   # join to it births by year and sex.
61 |   left_join(births, by = c("year", "sex")) %>%
62 |   # Then use the result to calculate a new variable, n 
63 |   mutate(n = round(prop * births)) %>%
64 |   # Select from that four columns: name, sex, year, and n
65 |   select(name, sex, year, n) %>% 
66 |   # Filter out rows where n = NA
67 |   filter(!is.na(n)) %>% 
68 |   # Then group by the combination of name and gender
69 |   group_by(name, sex) %>%
70 |   # Calculate the total number of children for each group
71 |   summarise(total = sum(n)) %>% 
72 |   # Ungroup the data (so we do not arrange within groups)
73 |   ungroup() %>%
74 |   # Then order the groups from the largest total to the smallest
75 |   arrange(desc(total))
76 | 
77 | 
78 | 
79 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # RQuickstart
2 | Slides, Code, and Exercises to support [R Quickstart tutorial](http://conferences.oreilly.com/strata/hadoop-big-data-ca/public/schedule/detail/48053) at 2016 Strata + Hadoop World San Jose
3 | 


--------------------------------------------------------------------------------
/RQuickstart.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX


--------------------------------------------------------------------------------