├── londonr_purrr_slides.pdf
├── Workshop 5 - Functional Programming with purr.pdf
├── README.md
└── scripts
    ├── londonr_script.R
    └── internal_script.R


/londonr_purrr_slides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MangoTheCat/Introduction-to-Functional-Programming-with-Purrr/master/londonr_purrr_slides.pdf


--------------------------------------------------------------------------------
/Workshop 5 - Functional Programming with purr.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MangoTheCat/Introduction-to-Functional-Programming-with-Purrr/master/Workshop 5 - Functional Programming with purr.pdf


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Introduction to Functional Programming with purrr
 2 | 
 3 | To follow along with the workshop please follow these pre-installation instructions
 4 | 
 5 | * Install R from https://cran.r-project.org/
 6 | * RStudio desktop from https://www.rstudio.com
 7 | 
 8 | Install the following R packages from CRAN in the usual way: 
 9 | 
10 | ```r
11 | install.packages(c("tidyverse", "repurrrsive", "broom", "modelr"))
12 | ```
13 | 


--------------------------------------------------------------------------------
/scripts/londonr_script.R:
--------------------------------------------------------------------------------
  1 | 
  2 | # Functional Programming with Purrr ---------------------------------------
  3 | 
  4 | # Notes and a neater version of this script
  5 | # https://github.com/MangoTheCat/Introduction-to-Functional-Programming-with-Purrr
  6 | # or short link
  7 | # https://bit.ly/mcpurrr
  8 | 
  9 | # Need the packages?
 10 | install.packages(c("tidyverse", "repurrrsive"))
 11 | 
 12 | # Lists
 13 | 
 14 | myList <- list(A = rnorm(100), B = sample(LETTERS, 10))
 15 | myList
 16 | 
 17 | length(myList)
 18 | names(myList)
 19 | 
 20 | myList[["B"]]
 21 | myList$B
 22 | 
 23 | myMass <- list(doug = 80, gary = 100)
 24 | 
 25 | rest_energy <- function(m, c = 299792458) {
 26 |   m * c^2
 27 | }
 28 | 
 29 | lapply(myMass, rest_energy)
 30 | 
 31 | # Exercise 1-4
 32 | 
 33 | library(repurrrsive)
 34 | 
 35 | # Using the gap_split data in the repurrrsive package:
 36 | #   a. How many elements are in the list?
 37 | 
 38 | View(gap_split)
 39 | length(gap_split)
 40 | 
 41 | #   b. Do the elements have names?
 42 | names(gap_split)
 43 | 
 44 | #   c. Extract the data from the United Kingdom. What type of data is it?
 45 | 
 46 | class(gap_split[["United Kingdom"]])
 47 | print.default(gap_split[["United Kingdom"]])
 48 | 
 49 | #   2. Write a function that, when given the data and a country name will
 50 | # calculate the mean life expectancy for that country
 51 | 
 52 | library(tidyverse)
 53 | 
 54 | meanLife <- function(data, country) {
 55 |   mean(data[[country]]$lifeExp)
 56 | }
 57 | 
 58 | View(myList)
 59 | View(gap_split)
 60 | 
 61 | data(package = "repurrrsive")
 62 | 
 63 | library(purrr)
 64 | 
 65 | maxYear <- function(data) {
 66 |   
 67 |   data %>%
 68 |     filter(lifeExp == max(lifeExp)) %>%
 69 |     pull("year")
 70 |   
 71 | }
 72 | 
 73 | maxYear(gap_split$Afghanistan)
 74 | 
 75 | # purrr map
 76 | 
 77 | map(gap_split, maxYear) # equivalent to an lapply
 78 | 
 79 | 
 80 | # Chapter 2
 81 | 
 82 | # Extracting elements
 83 | lifeExpectancy <- map(gap_split, "lifeExp")
 84 | 
 85 | names(lifeExpectancy)
 86 | length(142)
 87 | 
 88 | 
 89 | map(lifeExpectancy, max)
 90 | 
 91 | 
 92 | # Exercise working with map Page 2-3
 93 | # 
 94 | # 1. Using the split gapminder data:
 95 | #   a. Find the minimum value of the population for each country
 96 | 
 97 | population <- map(gap_split, "pop")
 98 | map(population, min)
 99 | 
100 | #   b. Calculate the variance of the GDP per capita
101 | 
102 | gdpVar <- function(data) {
103 |   gdp <- data[["gdpPercap"]]
104 |   var(gdp)
105 | }
106 | map(gap_split, gdpVar)
107 | 
108 | # Alternative
109 | map(gap_split, "gdpPercap") %>% map(var)
110 | 
111 | 
112 | # Extension Questions
113 | # 2. For each country, extract the value of the population in 1952.
114 | 
115 | # 3. Which country had the lowest population in 1952? (hint: take a look at
116 | #                                                      which.min)
117 | 
118 | maxLife <- function(data) {
119 |   max(data$lifeExp)
120 | }
121 | 
122 | map(gap_split, maxLife)
123 | 
124 | map(gap_split, ~max(.$lifeExp))
125 | 
126 | 
127 | lapply(gap_split, maxLife)
128 | maxLifeVec <- sapply(gap_split, maxLife)
129 | 
130 | class(maxLifeVec)
131 | maxLifeVec[3]
132 | 
133 | maxLifeVec <- vapply(gap_split, maxLife, numeric(1))
134 | 
135 | # Type-safe map functions
136 | maxLifeDbl <- map_dbl(gap_split, maxLife)
137 | maxLifeDbl
138 | 
139 | # Exercise
140 | # Page 2-6
141 | # 1. Find the average life expectancy for each country, storing 
142 | # the output in a numeric vector
143 | 
144 | avLife <- map_dbl(gap_split, ~ mean(.$lifeExp))
145 | 
146 | # 2. Can you store the output in an integer vector?
147 | 
148 | avLife <- map_int(gap_split, ~ mean(.$lifeExp))
149 | 
150 | 
151 | # Chapter 3 ---------------------------------------------------------------
152 | 
153 | # Extracting elements
154 | pluck(gap_split, "United Kingdom", "lifeExp")
155 | 
156 | # Filtering
157 | is.europe <- function(data) {
158 |   unique(data$continent) == "Europe"
159 | }
160 | 
161 | europe <- keep(gap_split, is.europe)
162 | 
163 | notEurope <- discard(gap_split, is.europe)
164 | 
165 | # Joining
166 | 
167 | uk <- pluck(gap_split, "United Kingdom")
168 | 
169 | updatedGap <- prepend(gap_split, values = list(UK = uk))
170 | View(updatedGap)
171 | 
172 | # Merging lists
173 | # purrr way
174 | ?list_merge
175 | # Base R way 
176 | ?modifyList()
177 | 
178 | # Transposing / Inverting
179 | 
180 | myList <- list(firstname = list("Doug", "Gary"),
181 |                lastname = list("Ashton", "Linekar"))
182 | 
183 | myTransposedList <- transpose(myList)
184 | View(myTransposedList)
185 | 
186 | gap_inverted <- transpose(gap_split)
187 | names(gap_inverted)
188 | 
189 | gap_inverted[["year"]]
190 | 
191 | # Exercise page 3-4
192 | # 
193 | # 1. Write a function to test if the life expentancy for the most recent year is
194 | #   the maximum life expectancy. The function should return TRUE (when
195 | #   life expectancy in 2007 is the maximum) or FALSE.
196 | #
197 | data <- pluck(gap_split, "Botswana")
198 | 
199 | life07 <- data$lifeExp[nrow(data)]
200 | maxlife <- max(data$lifeExp[nrow(data)])
201 | 
202 | life07 == maxlife
203 | 
204 | maxLife07 <- function(data) {
205 |   life07 <- data$lifeExp[nrow(data)]
206 |   maxlife <- max(data$lifeExp)
207 |   
208 |   life07 == maxlife
209 | }
210 | 
211 | 
212 | # 2. Test your function on the data for Botswana and the data for Denmark.
213 | 
214 | maxLife07(gap_split[["Botswana"]])
215 | maxLife07(gap_split[["Denmark"]])
216 | 
217 | 
218 | # 3. Filter the split gapminder data to return only elements where the life
219 | #   expectancy in 2007 is not it's highest life expectancy.
220 | 
221 | peakedLife <- discard(gap_split, maxLife07)
222 | 
223 | # Extension Questions
224 | # 4. Use appropriate map functions to return the maximum life expectancy
225 | #   for each of these countries and their life expectancy in 2007
226 | 
227 | 
228 | # Chapter 4 ---------------------------------------------------------------
229 | 
230 | 
231 | ?map2
232 | 
233 | means <- rep(0:5, each = 2)
234 | means <- set_names(means, nm = LETTERS[1:12])
235 | 
236 | sds <- rep(c(1,2), times = 6)
237 | 
238 | normData <- map2_df(means, sds, rnorm, n = 100)
239 | View(normData)
240 | 
241 | gather(normData, Simulation, Value) %>%
242 |   qplot(Value, data = ., geom = "density", group = Simulation)
243 | 
244 | ?pmap
245 | 
246 | 
247 | 
248 | # Chapter 5 ---------------------------------------------------------------
249 | 
250 | # Nested Data
251 | 
252 | gap_nested
253 | 
254 | gap_nested %>%
255 |   filter(country == "United Kingdom") %>%
256 |   select(data) %>%
257 |   unnest()
258 | 
259 | 
260 | gap_simple %>%
261 |   group_by(country, continent) %>%
262 |   nest()
263 | 
264 | 
265 | # Mutate and map together
266 | 
267 | map(gap_nested$data, ~max(.$lifeExp))
268 | 
269 | gap_nested %>%
270 |   mutate(MaxLife = map_dbl(data, ~max(.$lifeExp)))
271 | 
272 | 
273 | # Exercise 
274 | # 1. Using the nested gapminder data:
275 | #   a. Find the minimum value of the population for each
276 | #     country
277 | 
278 | 
279 | # b. Calculate the variance of the GDP per capita
280 | 
281 | 
282 | gap_nested %>%
283 |   mutate(MinPop = map_dbl(data, ~min(.$pop))) %>%
284 |   mutate(GdpVar = map_dbl(data, ~var(.$gdpPercap))) %>%
285 |   View()
286 | 
287 | 
288 | ukModel <- lm(lifeExp ~ year, data = gap_split[["United Kingdom"]])
289 | 
290 | 
291 | gap_model <- gap_nested %>%
292 |   mutate(model = map(data, ~lm(lifeExp ~ year, data = .)))
293 | 
294 | View(gap_model)
295 | 
296 | 
297 | library(broom)
298 | glance(ukModel) %>% View()
299 | 
300 | 
301 | gap_model %>%
302 |   mutate(model_results = map(model, glance)) %>%
303 |   select(country, model_results) %>%
304 |   unnest() %>% View()
305 | 


--------------------------------------------------------------------------------
/scripts/internal_script.R:
--------------------------------------------------------------------------------
  1 | 
  2 | # Introduction to Functional Programming with purrr -------------------------
  3 | 
  4 | 
  5 | library(purrr)
  6 | library(dplyr)
  7 | library(repurrrsive)
  8 | library(ggplot2)
  9 | library(modelr)
 10 | library(broom)
 11 | library(tidyr)
 12 | 
 13 | 
 14 | myList <- list(A = rnorm(100),
 15 |                B = sample(LETTERS, 10))
 16 | 
 17 | 
 18 | length(myList)
 19 | names(myList)
 20 | 
 21 | myList[[2]]
 22 | myList[2]
 23 | 
 24 | myList$A
 25 | 
 26 | addingFunction <- function(x, y = 0){
 27 |   x + y
 28 | }
 29 | 
 30 | addingFunction(1:10, 2)
 31 | 
 32 | str(gap_split)
 33 | 
 34 | 
 35 | # exercise 1-4 ------------------------------------------------------------
 36 | 
 37 | length(gap_split)
 38 | names(gap_split)
 39 | 
 40 | gap_uk <- gap_split$`United Kingdom`
 41 | 
 42 | 
 43 | mean_lifeExp <- function(data, name){
 44 |   data <- data[[name]]
 45 |   mean(data$lifeExp)
 46 | }
 47 | 
 48 | mean_lifeExp(gap_split, "United Kingdom")
 49 | 
 50 | 
 51 | 
 52 | gap_split[[1]] %>%
 53 |   filter(lifeExp == max(lifeExp)) %>%
 54 |   pull(year)
 55 | 
 56 | gap_split[[1]] %>%
 57 |   filter(lifeExp == max(lifeExp)) %>%
 58 |   magrittr::extract2(year)
 59 | 
 60 | 
 61 | maxYear <- function(data){
 62 |   data %>%
 63 |     filter(lifeExp == max(lifeExp)) %>%
 64 |     magrittr::extract2("year")
 65 | }
 66 | 
 67 | maxYear(gap_split[[4]])
 68 | 
 69 | years <- vector("numeric", length = length(gap_split))
 70 | 
 71 | for(i in seq_along(gap_split)){
 72 |   years[i] <- maxYear(gap_split[[i]])
 73 | }
 74 | 
 75 | map(gap_split, maxYear)
 76 | 
 77 | 
 78 | 
 79 | # Iteration in purrr ------------------------------------------------------
 80 | 
 81 | 
 82 | lifeExpectancy <- map(gap_split, "lifeExp")
 83 | map(lifeExpectancy, max)
 84 | 
 85 | map(gap_split, "lifeExp") %>% map(max)
 86 | 
 87 | 
 88 | # exercise 2-3 ------------------------------------------------------------
 89 | 
 90 | 
 91 | population <- map(gap_split, "pop")
 92 | map(population, min)
 93 | 
 94 | map(gap_split, "gdpPercap") %>% map(var)
 95 | 
 96 | 
 97 | ans <-  map(population, 1)
 98 | which.min(ans)
 99 | ans[[109]]
100 | 
101 | 
102 | 
103 | # Additional arguments ----------------------------------------------------
104 | 
105 | 
106 | map(lifeExpectancy, quantile, probs = c(0.05, 0.95))
107 | 
108 | maxLife <- function(data){
109 |   max(data$lifeExp)
110 | }
111 | 
112 | map(gap_split, maxLife)
113 | 
114 | x <- gap_split[[1]]
115 | 
116 | max(x$lifeExp)
117 | 
118 | map(gap_split, ~max(.x$lifeExp))
119 | 
120 | 
121 | # exercise 2-5 ------------------------------------------------------------
122 | 
123 | 
124 | min_pop_year <- function(data){
125 |   data %>%
126 |     filter(pop == min(pop)) %>%
127 |     magrittr::extract2("year")
128 | }
129 | 
130 | ans <- map(gap_split, min_pop_year)
131 | 
132 | ans <- map(gap_split, ~magrittr::extract2(filter(., pop == min(pop)), "year"))
133 | 
134 | map(gap_split, ~{filter(., pop == min(pop)) %>%
135 |                  magrittr::extract2("year")})
136 | 
137 | which.max(ans)
138 | gap_split[[41]]
139 | 
140 | 
141 | 
142 | map_dbl(gap_split, ~max(.x$lifeExp))
143 | 
144 | 
145 | 
146 | #map_lgl(gap_split, ~max(.x$lifeExp))
147 | 
148 | 
149 | # exercise 2-7 ------------------------------------------------------------
150 | 
151 | map_dbl(gap_split, ~mean(.x$lifeExp))
152 | 
153 | 
154 | pluck(gap_split, "United Kingdom")
155 | # gap_split$`United Kingdom`
156 | # gap_split[['United Kingdom']]
157 | 
158 | pluck(gap_split, "United Kingdom", "lifeExp")
159 | 
160 | is.europe <- function(data){
161 |   unique(data$continent) == "Europe"
162 | }
163 | 
164 | europe <- keep(gap_split, is.europe)
165 | 
166 | not_europe <- discard(gap_split, is.europe)
167 | 
168 | uk <- pluck(gap_split, "United Kingdom")
169 | 
170 | gap_split_updated <- prepend(gap_split, values = list(UK = uk))
171 | 
172 | invert <- transpose(gap_split)
173 | names(invert)
174 | 
175 | # exercise 3-4 ------------------------------------------------------------
176 | 
177 | is.maxLife <- function(data){
178 |   data$lifeExp[data$year == max(data$year)] == max(data$lifeExp)
179 | }
180 | 
181 | life_mostRec <- function(data){
182 |   data$lifeExp[data$year == max(data$year)]
183 | }
184 | 
185 | max_lifeExp <- function(data){
186 |   max(data$lifeExp)
187 | }
188 | 
189 | is.maxLife <- function(data){
190 |   life_mostRec(data) == max_lifeExp(data)
191 | }
192 | 
193 | 
194 | gap_maxLife <- discard(gap_split, is.maxLife)
195 | 
196 | 
197 | map(gap_maxLife, max_lifeExp)
198 | map(gap_maxLife, life_mostRec)
199 | 
200 | 
201 | 
202 | # The wider map family ----------------------------------------------------
203 | 
204 | 
205 | means <- rep(0:5, each = 2)
206 | sds <- rep(c(1, 2), times = 6)
207 | 
208 | means <- set_names(means, nm = LETTERS[1:12])
209 | 
210 | normData <- map2_df(means, sds, rnorm, n = 100)
211 | 
212 | normData %>% gather(key = Simulation, value = Value) %>%
213 |   qplot(Value, data = ., geom = "density", group = Simulation)
214 | 
215 | 
216 | map2_df(means, sds, ~rnorm(mean = .x, sd = .y, n = 100))
217 | 
218 | n <- sample(c(5, 10, 100), 12, replace = TRUE)
219 | 
220 | pmap(list(means, sds, n), ~rnorm(n = ..3, mean = ..1, sd = ..2))
221 | 
222 | 
223 | 
224 | # Side effects ------------------------------------------------------------
225 | 
226 | plotLifeExpectancy <- function(data){
227 |   country <- unique(data$country)
228 |   p <- qplot(x = year, y = lifeExp, data = data,
229 |              main = country, geom = "line")
230 |   print(p)
231 | }
232 | 
233 | pdf("lifeExpectancyPlots.pdf")
234 | walk(gap_split, plotLifeExpectancy)
235 | dev.off()
236 | 
237 | 
238 | 
239 | # exercise 4-4 ------------------------------------------------------------
240 | 
241 | max_life_print <- function(lifeExp, name){
242 |   cat("The maximum life expectancy for", name, "was", max(lifeExp), "\n")
243 | }
244 | 
245 | max_life_print(x$lifeExp, "Afghanistan")
246 | 
247 | lifeExp_list <- map(gap_split, "lifeExp")
248 | country_names <- names(gap_split)
249 | 
250 | walk2(lifeExp_list, country_names, max_life_print)
251 | 
252 | 
253 | plotLifeExpectancy <- function(data, country){
254 |   p <- qplot(x = year, y = lifeExp, data = data,
255 |              main = country, geom = "line")
256 |   print(p)
257 | }
258 | 
259 | pdf("lifeExpectancyPlotsWithCountry.pdf")
260 | iwalk(gap_split, plotLifeExpectancy)
261 | dev.off()
262 | 
263 | 
264 | # exercise 4-5 ------------------------------------------------------------
265 | 
266 | map(gap_split, "lifeExp") %>% iwalk(max_life_print)
267 | 
268 | 
269 | 
270 | # Nested Data -------------------------------------------------------------
271 | 
272 | # How to create split list dataframes
273 | iris %>% split(iris$Species)
274 | 
275 | gap_nested %>%
276 |   filter(country == "United Kingdom") %>%
277 |   select(data) %>%
278 |   unnest()
279 | 
280 | 
281 | map(gap_nested$data, "lifeExp")
282 | 
283 | 
284 | gap_nested %>%
285 |   mutate(maxLife = map_dbl(data, ~max(.$lifeExp)))
286 | 
287 | 
288 | # exercise 5-4 ------------------------------------------------------------
289 | 
290 | gap_nested %>%
291 |   mutate(minPop = map_dbl(data, ~min(.$pop)),
292 |          varGPD = map_dbl(data, ~var(.$gdpPercap)))
293 | 
294 | 
295 | gap_nested %>%
296 |   mutate(pop1952 = map_int(data, ~magrittr::extract2(filter(., year == 1952), "pop")))
297 | 
298 | 
299 | gap_model <- gap_nested %>%
300 |              mutate(model = map(data, ~lm(lifeExp ~ year, data = .x)))
301 | 
302 | 
303 | gap_model %>%
304 |   transmute(country, fit = map(model, glance)) %>%
305 |   unnest()
306 |          
307 | 
308 | gap_fit <- gap_model %>%
309 |            mutate(residuals = map2(data, model, add_residuals)) %>%
310 |            unnest(residuals)
311 | 
312 | ggplot(data = gap_fit, aes(x = year, y = resid)) +
313 |   geom_line(alpha = 0.5, aes(group = country)) + facet_wrap(~continent)
314 | 
315 | 
316 | iris %>%
317 |   group_by(Species) %>%
318 |   nest()
319 | 
320 | 
321 | # exercise 5-7 ------------------------------------------------------------
322 | 
323 | gap_simple %>%
324 |   group_by(continent) %>%
325 |   nest() %>%
326 |   mutate(model = map(data, ~lm(lifeExp ~ pop, data = .x)),
327 |          metrics = map(model, glance))
328 | 


--------------------------------------------------------------------------------