├── londonr_purrr_slides.pdf ├── Workshop 5 - Functional Programming with purr.pdf ├── README.md └── scripts ├── londonr_script.R └── internal_script.R /londonr_purrr_slides.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MangoTheCat/Introduction-to-Functional-Programming-with-Purrr/master/londonr_purrr_slides.pdf -------------------------------------------------------------------------------- /Workshop 5 - Functional Programming with purr.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MangoTheCat/Introduction-to-Functional-Programming-with-Purrr/master/Workshop 5 - Functional Programming with purr.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Introduction to Functional Programming with purrr 2 | 3 | To follow along with the workshop please follow these pre-installation instructions 4 | 5 | * Install R from https://cran.r-project.org/ 6 | * RStudio desktop from https://www.rstudio.com 7 | 8 | Install the following R packages from CRAN in the usual way: 9 | 10 | ```r 11 | install.packages(c("tidyverse", "repurrrsive", "broom", "modelr")) 12 | ``` 13 | -------------------------------------------------------------------------------- /scripts/londonr_script.R: -------------------------------------------------------------------------------- 1 | 2 | # Functional Programming with Purrr --------------------------------------- 3 | 4 | # Notes and a neater version of this script 5 | # https://github.com/MangoTheCat/Introduction-to-Functional-Programming-with-Purrr 6 | # or short link 7 | # https://bit.ly/mcpurrr 8 | 9 | # Need the packages? 10 | install.packages(c("tidyverse", "repurrrsive")) 11 | 12 | # Lists 13 | 14 | myList <- list(A = rnorm(100), B = sample(LETTERS, 10)) 15 | myList 16 | 17 | length(myList) 18 | names(myList) 19 | 20 | myList[["B"]] 21 | myList$B 22 | 23 | myMass <- list(doug = 80, gary = 100) 24 | 25 | rest_energy <- function(m, c = 299792458) { 26 | m * c^2 27 | } 28 | 29 | lapply(myMass, rest_energy) 30 | 31 | # Exercise 1-4 32 | 33 | library(repurrrsive) 34 | 35 | # Using the gap_split data in the repurrrsive package: 36 | # a. How many elements are in the list? 37 | 38 | View(gap_split) 39 | length(gap_split) 40 | 41 | # b. Do the elements have names? 42 | names(gap_split) 43 | 44 | # c. Extract the data from the United Kingdom. What type of data is it? 45 | 46 | class(gap_split[["United Kingdom"]]) 47 | print.default(gap_split[["United Kingdom"]]) 48 | 49 | # 2. Write a function that, when given the data and a country name will 50 | # calculate the mean life expectancy for that country 51 | 52 | library(tidyverse) 53 | 54 | meanLife <- function(data, country) { 55 | mean(data[[country]]$lifeExp) 56 | } 57 | 58 | View(myList) 59 | View(gap_split) 60 | 61 | data(package = "repurrrsive") 62 | 63 | library(purrr) 64 | 65 | maxYear <- function(data) { 66 | 67 | data %>% 68 | filter(lifeExp == max(lifeExp)) %>% 69 | pull("year") 70 | 71 | } 72 | 73 | maxYear(gap_split$Afghanistan) 74 | 75 | # purrr map 76 | 77 | map(gap_split, maxYear) # equivalent to an lapply 78 | 79 | 80 | # Chapter 2 81 | 82 | # Extracting elements 83 | lifeExpectancy <- map(gap_split, "lifeExp") 84 | 85 | names(lifeExpectancy) 86 | length(142) 87 | 88 | 89 | map(lifeExpectancy, max) 90 | 91 | 92 | # Exercise working with map Page 2-3 93 | # 94 | # 1. Using the split gapminder data: 95 | # a. Find the minimum value of the population for each country 96 | 97 | population <- map(gap_split, "pop") 98 | map(population, min) 99 | 100 | # b. Calculate the variance of the GDP per capita 101 | 102 | gdpVar <- function(data) { 103 | gdp <- data[["gdpPercap"]] 104 | var(gdp) 105 | } 106 | map(gap_split, gdpVar) 107 | 108 | # Alternative 109 | map(gap_split, "gdpPercap") %>% map(var) 110 | 111 | 112 | # Extension Questions 113 | # 2. For each country, extract the value of the population in 1952. 114 | 115 | # 3. Which country had the lowest population in 1952? (hint: take a look at 116 | # which.min) 117 | 118 | maxLife <- function(data) { 119 | max(data$lifeExp) 120 | } 121 | 122 | map(gap_split, maxLife) 123 | 124 | map(gap_split, ~max(.$lifeExp)) 125 | 126 | 127 | lapply(gap_split, maxLife) 128 | maxLifeVec <- sapply(gap_split, maxLife) 129 | 130 | class(maxLifeVec) 131 | maxLifeVec[3] 132 | 133 | maxLifeVec <- vapply(gap_split, maxLife, numeric(1)) 134 | 135 | # Type-safe map functions 136 | maxLifeDbl <- map_dbl(gap_split, maxLife) 137 | maxLifeDbl 138 | 139 | # Exercise 140 | # Page 2-6 141 | # 1. Find the average life expectancy for each country, storing 142 | # the output in a numeric vector 143 | 144 | avLife <- map_dbl(gap_split, ~ mean(.$lifeExp)) 145 | 146 | # 2. Can you store the output in an integer vector? 147 | 148 | avLife <- map_int(gap_split, ~ mean(.$lifeExp)) 149 | 150 | 151 | # Chapter 3 --------------------------------------------------------------- 152 | 153 | # Extracting elements 154 | pluck(gap_split, "United Kingdom", "lifeExp") 155 | 156 | # Filtering 157 | is.europe <- function(data) { 158 | unique(data$continent) == "Europe" 159 | } 160 | 161 | europe <- keep(gap_split, is.europe) 162 | 163 | notEurope <- discard(gap_split, is.europe) 164 | 165 | # Joining 166 | 167 | uk <- pluck(gap_split, "United Kingdom") 168 | 169 | updatedGap <- prepend(gap_split, values = list(UK = uk)) 170 | View(updatedGap) 171 | 172 | # Merging lists 173 | # purrr way 174 | ?list_merge 175 | # Base R way 176 | ?modifyList() 177 | 178 | # Transposing / Inverting 179 | 180 | myList <- list(firstname = list("Doug", "Gary"), 181 | lastname = list("Ashton", "Linekar")) 182 | 183 | myTransposedList <- transpose(myList) 184 | View(myTransposedList) 185 | 186 | gap_inverted <- transpose(gap_split) 187 | names(gap_inverted) 188 | 189 | gap_inverted[["year"]] 190 | 191 | # Exercise page 3-4 192 | # 193 | # 1. Write a function to test if the life expentancy for the most recent year is 194 | # the maximum life expectancy. The function should return TRUE (when 195 | # life expectancy in 2007 is the maximum) or FALSE. 196 | # 197 | data <- pluck(gap_split, "Botswana") 198 | 199 | life07 <- data$lifeExp[nrow(data)] 200 | maxlife <- max(data$lifeExp[nrow(data)]) 201 | 202 | life07 == maxlife 203 | 204 | maxLife07 <- function(data) { 205 | life07 <- data$lifeExp[nrow(data)] 206 | maxlife <- max(data$lifeExp) 207 | 208 | life07 == maxlife 209 | } 210 | 211 | 212 | # 2. Test your function on the data for Botswana and the data for Denmark. 213 | 214 | maxLife07(gap_split[["Botswana"]]) 215 | maxLife07(gap_split[["Denmark"]]) 216 | 217 | 218 | # 3. Filter the split gapminder data to return only elements where the life 219 | # expectancy in 2007 is not it's highest life expectancy. 220 | 221 | peakedLife <- discard(gap_split, maxLife07) 222 | 223 | # Extension Questions 224 | # 4. Use appropriate map functions to return the maximum life expectancy 225 | # for each of these countries and their life expectancy in 2007 226 | 227 | 228 | # Chapter 4 --------------------------------------------------------------- 229 | 230 | 231 | ?map2 232 | 233 | means <- rep(0:5, each = 2) 234 | means <- set_names(means, nm = LETTERS[1:12]) 235 | 236 | sds <- rep(c(1,2), times = 6) 237 | 238 | normData <- map2_df(means, sds, rnorm, n = 100) 239 | View(normData) 240 | 241 | gather(normData, Simulation, Value) %>% 242 | qplot(Value, data = ., geom = "density", group = Simulation) 243 | 244 | ?pmap 245 | 246 | 247 | 248 | # Chapter 5 --------------------------------------------------------------- 249 | 250 | # Nested Data 251 | 252 | gap_nested 253 | 254 | gap_nested %>% 255 | filter(country == "United Kingdom") %>% 256 | select(data) %>% 257 | unnest() 258 | 259 | 260 | gap_simple %>% 261 | group_by(country, continent) %>% 262 | nest() 263 | 264 | 265 | # Mutate and map together 266 | 267 | map(gap_nested$data, ~max(.$lifeExp)) 268 | 269 | gap_nested %>% 270 | mutate(MaxLife = map_dbl(data, ~max(.$lifeExp))) 271 | 272 | 273 | # Exercise 274 | # 1. Using the nested gapminder data: 275 | # a. Find the minimum value of the population for each 276 | # country 277 | 278 | 279 | # b. Calculate the variance of the GDP per capita 280 | 281 | 282 | gap_nested %>% 283 | mutate(MinPop = map_dbl(data, ~min(.$pop))) %>% 284 | mutate(GdpVar = map_dbl(data, ~var(.$gdpPercap))) %>% 285 | View() 286 | 287 | 288 | ukModel <- lm(lifeExp ~ year, data = gap_split[["United Kingdom"]]) 289 | 290 | 291 | gap_model <- gap_nested %>% 292 | mutate(model = map(data, ~lm(lifeExp ~ year, data = .))) 293 | 294 | View(gap_model) 295 | 296 | 297 | library(broom) 298 | glance(ukModel) %>% View() 299 | 300 | 301 | gap_model %>% 302 | mutate(model_results = map(model, glance)) %>% 303 | select(country, model_results) %>% 304 | unnest() %>% View() 305 | -------------------------------------------------------------------------------- /scripts/internal_script.R: -------------------------------------------------------------------------------- 1 | 2 | # Introduction to Functional Programming with purrr ------------------------- 3 | 4 | 5 | library(purrr) 6 | library(dplyr) 7 | library(repurrrsive) 8 | library(ggplot2) 9 | library(modelr) 10 | library(broom) 11 | library(tidyr) 12 | 13 | 14 | myList <- list(A = rnorm(100), 15 | B = sample(LETTERS, 10)) 16 | 17 | 18 | length(myList) 19 | names(myList) 20 | 21 | myList[[2]] 22 | myList[2] 23 | 24 | myList$A 25 | 26 | addingFunction <- function(x, y = 0){ 27 | x + y 28 | } 29 | 30 | addingFunction(1:10, 2) 31 | 32 | str(gap_split) 33 | 34 | 35 | # exercise 1-4 ------------------------------------------------------------ 36 | 37 | length(gap_split) 38 | names(gap_split) 39 | 40 | gap_uk <- gap_split$`United Kingdom` 41 | 42 | 43 | mean_lifeExp <- function(data, name){ 44 | data <- data[[name]] 45 | mean(data$lifeExp) 46 | } 47 | 48 | mean_lifeExp(gap_split, "United Kingdom") 49 | 50 | 51 | 52 | gap_split[[1]] %>% 53 | filter(lifeExp == max(lifeExp)) %>% 54 | pull(year) 55 | 56 | gap_split[[1]] %>% 57 | filter(lifeExp == max(lifeExp)) %>% 58 | magrittr::extract2(year) 59 | 60 | 61 | maxYear <- function(data){ 62 | data %>% 63 | filter(lifeExp == max(lifeExp)) %>% 64 | magrittr::extract2("year") 65 | } 66 | 67 | maxYear(gap_split[[4]]) 68 | 69 | years <- vector("numeric", length = length(gap_split)) 70 | 71 | for(i in seq_along(gap_split)){ 72 | years[i] <- maxYear(gap_split[[i]]) 73 | } 74 | 75 | map(gap_split, maxYear) 76 | 77 | 78 | 79 | # Iteration in purrr ------------------------------------------------------ 80 | 81 | 82 | lifeExpectancy <- map(gap_split, "lifeExp") 83 | map(lifeExpectancy, max) 84 | 85 | map(gap_split, "lifeExp") %>% map(max) 86 | 87 | 88 | # exercise 2-3 ------------------------------------------------------------ 89 | 90 | 91 | population <- map(gap_split, "pop") 92 | map(population, min) 93 | 94 | map(gap_split, "gdpPercap") %>% map(var) 95 | 96 | 97 | ans <- map(population, 1) 98 | which.min(ans) 99 | ans[[109]] 100 | 101 | 102 | 103 | # Additional arguments ---------------------------------------------------- 104 | 105 | 106 | map(lifeExpectancy, quantile, probs = c(0.05, 0.95)) 107 | 108 | maxLife <- function(data){ 109 | max(data$lifeExp) 110 | } 111 | 112 | map(gap_split, maxLife) 113 | 114 | x <- gap_split[[1]] 115 | 116 | max(x$lifeExp) 117 | 118 | map(gap_split, ~max(.x$lifeExp)) 119 | 120 | 121 | # exercise 2-5 ------------------------------------------------------------ 122 | 123 | 124 | min_pop_year <- function(data){ 125 | data %>% 126 | filter(pop == min(pop)) %>% 127 | magrittr::extract2("year") 128 | } 129 | 130 | ans <- map(gap_split, min_pop_year) 131 | 132 | ans <- map(gap_split, ~magrittr::extract2(filter(., pop == min(pop)), "year")) 133 | 134 | map(gap_split, ~{filter(., pop == min(pop)) %>% 135 | magrittr::extract2("year")}) 136 | 137 | which.max(ans) 138 | gap_split[[41]] 139 | 140 | 141 | 142 | map_dbl(gap_split, ~max(.x$lifeExp)) 143 | 144 | 145 | 146 | #map_lgl(gap_split, ~max(.x$lifeExp)) 147 | 148 | 149 | # exercise 2-7 ------------------------------------------------------------ 150 | 151 | map_dbl(gap_split, ~mean(.x$lifeExp)) 152 | 153 | 154 | pluck(gap_split, "United Kingdom") 155 | # gap_split$`United Kingdom` 156 | # gap_split[['United Kingdom']] 157 | 158 | pluck(gap_split, "United Kingdom", "lifeExp") 159 | 160 | is.europe <- function(data){ 161 | unique(data$continent) == "Europe" 162 | } 163 | 164 | europe <- keep(gap_split, is.europe) 165 | 166 | not_europe <- discard(gap_split, is.europe) 167 | 168 | uk <- pluck(gap_split, "United Kingdom") 169 | 170 | gap_split_updated <- prepend(gap_split, values = list(UK = uk)) 171 | 172 | invert <- transpose(gap_split) 173 | names(invert) 174 | 175 | # exercise 3-4 ------------------------------------------------------------ 176 | 177 | is.maxLife <- function(data){ 178 | data$lifeExp[data$year == max(data$year)] == max(data$lifeExp) 179 | } 180 | 181 | life_mostRec <- function(data){ 182 | data$lifeExp[data$year == max(data$year)] 183 | } 184 | 185 | max_lifeExp <- function(data){ 186 | max(data$lifeExp) 187 | } 188 | 189 | is.maxLife <- function(data){ 190 | life_mostRec(data) == max_lifeExp(data) 191 | } 192 | 193 | 194 | gap_maxLife <- discard(gap_split, is.maxLife) 195 | 196 | 197 | map(gap_maxLife, max_lifeExp) 198 | map(gap_maxLife, life_mostRec) 199 | 200 | 201 | 202 | # The wider map family ---------------------------------------------------- 203 | 204 | 205 | means <- rep(0:5, each = 2) 206 | sds <- rep(c(1, 2), times = 6) 207 | 208 | means <- set_names(means, nm = LETTERS[1:12]) 209 | 210 | normData <- map2_df(means, sds, rnorm, n = 100) 211 | 212 | normData %>% gather(key = Simulation, value = Value) %>% 213 | qplot(Value, data = ., geom = "density", group = Simulation) 214 | 215 | 216 | map2_df(means, sds, ~rnorm(mean = .x, sd = .y, n = 100)) 217 | 218 | n <- sample(c(5, 10, 100), 12, replace = TRUE) 219 | 220 | pmap(list(means, sds, n), ~rnorm(n = ..3, mean = ..1, sd = ..2)) 221 | 222 | 223 | 224 | # Side effects ------------------------------------------------------------ 225 | 226 | plotLifeExpectancy <- function(data){ 227 | country <- unique(data$country) 228 | p <- qplot(x = year, y = lifeExp, data = data, 229 | main = country, geom = "line") 230 | print(p) 231 | } 232 | 233 | pdf("lifeExpectancyPlots.pdf") 234 | walk(gap_split, plotLifeExpectancy) 235 | dev.off() 236 | 237 | 238 | 239 | # exercise 4-4 ------------------------------------------------------------ 240 | 241 | max_life_print <- function(lifeExp, name){ 242 | cat("The maximum life expectancy for", name, "was", max(lifeExp), "\n") 243 | } 244 | 245 | max_life_print(x$lifeExp, "Afghanistan") 246 | 247 | lifeExp_list <- map(gap_split, "lifeExp") 248 | country_names <- names(gap_split) 249 | 250 | walk2(lifeExp_list, country_names, max_life_print) 251 | 252 | 253 | plotLifeExpectancy <- function(data, country){ 254 | p <- qplot(x = year, y = lifeExp, data = data, 255 | main = country, geom = "line") 256 | print(p) 257 | } 258 | 259 | pdf("lifeExpectancyPlotsWithCountry.pdf") 260 | iwalk(gap_split, plotLifeExpectancy) 261 | dev.off() 262 | 263 | 264 | # exercise 4-5 ------------------------------------------------------------ 265 | 266 | map(gap_split, "lifeExp") %>% iwalk(max_life_print) 267 | 268 | 269 | 270 | # Nested Data ------------------------------------------------------------- 271 | 272 | # How to create split list dataframes 273 | iris %>% split(iris$Species) 274 | 275 | gap_nested %>% 276 | filter(country == "United Kingdom") %>% 277 | select(data) %>% 278 | unnest() 279 | 280 | 281 | map(gap_nested$data, "lifeExp") 282 | 283 | 284 | gap_nested %>% 285 | mutate(maxLife = map_dbl(data, ~max(.$lifeExp))) 286 | 287 | 288 | # exercise 5-4 ------------------------------------------------------------ 289 | 290 | gap_nested %>% 291 | mutate(minPop = map_dbl(data, ~min(.$pop)), 292 | varGPD = map_dbl(data, ~var(.$gdpPercap))) 293 | 294 | 295 | gap_nested %>% 296 | mutate(pop1952 = map_int(data, ~magrittr::extract2(filter(., year == 1952), "pop"))) 297 | 298 | 299 | gap_model <- gap_nested %>% 300 | mutate(model = map(data, ~lm(lifeExp ~ year, data = .x))) 301 | 302 | 303 | gap_model %>% 304 | transmute(country, fit = map(model, glance)) %>% 305 | unnest() 306 | 307 | 308 | gap_fit <- gap_model %>% 309 | mutate(residuals = map2(data, model, add_residuals)) %>% 310 | unnest(residuals) 311 | 312 | ggplot(data = gap_fit, aes(x = year, y = resid)) + 313 | geom_line(alpha = 0.5, aes(group = country)) + facet_wrap(~continent) 314 | 315 | 316 | iris %>% 317 | group_by(Species) %>% 318 | nest() 319 | 320 | 321 | # exercise 5-7 ------------------------------------------------------------ 322 | 323 | gap_simple %>% 324 | group_by(continent) %>% 325 | nest() %>% 326 | mutate(model = map(data, ~lm(lifeExp ~ pop, data = .x)), 327 | metrics = map(model, glance)) 328 | --------------------------------------------------------------------------------