├── NHL_goals ├── NHL_goals_dataviz.Rproj ├── tidytuesday_hockey.gif └── NHL_cumulative_goals.R ├── measles_vaccination ├── measles_vaccination_figure.png ├── measles_vaccination_dataviz.Rproj └── measles_vaccination.R ├── README.md ├── LICENSE └── tdf_tidytuesday.R /NHL_goals/NHL_goals_dataviz.Rproj: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /NHL_goals/tidytuesday_hockey.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carriebennette/TidyTuesdayProjects/HEAD/NHL_goals/tidytuesday_hockey.gif -------------------------------------------------------------------------------- /measles_vaccination/measles_vaccination_figure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carriebennette/TidyTuesdayProjects/HEAD/measles_vaccination/measles_vaccination_figure.png -------------------------------------------------------------------------------- /measles_vaccination/measles_vaccination_dataviz.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TidyTuesdayProjects 2 | Code and figures for my #TidyTuesday projects 3 | 4 | Week 10: 5 | ![Hockey_goals](https://github.com/carriebennette/TidyTuesdayProjects/blob/master/NHL_goals/tidytuesday_hockey.gif) 6 | Week 9: 7 | ![MMR_rates](https://github.com/carriebennette/TidyTuesdayProjects/blob/master/measles_vaccination/measles_vaccination_figure.png) 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Carrie Bennette 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /measles_vaccination/measles_vaccination.R: -------------------------------------------------------------------------------- 1 | 2 | library(binom) 3 | library(ggplot2) 4 | library(tidyr) 5 | library(showtext) 6 | 7 | # Get the raw data (https://github.com/rfordatascience/tidytuesday/blob/master/data/2020/2020-02-25/readme.md) 8 | measles_raw <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-02-25/measles.csv') 9 | 10 | measles <- measles_raw %>% 11 | filter(enroll > 0 ) %>% # a suprising number of schools have zero enrollment 12 | # get rid of the -1s (these are missing) 13 | mutate(mmr = case_when(mmr == -1 ~ NA_real_, 14 | TRUE ~ mmr)) %>% 15 | # Apparently RI, PA, and Arkansas don't have county data; use city instead 16 | # potentially come back and refine using lat/lon 17 | mutate(geo_group = case_when(state %in% c("Rhode Island", 18 | "Arkansas", 19 | "Pennsylvania") ~ city, 20 | TRUE ~ county)) %>% 21 | filter(!is.na(geo_group)) %>% # note: FL is dropped (not county or city data..) 22 | # we could (probably?) assume that MMR rates ~ overall rates (e.g., for states like Iowa that only report overall rates) 23 | mutate(mmr_adj = case_when(is.na(mmr) & overall != -1 ~ overall, 24 | TRUE ~ mmr)) %>% 25 | # there are duplicates! 26 | distinct(name, city, state, county, geo_group, enroll, mmr, mmr_adj, overall, type, xrel, xmed, xper) %>% 27 | 28 | # school with largest enrollment is 2x bigger than second largest (6k vs 3k) 29 | # school is "West Valley School Prek-6" in Montana...googling suggests enroll is actually closer to n=400 30 | # https://www.publicschoolreview.com/west-valley-school-profile 31 | # drop school for now; if this was more than just a fun data viz exercise I'd definitely dig deeper into data collection 32 | filter(name != "West Valley School Prek-6") %>% 33 | group_by(state) %>% 34 | # get state averages (for color scale) 35 | mutate(state_avg = weighted.mean(mmr, w = enroll, na.rm = T)) %>% 36 | ungroup() %>% 37 | group_by(geo_group, state) %>% 38 | summarise(mmr_rate = weighted.mean(mmr, w = enroll, na.rm = T), 39 | enroll = sum(enroll, na.rm = T), 40 | state_avg = mean(state_avg, na.rm = T)) %>% 41 | mutate(enroll_group = case_when(enroll < 500 ~ 0.1, 42 | enroll >= 500 & enroll < 1000 ~ 0.15, 43 | enroll >= 1000 & enroll < 2500 ~ 0.2, 44 | enroll >= 2500 & enroll < 5000 ~ 0.25, 45 | enroll >= 5000 & enroll < 10000 ~ 0.3, 46 | enroll >= 10000 & enroll < 20000 ~ 0.4, 47 | enroll >= 20000 & enroll < 50000 ~ 0.5, 48 | enroll >= 50000 & enroll < 60000 ~ 0.6, 49 | enroll >= 60000 & enroll < 70000 ~ 0.7, 50 | enroll >= 70000 & enroll < 80000 ~ 0.8, 51 | enroll >= 80000 & enroll < 90000 ~ 0.9, 52 | enroll >= 100000 ~ 1, 53 | TRUE ~ 1)) %>% 54 | filter(enroll < 100000 & enroll > 250) %>% 55 | ungroup() 56 | 57 | 58 | # so I learned today that you can download fancy fonts and use them within R 59 | # good timing as I no longer have Illustrator until I get a new laptop... 60 | 61 | # load the graphics device engine driver that will show the fancy plots 62 | dev.off() 63 | quartz() 64 | 65 | fig <- ggplot(measles, aes(enroll, mmr_rate, color = state_avg, alpha = enroll_group)) + 66 | geom_point(size = 3.5) + 67 | theme_minimal() + 68 | theme(legend.position = "bottom", 69 | axis.title.y = element_text(angle = 0, vjust = 1), 70 | axis.text = element_text(size = 11, family = "roboto"), 71 | axis.title = element_text(size = 13, family = "roboto"), 72 | plot.title = element_text(size = 17, face = "bold", family = "roboto"), 73 | plot.caption = element_text(color = "grey70")) + 74 | guides(alpha = FALSE) + 75 | scale_y_continuous("\n", 76 | labels = function(x) paste0(sprintf("%.0f", x),"%")) + 77 | scale_x_continuous("\nStudent Enrollment\n", 78 | labels = scales::comma) + 79 | scale_color_gradient(low = "red", 80 | high = "blue", 81 | limits=c(88, 96), 82 | oob = scales::squish, 83 | guide = guide_colorbar(direction = "horizontal", 84 | title.position = "top", 85 | title = "State average immunization rate", 86 | barwidth = 15, 87 | barheight = 0.25, 88 | ticks = FALSE, 89 | title.hjust = 0.5)) + 90 | # annotations! call out Little Rock, AR 91 | annotate(geom = "curve", x = 28848, y = 68, xend = 20548, yend = 77, 92 | curvature = -.23, color = "grey60", arrow = arrow(length = unit(1, "mm"))) + 93 | annotate(geom = "text", x = 29400, y = 68, size = 3, 94 | label = "Little Rock, AR had a notably low rate \ngiven it's relatively large enrollment", 95 | family = "lato", 96 | hjust = "left", 97 | lineheight = 0.9, 98 | color = "grey50") + 99 | # call out Brinkley, AR 100 | annotate(geom = "curve", x = 12000, y = 50, xend = 505, yend = 39.75, 101 | curvature = .23, color = "grey60", arrow = arrow(length = unit(1, "mm"))) + 102 | annotate(geom = "text", x = 12500, y = 50, size = 3, 103 | label = "When based on a smaller number of students, seemingly large differences are often \ndue to chance. Estimates from less populous counties are shown with lower transparency \nto de-emphasize such variability. Despite having fewer than 500 students, \nBrinkley, AR still manages to stand out for it's exceptionally low rate.", 104 | family = "lato", 105 | hjust = "left", 106 | lineheight = 0.9, 107 | color = "grey50") + 108 | # call out dashed line 109 | geom_hline(yintercept = 95, linetype = "dashed", size = 0.25, color = "grey40") + 110 | annotate(geom = "curve", x = 90000, y = 88, xend = 90000, yend = 94, 111 | curvature = .23, color = "grey60", arrow = arrow(length = unit(1, "mm"))) + 112 | annotate(geom = "text", x = 89500, y = 88, size = 3, 113 | # source: https://www.who.int/immunization/sage/meetings/2017/october/2._target_immunity_levels_FUNK.pdf 114 | # source: https://illinoisreview.typepad.com/.a/6a00d834515c5469e201b8d0ddb9ce970c-pi 115 | label = "According to the CDC, 95 percent of a population needs to be \nvaccinated to stop the spread of measles and preserve herd \nimmunity. Many counties appear to fall below this threshold.", 116 | hjust = "right", 117 | lineheight = 0.9, 118 | family = "lato", 119 | color = "grey50") + 120 | # title 121 | ggtitle("Measles, mumps & rubella immunization rates at schools across 1244 counties/cities in the US") + 122 | labs(subtitle="According to data collected by The Wall Street Journal", 123 | caption = "Visualization by @carriebennete \nTwo counties with more than 100k students (Cook County, IL & \nLos Angeles County, CA) are not shown; both had rates above 95%.\nData are aggregated by county (or city when county data is missing)") + 124 | theme(panel.grid.major = element_line(size = 0.25), 125 | panel.grid.minor = element_blank(), 126 | panel.border = element_blank(), 127 | panel.background = element_blank()) 128 | fig 129 | 130 | quartz.save("test", type = "png", device = dev.cur(), dpi = 800) 131 | 132 | ggsave("measles_vaccination","MMR_rates.png", dpi = 300) 133 | -------------------------------------------------------------------------------- /NHL_goals/NHL_cumulative_goals.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(gganimate) 3 | library(ggplot2) 4 | library(lubridate) 5 | library(zoo) 6 | 7 | # get the data (https://github.com/rfordatascience/tidytuesday/blob/master/data/2020/2020-03-03/readme.md) 8 | game_goals <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-03-03/game_goals.csv') 9 | top_250 <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-03-03/top_250.csv') 10 | season_goals <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-03-03/season_goals.csv') 11 | 12 | #---------- CLEAN RAW DATA ------ # 13 | goals_by_date <- game_goals %>% 14 | select(player, date, age, team, goals, rank) %>% 15 | # fix age (prob better way to do this) 16 | mutate(year = as.numeric(substr(age, 1, 2)), 17 | days = as.numeric(substr(age, 4, 6))/365.25, 18 | age = year + days) %>% 19 | # want the cumulative number of goals by time/age 20 | group_by(player) %>% 21 | mutate(cum_goals = order_by(date, cumsum(goals))) %>% 22 | # now join with players data (for active/retired status) <-- didn't use in the end 23 | ungroup() %>% 24 | left_join(top_250 %>% select(player, active, yr_start), by = "player") 25 | 26 | # above creates graph that is not terrible...but there are some annoying seasonal trends (jumps between seasons) that I want to remove 27 | # so I will remove all data points that don't show a 1+ increase in cumulative scoring 28 | # this solution took me a while to figure out (and is likely suboptimally implemented)! 29 | 30 | #---------- PREP FOR VISUALIZATION ------ # 31 | goals_cumulative <- goals_by_date %>% 32 | # going to group across quarter (month wasn't enough given offseason length) 33 | mutate(yearqtr = as.yearqtr(date)) %>% 34 | group_by(player, yearqtr) %>% 35 | mutate(max_cum_goals = max(cum_goals)) %>% 36 | group_by(player) %>% 37 | arrange(desc(date)) %>% 38 | # how much did cumulative goals increase since prior time point? 39 | mutate(step = max_cum_goals - lead(max_cum_goals)) %>% 40 | # only keep if there was an increase (to avoid having lots of "steps" in graph) 41 | # OR observation was last (want to keep long tails) 42 | filter(step >= 1 | row_number()==n()) %>% 43 | # need to add in a row so that everyone starts with zero (graph looks funky otherwise) 44 | group_by(player) %>% 45 | do(add_row(., .before = 0)) %>% 46 | # now fill in the missing data for the row just created 47 | ungroup() %>% 48 | # need player (but must ungroup first) 49 | mutate(player = case_when(is.na(player) ~ lead(player, 1), TRUE ~ player)) %>% 50 | group_by(player) %>% 51 | mutate(age = case_when(is.na(age) ~ min(age, na.rm = T) - 0.25, TRUE ~ age), 52 | rank = case_when(is.na(rank) ~ min(rank, na.rm = T), TRUE ~ rank), 53 | max_cum_goals = case_when(is.na(max_cum_goals) ~ 0, TRUE ~ max_cum_goals), 54 | is_700 = case_when(any(max_cum_goals >= 700) ~ TRUE, TRUE ~ FALSE)) %>% 55 | ungroup() %>% 56 | # added in another "blank" player so that we can pause animation 57 | # hacky solution to make gganimate work the way I want... 58 | do(add_row(., .before = 0)) %>% 59 | mutate(player = case_when(is.na(player) ~ "blank", TRUE ~ player), 60 | age = case_when(player == "blank" ~ 20, TRUE ~ age), 61 | max_cum_goals = case_when(player == "blank" ~ 0, TRUE ~ max_cum_goals)) %>% 62 | # need to create a 2nd player variable so we can loop through SOME players in animation 63 | mutate(player_highlight = case_when(player %in% c("Alex Ovechkin", 64 | "Wayne Gretzky", 65 | "Teemu Selanne", 66 | "Mark Messier", 67 | "Brendan Shanahan", 68 | "blank") ~ player, 69 | TRUE ~ NA_character_), 70 | # make this a factor with set levels so we can control order they will be shown 71 | player_fct = factor(player_highlight, levels = c("Alex Ovechkin", 72 | "Wayne Gretzky", 73 | "Teemu Selanne", 74 | "blank", 75 | "Mark Messier", 76 | "Brendan Shanahan"))) %>% 77 | # and lastly add text (from WaPo) that will accompany animation 78 | mutate(text = case_when(player_highlight == "Alex Ovechkin" ~ "Ovechkin is the second-youngest player to score 700 goals.", 79 | player_highlight == "Wayne Gretzky" ~ "Gretzky reached this milestone at age 29. While Gretzky's scoring declined \nin the last decade of his career, he had 57 goals in his final 3 seasons.", 80 | player_highlight == "Teemu Selanne" ~ "To break Gretzky's record, Ovechkin could look to Teemu Selanne for \ninspiration: Selanne score 232 goals in his final 9 seasons.", 81 | player_highlight == "blank" ~ "But since 1980, only three players 35 or older have scored 40+ \ngoals in a season...", 82 | player_highlight == "Mark Messier" ~ "Mark Messier did first with 47 goals at age 35 in 1995-96.", 83 | player_highlight == "Brendan Shanahan" ~ "And Brendan Shanahan did with 40 goals at age 37 in 2005-06. \nDaniel Alfredsson (not shown) also scored 40 goals at age 35 in 2007-08", 84 | TRUE ~ "")) 85 | 86 | 87 | #---------- CREATE THE ANIMATED VIZ ------ # 88 | p = ggplot(goals_cumulative, aes(age, max_cum_goals)) + 89 | geom_hline(yintercept = 700, linetype = "dashed", size = 0.2, color = "grey40") + 90 | # lots of fiddling 91 | theme(legend.position = "none", 92 | axis.text = element_text(size = 10), 93 | axis.title = element_text(size = 10), 94 | axis.ticks = element_line(color = "grey60", size = 0.2), 95 | plot.title = element_text(size = 18, 96 | family="Times", 97 | face = "bold", 98 | hjust = 0.5), 99 | plot.caption = element_text(color = "grey70"), 100 | panel.grid.major.x = element_blank(), 101 | panel.grid.major.y = element_line(color = "grey90", size = 0.2), 102 | panel.grid.minor = element_blank(), 103 | panel.border = element_blank(), 104 | panel.background = element_blank()) + 105 | geom_text(aes(x = 20, label = text), y = 1000, hjust = "left", lineheight = 0.9, color = "grey30", check_overlap = TRUE) + 106 | ggtitle("Can he keep up the pace?") + 107 | labs(caption = "Visualization by @carriebennete") + 108 | geom_line(aes(y = max_cum_goals, group = player_fct, color = factor(player_fct)), size = 1.25) + 109 | scale_color_manual(values=c("#b00000", "#0d1094", "#de6d16", "#ffffff" , "#4f288f", "#076b01")) + 110 | # expand() makes graph "fill up" available space (ie no weird gaps) 111 | scale_y_continuous("\n", 112 | breaks = c(0, 300, 600, 900), 113 | limits=c(0, 1100), 114 | expand = c(0, 0)) + 115 | scale_x_continuous("Age", 116 | breaks = c(20, 25, 30, 35, 40, 45), 117 | expand = c(0, 0)) + 118 | # make transition pause on each player 119 | transition_states(player_fct, 1, 10, wrap = FALSE) + 120 | # include gray lines for players just shown 121 | shadow_mark(past = TRUE, exclude_layer = 2, color = "gray70", size = 0.5, alpha = 0.5) + 122 | exit_recolor(color = "gray80") + 123 | enter_recolor(color = "gray90") 124 | 125 | # control outpt ( so that it's slow enough to read text) 126 | animate(p, nframes = 300, fps = 10, width = 1000, height = 700, res = 150) 127 | 128 | 129 | 130 | -------------------------------------------------------------------------------- /tdf_tidytuesday.R: -------------------------------------------------------------------------------- 1 | 2 | library(tidyverse) 3 | library(tdf) 4 | library(ggmap) 5 | library(ggalt) 6 | library(maps) 7 | library(geosphere) 8 | library(lubridate) 9 | library(plyr) 10 | 11 | # Get the Data 12 | ### hmm, the location data that I wanted (per looking at data dictionary online and then sketching ideas) is not on github... 13 | ### internet to the rescue: https://www.kaggle.com/jaminliu/tour-de-france-historical-tour-2018/version/4#unique_stage_geocode.csv 14 | stages <- read_csv("stages_TDF.csv", encoding = "UTF-8") 15 | 16 | ######################## 17 | ### GEOLOCATE CITIES ### 18 | ######################## 19 | 20 | # get unique list of locations 21 | unique_locations <- stages %>% 22 | select(Origin, Destination) %>% 23 | pivot_longer(c(Origin, Destination), names_to = "type", values_to = "city") %>% 24 | distinct(city) %>% 25 | mutate(lat = NA, lon = NA, address = NA) %>% 26 | # fix a couple cities that google can't find; if this was bigger I'd set up a prospective way to catch these 27 | mutate(city = case_when(city == "Pla d'Adet" ~ "Saint-Lary", 28 | city == "Merlin-Plage" ~ "Saint-Hilaire-de-Riez", 29 | city == "Montjuïc circuit" ~ "Barcelona", 30 | TRUE ~ city)) 31 | 32 | # use google API to get lat/lon for each city 33 | for(i in 1:nrow(unique_locations)) { 34 | 35 | # ping google for latitutde and longitude 36 | result <- geocode(unique_locations$city[i], output = "latlona") 37 | 38 | # store results 39 | unique_locations$lon[i] <- as.numeric(result[1]) 40 | unique_locations$lat[i] <- as.numeric(result[2]) 41 | unique_locations$address[i] <- as.character(result[3]) 42 | 43 | } 44 | 45 | # Few mistakes need to be fixed manually 46 | unique_locations$lat[unique_locations$city == "Joinville"] <- 48.444 47 | unique_locations$lon[unique_locations$city == "Joinville"] <- 5.139 48 | unique_locations$address[unique_locations$city == "Joinville"] <- "Joinville, France" 49 | 50 | unique_locations$lat[unique_locations$city == "Redon"] <- 47.655901 51 | unique_locations$lon[unique_locations$city == "Redon"] <- -2.080030 52 | unique_locations$address[unique_locations$city == "Redon"] <- "Redon, France" 53 | 54 | # save file locally so we don't have to keep pinging Google 55 | #write_csv(unique_locations, "tdf.csv") 56 | unique_locations <- read_csv("tdf.csv") 57 | 58 | # merge lat/lon data back on stages dataset & remove time trials (usually start/end in same city) 59 | stages_geo <- stages %>% 60 | mutate(Origin = case_when(Origin == "Pla d'Adet" ~ "Saint-Lary", 61 | Origin == "Merlin-Plage" | Destination == "Merlin-Plage" ~ "Saint-Hilaire-de-Riez", 62 | Origin == "Montjuïc circuit" | Destination == "Montjuïc circuit" ~ "Barcelona", 63 | TRUE ~ Origin)) %>% 64 | left_join(unique_locations %>% mutate(Origin = city), by = "Origin") %>% 65 | mutate(orig_lat = lat, 66 | orig_lon = lon) %>% 67 | select(-lat, -lon) %>% 68 | left_join(unique_locations %>% mutate(Destination = city), by = "Destination") %>% 69 | mutate(dest_lat = lat, 70 | dest_lon = lon) %>% 71 | mutate(dest_lat = ifelse(Date == "7/2/1975" , 46.69778, dest_lat), 72 | dest_lon = ifelse(Date == "7/2/1975" , -1.9445290, dest_lon)) %>% 73 | mutate(dest_lat = ifelse(Date == "6/30/1981" , 43.29510, dest_lat), 74 | dest_lon = ifelse(Date == "6/30/1981" , -0.370797, dest_lon)) %>% 75 | # Destination of Felsburg, Germany (weird: it's listed in France on map: https://en.wikipedia.org/wiki/1970_Tour_de_France) 76 | # must be a change in names; only Felsburg now is deep in Germany (so much so it definitely stood out on viz) 77 | mutate(dest_lat = ifelse(Date == "7/4/1970", 49.31346, dest_lat), 78 | dest_lon = ifelse(Date == "7/4/1970", 6.7522865, dest_lon)) %>% 79 | mutate(dest_lat = ifelse(Date == "7/6/1994", 50.81204, dest_lat), 80 | dest_lon = ifelse(Date == "7/6/1994", -1.0885444, dest_lon)) %>% 81 | filter(!is.na(dest_lat) & !(Date == "7/27/1939" & Origin == "Bonneval")) %>% # weird mountain time trial 82 | group_by(Origin) %>% 83 | dplyr::mutate(count = n()) %>% 84 | ungroup() %>% 85 | mutate(year = year(as.Date(Date, "%m/%d/%Y")), 86 | date = as.Date(Date, "%m/%d/%Y")) %>% 87 | arrange(year, date) %>% 88 | mutate(order = row_number()) %>% 89 | group_by(year) %>% 90 | mutate(psuedo_stage = row_number()) # some stages are 1a, 1b etc (back when they did 2x per day) 91 | 92 | 93 | ####################### 94 | ### CREATE BASE MAP ### 95 | ####################### 96 | 97 | # use "world" base map from map_data() limited to european countries (will also bound later below) 98 | base_map <- map_data("world", region = c("Portugal", "Spain", "France", "Belgium", "Netherlands", "UK", "Italy", "Denmark", "Poland", "Andorra", 99 | "Czech Republic", "Austria", "Switzerland", "Luxembourg", "Germany", "Ireland", "Northern Ireland")) 100 | 101 | # set colors 102 | base_map <- c(geom_polygon(aes(long, lat, group = group), 103 | size = 0.1, 104 | color= "#00001C", 105 | fill = "#252241", alpha = 0.8, data = base_map)) 106 | 107 | 108 | # test out what things look like with static plot...not too shabby! 109 | plot <- ggplot() + 110 | theme(panel.background = 111 | element_rect(fill='#00001C'), 112 | panel.grid.major = element_blank(), 113 | panel.grid.minor = element_blank()) + 114 | base_map + 115 | coord_cartesian(xlim = c(-10, 12), ylim = c(36, 58)) + 116 | theme(axis.line=element_blank(), 117 | axis.text.x=element_blank(), 118 | axis.text.y=element_blank(), 119 | axis.ticks=element_blank(), 120 | axis.title.x=element_blank(), 121 | axis.title.y=element_blank()) + 122 | geom_segment(aes(x = orig_lon, xend = dest_lon, y = orig_lat, yend = dest_lat, size = count), 123 | alpha = 0.1, 124 | color = "#FEFEF2", 125 | data = stages_geo %>% distinct(orig_lon, orig_lat, dest_lat, dest_lon, count)) + 126 | geom_segment(aes(x = orig_lon, xend = dest_lon, y = orig_lat, yend = dest_lat), 127 | alpha = 0.15, 128 | size = 0.05, 129 | color = "#FBFF29", 130 | data = stages_geo) 131 | 132 | ################################### 133 | ### CREATE GREAT CIRCLES (ARCS) ### 134 | ################################### 135 | # this blog was a huge help: https://medium.com/@mueller.johannes.j/use-r-and-gganimate-to-make-an-animated-map-of-european-students-and-their-year-abroad-517ad75dca06 136 | 137 | # great circles don't add the nice curved arcs in this case (geographic span is too small) 138 | # went down a deep dive learning about bezier curves as an alternative, but ended up thinking straight lines looked better 139 | 140 | # need to fortify the data for use with ggplot2 141 | fortify.SpatialLinesDataFrame <- function(model, data, ...){ 142 | ldply(model@lines, fortify) 143 | } 144 | 145 | # calculate routes for each row 146 | routes <- gcIntermediate(stages_geo[,c('orig_lon', 'orig_lat')], 147 | stages_geo[,c('dest_lon', 'dest_lat')], 148 | n = 18, 149 | breakAtDateLine = F, 150 | addStartEnd = T, 151 | sp = TRUE) 152 | # fortify to dataframe 153 | fortifiedroutes <- fortify.SpatialLinesDataFrame(routes) 154 | 155 | # merge to form great circles 156 | routes_count <- data.frame('id' = 1:nrow(stages_geo), 157 | 'Stage' = stages_geo$Stage, 158 | 'Year' = stages_geo$year, 159 | 'stage_order' = stages_geo$psuedo_stage) 160 | 161 | greatcircles <- merge(fortifiedroutes, routes_count, all.x=T, by='id') 162 | 163 | ############################ 164 | ### CREATE THE ANIMATION ### 165 | ############################ 166 | 167 | # want the routes to be revealed in same order as they were ridden 168 | # each route has 20 points (associated with each great arc) 169 | add_delay <- 0 170 | for(i in 1:max(greatcircles$stage_order)){ 171 | greatcircles$order[greatcircles$stage_order==i] <- 172 | greatcircles$order[greatcircles$stage_order==i] + add_delay 173 | add_delay = add_delay + 20 174 | } 175 | 176 | # want to show each year on it's own (ish); easiest way to do this seems to be outside of gganimate... 177 | # anyone knows of a better way to implement, please let me know! 178 | for (i in c(1903:1914, 1919:1939, 1947:2017)){ 179 | 180 | single <- greatcircles %>% 181 | filter(Year == i) 182 | 183 | plot <- ggplot() + 184 | theme(panel.background = 185 | element_rect(fill='#000021'), 186 | panel.grid.major = element_blank(), 187 | panel.grid.minor = element_blank()) + 188 | base_map + 189 | # add a couple of the big cities for reference 190 | annotate(geom="point", x=2.3522, y=48.8566, size = 0.75, 191 | color="#00001c") + 192 | annotate(geom="point", x=-1.5536, y=47.2184, size = 0.75, 193 | color="#00001c") + 194 | annotate(geom="point", x=4.8357, y=45.7640, size = 0.75, 195 | color="#00001c") + 196 | annotate(geom="point", x=1.4442, y=43.6047, size = 0.75, 197 | color="#00001c") + 198 | annotate(geom="text", x=2.3522, y=48.5566, label="Paris", size = 3, 199 | color="#00001c") + 200 | annotate(geom="text", x=-0.8536, y=47.2184, label="Nantes", size = 3, 201 | color="#00001c") + 202 | annotate(geom="text", x=4.8357, y=45.4640, label="Lyon", size = 3, 203 | color="#00001c") + 204 | annotate(geom="text", x=1.4442, y=43.9047, label="Toulouse", size = 3, 205 | color="#00001c") + 206 | coord_cartesian(xlim = c(-10, 12), ylim = c(36, 58)) + 207 | theme(axis.line=element_blank(), 208 | axis.text.x=element_blank(), 209 | axis.text.y=element_blank(), 210 | axis.ticks=element_blank(), 211 | axis.title.x=element_blank(), 212 | axis.title.y=element_blank()) + 213 | #trying to add a "glow" effect; think it's kinda lost 214 | geom_line(aes(long, lat, group = id), alpha = 0.25, 215 | size = 0.75, color = "#FDFFAF", data = single) + 216 | geom_line(aes(long, lat, group = id), alpha = 0.5, 217 | size = 0.5, color = "#FDFFAF", data = single) + 218 | annotate(geom="text", x=-10, y=58, label = "Around and around (and around and around) they go!", 219 | hjust = 0, color="#FDFFAF", alpha = 0.75, size = 12) + 220 | annotate(geom="text", x=-10, y=57, label = paste0("Visualizing the ", i, " stages of the Tour de France"), 221 | hjust = 0, color="#FDFFAF", alpha = 0.75, size = 8) + 222 | annotate(geom="text", x=5.9, y=35.5, label="Visualization: @carriebennette | Data: alastairrushworth/tdf & kaggle/jaminliu", 223 | size = 5, color="#FDFFAF", alpha = 0.5) 224 | 225 | anim <- plot + 226 | transition_reveal(single$order) 227 | 228 | animate(anim, duration = 1, fps = 10, width = 900, height = 900, renderer = gifski_renderer()) 229 | anim_save(paste0("tdf", i, ".gif")) 230 | 231 | } 232 | 233 | 234 | # fun facts: 235 | # 1913: first time the route was anticlockwise 236 | # 1926: first time that the race started outside Paris 237 | # 1960s: routes start to be discontinuous... 238 | 239 | # 1994 needs to have UK?? 240 | --------------------------------------------------------------------------------