├── NHL_goals
    ├── NHL_goals_dataviz.Rproj
    ├── tidytuesday_hockey.gif
    └── NHL_cumulative_goals.R
├── measles_vaccination
    ├── measles_vaccination_figure.png
    ├── measles_vaccination_dataviz.Rproj
    └── measles_vaccination.R
├── README.md
├── LICENSE
└── tdf_tidytuesday.R


/NHL_goals/NHL_goals_dataviz.Rproj:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/NHL_goals/tidytuesday_hockey.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carriebennette/TidyTuesdayProjects/HEAD/NHL_goals/tidytuesday_hockey.gif


--------------------------------------------------------------------------------
/measles_vaccination/measles_vaccination_figure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carriebennette/TidyTuesdayProjects/HEAD/measles_vaccination/measles_vaccination_figure.png


--------------------------------------------------------------------------------
/measles_vaccination/measles_vaccination_dataviz.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # TidyTuesdayProjects
2 | Code and figures for my #TidyTuesday projects
3 | 
4 | Week 10: 
5 | ![Hockey_goals](https://github.com/carriebennette/TidyTuesdayProjects/blob/master/NHL_goals/tidytuesday_hockey.gif)
6 | Week 9: 
7 | ![MMR_rates](https://github.com/carriebennette/TidyTuesdayProjects/blob/master/measles_vaccination/measles_vaccination_figure.png)
8 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Carrie Bennette
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/measles_vaccination/measles_vaccination.R:
--------------------------------------------------------------------------------
  1 | 
  2 | library(binom)
  3 | library(ggplot2)
  4 | library(tidyr)
  5 | library(showtext)
  6 | 
  7 | # Get the raw data (https://github.com/rfordatascience/tidytuesday/blob/master/data/2020/2020-02-25/readme.md)
  8 | measles_raw <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-02-25/measles.csv')
  9 | 
 10 | measles <- measles_raw %>%
 11 |   filter(enroll > 0 ) %>%  # a suprising number of schools have zero enrollment
 12 |   # get rid of the -1s (these are missing) 
 13 |   mutate(mmr = case_when(mmr == -1 ~ NA_real_,
 14 |                          TRUE ~ mmr)) %>%
 15 |   # Apparently RI, PA, and Arkansas don't have county data; use city instead
 16 |   # potentially come back and refine using lat/lon
 17 |   mutate(geo_group = case_when(state %in% c("Rhode Island", 
 18 |                                             "Arkansas", 
 19 |                                             "Pennsylvania") ~ city, 
 20 |                                TRUE ~ county)) %>%
 21 |   filter(!is.na(geo_group)) %>% # note: FL is dropped (not county or city data..)
 22 |   # we could (probably?) assume that MMR rates ~ overall rates (e.g., for states like Iowa that only report overall rates)
 23 |   mutate(mmr_adj = case_when(is.na(mmr) & overall != -1 ~ overall,
 24 |                              TRUE ~ mmr)) %>% 
 25 |   # there are duplicates!
 26 |   distinct(name, city, state, county, geo_group, enroll, mmr, mmr_adj, overall, type, xrel, xmed, xper) %>%
 27 | 
 28 |   # school with largest enrollment is 2x bigger than second largest (6k vs 3k)
 29 |   # school is "West Valley School Prek-6" in Montana...googling suggests enroll is actually closer to n=400
 30 |   # https://www.publicschoolreview.com/west-valley-school-profile
 31 |   # drop school for now; if this was more than just a fun data viz exercise I'd definitely dig deeper into data collection
 32 |   filter(name != "West Valley School Prek-6") %>%
 33 |   group_by(state) %>%
 34 |   # get state averages (for color scale)
 35 |   mutate(state_avg = weighted.mean(mmr, w = enroll, na.rm = T)) %>% 
 36 |   ungroup() %>%
 37 |   group_by(geo_group, state) %>%
 38 |   summarise(mmr_rate = weighted.mean(mmr, w = enroll, na.rm = T),
 39 |             enroll = sum(enroll, na.rm = T),
 40 |             state_avg = mean(state_avg, na.rm = T)) %>%
 41 |   mutate(enroll_group = case_when(enroll < 500 ~ 0.1,
 42 |                           enroll >= 500 & enroll < 1000 ~ 0.15,
 43 |                           enroll >= 1000 & enroll < 2500 ~ 0.2,
 44 |                           enroll >= 2500 & enroll < 5000 ~ 0.25,
 45 |                           enroll >= 5000 & enroll < 10000 ~ 0.3,
 46 |                           enroll >= 10000 & enroll < 20000 ~ 0.4,
 47 |                           enroll >= 20000 & enroll < 50000 ~ 0.5,
 48 |                           enroll >= 50000 & enroll < 60000 ~ 0.6,
 49 |                           enroll >= 60000 & enroll < 70000 ~ 0.7,
 50 |                           enroll >= 70000 & enroll < 80000 ~ 0.8,
 51 |                           enroll >= 80000 & enroll < 90000 ~ 0.9,
 52 |                           enroll >= 100000 ~ 1,
 53 |                           TRUE ~ 1)) %>%
 54 |   filter(enroll < 100000 & enroll > 250) %>%
 55 |   ungroup()
 56 |  
 57 | 
 58 | # so I learned today that you can download fancy fonts and use them within R 
 59 | # good timing as I no longer have Illustrator until I get a new laptop...
 60 | 
 61 | # load the graphics device engine driver that will show the fancy plots
 62 | dev.off()
 63 | quartz()
 64 | 
 65 | fig <- ggplot(measles, aes(enroll, mmr_rate, color = state_avg, alpha = enroll_group)) + 
 66 |   geom_point(size = 3.5) +
 67 |   theme_minimal() +
 68 |   theme(legend.position = "bottom", 
 69 |         axis.title.y = element_text(angle = 0, vjust = 1),
 70 |         axis.text = element_text(size = 11, family = "roboto"),
 71 |         axis.title = element_text(size = 13, family = "roboto"),
 72 |         plot.title = element_text(size = 17, face = "bold", family = "roboto"),
 73 |         plot.caption = element_text(color = "grey70")) +
 74 |   guides(alpha = FALSE) +
 75 |   scale_y_continuous("\n", 
 76 |                      labels = function(x) paste0(sprintf("%.0f", x),"%")) +
 77 |   scale_x_continuous("\nStudent Enrollment\n", 
 78 |                      labels = scales::comma) +
 79 |   scale_color_gradient(low = "red", 
 80 |                        high = "blue", 
 81 |                        limits=c(88, 96),
 82 |                        oob = scales::squish,
 83 |                        guide = guide_colorbar(direction = "horizontal",
 84 |                                               title.position = "top",
 85 |                                               title = "State average immunization rate",
 86 |                                               barwidth = 15,
 87 |                                               barheight = 0.25,
 88 |                                               ticks = FALSE,
 89 |                                               title.hjust = 0.5)) +
 90 |   # annotations! call out Little Rock, AR
 91 |   annotate(geom = "curve", x = 28848, y = 68, xend = 20548, yend = 77, 
 92 |            curvature = -.23, color = "grey60", arrow = arrow(length = unit(1, "mm"))) +
 93 |   annotate(geom = "text", x = 29400, y = 68, size = 3,
 94 |            label = "Little Rock, AR had a notably low rate \ngiven it's relatively large enrollment", 
 95 |            family = "lato",
 96 |            hjust = "left",
 97 |            lineheight = 0.9,
 98 |            color = "grey50") +
 99 |   # call out Brinkley, AR
100 |   annotate(geom = "curve", x = 12000, y = 50, xend = 505, yend = 39.75, 
101 |            curvature = .23, color = "grey60", arrow = arrow(length = unit(1, "mm"))) +
102 |   annotate(geom = "text", x = 12500, y = 50, size = 3,
103 |            label = "When based on a smaller number of students, seemingly large differences are often \ndue to chance. Estimates from less populous counties are shown with lower transparency \nto de-emphasize such variability. Despite having fewer than 500 students, \nBrinkley, AR still manages to stand out for it's exceptionally low rate.", 
104 |            family = "lato",
105 |            hjust = "left",
106 |            lineheight = 0.9,
107 |            color = "grey50") +
108 |   # call out dashed line
109 |   geom_hline(yintercept = 95, linetype = "dashed", size = 0.25, color = "grey40") +
110 |   annotate(geom = "curve", x = 90000, y = 88, xend = 90000, yend = 94, 
111 |            curvature = .23, color = "grey60", arrow = arrow(length = unit(1, "mm"))) +
112 |   annotate(geom = "text", x = 89500, y = 88, size = 3,
113 |            # source: https://www.who.int/immunization/sage/meetings/2017/october/2._target_immunity_levels_FUNK.pdf
114 |            # source: https://illinoisreview.typepad.com/.a/6a00d834515c5469e201b8d0ddb9ce970c-pi
115 |            label = "According to the CDC, 95 percent of a population needs to be \nvaccinated to stop the spread of measles and preserve herd \nimmunity. Many counties appear to fall below this threshold.", 
116 |            hjust = "right",
117 |            lineheight = 0.9,
118 |            family = "lato",
119 |            color = "grey50") +
120 |   # title
121 |   ggtitle("Measles, mumps & rubella immunization rates at schools across 1244 counties/cities in the US") +
122 |   labs(subtitle="According to data collected by The Wall Street Journal",
123 |        caption = "Visualization by @carriebennete \nTwo counties with more than 100k students (Cook County, IL & \nLos Angeles County, CA) are not shown; both had rates above 95%.\nData are aggregated by county (or city when county data is missing)") +
124 |   theme(panel.grid.major = element_line(size = 0.25),
125 |         panel.grid.minor = element_blank(),
126 |         panel.border = element_blank(),
127 |         panel.background = element_blank())
128 | fig
129 | 
130 | quartz.save("test", type = "png", device = dev.cur(), dpi = 800)
131 | 
132 | ggsave("measles_vaccination","MMR_rates.png", dpi = 300)
133 | 


--------------------------------------------------------------------------------
/NHL_goals/NHL_cumulative_goals.R:
--------------------------------------------------------------------------------
  1 | library(dplyr)
  2 | library(gganimate)
  3 | library(ggplot2)
  4 | library(lubridate)
  5 | library(zoo)
  6 | 
  7 | # get the data (https://github.com/rfordatascience/tidytuesday/blob/master/data/2020/2020-03-03/readme.md)
  8 | game_goals <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-03-03/game_goals.csv')
  9 | top_250 <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-03-03/top_250.csv')
 10 | season_goals <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-03-03/season_goals.csv')
 11 | 
 12 | #---------- CLEAN RAW DATA ------ #
 13 | goals_by_date <- game_goals %>%
 14 |   select(player, date, age, team, goals, rank) %>%
 15 |   # fix age (prob better way to do this)
 16 |   mutate(year = as.numeric(substr(age, 1, 2)),
 17 |          days = as.numeric(substr(age, 4, 6))/365.25,
 18 |          age = year + days) %>%
 19 |   # want the cumulative number of goals by time/age
 20 |   group_by(player) %>%
 21 |   mutate(cum_goals = order_by(date, cumsum(goals))) %>%
 22 |   # now join with players data (for active/retired status) <-- didn't use in the end
 23 |   ungroup() %>%
 24 |   left_join(top_250 %>% select(player, active, yr_start), by = "player") 
 25 | 
 26 | # above creates graph that is not terrible...but there are some annoying seasonal trends (jumps between seasons) that I want to remove
 27 | # so I will remove all data points that don't show a 1+ increase in cumulative scoring
 28 | # this solution took me a while to figure out (and is likely suboptimally implemented)!
 29 | 
 30 | #---------- PREP FOR VISUALIZATION ------ #
 31 | goals_cumulative <- goals_by_date %>%
 32 |   # going to group across quarter (month wasn't enough given offseason length)
 33 |   mutate(yearqtr = as.yearqtr(date)) %>%
 34 |   group_by(player, yearqtr) %>%
 35 |   mutate(max_cum_goals = max(cum_goals)) %>%
 36 |   group_by(player) %>%
 37 |   arrange(desc(date)) %>%
 38 |   # how much did cumulative goals increase since prior time point?
 39 |   mutate(step = max_cum_goals - lead(max_cum_goals)) %>%
 40 |   # only keep if there was an increase (to avoid having lots of "steps" in graph)
 41 |   # OR observation was last (want to keep long tails)
 42 |   filter(step >= 1 | row_number()==n()) %>%
 43 |   # need to add in a row so that everyone starts with zero (graph looks funky otherwise)
 44 |   group_by(player) %>%
 45 |   do(add_row(., .before = 0)) %>%
 46 |   # now fill in the missing data for the row just created
 47 |   ungroup() %>%
 48 |   # need player (but must ungroup first)
 49 |   mutate(player = case_when(is.na(player) ~ lead(player, 1), TRUE ~ player)) %>%
 50 |   group_by(player) %>% 
 51 |   mutate(age = case_when(is.na(age) ~ min(age, na.rm = T) - 0.25, TRUE ~ age),
 52 |          rank = case_when(is.na(rank) ~ min(rank, na.rm = T), TRUE ~ rank),
 53 |          max_cum_goals = case_when(is.na(max_cum_goals) ~ 0, TRUE ~ max_cum_goals),
 54 |          is_700 = case_when(any(max_cum_goals >= 700) ~ TRUE, TRUE ~ FALSE)) %>%
 55 |   ungroup() %>%
 56 |   # added in another "blank" player so that we can pause animation
 57 |   # hacky solution to make gganimate work the way I want...
 58 |   do(add_row(., .before = 0)) %>%
 59 |   mutate(player = case_when(is.na(player) ~ "blank", TRUE ~ player),
 60 |          age = case_when(player == "blank" ~ 20, TRUE ~ age),
 61 |          max_cum_goals = case_when(player == "blank" ~ 0, TRUE ~ max_cum_goals)) %>%
 62 |   # need to create a 2nd player variable so we can loop through SOME players in animation
 63 |   mutate(player_highlight = case_when(player %in% c("Alex Ovechkin", 
 64 |                                                     "Wayne Gretzky",
 65 |                                                     "Teemu Selanne", 
 66 |                                                     "Mark Messier", 
 67 |                                                     "Brendan Shanahan",
 68 |                                                     "blank") ~ player, 
 69 |                                       TRUE ~ NA_character_),
 70 |          # make this a factor with set levels so we can control order they will be shown
 71 |          player_fct = factor(player_highlight, levels = c("Alex Ovechkin", 
 72 |                                                           "Wayne Gretzky",
 73 |                                                           "Teemu Selanne",
 74 |                                                           "blank",
 75 |                                                           "Mark Messier", 
 76 |                                                           "Brendan Shanahan"))) %>%
 77 |   # and lastly add text (from WaPo) that will accompany animation 
 78 |   mutate(text = case_when(player_highlight == "Alex Ovechkin" ~ "Ovechkin is the second-youngest player to score 700 goals.",
 79 |                           player_highlight == "Wayne Gretzky" ~ "Gretzky reached this milestone at age 29. While Gretzky's scoring declined \nin the last decade of his career, he had 57 goals in his final 3 seasons.",
 80 |                           player_highlight == "Teemu Selanne" ~ "To break Gretzky's record, Ovechkin could look to Teemu Selanne for \ninspiration: Selanne score 232 goals in his final 9 seasons.",
 81 |                           player_highlight == "blank" ~ "But since 1980, only three players 35 or older have scored 40+ \ngoals in a season...",
 82 |                           player_highlight == "Mark Messier" ~ "Mark Messier did first with 47 goals at age 35 in 1995-96.",
 83 |                           player_highlight == "Brendan Shanahan" ~ "And Brendan Shanahan did with 40 goals at age 37 in 2005-06. \nDaniel Alfredsson (not shown) also scored 40 goals at age 35 in 2007-08",
 84 |                           TRUE ~ "")) 
 85 | 
 86 | 
 87 | #---------- CREATE THE ANIMATED VIZ ------ #
 88 | p = ggplot(goals_cumulative, aes(age, max_cum_goals)) +
 89 |   geom_hline(yintercept = 700, linetype = "dashed", size = 0.2, color = "grey40") +
 90 |   # lots of fiddling
 91 |   theme(legend.position = "none", 
 92 |         axis.text = element_text(size = 10),
 93 |         axis.title = element_text(size = 10),
 94 |         axis.ticks = element_line(color = "grey60", size = 0.2),
 95 |         plot.title = element_text(size = 18, 
 96 |                                   family="Times", 
 97 |                                   face = "bold", 
 98 |                                   hjust = 0.5),
 99 |         plot.caption = element_text(color = "grey70"),
100 |         panel.grid.major.x = element_blank(),
101 |         panel.grid.major.y = element_line(color = "grey90", size = 0.2),
102 |         panel.grid.minor = element_blank(),
103 |         panel.border = element_blank(),
104 |         panel.background = element_blank()) +
105 |   geom_text(aes(x = 20, label = text), y = 1000, hjust = "left", lineheight = 0.9, color = "grey30", check_overlap = TRUE) +
106 |   ggtitle("Can he keep up the pace?") +
107 |   labs(caption = "Visualization by @carriebennete") +
108 |   geom_line(aes(y = max_cum_goals, group = player_fct, color = factor(player_fct)), size = 1.25) +
109 |   scale_color_manual(values=c("#b00000", "#0d1094", "#de6d16", "#ffffff" , "#4f288f", "#076b01")) +
110 |   # expand() makes graph "fill up" available space (ie no weird gaps) 
111 |   scale_y_continuous("\n", 
112 |                      breaks = c(0, 300, 600, 900), 
113 |                      limits=c(0, 1100), 
114 |                      expand = c(0, 0)) +
115 |   scale_x_continuous("Age", 
116 |                      breaks = c(20, 25, 30, 35, 40, 45),
117 |                      expand = c(0, 0)) +
118 |   # make transition pause on each player
119 |   transition_states(player_fct, 1, 10, wrap = FALSE) +
120 |   # include gray lines for players just shown
121 |   shadow_mark(past = TRUE, exclude_layer = 2, color = "gray70", size = 0.5, alpha = 0.5) +
122 |   exit_recolor(color = "gray80") +
123 |   enter_recolor(color = "gray90")
124 | 
125 | # control outpt ( so that it's slow enough to read text)
126 | animate(p, nframes = 300, fps = 10, width = 1000, height = 700, res = 150)
127 | 
128 | 
129 | 
130 | 


--------------------------------------------------------------------------------
/tdf_tidytuesday.R:
--------------------------------------------------------------------------------
  1 | 
  2 | library(tidyverse)
  3 | library(tdf) 
  4 | library(ggmap)
  5 | library(ggalt)
  6 | library(maps)
  7 | library(geosphere)
  8 | library(lubridate)
  9 | library(plyr)
 10 | 
 11 | # Get the Data
 12 | ### hmm, the location data that I wanted (per looking at data dictionary online and then sketching ideas) is not on github...
 13 | ### internet to the rescue: https://www.kaggle.com/jaminliu/tour-de-france-historical-tour-2018/version/4#unique_stage_geocode.csv
 14 | stages <- read_csv("stages_TDF.csv", encoding = "UTF-8")
 15 | 
 16 | ########################
 17 | ### GEOLOCATE CITIES ###
 18 | ########################
 19 | 
 20 | # get unique list of locations
 21 | unique_locations <- stages %>%
 22 |   select(Origin, Destination) %>%
 23 |   pivot_longer(c(Origin, Destination), names_to = "type", values_to = "city") %>%
 24 |   distinct(city) %>%
 25 |   mutate(lat = NA, lon = NA, address = NA) %>%
 26 |   # fix a couple cities that google can't find; if this was bigger I'd set up a prospective way to catch these
 27 |   mutate(city = case_when(city == "Pla d'Adet" ~ "Saint-Lary", 
 28 |                           city == "Merlin-Plage" ~ "Saint-Hilaire-de-Riez", 
 29 |                           city == "Montjuïc circuit" ~ "Barcelona", 
 30 |                           TRUE ~ city))
 31 | 
 32 | # use google API to get lat/lon for each city
 33 | for(i in 1:nrow(unique_locations)) {
 34 |   
 35 |   # ping google for latitutde and longitude
 36 |   result <- geocode(unique_locations$city[i], output = "latlona")
 37 |   
 38 |   # store results
 39 |   unique_locations$lon[i] <- as.numeric(result[1])
 40 |   unique_locations$lat[i] <- as.numeric(result[2])
 41 |   unique_locations$address[i] <- as.character(result[3])
 42 |   
 43 | }
 44 | 
 45 | # Few mistakes need to be fixed manually
 46 | unique_locations$lat[unique_locations$city == "Joinville"] <- 48.444
 47 | unique_locations$lon[unique_locations$city == "Joinville"] <- 5.139
 48 | unique_locations$address[unique_locations$city == "Joinville"] <- "Joinville, France"
 49 | 
 50 | unique_locations$lat[unique_locations$city == "Redon"] <- 47.655901
 51 | unique_locations$lon[unique_locations$city == "Redon"] <- -2.080030
 52 | unique_locations$address[unique_locations$city == "Redon"] <- "Redon, France"
 53 | 
 54 | # save file locally so we don't have to keep pinging Google
 55 |   #write_csv(unique_locations, "tdf.csv")
 56 |   unique_locations <- read_csv("tdf.csv")
 57 | 
 58 | # merge lat/lon data back on stages dataset & remove time trials (usually start/end in same city)
 59 | stages_geo <- stages %>% 
 60 |   mutate(Origin = case_when(Origin == "Pla d'Adet" ~ "Saint-Lary", 
 61 |                           Origin == "Merlin-Plage" | Destination == "Merlin-Plage" ~ "Saint-Hilaire-de-Riez", 
 62 |                           Origin == "Montjuïc circuit" | Destination == "Montjuïc circuit" ~ "Barcelona", 
 63 |                           TRUE ~ Origin)) %>%
 64 |   left_join(unique_locations %>% mutate(Origin = city), by = "Origin") %>%
 65 |   mutate(orig_lat = lat, 
 66 |          orig_lon = lon) %>%
 67 |   select(-lat, -lon) %>%
 68 |   left_join(unique_locations %>% mutate(Destination = city), by = "Destination") %>%
 69 |   mutate(dest_lat = lat,
 70 |          dest_lon = lon) %>%
 71 |   mutate(dest_lat = ifelse(Date == "7/2/1975" , 46.69778, dest_lat),
 72 |          dest_lon = ifelse(Date == "7/2/1975" , -1.9445290, dest_lon)) %>%
 73 |   mutate(dest_lat = ifelse(Date == "6/30/1981" , 43.29510, dest_lat),
 74 |          dest_lon = ifelse(Date == "6/30/1981" , -0.370797, dest_lon)) %>%
 75 |   # Destination of Felsburg, Germany (weird: it's listed in France on map: https://en.wikipedia.org/wiki/1970_Tour_de_France)
 76 |   # must be a change in names; only Felsburg now is deep in Germany (so much so it definitely stood out on viz)
 77 |   mutate(dest_lat = ifelse(Date == "7/4/1970", 49.31346, dest_lat),
 78 |          dest_lon = ifelse(Date == "7/4/1970", 6.7522865, dest_lon)) %>%
 79 |   mutate(dest_lat = ifelse(Date == "7/6/1994", 50.81204, dest_lat),
 80 |          dest_lon = ifelse(Date == "7/6/1994", -1.0885444, dest_lon)) %>%
 81 |   filter(!is.na(dest_lat) & !(Date == "7/27/1939" & Origin == "Bonneval")) %>% # weird mountain time trial
 82 |   group_by(Origin) %>%
 83 |   dplyr::mutate(count = n()) %>%
 84 |   ungroup() %>%
 85 |   mutate(year = year(as.Date(Date, "%m/%d/%Y")),
 86 |          date = as.Date(Date, "%m/%d/%Y")) %>%
 87 |   arrange(year, date) %>%
 88 |   mutate(order = row_number()) %>%
 89 |   group_by(year) %>%
 90 |   mutate(psuedo_stage = row_number()) # some stages are 1a, 1b etc (back when they did 2x per day)
 91 |   
 92 | 
 93 | #######################
 94 | ### CREATE BASE MAP ###
 95 | #######################
 96 | 
 97 | # use "world" base map from map_data() limited to european countries (will also bound later below)
 98 | base_map <- map_data("world", region = c("Portugal", "Spain", "France", "Belgium", "Netherlands", "UK", "Italy", "Denmark", "Poland", "Andorra",
 99 |                                          "Czech Republic", "Austria", "Switzerland", "Luxembourg", "Germany", "Ireland", "Northern Ireland"))
100 | 
101 | # set colors
102 | base_map <- c(geom_polygon(aes(long, lat, group = group), 
103 |                      size = 0.1, 
104 |                      color= "#00001C",  
105 |                      fill = "#252241", alpha = 0.8, data = base_map))
106 | 
107 | 
108 | # test out what things look like with static plot...not too shabby!
109 | plot <- ggplot() + 
110 |   theme(panel.background =   
111 |         element_rect(fill='#00001C'),
112 |         panel.grid.major = element_blank(), 
113 |         panel.grid.minor = element_blank()) + 
114 |   base_map +
115 |   coord_cartesian(xlim = c(-10, 12), ylim = c(36, 58)) +
116 |   theme(axis.line=element_blank(),
117 |         axis.text.x=element_blank(),
118 |         axis.text.y=element_blank(),
119 |         axis.ticks=element_blank(),
120 |         axis.title.x=element_blank(),
121 |         axis.title.y=element_blank()) +
122 |   geom_segment(aes(x = orig_lon, xend = dest_lon, y = orig_lat, yend = dest_lat,  size = count), 
123 |                alpha = 0.1, 
124 |                color = "#FEFEF2",  
125 |                data = stages_geo %>% distinct(orig_lon, orig_lat, dest_lat, dest_lon, count)) +
126 |   geom_segment(aes(x = orig_lon, xend = dest_lon, y = orig_lat, yend = dest_lat), 
127 |                alpha = 0.15,
128 |                size = 0.05, 
129 |                color = "#FBFF29",  
130 |                data = stages_geo) 
131 | 
132 | ###################################
133 | ### CREATE GREAT CIRCLES (ARCS) ###
134 | ###################################
135 | # this blog was a huge help: https://medium.com/@mueller.johannes.j/use-r-and-gganimate-to-make-an-animated-map-of-european-students-and-their-year-abroad-517ad75dca06
136 | 
137 | # great circles don't add the nice curved arcs in this case (geographic span is too small)
138 | # went down a deep dive learning about bezier curves as an alternative, but ended up thinking straight lines looked better
139 | 
140 | # need to fortify the data for use with ggplot2
141 | fortify.SpatialLinesDataFrame <- function(model, data, ...){
142 |   ldply(model@lines, fortify)
143 | }
144 | 
145 | # calculate routes for each row
146 | routes <- gcIntermediate(stages_geo[,c('orig_lon', 'orig_lat')], 
147 |                          stages_geo[,c('dest_lon', 'dest_lat')], 
148 |                          n = 18, 
149 |                          breakAtDateLine = F, 
150 |                          addStartEnd = T, 
151 |                          sp = TRUE)
152 | # fortify to dataframe
153 | fortifiedroutes <- fortify.SpatialLinesDataFrame(routes)
154 | 
155 | # merge to form great circles
156 | routes_count <- data.frame('id' = 1:nrow(stages_geo), 
157 |                            'Stage' = stages_geo$Stage, 
158 |                            'Year' = stages_geo$year,
159 |                            'stage_order' = stages_geo$psuedo_stage)
160 | 
161 | greatcircles <- merge(fortifiedroutes, routes_count, all.x=T, by='id')
162 | 
163 | ############################
164 | ### CREATE THE ANIMATION ###
165 | ############################
166 | 
167 | # want the routes to be revealed in same order as they were ridden
168 | # each route has 20 points (associated with each great arc)
169 | add_delay <- 0
170 | for(i in 1:max(greatcircles$stage_order)){
171 |   greatcircles$order[greatcircles$stage_order==i] <-
172 |     greatcircles$order[greatcircles$stage_order==i] + add_delay
173 |   add_delay = add_delay + 20
174 | }
175 | 
176 | # want to show each year on it's own (ish); easiest way to do this seems to be outside of gganimate...
177 | # anyone knows of a better way to implement, please let me know!
178 | for (i in c(1903:1914, 1919:1939, 1947:2017)){
179 |   
180 | single <- greatcircles %>%
181 |   filter(Year == i) 
182 | 
183 | plot <- ggplot() + 
184 |   theme(panel.background =   
185 |           element_rect(fill='#000021'),
186 |         panel.grid.major = element_blank(), 
187 |         panel.grid.minor = element_blank()) + 
188 |   base_map +
189 |   # add a couple of the big cities for reference
190 |   annotate(geom="point", x=2.3522, y=48.8566, size = 0.75,
191 |            color="#00001c") +
192 |   annotate(geom="point", x=-1.5536, y=47.2184, size = 0.75,
193 |            color="#00001c") +
194 |   annotate(geom="point", x=4.8357, y=45.7640, size = 0.75,
195 |            color="#00001c") +
196 |   annotate(geom="point", x=1.4442, y=43.6047, size = 0.75,
197 |            color="#00001c") +
198 |   annotate(geom="text", x=2.3522, y=48.5566, label="Paris", size = 3,
199 |            color="#00001c") +
200 |   annotate(geom="text", x=-0.8536, y=47.2184, label="Nantes",  size = 3,
201 |            color="#00001c") +
202 |   annotate(geom="text", x=4.8357, y=45.4640, label="Lyon", size = 3,
203 |            color="#00001c") +
204 |   annotate(geom="text", x=1.4442, y=43.9047, label="Toulouse", size = 3,
205 |            color="#00001c") +
206 |   coord_cartesian(xlim = c(-10, 12), ylim = c(36, 58)) +
207 |   theme(axis.line=element_blank(),
208 |         axis.text.x=element_blank(),
209 |         axis.text.y=element_blank(),
210 |         axis.ticks=element_blank(),
211 |         axis.title.x=element_blank(),
212 |         axis.title.y=element_blank()) +
213 |   #trying to add a "glow" effect; think it's kinda lost
214 |   geom_line(aes(long, lat, group = id), alpha = 0.25, 
215 |             size = 0.75, color = "#FDFFAF", data = single) +
216 |   geom_line(aes(long, lat, group = id), alpha = 0.5, 
217 |             size = 0.5, color = "#FDFFAF", data = single) +
218 |   annotate(geom="text", x=-10, y=58, label = "Around and around (and around and around) they go!", 
219 |            hjust = 0, color="#FDFFAF", alpha = 0.75, size = 12) +
220 |   annotate(geom="text", x=-10, y=57, label = paste0("Visualizing the ", i, " stages of the Tour de France"), 
221 |            hjust = 0, color="#FDFFAF", alpha = 0.75, size = 8) +
222 |   annotate(geom="text", x=5.9, y=35.5, label="Visualization: @carriebennette | Data: alastairrushworth/tdf & kaggle/jaminliu", 
223 |            size = 5, color="#FDFFAF", alpha = 0.5) 
224 | 
225 | anim <- plot + 
226 |   transition_reveal(single$order) 
227 | 
228 |   animate(anim, duration = 1, fps = 10, width = 900, height = 900, renderer = gifski_renderer())
229 |   anim_save(paste0("tdf", i, ".gif"))
230 | 
231 | }
232 | 
233 | 
234 | # fun facts:
235 | # 1913: first time the route was anticlockwise
236 | # 1926: first time that the race started outside Paris
237 | # 1960s: routes start to be discontinuous... 
238 | 
239 | # 1994 needs to have UK??
240 | 


--------------------------------------------------------------------------------