├── 2020
    ├── 10_Hockey.R
    ├── 11_Tuition.R
    ├── 12_Office.R
    ├── 13_TBI.R
    ├── 14_Beer.R
    ├── 15_TourdeFrance.R
    ├── 16_HipHop.R
    ├── 17_GDPR.R
    ├── 18_Broadway.R
    ├── 19_AnimalCrossing.R
    ├── 19_AnimalCrossing_dataprep.R
    ├── 20_Volcanoes.R
    ├── 21_Volleyball.R
    ├── 22_Cocktails.R
    ├── 25_university_statements.R
    ├── 26_Caribou.R
    ├── 27_x-men.R
    ├── 28_coffee.R
    ├── 29_astronauts.R
    ├── 29_astronauts.png
    ├── 2_Transit.png
    ├── 2_transit.R
    ├── 30_RSPCA.R
    ├── 31_penguins.R
    ├── 32_EuropeanEnergy.R
    ├── 33_Airbender.R
    ├── 33_Airbender.png
    ├── 34_ExoticPlants.R
    ├── 36_Crops.R
    ├── 36_Crops.png
    ├── 37_Friends.R
    ├── 43_MoreBeer.R
    ├── 43_MoreBeer.png
    ├── 48_WATrails.R
    ├── 48_WATrails.png
    ├── 49_Shelters.R
    ├── 8_CO2Food.R
    ├── 9_Measles.R
    ├── AnimalCrossing.png
    ├── AreaPlot.png
    ├── Beer.png
    ├── Broadway.png
    ├── Caribou.png
    ├── Cocktails.png
    ├── Coffee1.png
    ├── Coffee2.png
    ├── First.png
    ├── GDPR.png
    ├── HipHop.png
    ├── Hockey.png
    ├── Measles.png
    ├── Measles.tiff
    ├── NewAvatar.png
    ├── Penguins.png
    ├── Plants.png
    ├── Plants2.png
    ├── Plants_cowplot.png
    ├── RSPCA.png
    ├── RSPCA2.png
    ├── Salary_Tuition.png
    ├── Second.png
    ├── TheOffice.png
    ├── TourdeFrance.png
    ├── Tuition.gif
    └── statements.csv
├── 2021
    ├── 11_2021_bechdel.R
    ├── 17_2021_netflix.R
    ├── 51_2021_spice_girls.R
    ├── 5_2021_plastics.R
    ├── 5_2021_plastics.png
    ├── 9_2021_employment.R
    ├── BechdelMoons.png
    ├── CountryBechdel.png
    └── incomplete
    │   ├── 23_2021_survivor.R
    │   ├── 25_2021_DuBoisChallenge.R
    │   ├── 35_lemurs.R
    │   └── X_2021_gunviolence.R
├── .gitignore
├── 19_2022_bestsellers.R
├── HelperFunctions.R
└── books.csv


/.gitignore:
--------------------------------------------------------------------------------
 1 | .Rproj.user
 2 | .Rhistory
 3 | *.RData
 4 | *.rds
 5 | *.png
 6 | *.zip
 7 | Other/
 8 | .Ruserdata
 9 | Dairy.png
10 | ^incomplete/$
11 | 
12 | 2021/RaincloudPlots-master/


--------------------------------------------------------------------------------
/19_2022_bestsellers.R:
--------------------------------------------------------------------------------
  1 | #19_bestsellers
  2 | 
  3 | # packages ----------------------------------------------------------------
  4 | library(tidyverse)
  5 | #devtools::install_github("BlakeRMills/MetBrewer")
  6 | library(MetBrewer)
  7 | library(ggrepel)
  8 | library(hrbrthemes)
  9 | 
 10 | 
 11 | 
 12 | # Read in the data manually
 13 | nyt_titles_raw <- readr::read_tsv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-05-10/nyt_titles.tsv')
 14 | # Clean out the ! and ? from authors
 15 | nyt_titles <- nyt_titles_raw %>% 
 16 |   mutate(author = str_replace(author, pattern = "[[:punct:]] by ","")) 
 17 | 
 18 | nyt_full <- readr::read_tsv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-05-10/nyt_full.tsv')
 19 | 
 20 | # Goodreads books from here: https://www.kaggle.com/datasets/jealousleopard/goodreadsbooks?resource=download
 21 | gr <- read.csv("books.csv") #This dataset only goes to 2020 so I think it actually matches well with the NYT list
 22 | gr2 <- gr %>% 
 23 |   mutate(title = toupper(title))
 24 | 
 25 | colnames(gr2)
 26 | colnames(nyt_titles)
 27 | 
 28 | # All the titles
 29 | dat <- nyt_titles %>% 
 30 |   left_join(distinct(gr2,title,.keep_all=T), by = c("title")) %>% # don't match with duplicated titles in goodreads
 31 |   mutate_at(c('average_rating','num_pages','text_reviews_count'),as.numeric)
 32 | 
 33 | #Titles with both goodreads and NYT metrics
 34 | dat_matches <- dat %>% 
 35 |   filter(!is.na(bookID)) 
 36 | 
 37 | baseplot <- ggplot(dat_matches, 
 38 |                    aes(x=total_weeks, y=average_rating,
 39 |                        color = language_code)) + 
 40 |   geom_point(alpha = 0.7) +
 41 |   xlab("Weeks on NYT Bestseller list") +
 42 |   ylab("Average rating on Goodreads") +
 43 |   scale_color_met_d(name = 'Benedictus') +
 44 |   hrbrthemes::theme_ft_rc()
 45 | 
 46 | manyweeks <- dat_matches %>% 
 47 |   filter(total_weeks>120) %>%
 48 |   mutate(title = stringr::str_to_title(title))
 49 | 
 50 | highratings <- dat_matches %>%
 51 |   filter(average_rating>4.4)%>%
 52 |   mutate(title = stringr::str_to_title(title))
 53 | 
 54 | p1 <- baseplot + 
 55 |   geom_point(data=manyweeks, 
 56 |              aes(x=total_weeks, y=average_rating))+
 57 |   ggrepel::geom_text_repel(data=manyweeks, 
 58 |                            aes(x=total_weeks, y=average_rating, 
 59 |                                label = title), size = 3,
 60 |                            nudge_y = ifelse(manyweeks$title == "The Da Vinci Code", .2,-.2)
 61 |                            ) +
 62 |   geom_point(data=highratings, 
 63 |              aes(x=total_weeks, y=average_rating))+
 64 |   ggrepel::geom_text_repel(data=highratings, 
 65 |                            aes(x=total_weeks, y=average_rating, 
 66 |                                label = title), size = 3,
 67 |                            nudge_y = .1
 68 |   ) +
 69 |   guides(color = guide_legend(override.aes = aes(label = ""),title = "Language"))
 70 |   
 71 | p1
 72 | 
 73 | bestselling_authors <- nyt_titles %>% 
 74 |   group_by(author) %>% 
 75 |   count() %>%
 76 |   ungroup() %>%
 77 |   slice_max(n, n=10, with_ties = FALSE)
 78 | 
 79 | # How many books did each of the bestselling authors have on the list each year? 
 80 | bsauthors_ts <- nyt_full %>%
 81 |   filter(author %in% bestselling_authors$author) %>%
 82 |   group_by(author,year) %>%
 83 |   distinct(title) %>%
 84 |   count()
 85 | 
 86 | # If available, what were the average goodreads ratings of those books?
 87 | bestsellers_gr <- nyt_full %>%
 88 |   filter(author %in% bestselling_authors$author) %>%
 89 |   distinct(title) %>%
 90 |   left_join(distinct(gr2,title,.keep_all=T), by = 'title') %>%
 91 |   mutate(year = as.numeric(str_sub(publication_date, start= -4)),
 92 |          average_rating = as.numeric(average_rating)) %>%
 93 |   filter(!is.na(bookID)) %>%
 94 |   group_by(authors, year) %>%
 95 |   summarize(mean_rating = mean(average_rating,na.rm=TRUE)) %>%
 96 |   ungroup()
 97 | 
 98 | p2 <- bsauthors_ts %>%
 99 |   left_join(bestsellers_gr, by = c("author"="authors", "year")) %>%
100 |   ggplot(aes(x=year,y=n)) + 
101 |   geom_line(colour = 'white') +
102 |   geom_point(aes(x=year,y=n,size = mean_rating),
103 |              alpha=0.5,colour = 'lavender') +
104 |   xlab("Year") +
105 |   ylab("Unique NYT bestselling books") +
106 |   facet_wrap(~author,ncol = 2) +
107 |   guides(size=guide_legend(title="Average Goodreads rating"))
108 | p2
109 | 
110 | library(patchwork)
111 | 
112 | booktheme <- hrbrthemes::theme_ft_rc(base_size = 8) +
113 |   theme(panel.grid.major = element_blank(), 
114 |         panel.grid.minor = element_blank(),
115 |         axis.title.x = element_text(size = 14),
116 |         axis.title.y = element_text(size = 14),
117 |         legend.position = 'bottom',
118 |         strip.text.x = element_text(size = 10))
119 | 
120 | png(filename = "19_NYTBestsellers.png",width = 12,height = 8,units = 'in',res = 200)
121 | p1 + p2 + plot_annotation(title = "How does Goodreads treat the NYT bestsellers?",
122 |                           subtitle = "I've always been curious about how representative the Goodreads community is of broader readership.",
123 |                           caption = "Data: goodreadsbooks from Kaggle and Post45 Data") & booktheme
124 | dev.off()
125 | 
126 | 
127 | # Bonus investigation: more correlations! ---------------------------------
128 | 
129 | baseplot2 <- dat_matches %>%
130 |   ggplot(aes(x=total_weeks,y=ratings_count)) + geom_point()
131 | 
132 | manyreviews <- filter(dat_matches,ratings_count>700000) %>% mutate(title = stringr::str_to_title(title))
133 | 
134 | p3 <- baseplot2 + 
135 |   geom_point(data=manyreviews, 
136 |                       aes(x=total_weeks, y=ratings_count)) +
137 |   ggrepel::geom_text_repel(data=manyreviews, 
138 |                            aes(x=total_weeks,y=ratings_count, 
139 |                                label = title), size = 3
140 |   ) +
141 |   xlab("Weeks on NYT bestsellers list") +
142 |   ylab("Number of ratings on Goodreads")
143 |   
144 | png(filename = "NumReviewsVsWeeks.png",width = 6,height = 5,units = 'in',res=200)
145 | p3
146 | dev.off()
147 | 
148 | gr2 %>% ggplot(aes(x=as.numeric(ratings_count),y=as.numeric(average_rating))) +geom_point()
149 | 


--------------------------------------------------------------------------------
/2020/10_Hockey.R:
--------------------------------------------------------------------------------
 1 | # 3 March 2020
 2 | 
 3 | game_goals <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-03-03/game_goals.csv')
 4 | 
 5 | top_250 <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-03-03/top_250.csv')
 6 | 
 7 | season_goals <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-03-03/season_goals.csv')
 8 | 
 9 | unique(game_goals$player)
10 | unique(game_goals$season)
11 | 
12 | library(calecopal)
13 | library(tidyverse)
14 | library(lubridate)
15 | library(ggrepel)
16 | library(patchwork)
17 | 
18 | # set strings as factors to false
19 | # options(stringsAsFactors = FALSE)
20 | 
21 | 
22 | # change date to date format and add month column
23 | game_goals <-game_goals %>%
24 |   mutate(date = as.Date(date, format = "%yyyy-%mm-%dd")) %>%
25 |   mutate(month = month(date))
26 | 
27 | tot_goals <- game_goals %>% 
28 |   group_by(player,season) %>%
29 |   summarize(tgoals = sum(goals,na.rm = T),
30 |             tassists = sum(assists,na.rm = T))
31 | 
32 | # nice labels
33 | MAP <- list(tgoals = "Total goals", 
34 |             tassists = "Total assists")
35 | 
36 | totplot <- tot_goals %>%
37 |   pivot_longer(cols = c('tgoals','tassists')) %>%
38 |   mutate(name = recode(name,!!!MAP)) %>%
39 |   filter(player %in% filter(top_250,raw_rank<10)$player) %>%
40 |   ggplot(aes(x=season,y=value,colour=name)) +
41 |   geom_line() +
42 |   theme_minimal(base_size = 12) +
43 |   theme(legend.position = 'bottom') +
44 |   theme(axis.text.x = element_text(angle=45,hjust=1)) +
45 |   scale_color_manual('',values = cal_palette("superbloom3")) +
46 |   facet_wrap(~player) +
47 |   ylab('') +
48 |    xlab('Season')
49 |   # ggtitle('Goals and assists of top-ranked players')
50 | 
51 | sdat <- game_goals %>%
52 |   mutate(goals_home = ifelse(location == 'Home',goals,0),
53 |          goals_away = ifelse(location == 'Away',goals,0)) %>%
54 |   group_by(player,season) %>%
55 |   summarize(tot_home = sum(goals_home,na.rm=T),
56 |             tot_away = sum(goals_away,na.rm=T)) 
57 | 
58 | scatter <- sdat %>%
59 |   ggplot(aes(x=tot_home,y=tot_away,colour=season)) +
60 |         geom_point() +
61 |         scale_colour_continuous('Season') +
62 |         theme_minimal(base_size = 12) +
63 |         xlab('Goals scored at home') +
64 |         ylab('Goals scored away') +
65 |         geom_label_repel(data = filter(sdat,tot_home>50 | tot_away>40),
66 |                   aes(x=tot_home,y=tot_away,label=player),
67 |                   xlim = c(50,NA),
68 |                   arrow = arrow(type = "closed",
69 |                                 ends = 'last',
70 |                                 length = unit(0.02, "npc")),
71 |                   force = 20) +
72 |         xlim(c(0,75))
73 | 
74 | png('Hockey.png',width = 10,height=6,units = 'in',res=150)
75 | totplot + scatter
76 | dev.off()
77 | 


--------------------------------------------------------------------------------
/2020/11_Tuition.R:
--------------------------------------------------------------------------------
 1 | library(tidyverse)
 2 | library(ggplot2)
 3 | library(gganimate)
 4 | library(lubridate)
 5 | 
 6 | tuition_cost <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-03-10/tuition_cost.csv')
 7 | 
 8 | tuition_income <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-03-10/tuition__cost_income.csv') 
 9 | 
10 | salary_potential <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-03-10/salary_potential.csv')
11 | 
12 | historical_tuition <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-03-10/historical_tuition.csv')
13 | 
14 | diversity_school <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-03-10/diversity_school.csv')
15 | 
16 | historical_tuition2 <- historical_tuition %>%
17 |   mutate(year1 = sub("\\-.*", "", year))
18 | 
19 | p <- historical_tuition2 %>%
20 |   filter(!is.na(tuition_cost) & !is.na(year1)) %>%
21 |   filter(complete.cases(.)) %>%
22 |   mutate(year1 = as.integer(year1)) %>%
23 |   ggplot(aes(x=type,y=tuition_cost)) +
24 |   geom_col() +
25 |   xlab("Institution type") +
26 |   ylab("Tuition cost") +
27 |   hrbrthemes::theme_ft_rc(base_size = 18) +
28 |   labs(title = 'Tuition cost over time',
29 |        subtitle = 'Year: {frame_time}') +
30 |   transition_time(year1) +
31 |   enter_fade()
32 | p
33 | 
34 | animate(p , width = 400, height = 400)
35 | anim_save(here::here("tuitioncost.gif"))
36 | 
37 | 
38 | salary_tuition <- left_join(tuition_cost,salary_potential)
39 | 
40 | st <- salary_tuition %>% 
41 |       filter(!is.na(early_career_pay) &
42 |              type != 'For Profit') %>%
43 |       ggplot(aes(in_state_total,early_career_pay,color = type)) +
44 |       geom_point() +
45 |       scale_colour_manual('',values = calecopal::cal_palette('chaparral2')) +
46 |       xlab('Total in-state tuition') +
47 |       xlim(c(10000,80000)) +
48 |       ylim(c(10000,80000)) +
49 |       ylab('Early-career pay') +
50 |       hrbrthemes::theme_ft_rc(base_size = 18) +
51 |       labs(title = 'In-state tuition vs. early-career pay',
52 |            subtitle = 'Do graduates of more expensive instutitions earn higher salaries?',
53 |            caption = 'Data: tuitiontracker.org')
54 | 
55 | png(filename = 'Salary_Tuition.png',width = 6,height = 6,units = 'in',res = 120)
56 | st
57 | dev.off()
58 | 


--------------------------------------------------------------------------------
/2020/12_Office.R:
--------------------------------------------------------------------------------
 1 | # Office
 2 | # install.packages("schrute")
 3 | library(schrute)
 4 | library(tidyverse)
 5 | 
 6 | 
 7 | # Get the Data
 8 | # From TidyT
 9 | office_ratings <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-03-17/office_ratings.csv')
10 | glimpse(office_ratings)
11 | 
12 | # From Schrute
13 | data(theoffice)
14 | 
15 | #Jim lines
16 | # charlines <- theoffice %>%
17 | #             filter(character == 'Jim') %>%
18 | #             group_by(season,episode) %>%
19 | #             summarize(nlines = length(text)) %>%
20 | #             ungroup(season, episode) %>%
21 | #             mutate(season = as.numeric(season),
22 | #                    episode = as.numeric(episode))
23 | # 
24 | # combo <- office_ratings %>%
25 | #   group_by(season,episode) %>%
26 | #   left_join(charlines,by=c('season','episode'))
27 | # combo %>%
28 | #   ggplot(aes(x=nlines,y=imdb_rating)) +geom_point()
29 | # 
30 | 
31 | # All lines 
32 | charlines <- theoffice %>%
33 |   filter(character %in% c('Michael','Jim','Pam','Dwight','Angela','Kelly')) %>% #Just picked first 6 from IMDB
34 |   filter(!is.na(character)) %>%
35 |   group_by(character,season,episode) %>%
36 |   summarize(nlines = length(text)) %>%
37 |   ungroup(character,season, episode) %>%
38 |   mutate(season = as.numeric(season),
39 |          episode = as.numeric(episode))
40 | 
41 | combo <- office_ratings %>%
42 |   group_by(season,episode) %>%
43 |   left_join(charlines,by=c('season','episode'))
44 | 
45 | po <- combo %>% 
46 |         na.omit(character) %>%
47 |         ggplot(aes(nlines,imdb_rating,colour=character)) +
48 |         geom_point() +
49 |         facet_wrap(~character) +
50 |         stat_smooth(method = 'lm') +
51 |         theme_classic(base_size = 12) +
52 |         ggpomological::scale_colour_pomological('Character') +
53 |         theme(strip.background = element_blank(),
54 |               text = element_text(family="Courier")) +
55 |         ylab('IMDB Rating') +
56 |         xlab('Total number of lines') +
57 |         labs(title = "IMDB ratings of The Office by character",
58 |              caption = "First six characters listed on IMDB. 
59 |              Data: schrute package and data.world")
60 | 
61 | png('TheOffice.png',width = 10,height = 5,units = 'in',res = 120)
62 | po
63 | dev.off()
64 | 


--------------------------------------------------------------------------------
/2020/13_TBI.R:
--------------------------------------------------------------------------------
 1 | # TBI
 2 | library(tidyverse)
 3 | 
 4 | 
 5 | 
 6 | # Get data ----------------------------------------------------------------
 7 | tbi_age <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-03-24/tbi_age.csv')
 8 | tbi_year <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-03-24/tbi_year.csv')
 9 | tbi_military <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-03-24/tbi_military.csv')
10 | 
11 | head(tbi_military)
12 | 
13 | plotdat <- tbi_military %>%
14 |           filter(component %in%c('Guard','Reserve')) %>%
15 |           group_by(year,service,severity) %>%
16 |           summarize(diag.total = sum(diagnosed))
17 | 
18 | colpal1 <- RColorBrewer::brewer.pal(n = 6,name = 'RdBu')[-6]
19 | colpal1[3] <- 'lightgrey'
20 | 
21 | areaplot <- plotdat %>% 
22 |             ggplot(aes(x=year,y=diag.total,fill = severity)) +
23 |             geom_area() +
24 |             facet_wrap(~service,scales='free') +
25 |             scale_fill_manual(values = rev(colpal1)) +
26 |             theme(strip.background = element_blank()) +
27 |             xlab('Year') +
28 |             ylab('Total diagnoses of traumatic brain injury') +
29 |             hrbrthemes::theme_ft_rc() +
30 |   labs(title = 'Brain injuries among guard and reserve \n military service members',
31 |        subtitle = '2006-2014',
32 |        caption = 'Data: CDC')
33 | 
34 | png('TBI.png',width = 8,height = 8,units = 'in',res = 120)
35 | areaplot
36 | dev.off()
37 | 


--------------------------------------------------------------------------------
/2020/14_Beer.R:
--------------------------------------------------------------------------------
 1 | # Beer
 2 | library(tidyverse)
 3 | library(lubridate)
 4 | library(patchwork)
 5 | 
 6 | brewing_materials <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-03-31/brewing_materials.csv')
 7 | 
 8 | dg = "#222222"
 9 | pal <- calecopal::cal_palette(name = "chaparral2", n = 6)[c(5,6)]
10 | 
11 | # Density plot ------------------------------------------------------------
12 | 
13 | s2 <- brewing_materials %>% 
14 |   group_by(year, type) %>%
15 |   filter(material_type %in% c('Grain Products',"Non-Grain Products"))
16 | 
17 | densplot <- s2 %>%
18 |   rename("Material type"=material_type) %>%
19 |   ggplot(aes(month_current/1e6,
20 |              colour=`Material type`,
21 |              fill=`Material type`)) +
22 |   geom_density(lwd=1) +
23 |   scale_colour_manual(values = pal) +
24 |   scale_fill_manual(values = colorspace::lighten(pal,amount=0.5))+
25 |   hrbrthemes::theme_ft_rc(base_size=14) +
26 |   theme(strip.background = element_blank(),
27 |         strip.text = element_blank()) +
28 |   ylab('Density') +
29 |   xlab(bquote('Material used (millions of pounds)')) +
30 |   facet_wrap(~`Material type`,ncol=1,scales='free_y') +
31 |   theme(legend.position = 'bottom') +
32 |   labs(title = 'Materials used for beer production',
33 |        subtitle = '2008-2017')
34 | 
35 | # Time series -------------------------------------------------------------
36 | 
37 | s <- brewing_materials %>%
38 |   group_by(year, type) %>%
39 |   summarize(totprod = sum(month_current,na.rm=T))
40 | 
41 | matchtable <- brewing_materials %>%
42 |   distinct(material_type,type) 
43 | 
44 | s <- s %>%
45 |   ungroup() %>%
46 |   left_join(matchtable) %>%
47 |   filter(material_type!='Total Used') %>%
48 |   filter(type != 'Total Grain products') %>%
49 |   filter(type != 'Total Non-Grain products')
50 | 
51 | labeldata <- s %>% 
52 |   filter(year == min(year))
53 | 
54 | timeplot <-  s %>%
55 |   ggplot(aes(x=year,y=totprod/1e6,
56 |              colour=material_type,
57 |              group=type)) +
58 |   geom_line(lwd=1) +
59 |   scale_colour_manual(values = pal) +
60 |   scale_x_continuous(breaks = unique(s$year)) +
61 |   hrbrthemes::theme_ft_rc(base_size=14) +
62 |   ggrepel::geom_label_repel(data = labeldata, 
63 |                             aes(label = type),
64 |                             point.padding = 1,
65 |                             fill=dg) +
66 |   ylab('Material used (millions of pounds)') +
67 |   xlab('Year') +
68 |   theme(legend.position = 'none') +
69 |   labs(caption = 'Data: Alcohol and Tobacco Tax and Trade Bureau')
70 |   
71 | 
72 | png('Beer.png',width = 8,height = 10,units = 'in',res = 120)
73 | densplot/timeplot +
74 |   plot_layout(heights = c(1,2))
75 | dev.off()


--------------------------------------------------------------------------------
/2020/15_TourdeFrance.R:
--------------------------------------------------------------------------------
 1 | # Tour de France
 2 | 
 3 | tdf_winners <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-04-07/tdf_winners.csv')
 4 | head(tdf_winners)
 5 | 
 6 | library(tidyverse)
 7 | library(lubridate)
 8 | library(patchwork)
 9 | 
10 | # Names and life durations of TDF winners ---------------------------------
11 | 
12 | bikenames <- tdf_winners %>% 
13 |   select(edition,winner_name,born,died,nickname,nationality,start_date) %>%
14 |   mutate(winyear = lubridate::ymd(year(start_date), truncated = 2L))
15 | unique(bikenames$birth_country)
16 | 
17 | bikenames$winner_name <- reorder(bikenames$winner_name, 
18 |                                  rev(bikenames$born))
19 | bikenames <- bikenames %>%
20 |   mutate(lifedur = as.duration(ymd(born) %--% ymd(died))/3.154e+7) %>% #convert seconds to years at the end
21 |   filter(!is.na(lifedur)) 
22 | 
23 | pal <- RColorBrewer::brewer.pal('Spectral',n=8)
24 | 
25 | longevity <- bikenames %>%
26 |   ggplot() +
27 |   geom_linerange(aes(ymin = born,ymax=died,x=winner_name,
28 |                      colour=as.numeric(lifedur)),lwd=1.1) +
29 |   coord_flip() +
30 |   xlab('Winner name') +
31 |   ylab('Year') +
32 |   geom_point(aes(x=winner_name,y=winyear,shape=19),
33 |              size=2,colour='darkgrey') +
34 |   scale_shape_identity('',
35 |                        labels = 'Won the \nTour de France',
36 |                        breaks=c(19),
37 |                        guide = 'legend') +
38 |   scale_colour_gradient2('How long \nthey lived \n (years)',
39 |                          low = pal[1], high=pal[8], midpoint = 60) +
40 |   labs(title='The lives of Tour de France winners', 
41 |        caption='Data: tdf package & Kaggle') +
42 |   guides(colour = guide_legend(order = 1),
43 |          shape = guide_legend(order = 2)) +
44 |   hrbrthemes::theme_ft_rc() +
45 |   theme(text=element_text(size=12,  family='Helvetica'))
46 | 
47 | 
48 | 
49 | # Wins by country ---------------------------------------------------------
50 | countrywins <- bikenames %>%
51 |   group_by(nationality) %>%
52 |   summarize(nwins = length(winner_name))
53 | 
54 | # This is still how I reorder factors, sorry tidyverse!
55 | countrywins$nationality <- reorder(countrywins$nationality,
56 |                                    countrywins$nwins)
57 | 
58 | cwins <- countrywins %>%
59 |   ggplot(aes(x=nationality,y=nwins)) +
60 |   geom_col() +
61 |   xlab('Winner nationality') +
62 |   ylab('Number of wins') +
63 |   coord_flip() +
64 |   hrbrthemes::theme_ft_rc() +
65 |   theme(text=element_text(size=16,family='Helvetica'))
66 | 
67 | 
68 | png('TourdeFrance.png',width = 8,height = 12,units='in',res=120)
69 | (longevity / cwins ) + plot_layout(heights = c(3,1))
70 | dev.off()
71 | 


--------------------------------------------------------------------------------
/2020/16_HipHop.R:
--------------------------------------------------------------------------------
 1 | #BBC hip-hop poll of critics
 2 | polls <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-04-14/polls.csv')
 3 | rankings <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-04-14/rankings.csv')
 4 | 
 5 | library(tidyverse)
 6 | library(patchwork)
 7 | 
 8 | # Five tracks I rocked out to while writing some of this code (after getting off Zoom, lol)
 9 | # Bahamadia - Uknowhowwedu
10 | # Da Brat & Notorious B.I.G. - Da B Side
11 | # Remy Ma - Conceited
12 | # MC LYTE - Cold Rock a Party
13 | # and a new one... my life...
14 | # Megan Thee Stallion - Savage
15 | 
16 | # Heat map showing 'golden years'
17 | rankheat <- polls %>%
18 |   ggplot(aes(x=year,y=rank)) +
19 |   stat_density_2d(aes(fill = ..density..),
20 |                   geom = "raster", contour = FALSE) +
21 |   viridis::scale_fill_viridis('Density of tracks \n at that ranking',option = 'plasma') +
22 |   hrbrthemes::theme_ft_rc() +
23 |   xlab('Year') +
24 |   ylab('Ranking')
25 | 
26 | 
27 | # Subset to female artists
28 | femrappers <- polls %>% 
29 |   filter(gender=='female')
30 | 
31 | femfreq <- femrappers %>%
32 |   group_by(year) %>%
33 |   summarize(Nfem = length(title))
34 | 
35 | hg <- femfreq %>% 
36 |   ggplot(aes(x=year,y=Nfem)) +
37 |   geom_col(colour='white') +
38 |   hrbrthemes::theme_ft_rc() +
39 |   xlab('Year') +
40 |   ylab("Number of female hip-hop\n  artists on critics' lists") +
41 |   labs(subtitle = "Were female artists recognized during the 'golden years'?")
42 | 
43 | criticsummary <- polls %>% 
44 |   group_by(critic_rols,gender) %>%
45 |   count() %>%
46 |   ungroup() %>%
47 |   group_by(critic_rols) %>%
48 |   mutate(percent = n/sum(n))
49 | 
50 | whichcritics <-  criticsummary %>% 
51 |   ggplot(aes(x=critic_rols,y=percent,fill=gender,colour = gender)) +
52 |   geom_col() +
53 |   scale_fill_brewer('Artist gender',palette = 'BuPu') +
54 |   scale_color_brewer('Artist gender',palette = 'BuPu') +
55 |   xlab('Critic role') +
56 |   ylab('Percent of top 5 list') +
57 |   coord_flip() +
58 |   hrbrthemes::theme_ft_rc() +
59 |   theme(legend.position = 'bottom') +
60 |   labs(title = 'Women in hip-hop top 5 lists',
61 |        subtitle = 'Who put them on their top five, and when their tracks came out') +
62 |   theme(plot.title = element_text(hjust = -.1),
63 |         plot.subtitle = element_text(hjust = -.1))
64 | 
65 | 
66 | png('HipHop.png',width = 13,height = 9,units = 'in',res = 120)
67 | whichcritics + 
68 |   ( ( hg/rankheat ) + 
69 |       plot_layout(heights = c(1,6)) ) +
70 |   plot_layout(widths = c(2,1))
71 | dev.off()  
72 | 
73 | 
74 | 
75 | # Extras ------------------------------------------------------------------
76 | 
77 | # peryearsongs <- polls %>% 
78 | #   group_by(year) %>% 
79 | #   summarize(totsongs = length(title))
80 | # 
81 | # peryearsongs %>% 
82 | #   ggplot(aes(x=year,y=totsongs)) + 
83 | #   geom_line(colour='white') +
84 | #   xlab('Year') +
85 | #   ylab('Number of songs on BBC list') +
86 | #   hrbrthemes::theme_ft_rc()
87 | 


--------------------------------------------------------------------------------
/2020/17_GDPR.R:
--------------------------------------------------------------------------------
  1 | # Get the Data
  2 | library(tidyverse)
  3 | library(lubridate)
  4 | library(patchwork)
  5 | library(scales)
  6 | 
  7 | gdpr_violations <- readr::read_tsv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-04-21/gdpr_violations.tsv')
  8 | gdpr_text <- readr::read_tsv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-04-21/gdpr_text.tsv')
  9 | 
 10 | gdpr2 <- gdpr_violations %>% 
 11 |   mutate(newdate = mdy(date)) %>%
 12 |   filter(newdate > '2000-01-01')
 13 | 
 14 | 
 15 | make_numer <- function(x){
 16 |   y <- str_extract(x,pattern = "[[:digit:]]+[:space:]") %>%
 17 |     str_trim(side = 'both') 
 18 |   return(y)
 19 | }
 20 | 
 21 | # sort out the data and clean up the articles being violated
 22 | articles_violated <- gdpr2 %>% 
 23 |   select(id,article_violated) %>%
 24 |   separate(article_violated,c('n1','n2','n3','n4','n5'),
 25 |            sep=paste("\\|"),fill='right') %>%
 26 |   mutate_at(c('n1','n2','n3','n4','n5'),make_numer) %>% #extract articles violated!
 27 |   pivot_longer(-id) %>%
 28 |   drop_na(value) %>%
 29 |   select(-name) %>%
 30 |   rename('art_viol'=value)
 31 | 
 32 | pdat <- articles_violated %>%
 33 |   left_join(gdpr2) %>%
 34 |   group_by(newdate,art_viol) %>%
 35 |   summarize(num_viols = n(),total_price = sum(price)) %>%
 36 |   arrange(newdate) %>%
 37 |   group_by(art_viol) %>%
 38 |   mutate(rolling_numviol = cumsum(num_viols)) %>%
 39 |   filter(length(num_viols)>3)
 40 | 
 41 | ends <- pdat %>%
 42 |   group_by(art_viol) %>%
 43 |   top_n(1,newdate) %>%
 44 |   mutate(enddate = max(newdate)) %>%
 45 |   mutate(artlabel = paste(art_viol))
 46 | 
 47 | # set aesthetic stuff
 48 | dg = "#222222"
 49 | palitra.pal <- c('#96ffff','#52eeff','#17d8ff','#5cbcff','#5cbcff','#a696ff','#e064e6','#ff009f')
 50 | nb.cols <- length(levels(factor(pdat$art_viol)))
 51 | mycolors <- colorRampPalette(palitra.pal)(nb.cols)
 52 | 
 53 | 
 54 | plot1 <- pdat %>% 
 55 |   ggplot(aes(x=newdate,y=rolling_numviol,colour=art_viol)) +
 56 |   geom_line(lwd=1.2) +
 57 |   hrbrthemes::theme_ft_rc() +
 58 |   xlab('Date') +
 59 |   ylab('Cumulative number of violations') +
 60 |   scale_color_manual(values=mycolors) +
 61 |   scale_x_date(date_labels = paste0('%b',' 20','%y')) +
 62 |   ggrepel::geom_label_repel(data = ends, 
 63 |                             aes(x=enddate,
 64 |                                 y=rolling_numviol,
 65 |                                 label=artlabel),
 66 |                             max.iter = 100,
 67 |                             nudge_x = 25,
 68 |                             fill = dg) +
 69 |   theme(legend.position = 'none',
 70 |         axis.title.x = element_text(size = rel(1.5)),
 71 |         axis.title.y = element_text(size = rel(1.5))) +
 72 |   labs(title = 'Violations of the General Data Protection Regulation (EU) ',
 73 |        subtitle = "Which articles of the GDPR had the most frequent violations?",
 74 |        caption = 'Data: GDPR, c/o Bob Rudis')
 75 | 
 76 | p2 <- pdat %>% 
 77 |   mutate(yr = year(newdate)) %>%
 78 |   group_by(yr,art_viol) %>%
 79 |   summarize(price = sum(total_price))
 80 | 
 81 | totals <- p2 %>% 
 82 |   group_by(yr) %>% 
 83 |   summarize(totp = sum(price)) %>%
 84 |   as.data.frame()
 85 | 
 86 | plot2 <- p2 %>% 
 87 |   ggplot(aes(x=yr,y=price,fill=art_viol)) + 
 88 |   geom_col() +
 89 |   scale_fill_manual('Article violated',values=mycolors) +
 90 |   xlab('Year') +
 91 |   ylab('Total fines ( \u20AC )') +
 92 |   hrbrthemes::theme_ft_rc() +
 93 |   theme(axis.title.x = element_text(size = rel(1.5)),
 94 |         axis.title.y = element_text(size = rel(1.5))) +
 95 |   geom_text(data=totals,
 96 |             aes(x=yr,y=totp + 5e6,
 97 |                 label=paste(format(round(totp,-3),big.mark = ','),' \u20AC')),
 98 |             inherit.aes = FALSE) +
 99 |   scale_y_continuous(labels = comma)
100 | 
101 | png('GDPR.png',width = 14,height = 8,units = 'in',res = 110)
102 | plot1+plot2 +
103 |   plot_layout(widths = c(2,1))
104 | dev.off()
105 | 


--------------------------------------------------------------------------------
/2020/18_Broadway.R:
--------------------------------------------------------------------------------
  1 | library(tidyverse)
  2 | library(lubridate)
  3 | library(ggExtra)
  4 | library(patchwork)
  5 | #devtools::install_github("thebioengineer/tidytuesdayR")
  6 | #devtools::install_github("seananderson/ggsidekick")
  7 | 
  8 | tuesdata <- tidytuesdayR::tt_load(2020, week = 18)
  9 | grosses <- tuesdata$grosses
 10 | synopses <- tuesdata$synopses
 11 | cpi <- tuesdata$cpi
 12 | pre_1985_starts<- tuesdata$`pre-1985-starts`
 13 | 
 14 | # Made a function to expand a palette to length(unique(x)):
 15 | pal_lengthen <- function(x,shortpal){
 16 |   ncolours <- length(unique(x))
 17 |   newpal <- colorRampPalette(shortpal)(ncolours)
 18 |   return(newpal)
 19 | }
 20 | 
 21 | synopses %>% 
 22 |   filter(str_detect(synopsis, "Sondheim"))
 23 | # Already mad that there are no Sondheim classics in here... where did they go??
 24 | 
 25 | grosses2 <- grosses %>%
 26 |   mutate(year = year(ymd(week_ending)))
 27 | 
 28 | # Get densities for a contour plot
 29 | dens <- MASS::kde2d(grosses$avg_ticket_price,
 30 |                     grosses$pct_capacity, n = 50)
 31 | densf <- data.frame(expand.grid(x = dens$x, y = dens$y),
 32 |                     z = as.vector(dens$z))
 33 | 
 34 | # Palettes from https://carto.com/carto-colors/ + datapasta:
 35 | mint <- c("#e4f1e1", "#b4d9cc", "#89c0b6", "#63a6a0", "#448c8a", "#287274", "#0d585f")
 36 | mypal <- pal_lengthen(x = 1:9,shortpal = mint)
 37 | 
 38 | maxes <- grosses2 %>%
 39 |   filter(pct_capacity == max(pct_capacity) |
 40 |            avg_ticket_price == max (avg_ticket_price))
 41 | 
 42 | 
 43 | # Contour plot with scatter -----------------------------------------------
 44 | p1 <- ggplot(densf, aes(x=x,y=y,z=z)) + 
 45 |       geom_contour_filled(breaks=seq(min(densf$z),
 46 |                                      max(densf$z),
 47 |                                      length.out=10),
 48 |                           aes(colour=after_stat(level))) +
 49 |       scale_fill_manual(values = mypal) +
 50 |       scale_colour_manual(values = mypal) +
 51 |       geom_point(data = sample_n(grosses2,size = 100),
 52 |                  aes(x=avg_ticket_price,y=pct_capacity),inherit.aes = FALSE,alpha=0.5,size = 2) +
 53 |       geom_point(data = filter(grosses2,pct_capacity>1.2 | avg_ticket_price> 400),
 54 |                  aes(x=avg_ticket_price,y=pct_capacity),inherit.aes = FALSE,alpha=0.5,size = 2) +
 55 |       geom_hline(yintercept = 1,lty=2,colour='darkgrey') +
 56 |       geom_curve(aes(x=70, y=1.2, xend = maxes$avg_ticket_price[1],
 57 |                      yend=maxes$pct_capacity[1]*.99), alpha=0.5,
 58 |                  inherit.aes=FALSE,arrow=arrow(length= unit(0.015, "npc")),
 59 |                  curvature = -0.2, color='darkgrey') + #adding arrows to join text to points
 60 |       geom_curve(aes(x=400, y=.7, xend=maxes$avg_ticket_price[2],
 61 |                      yend=maxes$pct_capacity[2]*1.01), alpha=0.5,
 62 |                  inherit.aes=FALSE,arrow=arrow(length= unit(0.015, "npc")),
 63 |                  curvature = -0.2, color='darkgrey') +
 64 |       geom_label(aes(x=70,y = 1.2),label = maxes$show[1]) +
 65 |       geom_label(aes(x=400,y = .7),label = maxes$show[2]) +
 66 |       scale_x_continuous(expand = c(0.001, 0.001)) +
 67 |       scale_y_continuous(expand = c(0.001, 0.001)) +
 68 |       xlab('Average ticket price (USD)') +
 69 |       ylab('Percent capacity') +
 70 |       ggsidekick::theme_sleek(base_size = 14) +
 71 |       theme(legend.position = 'none')
 72 | 
 73 | top_sellers <- grosses2 %>% 
 74 |   group_by(show) %>%
 75 |   summarize(total_gross = sum(weekly_gross)) %>%
 76 |   top_n(4)
 77 | 
 78 | mothcols <- PNWColors::pnw_palette("Moth",4,type = 'discrete') %>% as.vector()
 79 | 
 80 | p2 <- grosses2 %>%
 81 |   filter(show %in% top_sellers$show & year<2020) %>%
 82 |   ggplot(aes(x=year,y=weekly_gross/1e6,colour=show,group=show)) + 
 83 |   geom_point(alpha=0.7) +
 84 |   stat_summary(fun=mean, geom="line") +
 85 |   xlab('Year') +
 86 |   ylab('Weekly gross (x 1 million USD)') +
 87 |   ggsidekick::theme_sleek(base_size = 14) +
 88 |   theme(legend.position = 'bottom') +
 89 | scale_colour_manual('',values=mothcols)
 90 |   
 91 | p3 <- ggMarginal(p2, 
 92 |                  margins = 'y',
 93 |                  groupFill = TRUE,
 94 |                  colour='darkgrey',
 95 |                  size=4) 
 96 | 
 97 | 
 98 | png(filename = 'Broadway.png',width = 12,height = 5,units = 'in',res=120)
 99 | p1 + p3
100 | dev.off()
101 | 


--------------------------------------------------------------------------------
/2020/19_AnimalCrossing.R:
--------------------------------------------------------------------------------
  1 | # ANIMAL CROSSING!!!
  2 | 
  3 | library(tidyverse)
  4 | library(rfishbase)
  5 | library(calecopal)
  6 | library(patchwork)
  7 | 
  8 | # coding soundtrack: Edith Piaf 
  9 | 
 10 | items <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-05-05/items.csv')
 11 | load("PolygonData.RData") # load data from Polygon site - tibble pdat includes Location and Shadow.Size
 12 | 
 13 | getspinfo <- function(commonname){
 14 |   nm <- common_to_sci(paste(commonname))
 15 |   dat <- species(nm$Species, fields=c("Species", "PriceCateg", "Vulnerability"))
 16 |   nspecies <- nrow(dat)
 17 |   pricecat <- sort(table(dat$PriceCateg),decreasing=TRUE)
 18 |   x <- names(pricecat)[1]
 19 |   modeprice <- ifelse(x!='unknown',x,names(pricecat[2]))
 20 |   meanvuln <- mean(dat$Vulnerability,na.rm = T)
 21 |   return(list(n = nspecies,price_mode = modeprice,vuln = meanvuln))
 22 | }
 23 | 
 24 | # Let's be honest...
 25 | fishes <- items %>% 
 26 |             filter(category  =='Fish')
 27 | 
 28 | RLprice = RLvuln = nspps = vector()
 29 | 
 30 | # Didn't work with mutate() and I don't have time to figure out why
 31 | for(i in 1:nrow(fishes)){
 32 |   x <- getspinfo(fishes$name[i])
 33 |   if(length(x$price_mode)==0){RLprice[i] = RLvuln[i] = nspps[i] = NA}else{
 34 |   RLprice[i] <- x$price_mode
 35 |   RLvuln[i] <- x$vuln
 36 |   nspps[i] <- x$n
 37 |   }
 38 | }
 39 | 
 40 | fishvuln <- fishes %>%
 41 |   add_column(RLprice = RLprice) %>%
 42 |   add_column(RLvuln = RLvuln) %>%
 43 |   add_column(nspps = nspps)
 44 | 
 45 | p1 <- fishvuln %>%
 46 |   ggplot(aes(x=RLvuln,y=sell_value,colour=RLprice)) +
 47 |   geom_point() +
 48 |   scale_color_manual('Price in real life',values = cal_palette("superbloom1")) +
 49 |   xlab('Mean vulnerability score in real life') +
 50 |   ylab('Sell value in Animal Crossing (bells)') +
 51 |   hrbrthemes::theme_ft_rc()
 52 | 
 53 | #Labels for extreme values
 54 | extremes1 <- fishvuln %>% 
 55 |   filter(nspps>500) %>%
 56 |   distinct(name,.keep_all=TRUE)
 57 | extremes2 <-  fishvuln %>% 
 58 |   filter(sell_value>12500) %>%
 59 |   distinct(name,.keep_all=TRUE)
 60 | 
 61 | p2 <- fishvuln %>%
 62 |   ggplot(aes(x=sell_value,y=nspps)) +
 63 |   geom_point(colour='white') +
 64 |   xlim(c(0,20000)) +
 65 |   ggrepel::geom_text_repel(data=extremes1,
 66 |                            aes(x=sell_value,y=nspps,label=name),
 67 |                            colour='white') +
 68 |   ggrepel::geom_text_repel(data=extremes2,
 69 |                            aes(x=sell_value,y=nspps,label=name),
 70 |                            colour='white',
 71 |                            nudge_x = 5000,
 72 |                            nudge_y = 500,
 73 |                            vjust=0,
 74 |                            direction = 'y') +
 75 |   #scale_color_manual('Price in real life',values = cal_palette("superbloom1")) +
 76 |   xlab('Sell value in Animal Crossing (bells)') +
 77 |   ylab('Number of species in real life with this common name') +
 78 |   hrbrthemes::theme_ft_rc()
 79 | scatplot2
 80 | 
 81 | 
 82 | habs <- fishvuln %>%
 83 |   left_join(pdat,by = c("name"="Fish")) %>%
 84 |   select(name,Location,sell_value) %>%
 85 |   distinct(name,.keep_all=TRUE)
 86 | 
 87 | p3 <- habs %>%
 88 |   group_by(Location) %>%
 89 |   filter(length(name)>2) %>%
 90 |   ggplot(aes(x=sell_value,fill=Location)) +
 91 |   geom_density(colour='white') +
 92 |   scale_fill_manual(values = cal_palette("chaparral2")) +
 93 |   facet_wrap(~Location,nrow=1) +
 94 |   xlab('Sell value (bells)') +
 95 |   ylab("Density") +
 96 |   coord_flip() +
 97 |   hrbrthemes::theme_ft_rc() +
 98 |   theme(axis.text.x=element_blank(),
 99 |         axis.ticks.x=element_blank(),
100 |         legend.position = 'none') +
101 |   labs(title = 'How valuable are fish in Animal Crossing',
102 |        subtitle = "...and how does their value compare to the same taxa in real life?",
103 |        caption = "Data: VillagerDB, FishBase, and Polygon")
104 |   
105 | png(filename = 'AnimalCrossing.png',width = 12,height = 8,units = 'in',res = 120)
106 | p3 / (p1+p2 + plot_layout(widths=c(1,2))) 
107 | dev.off()
108 | 


--------------------------------------------------------------------------------
/2020/19_AnimalCrossing_dataprep.R:
--------------------------------------------------------------------------------
 1 | # Polygon data (collected from polygon using {datapasta})
 2 | pdat <- tibble::tribble(
 3 |   ~Critter.number,           ~Fish,             ~Location,       ~Shadow.Size, ~Value,                           ~Time,                                ~Month.Hemisphere,
 4 |   1L,           "Bitterling",            "River",      "Smallest",    900,                  "All.day",                                                   "November-March.(Northern)./.May-September.(Southern)",
 5 |   2L,           "Pale Chub",            "River",      "Smallest",    160,                  "9 a.m. - 4 p.m.",                                                   "Year-round (Northern and Southern)",
 6 |   3L,        "Crucian Carp",            "River",         "Small",    160,                          "All day",                                                   "Year-round (Northern and Southern)",
 7 |   4L,                "Dace",            "River",        "Medium",    240,                  "4 p.m. - 9 a.m.",                                                   "Year-round (Northern and Southern)",
 8 |   5L,                "Carp",             "Pond",         "Large",    300,                          "All day",                                                   "Year-round (Northern and Southern)",
 9 |   6L,                 "Koi",             "Pond",         "Large",   4000,                  "4 p.m. - 9 a.m.",                                                   "Year-round (Northern and Southern)",
10 |   7L,            "Goldfish",             "Pond",      "Smallest",   1300,                          "All day",                                                   "Year-round (Northern and Southern)",
11 |   8L,   "Pop-eyed Goldfish",             "Pond",      "Smallest",   1300,                  "9 a.m. - 4 p.m.",                                                   "Year-round (Northern and Southern)",
12 |   9L,     "Ranchu Goldfish",             "Pond",         "Small",   4500,                  "9 a.m. - 4 p.m.",                                                   "Year-round (Northern and Southern)",
13 |   10L,           "Killifish",             "Pond",      "Smallest",    300,                          "All day",                                "April-August (Northern) / October-February (Southern)",
14 |   11L,            "Crawfish",             "Pond",         "Small",    200,                          "All day",                                "April-September (Northern) / October-March (Southern)",
15 |   12L, "Soft-shelled Turtle",            "River",         "Large",   3750,                  "4 p.m. - 9 a.m.",                              "August-September (Northern) / February-March (Southern)",
16 |   13L,     "Snapping Turtle",            "River",       "X Large",   5000,                  "9 p.m. - 4 a.m.",                                  "April-October (Northern) / October-April (Southern)",
17 |   14L,             "Tadpole",             "Pond",      "Smallest",    100,                          "All day",                                 "March-July (Northern) / September-January (Southern)",
18 |   15L,                "Frog",             "Pond",         "Small",    120,                          "All day",                                 "May-August (Northern) / November-Feburary (Southern)",
19 |   16L,     "Freshwater Goby",            "River",         "Small",    400,                  "4 p.m. - 9 a.m.",                                                   "Year-round (Northern and Southern)",
20 |   17L,               "Loach",            "River",         "Small",    400,                          "All day",                                 "March-May (Northern) / September-November (Southern)",
21 |   18L,             "Catfish",             "Pond",         "Large",    800,                  "4 p.m. - 9 a.m.",                                   "May-October (Northern) / November-April (Southern)",
22 |   19L,     "Giant Snakehead",             "Pond",       "X Large",   5500,                  "9 a.m. - 4 p.m.",                                "June-August (Northern) / December-February (Southern)",
23 |   20L,            "Bluegill",            "River",         "Small",    180,                  "9 a.m. - 4 p.m.",                                                   "Year-round (Northern and Southern)",
24 |   21L,        "Yellow Perch",            "River",        "Medium",    300,                          "All day",                                "October-March (Northern) / April-September (Southern)",
25 |   22L,          "Black Bass",            "River",         "Large",    400,                          "All day",                                                   "Year-round (Northern and Southern)",
26 |   23L,             "Tilapia",            "River",        "Medium",    800,                          "All day",                                  "June-October (Northern) / December-April (Southern)",
27 |   24L,                "Pike",            "River",       "X Large",   1800,                          "All day",                                "September-December (Northern) / March-June (Southern)",
28 |   25L,          "Pond Smelt",            "River",         "Small",    500,                          "All day",                                "December-February (Northern) / June-August (Southern)",
29 |   26L,           "Sweetfish",            "River",        "Medium",    900,                          "All day",                                 "July-September (Northern) / January-March (Southern)",
30 |   27L,       "Cherry Salmon", "River (Clifftop)",        "Medium",   1000,                  "4 p.m. - 9 a.m.", "March-June, September-November (Northern) / March-May, September-December (Southern)",
31 |   28L,                "Char", "River (Clifftop)",        "Medium",   3800,                  "4 p.m. - 9 a.m.", "March-June, September-November (Northern) / March-May, September-December (Southern)",
32 |   29L,        "Golden Trout", "River (Clifftop)",        "Medium",  15000,                  "4 p.m. - 9 a.m.", "March-June, September-November (Northern) / March-May, September-December (Southern)",
33 |   30L,          "Stringfish", "River (Clifftop)",       "Largest",  15000,                  "4 p.m. - 9 a.m.",                                "December-March (Northern) / June-September (Southern)",
34 |   31L,              "Salmon",    "River (mouth)",         "Large",    700,                          "All day",                                              "September (Northern) / March (Southern)",
35 |   32L,         "King Salmon",    "River (mouth)",       "Largest",   1800,                          "All day",                                              "September (Northern) / March (Southern)",
36 |   33L,         "Mitten Crab",            "River",         "Small",   2000,                  "4 p.m. - 9 a.m.",                                 "September-November (Northern) / March-May (Southern)",
37 |   34L,               "Guppy",            "River",      "Smallest",   1300,                  "9 a.m. - 4 p.m.",                                   "April-November (Northern) / October-May (Southern)",
38 |   35L,         "Nibble Fish",            "River",         "Small",   1500,                  "9 a.m. - 4 p.m.",                                 "May-September (Northern) / November-March (Southern)",
39 |   36L,           "Angelfish",            "River",         "Small",   3000,                  "4 p.m. - 9 a.m.",                                   "May-October (Northern) / November-April (Southern)",
40 |   37L,               "Betta",            "River",         "Small",   2500,                  "9 a.m. - 4 p.m.",                                   "May-October (Northern) / November-April (Southern)",
41 |   38L,          "Neon Tetra",            "River",      "Smallest",    500,                  "9 a.m. - 4 p.m.",                                   "April-November (Northern) / October-May (Southern)",
42 |   39L,         "Rainbowfish",            "River",         "Small",    800,                  "9 a.m. - 4 p.m.",                                   "May-October (Northern) / November-April (Southern)",
43 |   40L,             "Piranha",            "River",         "Small",   2500, "9 a.m. - 4 p.m., 9 p.m. - 4 a.m.",                                "June-September (Northern) / December-March (Southern)",
44 |   41L,             "Arowana",            "River",         "Large",  10000,                  "4 p.m. - 9 a.m.",                                "June-September (Northern) / December-March (Southern)",
45 |   42L,              "Dorado",            "River",       "X Large",  15000,                  "4 a.m. - 9 p.m.",                                "June-September (Northern) / December-March (Southern)",
46 |   43L,                 "Gar",             "Pond",       "Largest",   6000,                  "4 p.m. - 9 a.m.",                                "June-September (Northern) / December-March (Southern)",
47 |   44L,            "Arapaima",            "River",       "Largest",  10000,                  "4 p.m. - 9 a.m.",                                "June-September (Northern) / December-March (Southern)",
48 |   45L,      "Saddled Bichir",            "River",         "Large",   4000,                  "9 p.m. - 4 a.m.",                                "June-September (Northern) / December-March (Southern)",
49 |   46L,            "Sturgeon",    "River (mouth)",       "Largest",  10000,                          "All day",                              "September-March (Northern) / March-September (Southern)",
50 |   47L,       "Sea Butterfly",              "Sea",      "Smallest",   1000,                          "All day",                                "December-March (Northern) / June-September (Southern)",
51 |   48L,           "Sea Horse",              "Sea",      "Smallest",   1100,                          "All day",                                   "April-November (Northern) / October-May (Southern)",
52 |   49L,          "Clown Fish",              "Sea",      "Smallest",    650,                          "All day",                                "April-September (Northern) / October-March (Southern)",
53 |   50L,         "Surgeonfish",              "Sea",         "Small",   1000,                          "All day",                                "April-September (Northern) / October-March (Southern)",
54 |   51L,      "Butterfly Fish",              "Sea",         "Small",   1000,                          "All day",                                "April-September (Northern) / October-March (Southern)",
55 |   52L,        "Napoleonfish",              "Sea",       "Largest",  10000,                  "4 a.m. - 9 p.m.",                                 "July-August (Northern) / January-February (Southern)",
56 |   53L,    "Zebra Turkeyfish",              "Sea",        "Medium",    500,                          "All day",                                   "April-November (Northern) / October-May (Southern)",
57 |   54L,            "Blowfish",              "Sea",        "Medium",   5000,                  "6 p.m. - 4 a.m.",                                 "November-February (Northern) / May-August (Southern)",
58 |   55L,         "Puffer Fish",              "Sea",        "Medium",    250,                          "All day",                                 "July-September (Northern) / January-March (Southern)",
59 |   56L,             "Anchovy",              "Sea",         "Small",    200,                  "4 a.m. - 9 p.m.",                                                   "Year-round (Northern and Southern)",
60 |   57L,      "Horse Mackerel",              "Sea",         "Small",    150,                          "All day",                                                   "Year-round (Northern and Southern)",
61 |   58L,     "Barred Knifejaw",              "Sea",        "Medium",   5000,                          "All day",                                 "March-November (Northern) / September-May (Southern)",
62 |   59L,            "Sea Bass",              "Sea",       "X Large",    400,                          "All day",                                                   "Year-round (Northern and Southern)",
63 |   60L,         "Red Snapper",              "Sea",         "Large",   3000,                          "All day",                                                   "Year-round (Northern and Southern)",
64 |   61L,                 "Dab",              "Sea",        "Medium",    300,                          "All day",                                  "October-April (Northern) / April-October (Southern)",
65 |   62L,      "Olive Flounder",              "Sea",         "Large",    800,                          "All day",                                                   "Year-round (Northern and Southern)",
66 |   63L,               "Squid",              "Sea",        "Medium",    500,                          "All day",                                "December-August (Northern) / June-February (Southern)",
67 |   64L,           "Moray Eel",              "Sea",        "Narrow",   2000,                          "All day",                                "August-October (Northern) / February-April (Southern)",
68 |   65L,          "Ribbon Eel",              "Sea",        "Narrow",    600,                          "All day",                                  "June-October (Northern) / December-April (Southern)",
69 |   66L,                "Tuna",             "Pier",       "Largest",   7000,                          "All day",                                   "November-April (Northern) / May-October (Southern)",
70 |   67L,         "Blue Marlin",             "Pier",       "Largest",  10000,                          "All day",   "July-September, November-April (Northern) / January-March, May-November (Southern)",
71 |   68L,      "Giant Trevally",             "Pier",         "Large",   4500,                          "All day",                                   "May-October (Northern) / November-April (Southern)",
72 |   69L,           "Mahi-mahi",             "Pier",         "Large",   6000,                          "All day",                                   "May-October (Northern) / November-April (Southern)",
73 |   70L,       "Ocean Sunfish",              "Sea", "Largest (Fin)",   4000,                  "4 a.m. - 9 p.m.",                                 "July-September (Northern) / January-March (Southern)",
74 |   71L,                 "Ray",              "Sea",       "X Large",   3000,                  "4 a.m. - 9 p.m.",                                 "August-November (Northern) / February-May (Southern)",
75 |   72L,           "Saw Shark",              "Sea", "Largest (Fin)",  12000,                  "4 p.m. - 9 a.m.",                                "June-September (Northern) / December-March (Southern)",
76 |   73L,    "Hammerhead Shark",              "Sea", "Largest (Fin)",   8000,                  "4 p.m. - 9 a.m.",                                "June-September (Northern) / December-March (Southern)",
77 |   74L,   "Great White Shark",              "Sea", "Largest (Fin)",  15000,                  "4 p.m. - 9 a.m.",                                "June-September (Northern) / December-March (Southern)",
78 |   75L,         "Whale Shark",              "Sea", "Largest (Fin)",  13000,                          "All day",                                "June-September (Northern) / December-March (Southern)",
79 |   76L,          "Suckerfish",              "Sea",   "Large (Fin)",   1500,                          "All day",                                "June-September (Northern) / December-March (Southern)",
80 |   77L,       "Football Fish",              "Sea",         "Large",   2500,                  "4 p.m. - 9 a.m.",                                 "November-March (Northern) / May-September (Southern)",
81 |   78L,             "Oarfish",              "Sea",       "Largest",   9000,                          "All day",                                   "December-May (Northern) / June-November (Southern)",
82 |   79L,           "Barreleye",              "Sea",         "Small",  15000,                  "9 p.m. - 4 a.m.",                                                   "Year-round (Northern and Southern)",
83 |   80L,          "Coelacanth", "Sea (rainy days)",       "Largest",  15000,                          "All day",                                                   "Year-round (Northern and Southern)"
84 | )
85 | 
86 | pdat <- as.data.frame(pdat)
87 | 
88 | save(pdat,file = 'PolygonData.RData')
89 | 


--------------------------------------------------------------------------------
/2020/20_Volcanoes.R:
--------------------------------------------------------------------------------
 1 | library(tidyverse)
 2 | library(patchwork)
 3 | 
 4 | # For the map:
 5 | library(sf)
 6 | library(mapview)
 7 | 
 8 | # Data
 9 | volcano <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-05-12/volcano.csv')
10 | eruptions <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-05-12/eruptions.csv')
11 | events <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-05-12/events.csv')
12 | #tree_rings <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-05-12/tree_rings.csv')
13 | #sulfur <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-05-12/sulfur.csv')
14 | 
15 | 
16 | 
17 | # Color stuff -------------------------------------------------------------
18 | pal = c('#fcde9c','#faa476','#f0746e','#e34f6f','#dc3977','#b9257a','#7c1d6f')
19 | 
20 | lengthen_pal <- function(x,shortpal){
21 |   ncolours <- length(unique(x))
22 |   newpal <- colorRampPalette(shortpal)(ncolours)
23 |   return(newpal)
24 | }
25 | 
26 | newpal <- lengthen_pal(x=1:10,shortpal = pal)
27 | 
28 | 
29 | # Data stuff --------------------------------------------------------------
30 | locs <- volcano %>%
31 |   select(volcano_name,major_rock_1,latitude,longitude)
32 | 
33 | locations_sf <- st_as_sf(locs, coords = c("longitude", "latitude"), crs = 4326)
34 | mapviewOptions(basemaps = "CartoDB.DarkMatter")
35 | p <- mapview(locations_sf,
36 |              zcol="major_rock_1",
37 |              alpha=0,
38 |              col.regions=newpal)
39 | 
40 | mapshot(p,file='volcanoplot2.pdf',
41 |         remove_controls = c("homeButton", "layersControl"),
42 |         res=400,
43 |         selfcontained = FALSE,
44 |         zoom=1)
45 | 
46 | volsumm <- eruptions %>%
47 |   group_by(volcano_name) %>%
48 |   count() %>%
49 |   left_join(volcano)
50 | 
51 | palitra <- c('#0099ff','#9d98f6','#d59de5','#f4a9d3','#ffbcca','#ffd2cd','#ffe8dd')
52 | 
53 | p1 <- volsumm %>%
54 |   ggplot(aes(x=longitude,y=n,colour = evidence_category)) +
55 |   geom_point() +
56 |   scale_colour_manual('Evidence category',values = palitra[c(2,3,4,5,6)]) +
57 |   geom_vline(xintercept = 47.6062,colour='white',lty=2) + # longitude of Seattle
58 |   ylab('Number of eruptions') +
59 |   xlab('Longitude') +
60 |   hrbrthemes::theme_ft_rc(base_size=11) +
61 |   theme(axis.title.x = element_text(size = rel(1.5)),
62 |         axis.title.y = element_text(size = rel(1.5))) +
63 |   geom_curve(aes(x = 100, y = 200, xend = 47.6, yend = 225),colour='white',arrow = arrow(length = unit(0.03, "npc"))) +
64 |   annotate('text',x=100,y=180,label="Seattle",colour='white')+
65 |   labs(title="A landscape of fear!",
66 |        subtitle = "Was my childhood fear of volcanic annihilation well-founded?")
67 | 
68 | p1
69 | 
70 | volareas <- volcano %>%
71 |   left_join(events) %>%
72 |   mutate(longbin = case_when(longitude < (-100) ~ "l1",
73 |                              longitude > (-100) & longitude < 0 ~ "l2",
74 |                              longitude > 0 & longitude < 100 ~ "l3",
75 |                              longitude > 100  ~ "l4"))
76 | 
77 | p2 <- volareas %>%
78 |   mutate(longbin2 = recode(longbin,l1='< 100',l2='-100 < 0',l3='0 < x < 100',l4='x > 100')) %>%
79 |   group_by(longbin2,event_type) %>%
80 |   count() %>%
81 |   filter(!is.na(event_type) & event_type != 'VEI (Explosivity Index)') %>%
82 |   ggplot(aes(x=fct_reorder(event_type,n),y=n)) +
83 |   geom_col(fill='lightgrey',colour='lightgrey') +
84 |   ylab("Count") +
85 |   xlab("Type of event") +
86 |   facet_wrap(~longbin2,nrow=1) +
87 |   coord_flip() +
88 |   hrbrthemes::theme_ft_rc(base_size=10) +
89 |   theme(axis.title.x = element_text(size = rel(1.5)),
90 |         axis.title.y = element_text(size = rel(1.5))) +
91 |   labs(caption = 'Data: The Smithsonian Institution')
92 | 
93 | png('Volcano.png',width = 12,height = 12,units = 'in',res = 400)
94 | p1 + p2 + plot_layout(ncol=1,heights = c(1,3))
95 | dev.off()
96 | 
97 | 


--------------------------------------------------------------------------------
/2020/21_Volleyball.R:
--------------------------------------------------------------------------------
  1 | #Beach volleyball
  2 | library(tidyverse)
  3 | library(ggchicklet)
  4 | library(hrbrthemes)
  5 | library(ggthemes)
  6 | library(patchwork)
  7 | 
  8 | vb_matches <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-05-19/vb_matches.csv', guess_max = 76000)
  9 | 
 10 | unique(vb_matches$country)
 11 | unique(vb_matches$w_p1_country)
 12 | unique(vb_matches$circuit)
 13 | 
 14 | 
 15 | pd <- vb_matches %>%
 16 |   select(circuit,
 17 |          tournament,
 18 |          country,
 19 |          year,
 20 |          w_p1_country,
 21 |          w_p1_tot_attacks,
 22 |          w_p1_tot_kills,
 23 |          w_p1_tot_aces,
 24 |          w_p1_tot_blocks,
 25 |          w_p1_tot_digs) %>%
 26 |   pivot_longer(cols = w_p1_tot_attacks:w_p1_tot_digs,
 27 |                names_to = c('winner','player','tot','move'),
 28 |                names_sep="_",
 29 |                values_to = 'n') %>%
 30 |   group_by(circuit,tournament,country,year,w_p1_country,winner,tot,move) %>%
 31 |   summarize(N = sum(n)) %>%
 32 |   ungroup() %>%
 33 |   select(-winner,-tot)
 34 | 
 35 | p1 <- pd %>% 
 36 |       filter(circuit=='FIVB' & !is.na(N)) %>%
 37 |       mutate(w_p1_country = fct_drop(w_p1_country)) %>%
 38 |       group_by(w_p1_country,move) %>%
 39 |       summarize(sum_moves = sum(N,na.rm=T)) %>%
 40 |       ggplot(aes(x=fct_reorder(w_p1_country,.x = sum_moves,.fun = sum),y=sum_moves,fill=move)) +
 41 |       xlab("Winning country (FIVB)") +
 42 |       ylab("Moves") +
 43 |       geom_chicklet() +
 44 |       coord_flip() +
 45 |       scale_fill_tableau("Miller Stone", name = NULL) +
 46 |       theme_ipsum_rc(grid="X") +
 47 |       theme(axis.title.x = element_text(size = rel(1.5)),
 48 |             axis.title.y = element_text(size = rel(1.5)),
 49 |             plot.margin = margin(1,1,1,1)) + 
 50 |   labs(title = 'Moves by winning teams',
 51 |        subtitle = 'Total moves in all winning matches',
 52 |        caption = 'Data: Adam Vagnar')
 53 | 
 54 | p1
 55 | 
 56 | 
 57 | # Now maybe we can look at wins over time for each country
 58 | pd2 <- vb_matches %>%
 59 |         select(year,circuit,tournament,country,match_num,w_p1_country) %>%
 60 |         filter(circuit =='FIVB') %>%
 61 |         group_by(w_p1_country,year) %>%
 62 |         count()
 63 | 
 64 | 
 65 | 
 66 | 
 67 | cpal <- lengthen_pal(x=1:9,shortpal = tableau_color_pal(palette = "Summer")(8) )
 68 | 
 69 | pd2a <- pd2 %>% 
 70 |         group_by(w_p1_country) %>%
 71 |         filter(sum(n)>2000) %>% # "big shot" countries have more than 2000 total wins
 72 |         ungroup() %>%
 73 |         mutate(bigshot = 1) %>%
 74 |         full_join(pd2) %>%
 75 |         replace_na(list(bigshot = 0))
 76 |   
 77 | p2 <- pd2a %>%
 78 |       mutate(w_p1_country = fct_drop(w_p1_country)) %>%
 79 |       ggplot(aes(x=year,y=n,colour=w_p1_country)) +
 80 |       geom_line(lwd=1) +
 81 |       scale_colour_manual('Country',
 82 |                           values = cpal) +
 83 |       scale_x_continuous(limits = c(2001,2019)) +
 84 |       xlab('Year') +
 85 |       ylab('Total matches won') +
 86 |       theme_ipsum_rc(grid="X") +
 87 |       theme(axis.title.x = element_text(size = rel(1.5)),
 88 |             axis.title.y = element_text(size = rel(1.5)),
 89 |             legend.position = 'bottom',
 90 |             plot.margin = margin(1,1,1,1)) +
 91 |       gghighlight::gghighlight(bigshot==1,use_direct_label = FALSE) +
 92 |   facet_wrap(~w_p1_country) + 
 93 |   labs(title = 'Wins over time',
 94 |        subtitle = 'For the nine "big shot" countries with over 2000 total wins',
 95 |        caption = 'Data: Adam Vagnar')
 96 | 
 97 |   
 98 | png(filename = 'Volleyball1.png',width = 10,height = 6,units = 'in',res = 200)
 99 | p1 
100 | dev.off()
101 | 
102 | png(filename = 'Volleyball2.png',width = 10,height = 6,units = 'in',res = 200)
103 | p2
104 | dev.off()
105 | 
106 | 
107 | 


--------------------------------------------------------------------------------
/2020/22_Cocktails.R:
--------------------------------------------------------------------------------
 1 | # Cocktails!
 2 | 
 3 | #devtools::install_github("gadenbuie/ggpomological")
 4 | 
 5 | library(tidyverse)
 6 | library(tidyr)
 7 | library(stringr)
 8 | library(vegan)
 9 | library(patchwork)
10 | library(ggpomological)
11 | 
12 | # Look at cocktails with fruits in them, for fun!
13 | cocktails <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-05-26/cocktails.csv')
14 | boston_cocktails <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-05-26/boston_cocktails.csv')
15 | 
16 | test <- c("Cats","Banana","banana")
17 | 
18 | fruit_list <- tolower(c("Kiwi", "Banana", "Apricot", "Avocado", "Cocos", "Clementine", "Mandarine", "Orange","Cranberry","Cherry","BLackcurrant","Lime", "Lemon", "Peach", "Plum", "Raspberry", "Strawberry", "Pineapple", "Pomegranate","Limon","Apple"))
19 | 
20 | fruitycocktails <- cocktails %>%
21 |   mutate(ingredient = tolower(ingredient)) %>%
22 |   filter(ingredient %in% fruit_list) %>%
23 |   select(drink) %>%
24 |   unique()
25 | 
26 | # Filter out fruity drinks
27 | fdrinks <- cocktails %>%
28 |   group_by(drink) %>%
29 |   right_join(fruitycocktails) 
30 | 
31 | # Tried to sort out amounts, too complicated for 2 hrs of coding... I fail!
32 | 
33 | p1 <- fdrinks %>% 
34 |   mutate(alcoholic = tolower(alcoholic)) %>%
35 |   distinct(drink,category,alcoholic) %>% #subset to distinct drinks
36 |   group_by(category,alcoholic) %>%
37 |   count() %>%
38 |   ggplot(aes(x=fct_reorder(category,desc(-n)),y=n,fill=alcoholic))+
39 |   geom_col() +
40 |   xlab('Category') +
41 |   ylab('Number of drinks') +
42 |   coord_flip() +
43 |   scale_fill_pomological('') +
44 |   labs(caption = 'Data: Kaggle')+
45 |   theme(legend.position = 'bottom') +
46 |   theme_pomological("Homemade Apple", 16) 
47 | 
48 | 
49 | 
50 | # Turn fruity drinks into community matrix --------------------------------
51 | cm <- fdrinks %>%
52 |   pivot_wider(id_cols = drink,
53 |               names_from = ingredient,
54 |               values_from = row_id,
55 |               values_fn = list(row_id = length)) %>%
56 |   mutate_if(is.numeric, funs(replace_na(., 0))) # replace NA with 0
57 | 
58 | cmat <-as.matrix(sapply(cm[,-1], as.numeric))
59 | subset_ingredients <- which(colSums(cmat)>10)
60 | cmat <- cmat[,subset_ingredients]
61 | subset_drinks <- which(rowSums(cmat) != 0)
62 | cmat <- cmat[subset_drinks,]
63 | drink <- cm$drink[subset_drinks]
64 | mds.log <- log(cmat+1)
65 | sol <- metaMDS(mds.log)
66 | vec.sp <- envfit(sol$points, mds.log, perm=500)
67 | vec.sp.df <- as.data.frame(vec.sp$vectors$arrows*sqrt(vec.sp$vectors$r))
68 | vec.sp.df$species <- rownames(vec.sp.df)
69 | vec.sp.df.tp <- vec.sp.df
70 | ord <- sol$points %>%
71 |   as.data.frame() %>%
72 |   add_column(drink = drink) %>%
73 |   left_join(fdrinks) %>%
74 |   distinct(id_drink,.keep_all = TRUE)
75 | 
76 | p2 <- ord %>% 
77 |   filter(!is.na(drink)) %>%
78 |   ggplot(aes(MDS1,MDS2,colour=category)) +
79 |   geom_point(size=4) +
80 |   labs(title = 'The community of fruity drinks') +
81 |   scale_colour_pomological("Category") +
82 |   theme_pomological("Homemade Apple", 16)
83 | 
84 | png('Cocktails.png',width = 10,height = 10,units = 'in',res = 200)
85 | p2 + p1 + plot_layout(ncol = 1, heights=c(3,1)) 
86 | dev.off()
87 | 


--------------------------------------------------------------------------------
/2020/25_university_statements.R:
--------------------------------------------------------------------------------
  1 | #25_university_statements
  2 | library(tidyverse)
  3 | library(snakecase)
  4 | library(hrbrthemes)
  5 | library(ggtext)
  6 | library(patchwork)
  7 | 
  8 | dat <- read.csv('statements.csv')
  9 | head(dat)
 10 | 
 11 | # Clean up the data a little
 12 | colnames(dat) <- to_any_case(colnames(dat),"snake")
 13 | colnames(dat) <- gsub("x_","",colnames(dat))
 14 | 
 15 | dat <- dat %>%
 16 |   select(-conference,
 17 |          -strength_of_statement_mentioning_good_cops_1_7,
 18 |          -follow_up_link,
 19 |          -statement_link) %>%
 20 |   mutate(school_name = recode(school_name,"Louisiana State University and Agricultural & Mechanical College" = "Louisiana State University \n and Agricultural & Mechanical College"))
 21 | 
 22 | props2plot <- dat %>% 
 23 |   select(-statement_text) %>%
 24 |   pivot_longer(cols=on_twitter:good_cops) %>%
 25 |   select(-strength_of_statement,-follow_up_updated_statement) %>%
 26 |   mutate(val_bin = ifelse(value=="Yes",1,0)) %>%
 27 |   group_by(name) %>%
 28 |   summarize(prop.mentioned = sum(val_bin)/length(val_bin)) %>%
 29 |   ungroup() %>%
 30 |   mutate(labelnames = to_title_case(name)) %>%
 31 |   mutate(labelnames = recode(labelnames,"Murder Killing" = "Murder/Killing",
 32 |                              "Mentions Blackness Discrimination Against Black People" = "Mentions Blackness \n or discrimination against Black people")) %>%
 33 |   filter(name!="on_twitter") %>%
 34 |   filter(name!="good_cops")
 35 | 
 36 | 
 37 | p1 <- props2plot %>%
 38 |   ggplot(aes(x=fct_reorder(labelnames,prop.mentioned),y=prop.mentioned)) +
 39 |   geom_col(fill='white') +
 40 |   xlab("Name or Phrase") +
 41 |   ylab("Proportion of school statements mentioning name or phrase") +
 42 |   coord_flip() +
 43 |   hrbrthemes::theme_ft_rc()+
 44 |   theme(axis.title.x = element_text(size = rel(1.5)),
 45 |         axis.title.y = element_text(size = rel(1.5)))
 46 | 
 47 | 
 48 | stdat <- dat %>%
 49 |   group_by(school_name) %>%
 50 |   summarize(statement_length = stringi::stri_count_words(statement_text)) %>%
 51 |   left_join(dat)
 52 | 
 53 | p2 <- stdat %>%
 54 |   filter(on_twitter %in% c("Yes","No")) %>%
 55 |   filter(statement_length>5) %>%
 56 |   ggplot(aes(x = statement_length, y = strength_of_statement,
 57 |              colour = on_twitter)) +
 58 |   geom_point(size = 2.3,alpha = 0.5) +
 59 |   scale_colour_manual("Statement on Twitter?",
 60 |                       values=c('white','#9ecae1')) +
 61 |   xlab("Length of statement (words)") +
 62 |   ylab("Score of statement") +
 63 |   labs(caption = "Statement score is the number of \n key names/phrases in the university's primary statement. Dataset: @amaan_c") +
 64 |   theme_ft_rc() +
 65 |   theme(axis.title.x = element_text(size = rel(1.5)),
 66 |         axis.title.y = element_text(size = rel(1.5)))
 67 | 
 68 | 
 69 | vp <- dat %>% 
 70 |   select(-statement_text) %>%
 71 |   pivot_longer(cols=on_twitter:good_cops) %>%
 72 |   select(-strength_of_statement,-follow_up_updated_statement) %>%
 73 |   mutate(val_bin = ifelse(value=="Yes",1,0)) %>%
 74 |   filter(name != 'good_cops') %>%
 75 |   filter(name != 'on_twitter') %>%
 76 |   group_by(school_name) %>%
 77 |   summarize(score = sum(val_bin)) %>%
 78 |   arrange(desc(score)) %>%
 79 |   group_by(score) %>%
 80 |   mutate(count1 = 1,cc = cumsum(count1))
 81 | 
 82 | mentions_good_cops <- filter(dat, good_cops == 'Yes') %>% 
 83 |   select(school_name) %>% 
 84 |   as.vector()
 85 | 
 86 | vp2 <- filter(vp,school_name %in% mentions_good_cops$school_name)
 87 | 
 88 | p3 <- vp %>% ggplot(aes(x=score,y=cc)) + 
 89 |   annotate("text",x = vp$score, y = vp$cc,
 90 |            label = vp$school_name, size = 3, 
 91 |            color = "lightgrey",
 92 |            family = "Arial Narrow") +
 93 |   annotate("text",x = vp2$score, y = vp2$cc,
 94 |            label = vp2$school_name, size = 3, 
 95 |            color = "red",
 96 |            family = "Arial Narrow") +
 97 |   hrbrthemes::theme_ft_rc() +
 98 |   xlab("Score (Total name and phrase mentions)") +
 99 |   ylab("") +
100 |   theme(panel.grid.major=element_blank(),
101 |         panel.grid.minor = element_blank(),
102 |         axis.title.y=element_blank(),
103 |         axis.text.y=element_blank(),
104 |         axis.ticks.y=element_blank()) +
105 |   labs(title = "How thorough were university statements since George Floyd's murder?",
106 |        subtitle = "<span style = 'color:#ff0000'>Red</span> universities also mentioned 'good cops' in their statement.",
107 |        caption = " Data source: Public university statements, collected and scored by @amaan_c") +
108 |   theme(plot.subtitle = element_markdown(lineheight = 1.1),
109 |         axis.title.x = element_text(size = rel(1.5)),
110 |         axis.title.y = element_text(size = rel(1.5)))
111 | 
112 | png("First.png",width = 14,height = 10,units = 'in',res = 120)
113 | p3
114 | dev.off()
115 | 
116 | png("Second.png",width = 10,height = 12,units = 'in',res=120)
117 | p1 + p2 + plot_layout(ncol = 1)
118 | dev.off()
119 | # Extra bits --------------------------------------------------------------
120 | # p2 <- dat %>% 
121 | #       select(-statement_text) %>%
122 | #       pivot_longer(cols=on_twitter:good_cops) %>%
123 | #       select(-strength_of_statement,-follow_up_updated_statement) %>%
124 | #       mutate(val_bin = ifelse(value=="Yes",1,0)) %>%
125 | #       filter(name != 'good_cops') %>%
126 | #       filter(name != 'on_twitter') %>%
127 | #       group_by(school_name) %>%
128 | #       summarize(score = sum(val_bin)) %>%
129 | #       ggplot(aes(x=fct_reorder(school_name,score),y=score)) +
130 | #       geom_col(fill='white') +
131 | #       xlab("School name") +
132 | #       ylab("Score (sum of mentions of each name/phrase") +
133 | #       coord_flip() +
134 | #       hrbrthemes::theme_ft_rc()
135 | 


--------------------------------------------------------------------------------
/2020/26_Caribou.R:
--------------------------------------------------------------------------------
  1 | # 26 - caribou
  2 | library(tidyverse)
  3 | library(patchwork)
  4 | 
  5 | 
  6 | individuals <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-06-23/individuals.csv')
  7 | locations <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-06-23/locations.csv')
  8 | 
  9 | predated <- individuals %>% 
 10 |   filter(stringr::str_detect(death_cause, 'predation')) %>%
 11 |   mutate(predator = ifelse(str_detect(death_cause,'wolf'),'wolf',
 12 |                            ifelse(str_detect(death_cause,'bear'),'bear',
 13 |                                   'unknown')))
 14 | loc_pred <- locations %>% 
 15 |   filter(animal_id %in% predated$animal_id)
 16 | unique(loc_pred$animal_id)
 17 | 
 18 | 
 19 | # Tracks without map ------------------------------------------------------
 20 | 
 21 | tagged_day_summary <-  locations %>%
 22 |   group_by(animal_id) %>%
 23 |   summarize(last_dttm = max(timestamp)) %>%
 24 |   right_join(locations) %>%
 25 |   group_by(animal_id) %>%
 26 |   mutate(day_of_life = as.numeric((last_dttm - timestamp)/(60*60*24))) %>%
 27 |   summarize(days_tagged = max(day_of_life))
 28 | 
 29 | xx <- locations %>%
 30 |   group_by(animal_id) %>%
 31 |   summarize(last_dttm = max(timestamp)) %>%
 32 |   right_join(locations) %>%
 33 |   filter(timestamp == last_dttm) %>%
 34 |   left_join(tagged_day_summary) %>%
 35 |   mutate(killed_by_preds = ifelse(animal_id %in% predated$animal_id,"Killed by predators","Other cause of death or tag loss")) %>%
 36 |   rename(`Number of days tagged` = days_tagged)
 37 | 
 38 | p1 <- xx %>%
 39 |   arrange(desc(killed_by_preds)) %>%
 40 |   ggplot(aes(x=longitude,y=latitude,
 41 |              size = `Number of days tagged`,
 42 |              colour = killed_by_preds)) +
 43 |   geom_point(alpha=0.7) +
 44 |   scale_color_manual("",values=c('#F072B6','white')) +
 45 |   guides(size=guide_legend(override.aes=list(colour="white"),
 46 |                            title.position = "top",
 47 |                            ncol = 1),
 48 |          colour = guide_legend(ncol = 1))+
 49 |   xlab('Longitude') +
 50 |   ylab('Latitude') +
 51 |   hrbrthemes::theme_ft_rc() +
 52 |   theme(axis.title.x = element_text(size = rel(1.5)),
 53 |         axis.title.y = element_text(size = rel(1.5)),
 54 |         legend.position = 'bottom') +
 55 |   labs(title = 'The work of wolves',
 56 |        subtitle = "Tagged caribou eaten by wolves on unceded First Nations territory")
 57 | 
 58 | p1  
 59 | 
 60 | 
 61 | pdat2 <- loc_pred %>%
 62 |   group_by(animal_id) %>%
 63 |   summarize(last_dttm = max(timestamp)) %>%
 64 |   right_join(loc_pred) %>%
 65 |   group_by(animal_id) %>%
 66 |   mutate(day_of_life = as.numeric((last_dttm - timestamp)/(60*60*24))) %>%
 67 |   mutate(prop_of_tagged_time = day_of_life/(max(day_of_life))) 
 68 | 
 69 | ends <- loc_pred %>%
 70 |   group_by(animal_id) %>%
 71 |   mutate(last_dttm = max(timestamp)) %>% 
 72 |   distinct(animal_id, .keep_all=TRUE)
 73 | 
 74 | p2 <- pdat2  %>%
 75 |   ggplot(aes(x = longitude, y = latitude, group = animal_id,colour = prop_of_tagged_time)) +
 76 |   geom_path(alpha=0.5,lwd=1.2) +
 77 |   scale_colour_gradient("Proximity to mortality \n (fraction of tagged time)", low = '#FFF886',high = '#F072B6') +
 78 |   geom_point(data = ends,aes(x=longitude,y=latitude),colour="white",size = 2.5) +
 79 |   xlab('Longitude') +
 80 |   ylab('Latitude') +
 81 |   hrbrthemes::theme_ft_rc() +
 82 |   theme(axis.title.x = element_text(size = rel(1.5)),
 83 |         axis.title.y = element_text(size = rel(1.5)),
 84 |         legend.position = 'bottom',
 85 |         legend.key.width=unit(1,"cm")) +
 86 |   labs(caption = "Data: B.C. Ministry of Environment & Climate Change")
 87 | 
 88 | 
 89 | p2
 90 | 
 91 | png("Caribou.png",width = 12,height = 9,units = 'in',res = 120)
 92 | p1+p2
 93 | dev.off()
 94 | 
 95 | 
 96 | 
 97 | # A bunch of mapping stuff that didn't work -------------------------------
 98 | 
 99 | # Get map (thank you Nyssa!!!)
100 | # x <- getData(name = "GADM", country = "CAN",level = 1)
101 | # bc <- x [x$NAME_1 == "British Columbia",]
102 | # bc@data$id <- rownames(bc@data)
103 | # create a data.frame from our spatial object
104 | # bcdf <- fortify(bc, region = "id")
105 | # 
106 | # p1 <- ggplot(bcdf, aes(x = long,y = lat)) +
107 | #       geom_polygon(fill = "grey30", size = 0.1) +
108 | #       geom_path(data = loc_pred, aes(x = longitude, y = latitude, group = animal_id,colour = animal_id)) +
109 | #       theme_classic() +
110 | #       theme(legend.position='none')
111 | # p1
112 | 
113 | 


--------------------------------------------------------------------------------
/2020/27_x-men.R:
--------------------------------------------------------------------------------
 1 | # 27_xmen 
 2 | remotes::install_github("malcolmbarrett/claremontrun")
 3 | 
 4 | comic_bechdel <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-06-30/comic_bechdel.csv')
 5 | 
 6 | character_visualization <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-06-30/character_visualization.csv')
 7 | 
 8 | characters <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-06-30/characters.csv')
 9 | 
10 | xmen_bechdel <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-06-30/xmen_bechdel.csv')
11 | 
12 | covers <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-06-30/covers.csv')
13 | 
14 | issue_collaborators <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-06-30/issue_collaborators.csv')
15 | 
16 | locations <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-06-30/locations.csv')
17 | 


--------------------------------------------------------------------------------
/2020/28_coffee.R:
--------------------------------------------------------------------------------
 1 | # Coffee
 2 | library(tidyverse)
 3 | library(ggridges)
 4 | library(GGally)
 5 | library(calecopal)
 6 | library(patchwork)
 7 | 
 8 | coffee_ratings <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-07/coffee_ratings.csv')
 9 | 
10 | # Some cleanup
11 | coffee <- coffee_ratings %>%
12 |   mutate(country_of_origin=fct_recode(country_of_origin,"Hawai‘i" = "United States (Hawaii)",
13 |                                       "United Republic of Tanzania"="Tanzania, United Republic Of"))
14 | 
15 | # Get the countries with more than 30 samples (for freq)
16 | dat1 <- coffee %>%
17 |   group_by(country_of_origin) %>%
18 |   count() %>%
19 |   filter(n>30) %>%
20 |   left_join(coffee)
21 | 
22 | order_to <- dat1 %>%
23 |   group_by(country_of_origin) %>%
24 |   summarize(mean(acidity)) %>%
25 |   arrange(desc(`mean(acidity)`)) %>%
26 |   pull(country_of_origin)
27 | 
28 | dat1$country_of_origin <- factor(dat1$country_of_origin,levels = order_to) # because fct_reorder() didn't work and I do not have the time.
29 | 
30 | coffeepal <- cal_palette(name = "desert", n = 12,type = 'continuous')
31 | coffeetheme <- hrbrthemes::theme_ipsum_rc() +
32 |   theme(legend.position = 'none',axis.title.x = element_text(size = rel(1.5)),
33 |         axis.title.y = element_text(size = rel(1.5)),
34 |         plot.title = element_text(hjust = -2),
35 |         plot.subtitle = element_text(hjust = -2)) 
36 | 
37 | p1 <- dat1 %>%
38 |   ggplot(aes(x=acidity,y=country_of_origin,fill=country_of_origin)) +
39 |   ggridges::geom_density_ridges(scale = 0.95) +
40 |   scale_fill_manual(values = coffeepal) +
41 |   xlim(c(5,9)) +
42 |   coffeetheme +
43 |   xlab('Acidity grade') +
44 |   ylab('Country of origin') +
45 |   labs(title = "Which country of origin has the least acidic beans?",
46 |        subtitle = "Solving a mystery that Megsie and her dad have been pondering for years!",
47 |        caption = "Data: James LeDoux @ Buzzfeed")
48 | 
49 | p1 
50 | 
51 | coffee2 <- filter(coffee,acidity>0 & aftertaste>0 & body>0) %>% 
52 |   rename(Acidity = acidity, Aftertaste = aftertaste,Body = body) %>%
53 |   as.data.frame()
54 | 
55 | spcols <- cal_palette("sierra1")[c(1,4)]
56 | 
57 | p2 <- ggpairs(coffee2,
58 |               columns = c('Acidity','Aftertaste','Body'),
59 |               mapping = ggplot2::aes(colour=species,fill=species)) +
60 |   scale_colour_manual(values = spcols) +
61 |   scale_fill_manual(values = spcols) +
62 |   coffeetheme 
63 | 
64 | png('Coffee1.png',width = 9,height = 8,units = 'in',res = 200)
65 | p1
66 | dev.off()
67 | 
68 | png('Coffee2.png',width = 8,height = 8,units = 'in',res = 200)
69 | p2
70 | dev.off()


--------------------------------------------------------------------------------
/2020/29_astronauts.R:
--------------------------------------------------------------------------------
 1 | # 29: Astronauts
 2 | library(tidyverse)
 3 | library(hrbrthemes)
 4 | library(vegan)
 5 | library(beyonce)
 6 | library(patchwork)
 7 | 
 8 | source('HelperFunctions.R')
 9 | 
10 | astronauts <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-14/astronauts.csv')
11 | 
12 | # smidge of cleanup
13 | astronauts  <- astronauts %>%
14 |   mutate(occupation = tolower(occupation))
15 | 
16 | # set theme and colours
17 | spacetheme <- theme_ft_rc() +
18 |   theme(panel.grid.major = element_blank(),
19 |         panel.grid.minor = element_blank()) +
20 |     theme(axis.title.x = element_text(size = rel(1.5)),
21 |           axis.title.y = element_text(size = rel(1.5)),
22 |           legend.position = 'bottom')
23 | palitra <- c('#FFFFDE','#FFE9B3','#FFCC94','#FFA98C','#FF809F','#FF5AC8','#FF52FF')
24 | pal <- rev(lengthen_pal(x = 1:8,shortpal = beyonce_palette(101)))
25 | gpal <- beyonce_palette(48)[c(4,6)]
26 | 
27 | p1 <- astronauts %>%
28 |   filter(eva_hrs_mission>0) %>%
29 |   ggplot(aes(x=year_of_mission,y=eva_hrs_mission,colour=sex)) + 
30 |   geom_point(alpha=0.5) +
31 |   geom_smooth(method = 'lm') +
32 |   xlab('Year of mission') +
33 |   ylab('Extravehicular hours') +
34 |   scale_colour_manual('',values = gpal) +
35 |   theme(legend.position = 'bottom')
36 | 
37 | dtable <- astronauts %>% # diversity table
38 |   count(mission_title,year_of_mission,nationality) %>%
39 |   pivot_wider(names_from = nationality,
40 |               values_from = n,
41 |               values_fill = 0) 
42 | 
43 | divs <- data.frame(shannon = diversity(dtable[,-c(1,2)]),
44 |                    dtable[,c(1,2)])
45 | 
46 | missions <- astronauts %>%
47 |   distinct(mission_title,year_of_mission) %>%
48 |   full_join(divs)
49 | 
50 | p2 <- missions %>%
51 |   group_by(year_of_mission,shannon) %>%
52 |   summarize(nmissions = length(mission_title)) %>%
53 |   filter(shannon>0) %>%
54 |   ggplot(aes(x = year_of_mission,y = shannon,size = nmissions)) +
55 |   geom_point(colour = 'lightblue',alpha=0.5) +
56 |   xlim(c(1960,2020)) +
57 |   xlab("Year") +
58 |   ylab("Diversity index of astronaut nationalities") +
59 |   labs(size = "Number of missions")
60 | 
61 | 
62 | singlecountrymissions <- missions %>%
63 |   filter(shannon == 0) %>%
64 |   left_join(astronauts %>% select(nationality,mission_title)) %>%
65 |   mutate(natcat = fct_lump(nationality,n = 6)) %>%
66 |   group_by(year_of_mission,natcat) %>%
67 |   summarise(n = n())
68 | 
69 |  p3 <- singlecountrymissions %>%
70 |   ggplot(aes(x=year_of_mission,y=n,fill=natcat,colour=natcat)) +
71 |   geom_col() +
72 |   xlim(c(1960,2020)) +
73 |   scale_fill_manual('Country',values=pal) +
74 |   scale_colour_manual('Country',values=pal) +
75 |   xlab('Year') +
76 |   ylab('Single-country missions') +
77 |   theme_ft_rc() 
78 |   
79 | png("29_astronauts.png",width = 12,height = 10,units = 'in',res = 200)
80 | ((p2 / p3 + plot_layout(heights = c(2,1),ncol=1)) | p1 ) +
81 |   plot_layout(ncol = 2) + 
82 |   plot_annotation(title = 'The diversity of space missions',
83 |                   subtitle = 'Collaborative missions between countries, and gender in astronaut extravehicular hours over time',
84 |                   caption = 'Data: Mariya Stavnichuk and Tatsuya Corlett') & spacetheme
85 | dev.off()
86 | 


--------------------------------------------------------------------------------
/2020/29_astronauts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/29_astronauts.png


--------------------------------------------------------------------------------
/2020/2_Transit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/2_Transit.png


--------------------------------------------------------------------------------
/2020/2_transit.R:
--------------------------------------------------------------------------------
 1 | # 2_Transit
 2 | 
 3 | 
 4 | # Soundtrack --------------------------------------------------------------
 5 | #Swedish procedural drama my folks are watching
 6 | 
 7 | 
 8 | # Packages ----------------------------------------------------------------
 9 | # devtools::install_github("davidsjoberg/ggbump")
10 | # install.packages("countrycode") # for merging iso2 country codes
11 | 
12 | library(tidyverse)
13 | library(countrycode)
14 | library(ggbump)
15 | library(gghighlight)
16 | 
17 | # Theme -------------------------------------------------------------------
18 | theme_transit <- hrbrthemes::theme_ft_rc() +
19 |   theme(
20 |     panel.grid.major = element_blank(),
21 |     panel.grid.minor = element_blank(),
22 |     axis.title.y = element_blank(),
23 |     axis.text.y = element_blank(),
24 |     axis.ticks.y = element_blank(),
25 |     legend.position = "none"
26 |   )
27 | 
28 | 
29 | 
30 | 
31 | # Data --------------------------------------------------------------------
32 | tuesdata <- tidytuesdayR::tt_load(2021, week = 2)
33 | transit_cost <- tuesdata$transit_cost
34 | 
35 | tdat <- countrycode::codelist %>%
36 |   select(country.name.en, iso2c, continent) %>%
37 |   right_join(transit_cost, by = c("iso2c" = "country"))
38 | 
39 | 
40 | # Summarize data
41 | cont_lengths <- tdat %>%
42 |   group_by(continent, year) %>%
43 |   summarize(tot_length = sum(length, na.rm = T)) %>%
44 |   drop_na() %>%
45 |   group_by(year) %>%
46 |   mutate(rank = dense_rank(desc(tot_length)))
47 | 
48 | 
49 | # ggbump plot of total track length ---------------------------------------
50 | 
51 | p1 <- cont_lengths %>%
52 |   filter(year < 2021) %>%
53 |   ggplot(aes(year, rank, color = continent, group = continent)) +
54 |   geom_bump(smooth = 10, size = 1.5, lineend = "round") +
55 |   scale_y_reverse() +
56 |   ghibli::scale_colour_ghibli_d(name = "MononokeMedium", direction = -1) +
57 |   gghighlight(unhighlighted_params = list(
58 |     size = 1,
59 |     colour = alpha("grey", 0.2)
60 |   )) +
61 |   facet_wrap(~continent, ncol = 1)
62 | 
63 | p1 +theme_transit
64 | 
65 | 
66 | # Density plot of number of stations per track/numbers of years --------
67 | 
68 | p2 <- tdat %>%
69 |   filter(!is.na(start_year)) %>%
70 |   filter(!is.na(end_year)) %>%
71 |   filter(!is.na(continent)) %>%
72 |   filter(end_year != "X") %>%
73 |   mutate_at(c("start_year", "end_year"), .funs = as.numeric) %>%
74 |   mutate(nyears = end_year - start_year) %>%
75 |   ggplot(aes(x = stations / nyears, color = continent, fill = continent)) +
76 |   # coord_flip() +
77 |   geom_density() +
78 |   xlab("Number of stations built per year") +
79 |   ghibli::scale_colour_ghibli_d(name = "MononokeMedium", direction = -1) +
80 |   scale_fill_manual(values = alpha(ghibli::ghibli_palette(name = "MononokeMedium", direction = -1),
81 |                                    alpha = 0.5)) +
82 |   facet_wrap(~continent, ncol = 1, scales = "free_y")
83 | 
84 | p2 + theme_transit
85 | 
86 | # Put them together -------------------------------------------------------
87 | library(patchwork)
88 | 
89 | png("2_Transit.png",width = 11, height = 9, units = 'in',res = 200)
90 | p1 + p2 +
91 |   plot_annotation(
92 |   title = "Transit",
93 |   subtitle = "Which continents had the longest rail projects, and how long did they take?",
94 |   caption = "Data: Transit Costs Project"
95 | ) & theme_transit
96 | dev.off()
97 | 


--------------------------------------------------------------------------------
/2020/30_RSPCA.R:
--------------------------------------------------------------------------------
  1 | library(magrittr)
  2 | library(hrbrthemes)
  3 | library(tidyverse)
  4 | library(waffle)
  5 | library(ghibli)
  6 | library(cowplot) #for insets
  7 | 
  8 | # Get the data ------------------------------------------------------------
  9 | 
 10 | animal_outcomes <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-21/animal_outcomes.csv')
 11 | animal_complaints <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-21/animal_complaints.csv')
 12 | 
 13 | tuesdata <- tidytuesdayR::tt_load(2020, week = 30)
 14 | brisbane_complaints <- tuesdata$brisbane_complaints
 15 | 
 16 | # Set up themes -----------------------------------------------------------
 17 | 
 18 | waffletheme <- theme_ft_rc() +
 19 |   theme(legend.position = 'none',
 20 |         axis.title.x=element_blank(),
 21 |         axis.text.x=element_blank(),
 22 |         axis.ticks.x=element_blank(),
 23 |         axis.title.y=element_blank(),
 24 |         axis.text.y=element_blank(),
 25 |         axis.ticks.y=element_blank(),
 26 |         panel.border = element_blank(),
 27 |         panel.grid.major = element_blank(),
 28 |         panel.grid.minor = element_blank(),
 29 |         plot.margin = margin(0, 0, 0, 0, "cm"))
 30 | maptheme <-  theme_ft_rc() + 
 31 |   theme(legend.position = 'none',
 32 |         axis.title.x=element_blank(),
 33 |         axis.text.x=element_blank(),
 34 |         axis.ticks.x=element_blank(),
 35 |         axis.title.y=element_blank(),
 36 |         axis.text.y=element_blank(),
 37 |         axis.ticks.y=element_blank())
 38 | petcols <- ghibli::ghibli_palette(name = "YesterdayMedium",n = 7,type = 'discrete',direction = -1)
 39 | 
 40 | # Data for waffle plots
 41 | wafdat <- animal_outcomes %>% 
 42 |   filter(outcome %in% c("Rehomed","Reclaimed") & year == 2018) %>%
 43 |   select(-year,-outcome, - Total) %>%
 44 |   pivot_longer(-animal_type,names_to = "region")
 45 | 
 46 | wd <- wafdat %>% group_by(region)
 47 | region_list <- group_split(wd)
 48 | 
 49 | plotfun <- function(x){
 50 |   x %>%
 51 |     ggplot(aes(fill = animal_type,values = value/100)) +
 52 |     geom_waffle(n_rows = 4,size=.3,colour='white',make_proportional = T) +
 53 |     scale_fill_manual(values = petcols) +
 54 |     labs(subtitle= paste(x$region[1],"( n = ", sum(x$value),"animals )" )) +
 55 |     waffletheme +
 56 |     coord_equal()
 57 | }
 58 | 
 59 | act <- region_list[[1]] %>% 
 60 |   mutate(region=fct_recode(region,"Australian Capital Territory"="ACT")) %>% 
 61 |   plotfun()
 62 | nsw <- region_list[[2]] %>% 
 63 |   mutate(region=fct_recode(region,"New South Wales"="NSW")) %>% 
 64 |   plotfun() 
 65 | nt <- region_list[[3]] %>% 
 66 |   mutate(region=fct_recode(region,"Northern Territory"="NT")) %>% 
 67 |   plotfun()
 68 | qld <- region_list[[4]] %>% 
 69 |   mutate(region=fct_recode(region,"Queensland"="QLD")) %>% 
 70 |   plotfun()
 71 | sa <- region_list[[5]] %>% 
 72 |   mutate(region=fct_recode(region,"South Australia"="SA")) %>% 
 73 |   plotfun()
 74 | tas <- region_list[[6]] %>% 
 75 |   mutate(region=fct_recode(region,"Tasmania"="TAS")) %>% 
 76 |   plotfun()
 77 | vic <- region_list[[7]] %>% 
 78 |   mutate(region=fct_recode(region,"Victoria"="VIC")) %>% 
 79 |   plotfun()
 80 | wa <- region_list[[8]] %>% 
 81 |   mutate(region=fct_recode(region,"Western Australia"="WA")) %>% 
 82 |   plotfun()
 83 | 
 84 | 
 85 | 
 86 | # Map ---------------------------------------------------------------------
 87 | # Inset maps: https://www.r-bloggers.com/inset-maps-with-ggplot2/
 88 | library(cowplot)
 89 | library(ozmaps)
 90 | library(sf)
 91 | 
 92 | oz_states <- ozmaps::ozmap_states
 93 | 
 94 | ozmap <- ggplot(oz_states) + 
 95 |   geom_sf(fill='darkgrey',colour = 'white') + 
 96 |   coord_sf() +
 97 |   waffletheme +
 98 |   labs(title = "RSPCA Outcomes",
 99 |        subtitle = "Animals rehomed or reclaimed in 2018")
100 | 
101 | # Get standalone legend to place in bigger plot
102 | xx <- ggplot(region_list[[8]],
103 |              aes(region, fill = animal_type)) + 
104 |   geom_bar(colour='white') +
105 |   scale_fill_manual("Animal type",values = petcols) +
106 |   theme_ft_rc(base_size = 14)
107 | legend <- get_legend(xx)
108 | 
109 | 
110 | 
111 | # Save it! ----------------------------------------------------------------
112 | 
113 | png("RSPCA1_v2.png",width = 14,height = 10,units = 'in',res=200)
114 | 
115 | ggdraw() +
116 |   draw_plot(ozmap) +
117 |   draw_plot(act, x = .71, y = .77, width = .3, height = .3,scale = 0.9)  +
118 |   draw_plot(nsw, x = .71, y = .47, width = .3, height = .3,scale = 0.9) +
119 |   draw_plot(nt, x = .3, y = .77, width = .3, height = .3,scale = 0.9) +
120 |   draw_plot(qld, x = .71, y = .65, width = .3, height = .3,scale = 0.9) +
121 |   draw_plot(sa, x = .3, y = -.08, width = .3, height = .3,scale = 0.9) + #sa
122 |   draw_plot(tas, x = .71, y = -.08, width = .3, height = .3,scale = 0.9) + #tas
123 |   draw_plot(vic, x = .71, y = .1, width = .3, height = .3,scale = 0.9) + #vic
124 |   draw_plot(wa, x = 0, y = .59, width = .3, height = .3,scale = 0.9) + #wa
125 |   draw_plot(legend, x=-.07, y=-0.05,width = .3, height = .3,scale = 1.5) +
126 |   theme(plot.background = element_rect(fill="#252A32", color = NA)) 
127 | 
128 | dev.off()
129 | 
130 | 
131 | # Basic time series plot --------------------------------------------------
132 | 
133 | regionsummary <- animal_outcomes %>% 
134 |   pivot_longer(cols = ACT:WA,names_to = "region") %>%
135 |   group_by(year,animal_type,region,outcome) %>%
136 |   summarize(n = sum(value)) %>%
137 |   group_by(year,animal_type,region) 
138 | 
139 | p2 <- regionsummary %>%
140 |   summarize(alloutcomes = sum(n)) %>%
141 |   right_join(regionsummary) %>%
142 |   mutate(propn = n/alloutcomes) %>%
143 |   filter(outcome %in% c("Rehomed","Reclaimed")) %>%
144 |   group_by(year,animal_type,region) %>%
145 |   summarize(claim_home = sum(propn)) %>%
146 |   ggplot(aes(year,claim_home,colour=animal_type)) +
147 |   geom_line(lwd = .7) +
148 |   facet_wrap(~region,ncol = 4) +
149 |   scale_colour_manual("Animal type",values = petcols) +
150 |   theme_ft_rc() +
151 |   theme(axis.title.x = element_text(size = rel(1.2),colour='white'),
152 |         axis.title.y = element_text(size = rel(1.2),colour='white'),
153 |         axis.text.x = element_text(colour = 'white'),
154 |         axis.text.y = element_text(colour = 'white'),
155 |         legend.text = element_text(colour = 'white'),
156 |         legend.title = element_text(colour = 'white'),
157 |         strip.text = element_text(colour = 'white')
158 |   ) +
159 |   ylab("Proportion reclaimed or rehomed") +
160 |   xlab("Year") +
161 |   labs(caption = "Data: Australia RSPCA")
162 | 
163 | p2
164 | 
165 | png("RSPCA2.png",width = 12,height = 7,units = 'in',res=200)
166 | p2
167 | dev.off()
168 | 


--------------------------------------------------------------------------------
/2020/31_penguins.R:
--------------------------------------------------------------------------------
 1 | library(tidyverse)
 2 | library(ghibli)
 3 | library(cowplot)
 4 | library(dplyr)
 5 | library(readr)
 6 | 
 7 | 
 8 | tuesdata <- tidytuesdayR::tt_load(2020, week = 31)
 9 | penguins <- tuesdata$penguins
10 | penguins <- penguins %>%
11 |   filter(!is.na(sex))
12 | 
13 | # Raincloud stuff ---------------------------------------------------------
14 | packages <- c("cowplot", "readr", "ggplot2" ,
15 |               "dplyr", "lavaan")
16 | 
17 | 
18 | # Palettes and theme ------------------------------------------------------
19 | penpal <- ghibli_palette("SpiritedMedium",direction = -1)
20 | pentheme <-  hrbrthemes::theme_ipsum_rc() +
21 |   theme(axis.title.x = element_text(size = rel(1.5)),
22 |                    axis.title.y = element_text(size = rel(1.5)),
23 |         panel.grid.major.x = element_blank(),
24 |         panel.grid.minor.x = element_blank())
25 | 
26 | 
27 | # Plot stuff --------------------------------------------------------------
28 | 
29 | p1 <- ggplot(penguins, aes(x = sex, y = body_mass_g, fill = species)) +
30 |   geom_flat_violin(position = position_nudge(x = .1, y = 0), 
31 |                    adjust = 1.5, trim = FALSE, alpha = .5, colour = NA) +
32 |   geom_point(aes(x = sex, y = body_mass_g, colour = species),
33 |              position = position_jitter(width = .05), size = 1, shape = 20) +
34 |   geom_boxplot(aes(x = sex, y = body_mass_g, fill = species),
35 |                outlier.shape = NA, alpha = .5, width = .1, colour = "black") +
36 |   scale_colour_manual("Species",values = penpal) +
37 |   scale_fill_manual("Species",values = penpal) +
38 |   xlab("Sex") +
39 |   ylab("Body mass (g)") +
40 |   coord_flip() +
41 |   facet_wrap(~island,scale = "free_x",ncol = 1) +
42 |   pentheme +
43 |     theme(legend.position = 'none')
44 |   
45 | p1
46 | 
47 | p2 <- penguins %>%
48 |   ggplot(aes(x = bill_length_mm,y = bill_depth_mm,colour = species)) +
49 |   geom_point() +
50 |   scale_colour_manual("Species",values = penpal) +
51 |   facet_grid(island~sex) +
52 |   xlab("Bill length (mm)") +
53 |   ylab("Bill depth (mm)") +
54 |   pentheme +
55 |   theme(panel.grid.major = element_blank(),
56 |         panel.grid.minor = element_blank(),
57 |         legend.position = 'none')
58 | 
59 | p3 <- penguins %>%
60 |   ggplot(aes(x = body_mass_g,y = flipper_length_mm,colour = species)) +
61 |   geom_point() +
62 |   scale_colour_manual("Species",values = penpal) +
63 |   facet_grid(island~sex) +
64 |   xlab("Body mass (g)") +
65 |   ylab("Flipper length (mm)") +
66 |   pentheme +
67 |   theme(panel.grid.major = element_blank(),
68 |         panel.grid.minor = element_blank(),
69 |         legend.position = 'bottom')
70 | 
71 | png('Penguins.png',width = 12,height = 12,units = 'in',res=200)
72 | p1 + (p2/p3 + plot_layout(heights = c(4,3))) +
73 |   plot_annotation(title = 'Palmer penguins',
74 |                   subtitle = 'Body mass and sex across species and island',
75 |                   caption = "Data: Dr. Kristen Gorman (via the palmerpenguins package)") &  theme(text = element_text('Roboto Condensed',size = 16))
76 |   dev.off()
77 | 
78 |   


--------------------------------------------------------------------------------
/2020/32_EuropeanEnergy.R:
--------------------------------------------------------------------------------
 1 | # European energy
 2 | library(tidyverse)
 3 | 
 4 | 
 5 | # Get data ----------------------------------------------------------------
 6 | tuesdata <- tidytuesdayR::tt_load('2020-08-04')
 7 | energy_types <- tuesdata$energy_types
 8 | country_totals <- tuesdata$country_totals
 9 | unique(country_totals$type)
10 | 
11 | 
12 | # Map stuff ---------------------------------------------------------------
13 | world_map <- map_data("world")
14 | ggplot(world_map) +
15 |   geom_polygon(data = world_map, aes(x = long, y = lat, group = group), 
16 |                fill = "grey30", colour = "grey45", size = 0.1) +
17 |   theme_void()
18 | 


--------------------------------------------------------------------------------
/2020/33_Airbender.R:
--------------------------------------------------------------------------------
  1 | 
  2 | library(ggstream)
  3 | library(tidyverse)
  4 | library(tidytext)
  5 | library(beyonce)
  6 | library(patchwork)
  7 | library(ggpomological)
  8 | 
  9 | # Get the data ------------------------------------------------------------
 10 | tuesdata <- tidytuesdayR::tt_load('2020-08-11')
 11 | avatar <- tuesdata$avatar
 12 | scenes <- tuesdata$scene_description
 13 | 
 14 | 
 15 | # Palettes ----------------------------------------------------------------
 16 | pn_pal <- c('#FFFFFF','#0096FF')
 17 | pn_pal2 <- c('darkgrey','lightblue')
 18 | char_pal <- beyonce_palette(90,n = 11)[c(1,4,10,11)]
 19 | 
 20 | theme_avatar <- theme_pomological_fancy() +
 21 |   theme(text = element_text(family = "Herculanum", size = 12),
 22 |         legend.position = 'bottom',
 23 |         legend.box="vertical", legend.margin=margin(),
 24 |         panel.grid.major = element_blank(),
 25 |         panel.grid.minor = element_blank(),
 26 |         panel.border = element_blank()) 
 27 | # Plot 1: sentiments in episodes ------------------------------------------
 28 | xx <- avatar %>%
 29 |   mutate(clean_char = str_replace_all(character_words, "[^a-zA-Z\\s]", " ")) %>%
 30 |   mutate(clean_char = str_trim(clean_char, side = "both"))
 31 | 
 32 | data('stop_words')
 33 | 
 34 | tidied_words <- xx %>%
 35 |   unnest_tokens(word, clean_char) %>%
 36 |   anti_join(stop_words, by = "word") #get just the 'active words' 
 37 | 
 38 | d1 <- tidied_words %>%
 39 |   inner_join(get_sentiments("bing"), by = "word") %>%
 40 |   group_by(book,chapter_num,sentiment) %>%
 41 |   summarize(n = length(sentiment)) %>%
 42 |   pivot_wider(names_from = sentiment,values_from = n) %>%
 43 |   mutate(overall = positive - negative) %>%
 44 |   mutate(pn = ifelse(overall>0, "Positive", "Negative"))
 45 | d1$book <- factor(d1$book,levels = c('Water','Earth','Fire'))
 46 | 
 47 | # Does Appa's presence mean more positive words?
 48 | appafreq <- avatar %>%
 49 |   mutate(n_appa = str_count(full_text,"Appa")) %>%
 50 |   group_by(book, chapter_num) %>%
 51 |   summarize(n_appa = sum(n_appa,na.rm=T))
 52 | 
 53 | # Appa appearances
 54 | d1a <- d1 %>%
 55 |   left_join(appafreq,by=c('book','chapter_num')) %>%
 56 |   mutate(fake_y = -50, fake_size = 160)
 57 | d1a$book <- factor(d1a$book,levels = c('Water','Earth','Fire'))
 58 | 
 59 | d1a %>% 
 60 |   ggplot(aes(x=chapter_num,y=fake_y,size=n_appa)) + 
 61 |   geom_point(colour = 'white') +
 62 |   geom_point(data = d1a,
 63 |              aes(x=chapter_num,y=fake_y,size = fake_size),
 64 |              shape =1,colour = 'white')
 65 | 
 66 | p1 <- d1 %>%
 67 |   ggplot(aes(x=chapter_num,y=overall)) +
 68 |   geom_hline(yintercept = 0, colour = 'darkgrey') +
 69 |   geom_segment(aes(x = chapter_num, xend=chapter_num, y=0, yend=overall,color = pn),lwd=4) +
 70 |   scale_colour_manual('',values = pn_pal2) +
 71 |   geom_point(data = d1a,
 72 |              aes(x=chapter_num,y=fake_y),
 73 |              shape = 1,size = 5, colour = 'darkgrey') +
 74 |   geom_point(data = d1a,
 75 |              aes(x = chapter_num,y = fake_y,size = n_appa),
 76 |              colour = 'darkgrey') +
 77 |   scale_size(name = "Number of Appa mentions") +
 78 |   guides(size = "legend") +
 79 |   facet_wrap(~book,ncol = 1) +
 80 |     xlab("Chapter") +
 81 |   ylab("Overall Bing sentiment score") +
 82 |   theme_avatar +
 83 |   labs(title = "Avatar: The Last Airbender",
 84 |        subtitle = "Are sentiments better when Appa is present?",
 85 |        caption = "Data: {appa} package")
 86 | 
 87 | png('NewAvatar.png',width = 7,height = 10,units = 'in',res=200)
 88 | p1
 89 | dev.off()
 90 | 
 91 | 
 92 | # Plot 2: ggstream plot of character lines over time ----------------------
 93 | d2 <- avatar %>%
 94 |   mutate(character_lumped = fct_lump(character,n = 5,other_level = 'someone_else')) %>%
 95 |   group_by(book,chapter_num,character_lumped) %>%
 96 |   count() %>%
 97 |   filter(!character_lumped %in% c("Scene Description","someone_else"))
 98 | d2$book <- factor(d2$book,levels = c('Water','Earth','Fire'))
 99 | 
100 | p2 <- d2 %>%
101 |   ggplot(aes(x = chapter_num,y = n,
102 |              fill = character_lumped)) +
103 |   geom_stream(colour='white',size=0.1) +
104 |   scale_fill_manual('',values = char_pal) +
105 |   facet_wrap(~book,ncol = 1) +
106 |   xlim(c(0,20)) +
107 |   xlab('Chapter') +
108 |   ylab('Number of lines') +
109 |   hrbrthemes::theme_ft_rc() +
110 |   theme(legend.position = 'bottom',
111 |         axis.ticks.y = element_blank(),
112 |         axis.text.y = element_blank(),
113 |         panel.grid.major = element_blank(),
114 |         panel.grid.minor = element_blank(),
115 |         panel.border = element_blank(),
116 |         panel.background = element_blank()) +
117 |   labs(caption = "Data: {appa} package by Avery Robbins")
118 | p2
119 | 
120 | 
121 | # Put em together and add title etc ---------------------------------------
122 | 
123 | png("33_Airbender.png",width = 11,height = 8,units = 'in',res = 200)
124 | p1 + p2
125 | dev.off()
126 | 
127 | 
128 | 


--------------------------------------------------------------------------------
/2020/33_Airbender.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/33_Airbender.png


--------------------------------------------------------------------------------
/2020/34_ExoticPlants.R:
--------------------------------------------------------------------------------
  1 | #34_ExoticPlants
  2 | library(tidyverse)
  3 | library(ggchicklet)
  4 | 
  5 | source('HelperFunctions.R')
  6 | 
  7 | tuesdata <- tidytuesdayR::tt_load(2020, week = 34)
  8 | 
  9 | plants <- tuesdata$plants
 10 | actions <- tuesdata$actions
 11 | threats <- tuesdata$threats
 12 | 
 13 | extrafont::loadfonts()
 14 | 
 15 | 
 16 | # Palettes ----------------------------------------------------------------
 17 | dangerpal <- calecopal::cal_palette(name = 'superbloom3')[1:2]
 18 | threatpal <- calecopal::cal_palette(name = 'superbloom2') %>% 
 19 |   lengthen_pal(x = 1:12)
 20 | bgcolor <- "#FFFFF8"
 21 | countrycolor <- "#dfdacd"
 22 | 
 23 | world_map <- map_data("world")
 24 | 
 25 | plant_statuses <- plants %>% 
 26 |   select(country, group, red_list_category) %>%
 27 |   group_by(country,red_list_category) %>%
 28 |   count() %>%
 29 |   right_join(world_map,by = c("country" = "region") ) %>%
 30 |   filter(!is.na(red_list_category))
 31 | 
 32 | 
 33 | p1 <- ggplot() +
 34 |   geom_polygon(data = world_map, aes(x = long, y = lat, group = group),
 35 |                fill = countrycolor, colour = "grey45", size = 0.1) +
 36 |   geom_polygon(data = plant_statuses,
 37 |                aes(x = long, y = lat, group = group,fill = log(n)), 
 38 |                colour = "grey45", size = 0.1) +
 39 |   facet_wrap(~red_list_category, ncol = 2) +
 40 |   theme_void(base_family = 'Lato') +
 41 |   scale_fill_stepsn('Number of \nspecies \n(log-scale)',colours = dangerpal) +
 42 |   theme(#panel.border = element_rect(fill = bgcolor,colour = bgcolor),
 43 |     panel.background = element_rect(fill = bgcolor,colour = bgcolor),
 44 |         plot.background = element_rect(fill = bgcolor,colour = bgcolor),
 45 |         panel.grid = element_blank(),
 46 |         legend.position = 'right') 
 47 | p1
 48 | 
 49 | 
 50 | # Chicklet plots ----------------------------------------------------------
 51 | d2 <- threats %>%
 52 |   group_by(red_list_category,continent,threat_type) %>%
 53 |   summarize(nthreatened = sum(threatened,na.rm = TRUE))
 54 | 
 55 | p2 <- d2 %>% 
 56 |   ggplot(aes(continent, nthreatened,fill = threat_type)) +
 57 |   geom_chicklet(width = 0.75,colour = bgcolor) +
 58 |   scale_fill_manual("Threat type",values = threatpal) +
 59 |   xlab("Continent") +
 60 |   ylab("Number of species") +
 61 |   theme(text = element_text(family = 'Lato'),
 62 |         panel.background = element_rect(fill = bgcolor,colour = bgcolor),
 63 |         plot.background = element_rect(fill = bgcolor,colour = bgcolor),
 64 |         #panel.border = element_rect(fill = bgcolor,colour = bgcolor),
 65 |         legend.background = element_rect(fill = bgcolor, colour = bgcolor),
 66 |         legend.key = element_rect(fill = bgcolor, color = NA),
 67 |         panel.grid = element_blank(),
 68 |         strip.background = element_blank(),
 69 |         legend.position = 'bottom') +
 70 |   facet_wrap(~red_list_category,ncol = 2,scales = "free_x") +
 71 |   coord_flip()
 72 | p2
 73 | 
 74 | 
 75 | library(patchwork)
 76 | 
 77 | all <- p1 + p2 + 
 78 |   plot_layout(heights = c(3,1)) + 
 79 |   plot_annotation(title = 'Plants in danger',
 80 |                   subtitle = 'Threats to plants that are extinct or extinct in the wild',
 81 |                   caption = 'Data: Florent Laverne & Cédric Scherer') & 
 82 |   theme(text = element_text('Lato',size = 14),
 83 |         panel.background = element_rect(fill = bgcolor,colour = bgcolor),
 84 |         plot.background = element_rect(fill = bgcolor,colour = bgcolor))
 85 | 
 86 | png('Plants.png',width = 12,height = 9,units = 'in',res = 200)
 87 | all
 88 | dev.off()
 89 | 
 90 | 
 91 | 
 92 | # cowplot option (avoid weird projection issues from axis align wi --------
 93 | library(cowplot)
 94 | p1 <- p1 + 
 95 |   coord_map(xlim=c(-180,180)) + # fix projection (thanks Dan)
 96 |   theme(plot.margin = unit(c(0,0,0,6), "lines")) + # add whitespace to the left to sort of align
 97 |   labs(title = 'Plants in danger',
 98 |        subtitle = 'Threats to plants that are extinct or extinct in the wild')
 99 | p2 <- p2 + 
100 |   labs(caption = 'Data: Florent Laverne & Cédric Scherer')
101 | 
102 | png('Plants_cowplot.png',width = 12,height = 9,units = 'in',res = 200)
103 | plot_grid(p1,p2,ncol = 1) + 
104 |   theme(plot.background = element_rect(fill = bgcolor,colour = bgcolor))
105 | dev.off()                                                                                                         
106 | 


--------------------------------------------------------------------------------
/2020/36_Crops.R:
--------------------------------------------------------------------------------
  1 | # 36: crop yields
  2 | library(tidyverse)
  3 | library(hrbrthemes)
  4 | library(calecopal)
  5 | 
  6 | tuesdata <- tidytuesdayR::tt_load(2020, week = 36)
  7 | 
  8 | # Data
  9 | key_crop_yields <- tuesdata$key_crop_yields
 10 | fertilizer <- tuesdata$cereal_crop_yield_vs_fertilizer_application
 11 | tractors <- tuesdata$cereal_yields_vs_tractor_inputs_in_agriculture
 12 | land_use <- tuesdata$land_use_vs_yield_change_in_cereal_production
 13 | arable_land <- tuesdata$arable_land_pin
 14 | 
 15 | # Palettes
 16 | source('HelperFunctions.R')
 17 | croppal <- lengthen_pal(x = 1:11,shortpal = cal_palette('bigsur'))
 18 | 
 19 | # give data nicer formatting ----------------------------------------------
 20 | clean_names <- function(x) word(x,1) %>% tolower()
 21 | 
 22 | d1 <- key_crop_yields %>%
 23 |   rename_with(clean_names) 
 24 | 
 25 | 
 26 | # How have yields changed over time? --------------------------------------
 27 | 
 28 | d1a <- d1 %>%
 29 |   group_by(year) %>%
 30 |   summarize_at(vars(wheat:bananas),.funs = sum, na.rm = TRUE) %>%
 31 |   pivot_longer(names_to = "crop",cols = wheat:bananas) 
 32 | 
 33 | d1b <- d1a %>%
 34 |   group_by(year) %>%
 35 |   summarize(tot_prod = sum(value,na.rm = T)) %>%
 36 |   right_join(d1a) %>%
 37 |   mutate(prop = value/tot_prod)
 38 | 
 39 | d1b %>% group_by(year) %>% summarize(sum(prop)) 
 40 | 
 41 | p1 <- d1a %>% 
 42 |   ggplot(aes(x=year,y=value,fill = crop)) +
 43 |   geom_area(color = 'white') +
 44 |   scale_fill_manual('Crop', values = croppal) +
 45 |   xlab('Year') +
 46 |   ylab('Global yield (tonnes per hectare)') +
 47 |   theme_ipsum_rc() +
 48 |   labs(title = '', #blank labels to match alginment of plots
 49 |        subtitle = '',
 50 |        caption = 'Data: Our World in Data')
 51 | 
 52 | 
 53 | # Which crops are dominant for each country? ------------------------------
 54 | # If you wanted to make a column with the max value from all yield cols
 55 | # prod_cols <- d1 %>% select(wheat:cocoa) %>% names()
 56 | # test <- d1 %>%
 57 | #   mutate(maxval = pmax(!!!rlang::syms(prod_cols),na.rm = TRUE))
 58 | 
 59 | d1x <- d1 %>% 
 60 |   mutate(across(everything(),
 61 |                 ~replace_na(.x, 0)))
 62 | 
 63 | # Messy, but you know what? I don't care
 64 | d1x$dom_crop <- colnames(d1x[,-(1:3)])[max.col(d1x[,-(1:3)],ties.method="first")]
 65 | country_key <- c(`United States` = "USA",
 66 |                  `Democratic Republic of Congo` = "Democratic Republic of the Congo",
 67 |                  Congo = "Republic of Congo")
 68 | d1y <- d1x %>% 
 69 |   filter(year == 2018) %>%
 70 |   select(entity,dom_crop) %>%
 71 |   mutate(entity = recode(entity,
 72 |                          !!!country_key))
 73 | 
 74 | croppal2 <- croppal[c(1:4,6,8)] # first four plus maize and potatoes
 75 | bgcolor <- "white"
 76 | 
 77 | crop_map <- map_data("world") %>%
 78 |   left_join(d1y,by = c('region' = 'entity'))
 79 | 
 80 | p2 <- ggplot(crop_map) +
 81 |   geom_polygon(data = crop_map, aes(x = long, y = lat,
 82 |                                     group = group),
 83 |                fill = "grey30",
 84 |                size = 0.1) +
 85 |   geom_polygon(data = crop_map, aes(x = long, y = lat,
 86 |                                     group = group,fill = dom_crop),
 87 |                colour = "white", size = 0.1) +
 88 |   coord_map(xlim = c(-180,180)) +
 89 |   scale_fill_manual('Dominant crop (2018)',
 90 |                     values = croppal2) +
 91 |   theme_ipsum_rc() +
 92 |   theme(axis.title.x = element_blank(),
 93 |         axis.title.y = element_blank(),
 94 |         panel.background = element_rect(fill = bgcolor,colour = bgcolor),
 95 |         plot.background = element_rect(fill = bgcolor,colour = bgcolor),
 96 |         panel.grid = element_blank(),
 97 |         panel.grid.major = element_blank(),
 98 |         axis.text.y = element_blank(),
 99 |         legend.position = 'none',
100 |         plot.margin = unit(c(0,0,0,0), "lines")) +
101 |   labs(title = 'Crop production',
102 |        subtitle = 'Dominant stocks in each country and global yield')
103 | 
104 | p2
105 | 
106 | library(cowplot)
107 | png('36_Crops.png',width = 12,height = 6,units = 'in',res = 200)
108 | plot_grid(p2,p1,nrow = 1) + 
109 |   theme(plot.background = element_rect(fill = bgcolor,colour = bgcolor))
110 | dev.off()


--------------------------------------------------------------------------------
/2020/36_Crops.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/36_Crops.png


--------------------------------------------------------------------------------
/2020/37_Friends.R:
--------------------------------------------------------------------------------
 1 | #37_Friends
 2 | 
 3 | library(tidyverse)
 4 | 
 5 | tuesdata <- tidytuesdayR::tt_load('2020-09-08')
 6 | 
 7 | friends <- tuesdata$friends
 8 | friends_info <- tuesdata$friends_info
 9 | friends_emotions <- tuesdata$friends_emotions
10 | 
11 | 


--------------------------------------------------------------------------------
/2020/43_MoreBeer.R:
--------------------------------------------------------------------------------
 1 | # Week 43
 2 | 
 3 | library(tidyverse)
 4 | library(ggbeeswarm)
 5 | library(hrbrthemes)
 6 | library(calecopal)
 7 | library(patchwork)
 8 | 
 9 | 
10 | tuesdata <- tidytuesdayR::tt_load(2020, week = 43)
11 | states_regions <- data.frame("state" = state.abb,
12 |                              "region" = state.region)
13 | 
14 | beer_awards <- tuesdata$beer_awards
15 | 
16 | # Cleanup on aisle b
17 | beer_awards$state[which(beer_awards$state == 'Ak')] <- "AK"
18 | beer_awards$state[which(beer_awards$state == 'wa')] <- "WA"
19 | 
20 | beer_counts <- beer_awards %>%
21 |   group_by(medal, state, year) %>%
22 |   count() %>%
23 |   ungroup() %>%
24 |   group_by(state, medal) %>%
25 |   summarize(mean_nawards = mean(n,na.rm = T)) %>%
26 |   left_join(states_regions) 
27 | 
28 | beer_counts$region[which(beer_counts$state == 'DC')] <- "South"
29 | 
30 | 
31 | 
32 | # Plot 1: Beeswarm plot of medals by state --------------------------------
33 | 
34 | medal_pal <- cal_palette("chaparral1")[c(4,1,2)]
35 | 
36 | maxlabs <- beer_counts %>% 
37 |   group_by(region) %>%
38 |   filter(mean_nawards == max(mean_nawards) & !is.na(region))
39 | 
40 | p1 <- beer_counts %>%
41 |   mutate(region = fct_relevel(region,"West","North Central","South","Northeast")) %>%
42 |   ggplot(aes(x = region,y = mean_nawards,colour = medal)) +
43 |   geom_quasirandom(method = "tukeyDense",size = 2) +
44 |   xlab("Region") +
45 |   ylab("Mean number of awards") +
46 |   scale_colour_manual("Medal", values = medal_pal) +
47 |   coord_flip() +
48 |   geom_text_repel(data = maxlabs,
49 |                   aes(label = state),
50 |                   force = 10, nudge_x = 0.2,show.legend = FALSE) 
51 | p1
52 | 
53 | 
54 | dat2 <- beer_awards %>%
55 |   left_join(states_regions) 
56 | 
57 | # DC is a state now, and MD and VA are both "South" so...!
58 | dat2$region[which(dat2$state == "DC")] <- "South"
59 | 
60 | dat3 <- dat2 %>%
61 |   group_by(region,year,medal) %>%
62 |   count()
63 | 
64 | p2 <- dat3 %>% 
65 |   ggplot(aes(x=year,y=n,fill = medal)) +
66 |   geom_area(lwd=0.3, colour = 'white') +
67 |   facet_wrap(~region, scales = "free_y",ncol = 1) +
68 |   scale_fill_manual("Medal",values = medal_pal) +
69 |   theme(legend.position = 'none') +
70 |   xlab("Year") +
71 |   ylab("Number of states with awards") +
72 | guides(fill = FALSE) 
73 |   #+
74 | #  guides(colour = FALSE)
75 | 
76 | beertheme <- theme_ipsum_rc(base_size = 12) +
77 |   theme(panel.grid.major = element_blank(),
78 |         panel.grid.minor = element_blank(),
79 |         axis.title.x = element_text(size = rel(1.5)),
80 |         axis.title.y = element_text(size = rel(1.5)),
81 |         plot.margin=unit(c(3,3,3,3),"mm"))
82 | 
83 | png("43_MoreBeer.png",width = 10,height = 8,units = "in",res = 200)
84 | p1 + p2 + 
85 |   plot_layout(ncol = 2, guides = 'collect') + 
86 |   plot_annotation(title = "Beer awards",
87 |                   subtitle = "Which regions of the U.S. cleaned up at the Great American Beer Festival?",
88 |                   caption = "Data: The Great American Beer Festival") & beertheme 
89 | dev.off()
90 | 


--------------------------------------------------------------------------------
/2020/43_MoreBeer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/43_MoreBeer.png


--------------------------------------------------------------------------------
/2020/48_WATrails.R:
--------------------------------------------------------------------------------
  1 | #48_trails
  2 | # By Megsie Siple
  3 | library(tidyverse)
  4 | library(patchwork)
  5 | library(ggraph)
  6 | 
  7 | # Palettes ----------------------------------------------------------------
  8 | # to make the colour palette, I cloned David L Miller's gist with the usfws palettes. Thank you Dave!
  9 | # Gist URL: https://gist.github.com/dill/1729bbc9ad4f915942045f96a6cfbf9d
 10 | source(here::here("USFWS-DLL","usfws_palette.R"))
 11 | source(here::here("HelperFunctions.R"))
 12 | p <- usfws_palette(n = 5,name = "dolly")
 13 | trailpal <- lengthen_pal(x = 1:12, p)
 14 | 
 15 | Emrld <- c("#d3f2a3", "#97e196", "#6cc08b", "#4c9b82", "#217a79", "#105965", "#074050") #from Carto colors
 16 | FeatureColors <- c("#d39c83","#e597b9","#e4f1e1","#6cc08b",Emrld[4],"#d1afe8","#85c4c9")
 17 |   #fall, flowers, mtns, nodogs, oldgrowth, ridges, water
 18 | 
 19 | hiketheme <- hrbrthemes::theme_ft_rc() +
 20 |   theme(text = element_text(colour = 'white'),
 21 |         title = element_text(colour = 'white'),
 22 |         axis.text = element_text(colour = 'white'),
 23 |         strip.text = element_text(colour = 'white'),
 24 |         plot.background = element_rect(fill = Emrld[7]),
 25 |         panel.border = element_blank(), 
 26 |         panel.background = element_blank(), 
 27 |         panel.grid = element_blank())
 28 | 
 29 | 
 30 | # Get data ----------------------------------------------------------------
 31 | tuesdata <- tidytuesdayR::tt_load('2020-11-24')
 32 | hike_data <- tuesdata$hike_data 
 33 | 
 34 | # minor cleanup
 35 | hike_data <- hike_data %>% 
 36 |   mutate_at(c("gain","highpoint","rating"),.funs = as.numeric) %>%
 37 |   mutate(simple_location = sub("\\ --.*", "", location))
 38 | 
 39 | hdat <- hike_data %>%
 40 |   mutate(nfeatures = lengths(features)) %>%
 41 |   unnest(features) %>%
 42 |   mutate(pres = 1) %>%
 43 |   pivot_wider(names_from = features,
 44 |               values_from = pres,
 45 |               values_fill = 0,values_fn = length )
 46 | 
 47 | p1 <- hike_data %>% 
 48 |   ggplot(aes(x = lengths(features), y = rating)) + 
 49 |   annotate("rect", xmin = 7.5, xmax = 12.5,
 50 |            ymin = 3, ymax = 5.5,
 51 |            fill = 'white', colour = NA, alpha = 0.1) +
 52 |   geom_point(alpha = 0.5,colour = 'white') +
 53 |   geom_smooth(method = 'lm',colour = trailpal[11]) +
 54 |   ylim(c(0,6.2))+
 55 |   annotate(geom = "text", x = 10, y = 6,
 56 |            family = "Roboto Condensed Light",
 57 |            label = "Megsie-preferred \nhiking zone",
 58 |            hjust = "center",
 59 |            colour = "white") +
 60 |   xlab("Number of features") +
 61 |   ylab("Hiker rating") +
 62 |   hiketheme
 63 | 
 64 | p1
 65 | 
 66 | d1 <- hdat %>%
 67 |   group_by(simple_location) %>%
 68 |   summarize(flowers = mean(`Wildflowers/Meadows`),
 69 |             mtnviews = mean(`Mountain views`),
 70 |             oldtrees = mean(`Old growth`))
 71 | 
 72 | p2 <- d1 %>%
 73 |   pivot_longer(cols = flowers:oldtrees) %>%
 74 |   mutate(name = recode(name, flowers = "Flowers",
 75 |                        mtnviews = "Mountain views",
 76 |                        oldtrees = "Old-growth forest")) %>%
 77 |   ggplot(aes(x=simple_location, y=value, fill = simple_location)) +
 78 |   geom_col() +
 79 |   facet_wrap(~name) +
 80 |   scale_fill_manual(values = trailpal[-1]) +
 81 |   scale_x_discrete(labels = function(x) str_wrap(x, width = 10)) +
 82 |   xlab("") +
 83 |   ylab("") +
 84 |   coord_polar(clip = "off") +
 85 |   labs(title = 'Searching for the best hike in Washington State',subtitle = 'Features and ratings for hikes in Washington State') +
 86 |   hiketheme +
 87 |   theme(legend.position = 'none',
 88 |         axis.text.x = element_text(size = 9),
 89 |         axis.text.y = element_blank(),
 90 |         panel.spacing = unit(4, "lines"))
 91 | 
 92 | 
 93 | 
 94 | 
 95 | 
 96 | # Test out a network ------------------------------------------------------
 97 | fdat <- hdat %>%
 98 |   select(`Dogs allowed on leash`:Summits)
 99 | 
100 | trythis <- fdat %>%  #woe is me
101 |   mutate(nodogs_flowers = ifelse(`Dogs not allowed`==1 & `Wildflowers/Meadows`==1,1,0)) %>%
102 |   mutate(nodogs_water = ifelse(`Dogs not allowed`==1 & `Waterfalls`==1,1,0)) %>%
103 |   mutate(nodogs_mtns = ifelse(`Dogs not allowed`==1 & `Mountain views`==1,1,0)) %>%
104 |   mutate(nodogs_oldgrowth = ifelse(`Dogs not allowed`==1 & `Old growth`==1,1,0)) %>%
105 |   
106 |   mutate(mtns_oldgrowth = ifelse(`Mountain views`==1 & `Old growth`==1,1,0)) %>%
107 |   mutate(mtns_flowers = ifelse(`Mountain views`==1 & `Wildflowers/Meadows`==1,1,0)) %>%
108 |   mutate(mtns_water = ifelse(`Mountain views`==1 & `Waterfalls`==1,1,0)) %>%
109 |   
110 |   mutate(water_flowers = ifelse(`Waterfalls`==1 & `Wildflowers/Meadows`==1,1,0)) %>%
111 |   mutate(water_oldgrowth = ifelse(`Waterfalls`==1 & `Old growth`==1,1,0)) %>%
112 |   
113 |   mutate(fall_flowers = ifelse(`Fall foliage`==1 & `Wildflowers/Meadows`==1,1,0)) %>%
114 |   mutate(fall_water = ifelse(`Fall foliage`==1 & `Waterfalls`==1,1,0)) %>%
115 |   mutate(fall_flowers = ifelse(`Fall foliage`==1 & `Wildflowers/Meadows`==1,1,0)) %>%
116 |   mutate(fall_oldgrowth = ifelse(`Fall foliage`==1 & `Old growth`==1,1,0)) %>%
117 |   
118 |   mutate(ridges_flowers = ifelse(`Ridges/passes`==1 & `Wildflowers/Meadows`==1,1,0)) %>%
119 |   mutate(ridges_water = ifelse(`Ridges/passes`==1 & `Waterfalls`==1,1,0)) %>%
120 |   mutate(ridges_flowers = ifelse(`Ridges/passes`==1 & `Wildflowers/Meadows`==1,1,0)) %>%
121 |   mutate(ridges_oldgrowth = ifelse(`Ridges/passes`==1 & `Old growth`==1,1,0)) %>%
122 |   
123 |   select(nodogs_flowers:ridges_oldgrowth) %>%
124 |   filter_all(any_vars(. != 0)) %>% # remove columns that are all zeroes
125 |   pivot_longer(cols = nodogs_flowers:ridges_oldgrowth) %>%
126 |   group_by(name) %>%
127 |   summarize(nhikes = sum(value)) %>%
128 |   separate(col = name,into = c("from","to")) %>%
129 |   uncount(nhikes)
130 | 
131 | graph <- as_tbl_graph(trythis) %>% 
132 |   mutate(Popularity = centrality_degree(mode = 'in'))
133 | 
134 | p3 <- ggraph(graph, layout = 'kk') + 
135 |       geom_edge_fan2(aes(alpha = stat(index)),
136 |                     colour = "white",alpha = 0.2,
137 |                     show.legend = FALSE) + 
138 |       geom_node_point(aes(color = name), size = 5) + 
139 |       scale_colour_manual("",values = FeatureColors) +
140 |       labs(x = NULL, y = NULL,
141 |            caption = "Data: Washington Trails Association & TidyX") +
142 |       hiketheme +
143 |       theme(axis.text.x = element_blank(),
144 |             axis.text.y = element_blank())
145 | 
146 | 
147 | 
148 | # Save it -----------------------------------------------------------------
149 | png('48_WATrails.png',width = 12,height = 11,units = 'in',res=200)
150 | p2 + (p1|p3) + plot_layout(ncol = 1)
151 | dev.off()
152 | 
153 | 
154 | 
155 | 


--------------------------------------------------------------------------------
/2020/48_WATrails.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/48_WATrails.png


--------------------------------------------------------------------------------
/2020/49_Shelters.R:
--------------------------------------------------------------------------------
1 | 
2 | #49_TorontoShelters
3 | 
4 | library(tidyverse)
5 | tuesdata <- tidytuesdayR::tt_load('2020-12-01')
6 | shelters <- tuesdata$shelters
7 | 


--------------------------------------------------------------------------------
/2020/8_CO2Food.R:
--------------------------------------------------------------------------------
 1 | #Feb 18, 2020
 2 | # Food consumption and CO2 
 3 | library(sf)
 4 | library(mapedit)
 5 | library(leaflet)
 6 | library(tidyverse)
 7 | library(rnaturalearth)
 8 | library(rnaturalearthdata)
 9 | library(stringr)
10 | 
11 | food_consumption <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-02-18/food_consumption.csv')
12 | str(food_consumption)
13 | unique(food_consumption$country)
14 | unique(food_consumption$food_category)
15 | 
16 | # emissions per pound are very slightly different!
17 | food_consumption %>%
18 |   group_by(food_category) %>%
19 |   mutate(emission_per = co2_emmission/consumption) %>%
20 |   as.data.frame() 
21 | 
22 | nfc <- food_consumption %>%
23 |   mutate(emiss_per_cons = co2_emmission/consumption)
24 | # But I'm not going to worry about it
25 | 
26 | # Let's map it!
27 | world <- ne_countries(scale = "medium", returnclass = "sf") 
28 | 
29 | pe <- food_consumption %>%
30 |   filter(food_category == "Milk - inc. cheese") %>%
31 |   mutate(country.corr = str_replace(country,"USA","United States"))
32 | 
33 | worldp <- left_join(world,pe,by=c('name'='country.corr'))
34 | 
35 | dairy <- ggplot(data = worldp) +
36 |           geom_sf(aes(fill=co2_emmission)) +
37 |           scale_fill_viridis_c("Emissions \n (kg/person/yr)",option = 'inferno',direction = -1) +
38 |           theme_minimal() +
39 |           ggtitle(expression('Per capita '~CO[2]~' emissions from milk/cheese consumption'))
40 | 
41 | png('Dairy.png',width=6,height=4,units = 'in',res=150)
42 | dairy 
43 | dev.off()
44 | 
45 | 
46 | # Fancy dark background version -------------------------------------------
47 | # To make the fancy dark ones that everyone makes on twitter
48 | library(extrafont) #not sure which of these is required
49 | library(extrafontdb)
50 | library(hrbrthemes)
51 | 
52 | dairy2 <- dairy + theme_ft_rc()
53 | dairy2
54 | 


--------------------------------------------------------------------------------
/2020/9_Measles.R:
--------------------------------------------------------------------------------
 1 | # Measles vaccines
 2 | # Moody bc I listened to the awful Feb 25th democratic debate while coding.
 3 | library(tidyverse)
 4 | library(stringr)
 5 | library(sf)
 6 | library(mapedit)
 7 | library(leaflet)
 8 | library(tidyverse)
 9 | library(rnaturalearth)
10 | library(rnaturalearthdata)
11 | 
12 | measles <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-02-25/measles.csv')
13 | 
14 | measles.summ <- measles %>%
15 |                 group_by(state,year) %>%
16 |                 summarize(mean.mmr=mean(mmr)) %>%
17 |                 filter(mean.mmr != -1) %>%
18 |                 as.data.frame()
19 | 
20 | states <- map_data("state") %>% 
21 |   mutate(State = str_to_title(region))
22 | 
23 | test <- measles.summ %>% # normally I would do something with date but I don't wanna 
24 |   select(state,mean.mmr) %>%
25 |   right_join(states, by=c("state"="State"))
26 | 
27 | mdf <- as.data.frame(measles) %>% 
28 |   mutate(lat = as.numeric(lat),
29 |          lng = as.numeric(lng)) %>%
30 |   filter(lng<(-70) & !is.na(lng) & !is.na(lat)) %>% #filter out a few funky points
31 |   filter(!is.na(xper) & xper>25) #forget why I filtered for xper>25
32 | 
33 | mapplot <- test %>%
34 |             ggplot(aes(long,lat)) +
35 |             xlab("") +
36 |             ylab("") +
37 |             geom_polygon(aes(group=state,fill=mean.mmr)) +
38 |             scale_fill_viridis_c("") +
39 |             hrbrthemes::theme_ft_rc() +
40 |             theme(axis.ticks = element_blank(),
41 |                   panel.grid = element_blank(),
42 |                   axis.text = element_blank()) +
43 |             ggtitle("Mean statewide MMR vaccination rate")
44 | mapplot
45 | 
46 | # Map with white points
47 | personalvax <- mapplot + 
48 |   geom_point(data=mdf,aes(x=lng,y=lat,size=xper),
49 |              colour='white', alpha=0.5) +
50 |   scale_size_continuous("Percentage of students \n exempted for \n personal reasons (>25%)") +
51 |   ggtitle("Students exempted from vaccines for personal reasons")
52 | 
53 | 
54 | bars <- measles %>%
55 |     filter(mmr != -1 & overall != -1) %>% #take out NAs
56 |   group_by(state) %>%
57 |   summarize(mean.personal = mean(xper,na.rm=T)) %>%
58 |   filter(!is.na(mean.personal)) %>%
59 |   ggplot(aes(x=state,y=mean.personal)) +
60 |   geom_col(fill='white') +
61 |   ylab("Mean percentage exempted\n for personal reasons") +
62 |   xlab("State") +
63 |   hrbrthemes::theme_ft_rc()
64 | 
65 | scatter <- measles %>%
66 |   ggplot(aes(x=xmed,y=xper)) +
67 |   geom_point(alpha=0.5,colour='white') +
68 |   hrbrthemes::theme_ft_rc() +
69 |   xlab("Exempted \nfor medical reasons (%)") +
70 |   ylab("Exempted \nfor personal reasons (%)") +
71 |   hrbrthemes::theme_ft_rc()
72 | 
73 | png("TidyFeb25.png",width = 10,height=4,units = 'in',res = 150)
74 | p2 <- gridExtra::grid.arrange(bars,scatter,ncol=1)
75 | cowplot::plot_grid(mapplot,p2,rel_widths = c(2,1))
76 | dev.off()
77 | 


--------------------------------------------------------------------------------
/2020/AnimalCrossing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/AnimalCrossing.png


--------------------------------------------------------------------------------
/2020/AreaPlot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/AreaPlot.png


--------------------------------------------------------------------------------
/2020/Beer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/Beer.png


--------------------------------------------------------------------------------
/2020/Broadway.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/Broadway.png


--------------------------------------------------------------------------------
/2020/Caribou.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/Caribou.png


--------------------------------------------------------------------------------
/2020/Cocktails.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/Cocktails.png


--------------------------------------------------------------------------------
/2020/Coffee1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/Coffee1.png


--------------------------------------------------------------------------------
/2020/Coffee2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/Coffee2.png


--------------------------------------------------------------------------------
/2020/First.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/First.png


--------------------------------------------------------------------------------
/2020/GDPR.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/GDPR.png


--------------------------------------------------------------------------------
/2020/HipHop.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/HipHop.png


--------------------------------------------------------------------------------
/2020/Hockey.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/Hockey.png


--------------------------------------------------------------------------------
/2020/Measles.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/Measles.png


--------------------------------------------------------------------------------
/2020/Measles.tiff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/Measles.tiff


--------------------------------------------------------------------------------
/2020/NewAvatar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/NewAvatar.png


--------------------------------------------------------------------------------
/2020/Penguins.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/Penguins.png


--------------------------------------------------------------------------------
/2020/Plants.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/Plants.png


--------------------------------------------------------------------------------
/2020/Plants2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/Plants2.png


--------------------------------------------------------------------------------
/2020/Plants_cowplot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/Plants_cowplot.png


--------------------------------------------------------------------------------
/2020/RSPCA.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/RSPCA.png


--------------------------------------------------------------------------------
/2020/RSPCA2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/RSPCA2.png


--------------------------------------------------------------------------------
/2020/Salary_Tuition.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/Salary_Tuition.png


--------------------------------------------------------------------------------
/2020/Second.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/Second.png


--------------------------------------------------------------------------------
/2020/TheOffice.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/TheOffice.png


--------------------------------------------------------------------------------
/2020/TourdeFrance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/TourdeFrance.png


--------------------------------------------------------------------------------
/2020/Tuition.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/Tuition.gif


--------------------------------------------------------------------------------
/2020/statements.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/statements.csv


--------------------------------------------------------------------------------
/2021/11_2021_bechdel.R:
--------------------------------------------------------------------------------
  1 | # 11_2021: Bechdel test
  2 | source("HelperFunctions.R")
  3 | 
  4 | tuesdata <- tidytuesdayR::tt_load(2021, week = 11)
  5 | bechdel <- tuesdata$raw_bechdel
  6 | movies <- tuesdata$movies
  7 | 
  8 | library(tidyverse)
  9 | 
 10 | 
 11 | # Do American movies pass the Bechdel test less often than others? ----------
 12 | d1 <- movies %>%
 13 |   mutate(isamerican = str_detect(string = country, 
 14 |                                pattern = "USA")) %>%
 15 |   group_by(year, clean_test, isamerican) %>%
 16 |   count() %>%
 17 |   filter(!is.na(isamerican)) %>%
 18 |   ungroup() %>%
 19 |   mutate(clean_test = recode(clean_test,
 20 |     "dubious" = "Probably doesn't pass",
 21 |     "nowomen" = "Fewer than two women",
 22 |     "notalk" = "Women don't talk to each other",
 23 |     "men" = "Women only talk about men",
 24 |     "ok" = "Passes the Bechdel test"
 25 |   )) %>%
 26 |   mutate(clean_test = factor(clean_test,
 27 |     levels = c(
 28 |       "Fewer than two women",
 29 |       "Women don't talk to each other",
 30 |       "Women only talk about men",
 31 |       "Probably doesn't pass",
 32 |       "Passes the Bechdel test"
 33 |     )
 34 |   ))
 35 | 
 36 | counts <- d1 %>%
 37 |   ungroup() %>%
 38 |   group_by(year, isamerican) %>%
 39 |   summarize(totalmovies = sum(n)) %>%
 40 |   ungroup()
 41 | 
 42 | props <- d1 %>%
 43 |   left_join(counts, by = c("year", "isamerican")) %>%
 44 |   mutate(prop_ctotal = n / totalmovies)
 45 | 
 46 | fpal <- c("#e63946", "#EA7F83", "#EEC5C0", "#a8dadc", "#457b9d")
 47 | 
 48 | p1 <- props %>%
 49 |   mutate(is_american = ifelse(isamerican, 
 50 |                             "American Films", 
 51 |                             "Everyone Else")) %>%
 52 |   ggplot(aes(
 53 |     x = year, y = prop_ctotal,
 54 |     colour = clean_test, fill = clean_test
 55 |   )) +
 56 |   geom_col() +
 57 |   facet_wrap(~is_american) +
 58 |   scale_fill_manual(values = fpal) +
 59 |   scale_colour_manual(values = fpal) +
 60 |   ylab("Proportion of movies") +
 61 |   labs(
 62 |     title = "The Bechdel test in American films and globally",
 63 |     subtitle = "How do American films compare to films from other countries?"
 64 |   ) +
 65 |   hrbrthemes::theme_ipsum_rc() +
 66 |   theme(legend.position = "none") 
 67 | 
 68 | 
 69 | p2 <- d1 %>%
 70 |   mutate(is_american = ifelse(isamerican, "American Films", 
 71 |                             "Everyone Else")) %>%
 72 |   ggplot(aes(x = year, y = n, color = clean_test)) +
 73 |   facet_wrap(~is_american, scales = "free_y") +
 74 |   geom_point() +
 75 |   geom_smooth(se = FALSE) +
 76 |   scale_colour_manual("Bechdel result", values = fpal) +
 77 |   scale_fill_manual(values = fpal) +
 78 |   ylab("Number of movies") +
 79 |   labs(caption = "Plot: Margaret Siple (@margaretsiple) - Data: Bechdeltest.com API") +
 80 |   hrbrthemes::theme_ipsum_rc() +
 81 |   theme(
 82 |     strip.background = element_blank(),
 83 |     strip.text.x = element_blank()
 84 |   )
 85 | 
 86 | library(patchwork)
 87 | 
 88 | png(filename = "CountryBechdel.png", width = 8, height = 8, units = "in", res = 200)
 89 | p1 + p2 +
 90 |   plot_layout(ncol = 1, heights = c(1, 3), guides = "collect")
 91 | dev.off()
 92 | 
 93 | # Bechdel passes by genre and rating --------------------------------------
 94 | ct <- movies %>%
 95 |   select(year, title, clean_test, binary, country, genre, rated) %>%
 96 |   tidyr::separate(country, into = paste0("c", 1:5)) %>%
 97 |   tidyr::separate(genre, into = paste0("g", 1:3))
 98 | 
 99 | 
100 | genres <- ct %>%
101 |   select(g1, g2, g3) %>%
102 |   t() %>%
103 |   c() %>%
104 |   unique()
105 | unique(movies$rated)
106 | 
107 | library(gggibbous)
108 | 
109 | d3 <- ct %>%
110 |   select(title, binary, rated, g1, g2, g3) %>%
111 |   filter(!is.na(g1)) %>%
112 |   pivot_longer(cols = g1:g3, values_to = c("genre")) %>%
113 |   select(-name) %>%
114 |   group_by(rated, genre) %>%
115 |   count(binary) %>%
116 |   ungroup() %>%
117 |   pivot_wider(names_from = binary, values_from = n) %>%
118 |   mutate_at(c("FAIL", "PASS"), ~ replace_na(., replace = 0)) %>%
119 |   mutate(total = FAIL + PASS) %>%
120 |   filter(!is.na(genre) & genre != "Fi" & !rated %in% c("N/A", "Not Rated", "Unrated", "X", "TV-14", "TV-PG")) %>%
121 |   mutate(genre = recode(genre, "Sci" = "Sci-Fi"))
122 | 
123 | totalrow <- d3 %>%
124 |   select(-rated) %>%
125 |   group_by(genre) %>%
126 |   summarise(across(where(is.numeric), ~ sum(.x, na.rm = T), .names = "{.col}")) %>%
127 |   mutate(rated = "Genre total")
128 | 
129 | d4 <- d3 %>%
130 |   add_row(totalrow) %>%
131 |   mutate(genre = fct_reorder(genre, PASS / total)) %>%
132 |   mutate(rated = fct_relevel(rated, "G", "PG", "PG-13", "R", "NC-17", "Genre total"))
133 | 
134 | mooncolor <- "grey"
135 | moonfill <- "white"
136 | highlightmoon <- "#457b9d"
137 | 
138 | d4b <- d4 %>%
139 |   filter(PASS / total > 0.66)
140 | 
141 | p3 <- d4 %>%
142 |   ggplot(aes(x = rated, y = genre)) +
143 |   geom_moon(aes(ratio = PASS / total), 
144 |             fill = mooncolor, 
145 |             colour = mooncolor) +
146 |   geom_moon(aes(ratio = FAIL / total), 
147 |             fill = moonfill, 
148 |             right = FALSE, 
149 |             colour = mooncolor) +
150 |   geom_moon(data = d4b, aes(ratio = PASS / total), 
151 |             fill = highlightmoon, 
152 |             colour = highlightmoon) +
153 |   geom_moon(data = d4b, aes(ratio = FAIL / total), 
154 |             fill = NA, 
155 |             right = FALSE, 
156 |             colour = highlightmoon) +
157 |   ylab("Genre") +
158 |   xlab("Rating") +
159 |   labs(
160 |     title = "Across genres, fewer R-rated films pass the Bechdel",
161 |     subtitle = "Blue crescents indicate where more than two-thirds of films pass the test.",
162 |     caption = "Plot: Margaret Siple (@margaretsiple) - Data: Bechdeltest.com API"
163 |   ) +
164 |   hrbrthemes::theme_ipsum_rc() +
165 |   theme(plot.title = element_text(hjust = 0.5), 
166 |         plot.subtitle = element_text(colour = highlightmoon, hjust = 0.5))
167 | 
168 | png("BechdelMoons.png", width = 7, height = 10, units = "in", res = 200)
169 | p3
170 | dev.off()
171 | 


--------------------------------------------------------------------------------
/2021/17_2021_netflix.R:
--------------------------------------------------------------------------------
  1 | # 17_netflix
  2 | library(tidyverse)
  3 | library(tidygraph)
  4 | library(ggraph)
  5 | 
  6 | # soundtrack: Olivia Rodrigo - SOUR
  7 | rodrigopal <- c("#8780C3", "#71BDC3", "#EBD4C8", "#BB2D67", "#EEC660")
  8 | source("HelperFunctions.R")
  9 | rodrigopal_10 <- lengthen_pal(1:10, rodrigopal)
 10 | 
 11 | # Load the data
 12 | tuesdata <- tidytuesdayR::tt_load(2021, week = 17)
 13 | 
 14 | # Clean up data
 15 | netflix <- tuesdata$netflix %>%
 16 |   mutate(add_year = as.numeric(str_sub(date_added, -5, -1))) %>%
 17 |   filter(!is.na(add_year)) %>%
 18 |   select(-cast, -date_added, -rating, -duration, -director, -cast, -description) %>%
 19 |   filter(across(country, ~ grepl("United States", .)))
 20 | 
 21 | 
 22 | alldat <- netflix %>%
 23 |   separate(listed_in, into = c("g1", "g2", "g3"), fill = "right") %>%
 24 |   pivot_longer(cols = g1:g3, values_to = "genre") %>%
 25 |   filter(!is.na(genre)) %>%
 26 |   filter(add_year > 2012)
 27 | 
 28 | alldat
 29 | 
 30 | 
 31 | 
 32 | # Functions for bubble plot -----------------------------------------------
 33 | 
 34 | make_nodes <- function(alldat) {
 35 |   unit_size <- alldat %>%
 36 |     group_by(type, genre) %>%
 37 |     summarize(size = length(show_id)) %>%
 38 |     ungroup() %>%
 39 |     rename(name = genre)
 40 |   listed_size <- unit_size %>%
 41 |     select(-type)
 42 | 
 43 |   type_size <- unit_size %>%
 44 |     group_by(type) %>%
 45 |     summarize(size = sum(size)) %>%
 46 |     rename(name = type)
 47 | 
 48 |   year_size <- tibble(
 49 |     name = as.character(alldat$add_year[1]),
 50 |     size = sum(unit_size$size)
 51 |   )
 52 | 
 53 |   nodes <- bind_rows(listed_size, type_size, year_size)
 54 |   return(nodes)
 55 | }
 56 | 
 57 | 
 58 | make_edges <- function(alldat) {
 59 |   x <- alldat %>%
 60 |     group_by(add_year, type, genre) %>%
 61 |     summarize(size = length(show_id)) %>%
 62 |     ungroup() %>%
 63 |     rename(name = genre)
 64 | 
 65 |   base <- tibble(
 66 |     from = as.character(unique(x$add_year)),
 67 |     to = unique(x$type)
 68 |   )
 69 | 
 70 |   inner <- x %>%
 71 |     select(from = type, to = name) %>%
 72 |     distinct()
 73 | 
 74 |   edges <- bind_rows(base, inner)
 75 |   return(edges)
 76 | }
 77 | 
 78 | 
 79 | make_year_plot <- function(x, pal = rodrigopal) {
 80 |   nodes <- make_nodes(x)
 81 |   edges <- make_edges(x)
 82 |   mygraph <- tbl_graph(nodes = nodes, edges = edges)
 83 | 
 84 |   # Make the plot
 85 |   plot <- ggraph(mygraph, layout = "circlepack", weight = size) +
 86 |     geom_node_circle(aes(fill = depth), color = "grey14", size = 0.2) +
 87 |     labs(title = unique(x$add_year)) +
 88 |     coord_equal() +
 89 |     scale_fill_gradientn(colours = pal) +
 90 |     hrbrthemes::theme_ipsum(base_size = 10) +
 91 |     scale_x_continuous(expand = c(0, 0)) +
 92 |     scale_y_continuous(expand = c(0, 0)) +
 93 |     labs(x = NULL, y = NULL) +
 94 |     theme(
 95 |       legend.position = "none",
 96 |       panel.grid.major = element_blank(),
 97 |       panel.grid.minor = element_blank(),
 98 |       panel.background = element_blank(),
 99 |       axis.ticks = element_blank(),
100 |       axis.text = element_blank(),
101 |       axis.text.x = element_blank(),
102 |       axis.text.y = element_blank(),
103 |       plot.title = element_text(size = 9)
104 |     )
105 |   plot
106 | }
107 | 
108 | 
109 | library(patchwork)
110 | 
111 | # amazing shortcut for many mini figs
112 | list <- alldat %>%
113 |   split(.$add_year) %>%
114 |   map(make_year_plot)
115 | 
116 | p1 <- wrap_plots(list, ncol = 3, nrow = 3) +
117 |   plot_annotation(caption = "Data: Sara Stoudt / The Economist. Plot: @margaretsiple w/ code from @jakekaupp") & (hrbrthemes::theme_ipsum(base_size = 10) + theme(
118 |   legend.position = "none",
119 |   panel.grid.major = element_blank(),
120 |   panel.grid.minor = element_blank(),
121 |   panel.background = element_blank(),
122 |   axis.ticks = element_blank(),
123 |   axis.text = element_blank(),
124 |   axis.text.x = element_blank(),
125 |   axis.text.y = element_blank(),
126 |   plot.title = element_text(size = 12, hjust = 0)
127 | ))
128 | 
129 | p1
130 | 
131 | # Maybe just a nice time series fig now -------------------------------------
132 | topgenres <- alldat %>%
133 |   group_by(genre) %>%
134 |   count() %>%
135 |   filter(!genre %in% c("Movies", "TV", "Stand", "Up", "Shows")) %>%
136 |   ungroup() %>%
137 |   slice_max(order_by = n, n = 10) %>%
138 |   # get top 10 genres
139 |   select(genre)
140 | 
141 | d2 <- alldat %>%
142 |   right_join(topgenres) %>%
143 |   group_by(add_year, type, genre) %>%
144 |   summarize(n = length(unique(title))) %>%
145 |   ungroup() %>%
146 |   arrange(add_year) %>%
147 |   mutate(cumu_n = cumsum(n)) %>%
148 |   filter(add_year <= 2020) %>%
149 |   rename(Genre = genre)
150 | 
151 | p2 <- d2 %>%
152 |   ggplot(aes(x = add_year, y = cumu_n, fill = Genre)) +
153 |   geom_area() +
154 |   scale_fill_manual(values = rodrigopal_10) +
155 |   facet_wrap(~type, ncol = 1) +
156 |   labs(
157 |     x = "Year added", y = "Cumulative number of listings added",
158 |     title = "Netflix additions in the U.S.",
159 |     subtitle = "Movies and TV shows added to Netflix since 2013. \nYellow bubbles are genres, nested within type (TV show or movie), \nnested within year"
160 |   ) +
161 |   hrbrthemes::theme_ipsum(base_size = 10) +
162 |   theme(legend.position = "bottom")
163 | 
164 | 
165 | # Export figs -------------------------------------------------------------
166 | 
167 | png("NetflixA.png", width = 6, height = 10, units = "in", res = 200)
168 | p2
169 | dev.off()
170 | 
171 | png("NetflixB.png", width = 14, height = 10, units = "in", res = 200)
172 | p1
173 | dev.off()
174 | 


--------------------------------------------------------------------------------
/2021/51_2021_spice_girls.R:
--------------------------------------------------------------------------------
  1 | # Spice girls!
  2 | 
  3 | # Libraries ---------------------------------------------------------------
  4 | # remotes::install_github("hrbrmstr/ggchicklet")
  5 | library(tidyverse)
  6 | library(tidytext)
  7 | library(glue)
  8 | library(stringr)
  9 | library(ggchicklet)
 10 | library(GGally)
 11 | library(ggcorrplot)
 12 | 
 13 | # Color palettes and themes -----------------------------------------------
 14 | # Palettes from Spice Girls album covers, made on coolors.co
 15 | spice_pal <- c("#EF4035", "#FD7D34", "#9A351D", "#88D200", "#f9ea9a", "#cDB2CB", "#FFFFFF", "#BE525F", "#CDE009", "#A9973E") # 10 colours
 16 | spiceworld_pal <- c("#B0A7B4", "#FF2332", "#07047C", "#FF1085", "#B6E6D1", "#E8F650", "#00A8D1", "#FFB463") # 8 colours
 17 | 
 18 | scales::show_col(spice_pal)
 19 | scales::show_col(spiceworld_pal)
 20 | 
 21 | # Data --------------------------------------------------------------------
 22 | tuesdata <- tidytuesdayR::tt_load("2021-12-14")
 23 | studio_album_tracks <- tuesdata$studio_album_tracks
 24 | lyrics <- tuesdata$lyrics
 25 | 
 26 | tokens <- lyrics %>%
 27 |   mutate(nsingers = str_count(section_artist, boundary("word"))) %>%
 28 |   mutate(nsingers = ifelse(nsingers > 5, 5, nsingers)) %>%
 29 |   unnest_tokens(output = "word", input = "line") %>%
 30 |   inner_join(get_sentiments("bing")) %>%
 31 |   group_by(nsingers) %>%
 32 |   count(sentiment) %>% # count the # of positive & negative words
 33 |   spread(sentiment, n, fill = 0) %>%
 34 |   mutate(
 35 |     prop_negative = negative / (negative + positive),
 36 |     prop_positive = positive / (negative + positive),
 37 |     pos_ratio = positive / negative
 38 |   )
 39 | 
 40 | tokens2 <- lyrics %>%
 41 |   unnest_tokens(output = "word", input = "line") %>%
 42 |   inner_join(get_sentiments("bing")) %>%
 43 |   group_by(track_number, album_name, section_artist) %>%
 44 |   count(sentiment) %>% # count the # of positive & negative words
 45 |   spread(sentiment, n, fill = 0) %>%
 46 |   mutate(
 47 |     prop_negative = negative / (negative + positive),
 48 |     prop_positive = positive / (negative + positive),
 49 |     pos_ratio = positive / negative
 50 |   )
 51 | 
 52 | tokens %>%
 53 |   select(nsingers, pos_ratio) %>%
 54 |   pivot_longer(pos_ratio) %>%
 55 |   ggplot(aes(x = nsingers, y = value)) +
 56 |   geom_point() +
 57 |   geom_line()
 58 | 
 59 | 
 60 | # Spread singers from section_artist into columns --------------------------------------
 61 | get_singer <- function(col_str, singer) {
 62 |   ifelse(grepl(pattern = singer, x = col_str), 1, 0)
 63 | }
 64 | 
 65 | singers <- c("Scary", "Posh", "Ginger", "Baby", "Sporty")
 66 | # Divide singers for each lyric into their own column a la presence/absence
 67 | lyrics_wide <- lyrics %>%
 68 |   mutate(
 69 |     Baby = get_singer(singer = "Baby", section_artist),
 70 |     Posh = get_singer(singer = "Posh", section_artist),
 71 |     Scary = get_singer(singer = "Scary", section_artist),
 72 |     Sporty = get_singer(singer = "Sporty", section_artist),
 73 |     Ginger = get_singer(singer = "Ginger", section_artist),
 74 |     All = get_singer(singer = "All", section_artist)
 75 |   )
 76 | # No idea how to do this better...sorry tidyverse!
 77 | for (i in 1:nrow(lyrics_wide)) {
 78 |   lyrics_wide[i, "Baby"] <- ifelse(lyrics_wide[i, "All"] == 1, 1, lyrics_wide[i, "Baby"])
 79 |   lyrics_wide[i, "Scary"] <- ifelse(lyrics_wide[i, "All"] == 1, 1, lyrics_wide[i, "Scary"])
 80 |   lyrics_wide[i, "Sporty"] <- ifelse(lyrics_wide[i, "All"] == 1, 1, lyrics_wide[i, "Sporty"])
 81 |   lyrics_wide[i, "Posh"] <- ifelse(lyrics_wide[i, "All"] == 1, 1, lyrics_wide[i, "Posh"])
 82 |   lyrics_wide[i, "Ginger"] <- ifelse(lyrics_wide[i, "All"] == 1, 1, lyrics_wide[i, "Ginger"])
 83 | }
 84 | 
 85 | lyrics_wide2 <- lyrics_wide %>%
 86 |   select(-All) %>%
 87 |   unnest_tokens(output = "word", input = "line") %>%
 88 |   inner_join(get_sentiments("afinn")) %>%
 89 |   pivot_longer(cols = Baby:Ginger, values_to = "Singer") %>%
 90 |   filter(Singer == 1) %>%
 91 |   select(-Singer, -section_artist) %>%
 92 |   group_by(track_number, album_name, name) %>%
 93 |   count(sentiment) %>% # count the # of positive & negative words
 94 |   ungroup() %>%
 95 |   spread(sentiment, n, fill = 0)
 96 | 
 97 | 
 98 | # Geom_chicklet plot of sentiments ----------------------------------------
 99 | 
100 | p1 <- lyrics_wide2 %>%
101 |   mutate(negative = negative * -1) %>%
102 |   rename(Singer = name) %>%
103 |   ggplot(aes(x = track_number, y = positive, fill = Singer)) +
104 |   geom_chicklet() +
105 |   geom_chicklet(aes(x = track_number, y = negative, fill = Singer)) +
106 |   scale_fill_manual(values = spiceworld_pal[3:7]) +
107 |   geom_hline(yintercept = 0) +
108 |   facet_wrap(~ factor(album_name, # order albums chronologically
109 |     levels = c("Spice", "Spiceworld", "Forever")
110 |   ), ncol = 3) +
111 |   xlab("Track number") +
112 |   ylab("Number of positive or negative words \n(AFINN sentiment score)") +
113 |   hrbrthemes::theme_ipsum_rc() +
114 |   labs(
115 |     x = "Track number",
116 |     title = "Spice Girls and Sentiments",
117 |     subtitle = "Sentiment analysis of Spice Girls lyrics and correlations between track characteristics"
118 |   )
119 | p1
120 | 
121 | 
122 | # Studio album tracks ----------------------------------------------------
123 | 
124 | dat2 <- studio_album_tracks %>%
125 |   select(
126 |     danceability, energy, loudness, liveness, valence,
127 |     album_name
128 |   )
129 | 
130 | p2 <- GGally::ggpairs(as.data.frame(dat2),
131 |   columns = 1:5, aes(color = album_name, alpha = 0.8)
132 | ) +
133 |   scale_color_manual(values = spice_pal[c(1, 8, 9)]) +
134 |   scale_fill_manual(values = spice_pal[c(1, 8, 9)]) +
135 |   hrbrthemes::theme_ipsum_rc()
136 | 
137 | x <- studio_album_tracks %>%
138 |   select(speechiness:tempo)
139 | 
140 | xx <- cor_pmat(x)
141 | 
142 | p3 <- ggcorrplot(xx,
143 |   type = "upper",
144 |   method = "circle",
145 |   ggtheme = hrbrthemes::theme_ipsum_rc,
146 |   colors = c("#6D9EC1", "white", "#07047C")
147 | ) +
148 |   labs(caption = "Spice Girls dataset from @jacquietran")
149 | 
150 | # Save plots --------------------------------------------------------------
151 | 
152 | 
153 | png("SG.png", width = 12, height = 7, units = "in", res = 150)
154 | p1 + p3 + plot_layout(ncol = 2, widths = c(2, 1))
155 | dev.off()
156 | 


--------------------------------------------------------------------------------
/2021/5_2021_plastics.R:
--------------------------------------------------------------------------------
  1 | library(tidyverse)
  2 | 
  3 | 
  4 | # Soundtrack --------------------------------------------------------------
  5 | # Cardi B - UP!
  6 | 
  7 | 
  8 | # Get the data ------------------------------------------------------------
  9 | tuesdata <- tidytuesdayR::tt_load(2021, week = 5)
 10 | plastics <- tuesdata$plastics
 11 | 
 12 | 
 13 | # Summarize ---------------------------------------------------------------
 14 | totplastics <- plastics %>%
 15 |   group_by(parent_company, year) %>%
 16 |   summarize_at(vars(empty:pvc), .funs = ~ sum(.x, na.rm = TRUE))
 17 | 
 18 | top7 <- totplastics %>%
 19 |   filter(year == 2020) %>%
 20 |   pivot_longer(cols = empty:pvc, names_to = "type") %>%
 21 |   group_by(parent_company) %>%
 22 |   summarize(total = sum(value, na.rm = T)) %>%
 23 |   ungroup() %>%
 24 |   mutate(parent_company = case_when(
 25 |     parent_company %in% c("Unbranded", "null", "NULL", "#ERROR!") ~ "Unbranded_unknown",
 26 |     TRUE ~ parent_company
 27 |   )) %>%
 28 |   mutate(company_lumped = fct_lump(parent_company, n = 7, w = total)) %>%
 29 |   distinct(parent_company, company_lumped) %>%
 30 |   right_join(totplastics) %>%
 31 |   mutate(total = hdpe + ldpe + o + pet + pp + ps + pvc) # forgot how to sum across columns
 32 | 
 33 | # Top 6 companies palettes, from design-seeds.com
 34 | companypal <- c("#81C4CA",
 35 |                 "#468D96",
 36 |                 "#103128",
 37 |                 "#E83D5F",
 38 |                 "#FA6E90",
 39 |                 "#FCB16D")
 40 | 
 41 | # Plot 1: coord_polar with proportion of plastic types from each company in each plastic category--------------------
 42 | p1 <- top7 %>%
 43 |   pivot_longer(cols = empty:pvc, names_to = "type") %>%
 44 |   mutate(type = fct_reorder(type, desc(value))) %>%
 45 |   filter(company_lumped != "Unbranded_unknown") %>%
 46 |   filter(company_lumped != "Other") %>%
 47 |   filter(!type %in% c("ps", "pvc", "empty")) %>%
 48 |   group_by(company_lumped, type) %>%
 49 |   summarize(totvalue = sum(value, na.rm = TRUE)) %>%
 50 |   ungroup() %>%
 51 |   group_by(company_lumped) %>%
 52 |   mutate(prop_val = totvalue / sum(totvalue)) %>%
 53 |   ggplot(aes(fill = company_lumped, colour = company_lumped)) +
 54 |   geom_segment(aes(
 55 |     x = type, xend = type,
 56 |     y = 0.02, yend = prop_val + 0.02
 57 |   ),
 58 |   lwd = 2, lineend = "round"
 59 |   ) +
 60 |   geom_label(aes(x = type, y = 0, label = type),
 61 |     fill = "white", size = 2.5
 62 |   ) +
 63 |   ylim(c(0, 1.2)) +
 64 |   xlab("Type of plastic") +
 65 |   ylab("") +
 66 |   scale_colour_manual(values = companypal) +
 67 |   coord_polar(theta = "y") +
 68 |   facet_wrap(~company_lumped, ncol = 2) +
 69 |   hrbrthemes::theme_ipsum_rc() +
 70 |   theme(
 71 |     axis.text.x = element_blank(),
 72 |     axis.text.y = element_blank(),
 73 |     legend.position = "none"
 74 |   ) +
 75 |   labs(caption = "Data: Break Free From Plastic")
 76 | 
 77 | p1
 78 | 
 79 | # Heat map! ---------------------------------------------------------------
 80 | top30 <- totplastics %>%
 81 |   filter(year == 2020) %>%
 82 |   pivot_longer(cols = empty:pvc, names_to = "type") %>%
 83 |   group_by(parent_company) %>%
 84 |   summarize(total = sum(value, na.rm = T)) %>%
 85 |   ungroup() %>%
 86 |   mutate(parent_company = case_when(
 87 |     parent_company %in% c("Unbranded", "null", "NULL", "#ERROR!") ~ "Unbranded_unknown",
 88 |     TRUE ~ parent_company
 89 |   )) %>%
 90 |   mutate(company_lumped = fct_lump(parent_company, n = 30, w = total)) %>%
 91 |   distinct(parent_company, company_lumped) %>%
 92 |   right_join(totplastics) %>%
 93 |   mutate(total = hdpe + ldpe + o + pet + pp + ps + pvc) # forgot how to sum across columns
 94 | 
 95 | p2 <- top30 %>%
 96 |   group_by(company_lumped) %>%
 97 |   summarize_at(vars(empty:pvc), .funs = ~ sum(.x, na.rm = TRUE)) %>%
 98 |   filter(!is.na(company_lumped)) %>%
 99 |   mutate(row_sum = rowSums(select(., -1))) %>%
100 |   mutate_at(-1, ~ . / row_sum) %>%
101 |   select(-row_sum) %>%
102 |   pivot_longer(cols = empty:pvc) %>%
103 |   ggplot(aes(x = name, y = company_lumped, colour = value)) +
104 |   geom_point(size = 4.5) +
105 |   scale_colour_distiller(palette = "YlGnBu") +
106 |   scale_y_discrete(limits = rev) +
107 |   xlab("Type of plastic") +
108 |   ylab("") +
109 |   hrbrthemes::theme_ipsum_rc() +
110 |   theme(
111 |     legend.position = "none",
112 |     plot.title = element_text(hjust = 0.01),
113 |     plot.subtitle = element_text(hjust = 0.2)
114 |   ) +
115 |   labs(
116 |     title = "Which type of plastic waste do these top companies produce?",
117 |     subtitle = "Proportions of each plastic type by company. Based on labeled products from an audit report by Break Free From Plastic"
118 |   )
119 | 
120 | p2
121 | 
122 | 
123 | 
124 | # Save --------------------------------------------------------------------
125 | library(patchwork)
126 | 
127 | png("5_2021_plastics.png", width = 14, height = 10, units = "in", res = 200)
128 | p2 + p1 + plot_layout(ncol = 2, widths = c(2, 3))
129 | dev.off()
130 | 


--------------------------------------------------------------------------------
/2021/5_2021_plastics.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2021/5_2021_plastics.png


--------------------------------------------------------------------------------
/2021/9_2021_employment.R:
--------------------------------------------------------------------------------
 1 | # Employment
 2 | tuesdata <- tidytuesdayR::tt_load(2021, week = 9)
 3 | employed <- tuesdata$employed
 4 | earn <- tuesdata$earn
 5 | demo_names <- c("Men","Women","White","Black or African American", "Asian")
 6 | 
 7 | library(tidyverse)
 8 | table(employed$major_occupation)
 9 | table(employed$race_gender)
10 | 
11 | employed %>% 
12 |   filter(race_gender!="TOTAL" & !industry %in% demo_names) %>% #
13 |   group_by(industry, year, race_gender) %>% #
14 |   summarize(employ_N = sum(employ_n)) %>%
15 |   ungroup() %>%
16 |   ggplot(aes(x = year, y = employ_N, color = industry, lty = race_gender)) +
17 |   geom_line() +
18 |   facet_wrap(~industry)
19 | 


--------------------------------------------------------------------------------
/2021/BechdelMoons.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2021/BechdelMoons.png


--------------------------------------------------------------------------------
/2021/CountryBechdel.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2021/CountryBechdel.png


--------------------------------------------------------------------------------
/2021/incomplete/23_2021_survivor.R:
--------------------------------------------------------------------------------
 1 | # Week 23: SURVIVOR the TV show
 2 | 
 3 | library(tidyverse)
 4 | 
 5 | summary <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-06-01/summary.csv')
 6 | challenges <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-06-01/challenges.csv')
 7 | castaways <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-06-01/castaways.csv')
 8 | viewers <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-06-01/viewers.csv')
 9 | jury_votes <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-06-01/jury_votes.csv')
10 | 
11 | head(summary)
12 | 
13 | summary2 <- summary %>%
14 |   mutate(filming_duration = filming_ended - filming_started)
15 | # summary3 <- summary2 %>%
16 | #   pivot_longer(cols = premiered:filming_ended, values_to = "date")
17 | # summary3 %>%
18 | #   ggplot(aes(x = season, y = date, colour = name)) +
19 | #   geom_point()
20 | 
21 | summary %>%
22 |   ggplot(aes(x = season)) +
23 |   geom_linerange(aes(ymin = filming_ended, ymax = premiered), lwd=3) +
24 |   coord_flip()
25 | 
26 | 
27 | 
28 | summary2 %>%
29 |   ggplot(aes(x=viewers_premier,y=viewers_finale)) +
30 |   geom_point()
31 | 


--------------------------------------------------------------------------------
/2021/incomplete/25_2021_DuBoisChallenge.R:
--------------------------------------------------------------------------------
 1 | library(tidyverse)
 2 | 
 3 | tuesdata <- tidytuesdayR::tt_load(2021, week = 25)
 4 | tweets <- tuesdata$tweets
 5 | 
 6 | world_map <- map_data("world")
 7 | map_plot <- ggplot() +
 8 |   geom_polygon(data = world_map, 
 9 |                aes(x = long, y = lat, group = group),
10 |                fill = "grey30", colour = "grey45", size = 0.1) + 
11 |   xlab('') +
12 |   ylab('') +
13 |   coord_fixed() +
14 |   hrbrthemes::theme_ft_rc(base_size=14) +
15 |   theme(panel.grid.major = element_blank(),
16 |         panel.grid.minor = element_blank(),
17 |         panel.background = element_blank(),
18 |         axis.ticks = element_blank(),
19 |         axis.text = element_blank(),
20 |         axis.text.x = element_blank(),
21 |         axis.text.y = element_blank())
22 | 
23 | map_plot +
24 |   geom_point(data = tweets,
25 |              aes(x = long, y = lat, size = retweet_count))
26 | 


--------------------------------------------------------------------------------
/2021/incomplete/35_lemurs.R:
--------------------------------------------------------------------------------
1 | #35_lemurs
2 | lemurs <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-08-24/lemur_data.csv')
3 | View(lemurs)
4 | unique(lemurs$taxon)
5 | 


--------------------------------------------------------------------------------
/2021/incomplete/X_2021_gunviolence.R:
--------------------------------------------------------------------------------
1 | # 2021_gunviolence
2 | # Gun violence dataset from Kaggle, contributed by James Ko
3 | # https://www.kaggle.com/jameslko/gun-violence-data
4 | gv <- read.csv("gun-violence-data_01-2013_03-2018.csv")
5 | 


--------------------------------------------------------------------------------
/HelperFunctions.R:
--------------------------------------------------------------------------------
 1 | # Helper functions + code tidbits
 2 | 
 3 | # for increasing the small little font size on the {hrbrthemes} plots:
 4 | # theme_ipsum_rc() +
 5 | #   theme(axis.title.x = element_text(size = rel(1.5)),
 6 | #         axis.title.y = element_text(size = rel(1.5)))
 7 | # This is a test to see if GH is working
 8 | 
 9 | # for increasing the number of colors in a palette:
10 | lengthen_pal <- function(x=1:10,shortpal){
11 |   ncolours <- length(unique(x))
12 |   newpal <- colorRampPalette(shortpal)(ncolours)
13 |   return(newpal)
14 | }
15 | 
16 | 
17 | # PATCHWORK NOTE -------------------------------------------------------------------
18 | #if you use patchwork, use the & symbol to add a theme to the whole assemblage
19 | # p1 + p2 + plot_annotation(title = 'Patchwork plots',subtitle = 'Reminder about adding a theme to the whole shebang') & theme(text = element_text('Roboto Condensed',size = 16))
20 | 
21 | 
22 | # MAP NOTES ---------------------------------------------------------------
23 | # world_map <- map_data("world")
24 | # ggplot(world_map) +
25 | #   geom_polygon(data = world_map, aes(x = long, y = lat, group = group), 
26 | #                fill = "grey30", colour = "grey45", size = 0.1) 
27 | 
28 | 
29 | # Add how to print palette in graph window --------------------------------
30 | #taken from stackoverflow
31 | show_pal <- function(pal){ 
32 |   plot(seq_len(length(pal)), rep_len(1, length(pal)),
33 |      col = pal, pch = 16, cex = 3, xaxt = 'n', yaxt = 'n', xlab = '', ylab = '')
34 | }
35 | 


--------------------------------------------------------------------------------