├── 2020 ├── 10_Hockey.R ├── 11_Tuition.R ├── 12_Office.R ├── 13_TBI.R ├── 14_Beer.R ├── 15_TourdeFrance.R ├── 16_HipHop.R ├── 17_GDPR.R ├── 18_Broadway.R ├── 19_AnimalCrossing.R ├── 19_AnimalCrossing_dataprep.R ├── 20_Volcanoes.R ├── 21_Volleyball.R ├── 22_Cocktails.R ├── 25_university_statements.R ├── 26_Caribou.R ├── 27_x-men.R ├── 28_coffee.R ├── 29_astronauts.R ├── 29_astronauts.png ├── 2_Transit.png ├── 2_transit.R ├── 30_RSPCA.R ├── 31_penguins.R ├── 32_EuropeanEnergy.R ├── 33_Airbender.R ├── 33_Airbender.png ├── 34_ExoticPlants.R ├── 36_Crops.R ├── 36_Crops.png ├── 37_Friends.R ├── 43_MoreBeer.R ├── 43_MoreBeer.png ├── 48_WATrails.R ├── 48_WATrails.png ├── 49_Shelters.R ├── 8_CO2Food.R ├── 9_Measles.R ├── AnimalCrossing.png ├── AreaPlot.png ├── Beer.png ├── Broadway.png ├── Caribou.png ├── Cocktails.png ├── Coffee1.png ├── Coffee2.png ├── First.png ├── GDPR.png ├── HipHop.png ├── Hockey.png ├── Measles.png ├── Measles.tiff ├── NewAvatar.png ├── Penguins.png ├── Plants.png ├── Plants2.png ├── Plants_cowplot.png ├── RSPCA.png ├── RSPCA2.png ├── Salary_Tuition.png ├── Second.png ├── TheOffice.png ├── TourdeFrance.png ├── Tuition.gif └── statements.csv ├── 2021 ├── 11_2021_bechdel.R ├── 17_2021_netflix.R ├── 51_2021_spice_girls.R ├── 5_2021_plastics.R ├── 5_2021_plastics.png ├── 9_2021_employment.R ├── BechdelMoons.png ├── CountryBechdel.png └── incomplete │ ├── 23_2021_survivor.R │ ├── 25_2021_DuBoisChallenge.R │ ├── 35_lemurs.R │ └── X_2021_gunviolence.R ├── .gitignore ├── 19_2022_bestsellers.R ├── HelperFunctions.R └── books.csv /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | *.RData 4 | *.rds 5 | *.png 6 | *.zip 7 | Other/ 8 | .Ruserdata 9 | Dairy.png 10 | ^incomplete/$ 11 | 12 | 2021/RaincloudPlots-master/ -------------------------------------------------------------------------------- /19_2022_bestsellers.R: -------------------------------------------------------------------------------- 1 | #19_bestsellers 2 | 3 | # packages ---------------------------------------------------------------- 4 | library(tidyverse) 5 | #devtools::install_github("BlakeRMills/MetBrewer") 6 | library(MetBrewer) 7 | library(ggrepel) 8 | library(hrbrthemes) 9 | 10 | 11 | 12 | # Read in the data manually 13 | nyt_titles_raw <- readr::read_tsv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-05-10/nyt_titles.tsv') 14 | # Clean out the ! and ? from authors 15 | nyt_titles <- nyt_titles_raw %>% 16 | mutate(author = str_replace(author, pattern = "[[:punct:]] by ","")) 17 | 18 | nyt_full <- readr::read_tsv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-05-10/nyt_full.tsv') 19 | 20 | # Goodreads books from here: https://www.kaggle.com/datasets/jealousleopard/goodreadsbooks?resource=download 21 | gr <- read.csv("books.csv") #This dataset only goes to 2020 so I think it actually matches well with the NYT list 22 | gr2 <- gr %>% 23 | mutate(title = toupper(title)) 24 | 25 | colnames(gr2) 26 | colnames(nyt_titles) 27 | 28 | # All the titles 29 | dat <- nyt_titles %>% 30 | left_join(distinct(gr2,title,.keep_all=T), by = c("title")) %>% # don't match with duplicated titles in goodreads 31 | mutate_at(c('average_rating','num_pages','text_reviews_count'),as.numeric) 32 | 33 | #Titles with both goodreads and NYT metrics 34 | dat_matches <- dat %>% 35 | filter(!is.na(bookID)) 36 | 37 | baseplot <- ggplot(dat_matches, 38 | aes(x=total_weeks, y=average_rating, 39 | color = language_code)) + 40 | geom_point(alpha = 0.7) + 41 | xlab("Weeks on NYT Bestseller list") + 42 | ylab("Average rating on Goodreads") + 43 | scale_color_met_d(name = 'Benedictus') + 44 | hrbrthemes::theme_ft_rc() 45 | 46 | manyweeks <- dat_matches %>% 47 | filter(total_weeks>120) %>% 48 | mutate(title = stringr::str_to_title(title)) 49 | 50 | highratings <- dat_matches %>% 51 | filter(average_rating>4.4)%>% 52 | mutate(title = stringr::str_to_title(title)) 53 | 54 | p1 <- baseplot + 55 | geom_point(data=manyweeks, 56 | aes(x=total_weeks, y=average_rating))+ 57 | ggrepel::geom_text_repel(data=manyweeks, 58 | aes(x=total_weeks, y=average_rating, 59 | label = title), size = 3, 60 | nudge_y = ifelse(manyweeks$title == "The Da Vinci Code", .2,-.2) 61 | ) + 62 | geom_point(data=highratings, 63 | aes(x=total_weeks, y=average_rating))+ 64 | ggrepel::geom_text_repel(data=highratings, 65 | aes(x=total_weeks, y=average_rating, 66 | label = title), size = 3, 67 | nudge_y = .1 68 | ) + 69 | guides(color = guide_legend(override.aes = aes(label = ""),title = "Language")) 70 | 71 | p1 72 | 73 | bestselling_authors <- nyt_titles %>% 74 | group_by(author) %>% 75 | count() %>% 76 | ungroup() %>% 77 | slice_max(n, n=10, with_ties = FALSE) 78 | 79 | # How many books did each of the bestselling authors have on the list each year? 80 | bsauthors_ts <- nyt_full %>% 81 | filter(author %in% bestselling_authors$author) %>% 82 | group_by(author,year) %>% 83 | distinct(title) %>% 84 | count() 85 | 86 | # If available, what were the average goodreads ratings of those books? 87 | bestsellers_gr <- nyt_full %>% 88 | filter(author %in% bestselling_authors$author) %>% 89 | distinct(title) %>% 90 | left_join(distinct(gr2,title,.keep_all=T), by = 'title') %>% 91 | mutate(year = as.numeric(str_sub(publication_date, start= -4)), 92 | average_rating = as.numeric(average_rating)) %>% 93 | filter(!is.na(bookID)) %>% 94 | group_by(authors, year) %>% 95 | summarize(mean_rating = mean(average_rating,na.rm=TRUE)) %>% 96 | ungroup() 97 | 98 | p2 <- bsauthors_ts %>% 99 | left_join(bestsellers_gr, by = c("author"="authors", "year")) %>% 100 | ggplot(aes(x=year,y=n)) + 101 | geom_line(colour = 'white') + 102 | geom_point(aes(x=year,y=n,size = mean_rating), 103 | alpha=0.5,colour = 'lavender') + 104 | xlab("Year") + 105 | ylab("Unique NYT bestselling books") + 106 | facet_wrap(~author,ncol = 2) + 107 | guides(size=guide_legend(title="Average Goodreads rating")) 108 | p2 109 | 110 | library(patchwork) 111 | 112 | booktheme <- hrbrthemes::theme_ft_rc(base_size = 8) + 113 | theme(panel.grid.major = element_blank(), 114 | panel.grid.minor = element_blank(), 115 | axis.title.x = element_text(size = 14), 116 | axis.title.y = element_text(size = 14), 117 | legend.position = 'bottom', 118 | strip.text.x = element_text(size = 10)) 119 | 120 | png(filename = "19_NYTBestsellers.png",width = 12,height = 8,units = 'in',res = 200) 121 | p1 + p2 + plot_annotation(title = "How does Goodreads treat the NYT bestsellers?", 122 | subtitle = "I've always been curious about how representative the Goodreads community is of broader readership.", 123 | caption = "Data: goodreadsbooks from Kaggle and Post45 Data") & booktheme 124 | dev.off() 125 | 126 | 127 | # Bonus investigation: more correlations! --------------------------------- 128 | 129 | baseplot2 <- dat_matches %>% 130 | ggplot(aes(x=total_weeks,y=ratings_count)) + geom_point() 131 | 132 | manyreviews <- filter(dat_matches,ratings_count>700000) %>% mutate(title = stringr::str_to_title(title)) 133 | 134 | p3 <- baseplot2 + 135 | geom_point(data=manyreviews, 136 | aes(x=total_weeks, y=ratings_count)) + 137 | ggrepel::geom_text_repel(data=manyreviews, 138 | aes(x=total_weeks,y=ratings_count, 139 | label = title), size = 3 140 | ) + 141 | xlab("Weeks on NYT bestsellers list") + 142 | ylab("Number of ratings on Goodreads") 143 | 144 | png(filename = "NumReviewsVsWeeks.png",width = 6,height = 5,units = 'in',res=200) 145 | p3 146 | dev.off() 147 | 148 | gr2 %>% ggplot(aes(x=as.numeric(ratings_count),y=as.numeric(average_rating))) +geom_point() 149 | -------------------------------------------------------------------------------- /2020/10_Hockey.R: -------------------------------------------------------------------------------- 1 | # 3 March 2020 2 | 3 | game_goals <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-03-03/game_goals.csv') 4 | 5 | top_250 <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-03-03/top_250.csv') 6 | 7 | season_goals <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-03-03/season_goals.csv') 8 | 9 | unique(game_goals$player) 10 | unique(game_goals$season) 11 | 12 | library(calecopal) 13 | library(tidyverse) 14 | library(lubridate) 15 | library(ggrepel) 16 | library(patchwork) 17 | 18 | # set strings as factors to false 19 | # options(stringsAsFactors = FALSE) 20 | 21 | 22 | # change date to date format and add month column 23 | game_goals <-game_goals %>% 24 | mutate(date = as.Date(date, format = "%yyyy-%mm-%dd")) %>% 25 | mutate(month = month(date)) 26 | 27 | tot_goals <- game_goals %>% 28 | group_by(player,season) %>% 29 | summarize(tgoals = sum(goals,na.rm = T), 30 | tassists = sum(assists,na.rm = T)) 31 | 32 | # nice labels 33 | MAP <- list(tgoals = "Total goals", 34 | tassists = "Total assists") 35 | 36 | totplot <- tot_goals %>% 37 | pivot_longer(cols = c('tgoals','tassists')) %>% 38 | mutate(name = recode(name,!!!MAP)) %>% 39 | filter(player %in% filter(top_250,raw_rank<10)$player) %>% 40 | ggplot(aes(x=season,y=value,colour=name)) + 41 | geom_line() + 42 | theme_minimal(base_size = 12) + 43 | theme(legend.position = 'bottom') + 44 | theme(axis.text.x = element_text(angle=45,hjust=1)) + 45 | scale_color_manual('',values = cal_palette("superbloom3")) + 46 | facet_wrap(~player) + 47 | ylab('') + 48 | xlab('Season') 49 | # ggtitle('Goals and assists of top-ranked players') 50 | 51 | sdat <- game_goals %>% 52 | mutate(goals_home = ifelse(location == 'Home',goals,0), 53 | goals_away = ifelse(location == 'Away',goals,0)) %>% 54 | group_by(player,season) %>% 55 | summarize(tot_home = sum(goals_home,na.rm=T), 56 | tot_away = sum(goals_away,na.rm=T)) 57 | 58 | scatter <- sdat %>% 59 | ggplot(aes(x=tot_home,y=tot_away,colour=season)) + 60 | geom_point() + 61 | scale_colour_continuous('Season') + 62 | theme_minimal(base_size = 12) + 63 | xlab('Goals scored at home') + 64 | ylab('Goals scored away') + 65 | geom_label_repel(data = filter(sdat,tot_home>50 | tot_away>40), 66 | aes(x=tot_home,y=tot_away,label=player), 67 | xlim = c(50,NA), 68 | arrow = arrow(type = "closed", 69 | ends = 'last', 70 | length = unit(0.02, "npc")), 71 | force = 20) + 72 | xlim(c(0,75)) 73 | 74 | png('Hockey.png',width = 10,height=6,units = 'in',res=150) 75 | totplot + scatter 76 | dev.off() 77 | -------------------------------------------------------------------------------- /2020/11_Tuition.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(ggplot2) 3 | library(gganimate) 4 | library(lubridate) 5 | 6 | tuition_cost <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-03-10/tuition_cost.csv') 7 | 8 | tuition_income <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-03-10/tuition__cost_income.csv') 9 | 10 | salary_potential <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-03-10/salary_potential.csv') 11 | 12 | historical_tuition <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-03-10/historical_tuition.csv') 13 | 14 | diversity_school <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-03-10/diversity_school.csv') 15 | 16 | historical_tuition2 <- historical_tuition %>% 17 | mutate(year1 = sub("\\-.*", "", year)) 18 | 19 | p <- historical_tuition2 %>% 20 | filter(!is.na(tuition_cost) & !is.na(year1)) %>% 21 | filter(complete.cases(.)) %>% 22 | mutate(year1 = as.integer(year1)) %>% 23 | ggplot(aes(x=type,y=tuition_cost)) + 24 | geom_col() + 25 | xlab("Institution type") + 26 | ylab("Tuition cost") + 27 | hrbrthemes::theme_ft_rc(base_size = 18) + 28 | labs(title = 'Tuition cost over time', 29 | subtitle = 'Year: {frame_time}') + 30 | transition_time(year1) + 31 | enter_fade() 32 | p 33 | 34 | animate(p , width = 400, height = 400) 35 | anim_save(here::here("tuitioncost.gif")) 36 | 37 | 38 | salary_tuition <- left_join(tuition_cost,salary_potential) 39 | 40 | st <- salary_tuition %>% 41 | filter(!is.na(early_career_pay) & 42 | type != 'For Profit') %>% 43 | ggplot(aes(in_state_total,early_career_pay,color = type)) + 44 | geom_point() + 45 | scale_colour_manual('',values = calecopal::cal_palette('chaparral2')) + 46 | xlab('Total in-state tuition') + 47 | xlim(c(10000,80000)) + 48 | ylim(c(10000,80000)) + 49 | ylab('Early-career pay') + 50 | hrbrthemes::theme_ft_rc(base_size = 18) + 51 | labs(title = 'In-state tuition vs. early-career pay', 52 | subtitle = 'Do graduates of more expensive instutitions earn higher salaries?', 53 | caption = 'Data: tuitiontracker.org') 54 | 55 | png(filename = 'Salary_Tuition.png',width = 6,height = 6,units = 'in',res = 120) 56 | st 57 | dev.off() 58 | -------------------------------------------------------------------------------- /2020/12_Office.R: -------------------------------------------------------------------------------- 1 | # Office 2 | # install.packages("schrute") 3 | library(schrute) 4 | library(tidyverse) 5 | 6 | 7 | # Get the Data 8 | # From TidyT 9 | office_ratings <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-03-17/office_ratings.csv') 10 | glimpse(office_ratings) 11 | 12 | # From Schrute 13 | data(theoffice) 14 | 15 | #Jim lines 16 | # charlines <- theoffice %>% 17 | # filter(character == 'Jim') %>% 18 | # group_by(season,episode) %>% 19 | # summarize(nlines = length(text)) %>% 20 | # ungroup(season, episode) %>% 21 | # mutate(season = as.numeric(season), 22 | # episode = as.numeric(episode)) 23 | # 24 | # combo <- office_ratings %>% 25 | # group_by(season,episode) %>% 26 | # left_join(charlines,by=c('season','episode')) 27 | # combo %>% 28 | # ggplot(aes(x=nlines,y=imdb_rating)) +geom_point() 29 | # 30 | 31 | # All lines 32 | charlines <- theoffice %>% 33 | filter(character %in% c('Michael','Jim','Pam','Dwight','Angela','Kelly')) %>% #Just picked first 6 from IMDB 34 | filter(!is.na(character)) %>% 35 | group_by(character,season,episode) %>% 36 | summarize(nlines = length(text)) %>% 37 | ungroup(character,season, episode) %>% 38 | mutate(season = as.numeric(season), 39 | episode = as.numeric(episode)) 40 | 41 | combo <- office_ratings %>% 42 | group_by(season,episode) %>% 43 | left_join(charlines,by=c('season','episode')) 44 | 45 | po <- combo %>% 46 | na.omit(character) %>% 47 | ggplot(aes(nlines,imdb_rating,colour=character)) + 48 | geom_point() + 49 | facet_wrap(~character) + 50 | stat_smooth(method = 'lm') + 51 | theme_classic(base_size = 12) + 52 | ggpomological::scale_colour_pomological('Character') + 53 | theme(strip.background = element_blank(), 54 | text = element_text(family="Courier")) + 55 | ylab('IMDB Rating') + 56 | xlab('Total number of lines') + 57 | labs(title = "IMDB ratings of The Office by character", 58 | caption = "First six characters listed on IMDB. 59 | Data: schrute package and data.world") 60 | 61 | png('TheOffice.png',width = 10,height = 5,units = 'in',res = 120) 62 | po 63 | dev.off() 64 | -------------------------------------------------------------------------------- /2020/13_TBI.R: -------------------------------------------------------------------------------- 1 | # TBI 2 | library(tidyverse) 3 | 4 | 5 | 6 | # Get data ---------------------------------------------------------------- 7 | tbi_age <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-03-24/tbi_age.csv') 8 | tbi_year <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-03-24/tbi_year.csv') 9 | tbi_military <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-03-24/tbi_military.csv') 10 | 11 | head(tbi_military) 12 | 13 | plotdat <- tbi_military %>% 14 | filter(component %in%c('Guard','Reserve')) %>% 15 | group_by(year,service,severity) %>% 16 | summarize(diag.total = sum(diagnosed)) 17 | 18 | colpal1 <- RColorBrewer::brewer.pal(n = 6,name = 'RdBu')[-6] 19 | colpal1[3] <- 'lightgrey' 20 | 21 | areaplot <- plotdat %>% 22 | ggplot(aes(x=year,y=diag.total,fill = severity)) + 23 | geom_area() + 24 | facet_wrap(~service,scales='free') + 25 | scale_fill_manual(values = rev(colpal1)) + 26 | theme(strip.background = element_blank()) + 27 | xlab('Year') + 28 | ylab('Total diagnoses of traumatic brain injury') + 29 | hrbrthemes::theme_ft_rc() + 30 | labs(title = 'Brain injuries among guard and reserve \n military service members', 31 | subtitle = '2006-2014', 32 | caption = 'Data: CDC') 33 | 34 | png('TBI.png',width = 8,height = 8,units = 'in',res = 120) 35 | areaplot 36 | dev.off() 37 | -------------------------------------------------------------------------------- /2020/14_Beer.R: -------------------------------------------------------------------------------- 1 | # Beer 2 | library(tidyverse) 3 | library(lubridate) 4 | library(patchwork) 5 | 6 | brewing_materials <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-03-31/brewing_materials.csv') 7 | 8 | dg = "#222222" 9 | pal <- calecopal::cal_palette(name = "chaparral2", n = 6)[c(5,6)] 10 | 11 | # Density plot ------------------------------------------------------------ 12 | 13 | s2 <- brewing_materials %>% 14 | group_by(year, type) %>% 15 | filter(material_type %in% c('Grain Products',"Non-Grain Products")) 16 | 17 | densplot <- s2 %>% 18 | rename("Material type"=material_type) %>% 19 | ggplot(aes(month_current/1e6, 20 | colour=`Material type`, 21 | fill=`Material type`)) + 22 | geom_density(lwd=1) + 23 | scale_colour_manual(values = pal) + 24 | scale_fill_manual(values = colorspace::lighten(pal,amount=0.5))+ 25 | hrbrthemes::theme_ft_rc(base_size=14) + 26 | theme(strip.background = element_blank(), 27 | strip.text = element_blank()) + 28 | ylab('Density') + 29 | xlab(bquote('Material used (millions of pounds)')) + 30 | facet_wrap(~`Material type`,ncol=1,scales='free_y') + 31 | theme(legend.position = 'bottom') + 32 | labs(title = 'Materials used for beer production', 33 | subtitle = '2008-2017') 34 | 35 | # Time series ------------------------------------------------------------- 36 | 37 | s <- brewing_materials %>% 38 | group_by(year, type) %>% 39 | summarize(totprod = sum(month_current,na.rm=T)) 40 | 41 | matchtable <- brewing_materials %>% 42 | distinct(material_type,type) 43 | 44 | s <- s %>% 45 | ungroup() %>% 46 | left_join(matchtable) %>% 47 | filter(material_type!='Total Used') %>% 48 | filter(type != 'Total Grain products') %>% 49 | filter(type != 'Total Non-Grain products') 50 | 51 | labeldata <- s %>% 52 | filter(year == min(year)) 53 | 54 | timeplot <- s %>% 55 | ggplot(aes(x=year,y=totprod/1e6, 56 | colour=material_type, 57 | group=type)) + 58 | geom_line(lwd=1) + 59 | scale_colour_manual(values = pal) + 60 | scale_x_continuous(breaks = unique(s$year)) + 61 | hrbrthemes::theme_ft_rc(base_size=14) + 62 | ggrepel::geom_label_repel(data = labeldata, 63 | aes(label = type), 64 | point.padding = 1, 65 | fill=dg) + 66 | ylab('Material used (millions of pounds)') + 67 | xlab('Year') + 68 | theme(legend.position = 'none') + 69 | labs(caption = 'Data: Alcohol and Tobacco Tax and Trade Bureau') 70 | 71 | 72 | png('Beer.png',width = 8,height = 10,units = 'in',res = 120) 73 | densplot/timeplot + 74 | plot_layout(heights = c(1,2)) 75 | dev.off() -------------------------------------------------------------------------------- /2020/15_TourdeFrance.R: -------------------------------------------------------------------------------- 1 | # Tour de France 2 | 3 | tdf_winners <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-04-07/tdf_winners.csv') 4 | head(tdf_winners) 5 | 6 | library(tidyverse) 7 | library(lubridate) 8 | library(patchwork) 9 | 10 | # Names and life durations of TDF winners --------------------------------- 11 | 12 | bikenames <- tdf_winners %>% 13 | select(edition,winner_name,born,died,nickname,nationality,start_date) %>% 14 | mutate(winyear = lubridate::ymd(year(start_date), truncated = 2L)) 15 | unique(bikenames$birth_country) 16 | 17 | bikenames$winner_name <- reorder(bikenames$winner_name, 18 | rev(bikenames$born)) 19 | bikenames <- bikenames %>% 20 | mutate(lifedur = as.duration(ymd(born) %--% ymd(died))/3.154e+7) %>% #convert seconds to years at the end 21 | filter(!is.na(lifedur)) 22 | 23 | pal <- RColorBrewer::brewer.pal('Spectral',n=8) 24 | 25 | longevity <- bikenames %>% 26 | ggplot() + 27 | geom_linerange(aes(ymin = born,ymax=died,x=winner_name, 28 | colour=as.numeric(lifedur)),lwd=1.1) + 29 | coord_flip() + 30 | xlab('Winner name') + 31 | ylab('Year') + 32 | geom_point(aes(x=winner_name,y=winyear,shape=19), 33 | size=2,colour='darkgrey') + 34 | scale_shape_identity('', 35 | labels = 'Won the \nTour de France', 36 | breaks=c(19), 37 | guide = 'legend') + 38 | scale_colour_gradient2('How long \nthey lived \n (years)', 39 | low = pal[1], high=pal[8], midpoint = 60) + 40 | labs(title='The lives of Tour de France winners', 41 | caption='Data: tdf package & Kaggle') + 42 | guides(colour = guide_legend(order = 1), 43 | shape = guide_legend(order = 2)) + 44 | hrbrthemes::theme_ft_rc() + 45 | theme(text=element_text(size=12, family='Helvetica')) 46 | 47 | 48 | 49 | # Wins by country --------------------------------------------------------- 50 | countrywins <- bikenames %>% 51 | group_by(nationality) %>% 52 | summarize(nwins = length(winner_name)) 53 | 54 | # This is still how I reorder factors, sorry tidyverse! 55 | countrywins$nationality <- reorder(countrywins$nationality, 56 | countrywins$nwins) 57 | 58 | cwins <- countrywins %>% 59 | ggplot(aes(x=nationality,y=nwins)) + 60 | geom_col() + 61 | xlab('Winner nationality') + 62 | ylab('Number of wins') + 63 | coord_flip() + 64 | hrbrthemes::theme_ft_rc() + 65 | theme(text=element_text(size=16,family='Helvetica')) 66 | 67 | 68 | png('TourdeFrance.png',width = 8,height = 12,units='in',res=120) 69 | (longevity / cwins ) + plot_layout(heights = c(3,1)) 70 | dev.off() 71 | -------------------------------------------------------------------------------- /2020/16_HipHop.R: -------------------------------------------------------------------------------- 1 | #BBC hip-hop poll of critics 2 | polls <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-04-14/polls.csv') 3 | rankings <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-04-14/rankings.csv') 4 | 5 | library(tidyverse) 6 | library(patchwork) 7 | 8 | # Five tracks I rocked out to while writing some of this code (after getting off Zoom, lol) 9 | # Bahamadia - Uknowhowwedu 10 | # Da Brat & Notorious B.I.G. - Da B Side 11 | # Remy Ma - Conceited 12 | # MC LYTE - Cold Rock a Party 13 | # and a new one... my life... 14 | # Megan Thee Stallion - Savage 15 | 16 | # Heat map showing 'golden years' 17 | rankheat <- polls %>% 18 | ggplot(aes(x=year,y=rank)) + 19 | stat_density_2d(aes(fill = ..density..), 20 | geom = "raster", contour = FALSE) + 21 | viridis::scale_fill_viridis('Density of tracks \n at that ranking',option = 'plasma') + 22 | hrbrthemes::theme_ft_rc() + 23 | xlab('Year') + 24 | ylab('Ranking') 25 | 26 | 27 | # Subset to female artists 28 | femrappers <- polls %>% 29 | filter(gender=='female') 30 | 31 | femfreq <- femrappers %>% 32 | group_by(year) %>% 33 | summarize(Nfem = length(title)) 34 | 35 | hg <- femfreq %>% 36 | ggplot(aes(x=year,y=Nfem)) + 37 | geom_col(colour='white') + 38 | hrbrthemes::theme_ft_rc() + 39 | xlab('Year') + 40 | ylab("Number of female hip-hop\n artists on critics' lists") + 41 | labs(subtitle = "Were female artists recognized during the 'golden years'?") 42 | 43 | criticsummary <- polls %>% 44 | group_by(critic_rols,gender) %>% 45 | count() %>% 46 | ungroup() %>% 47 | group_by(critic_rols) %>% 48 | mutate(percent = n/sum(n)) 49 | 50 | whichcritics <- criticsummary %>% 51 | ggplot(aes(x=critic_rols,y=percent,fill=gender,colour = gender)) + 52 | geom_col() + 53 | scale_fill_brewer('Artist gender',palette = 'BuPu') + 54 | scale_color_brewer('Artist gender',palette = 'BuPu') + 55 | xlab('Critic role') + 56 | ylab('Percent of top 5 list') + 57 | coord_flip() + 58 | hrbrthemes::theme_ft_rc() + 59 | theme(legend.position = 'bottom') + 60 | labs(title = 'Women in hip-hop top 5 lists', 61 | subtitle = 'Who put them on their top five, and when their tracks came out') + 62 | theme(plot.title = element_text(hjust = -.1), 63 | plot.subtitle = element_text(hjust = -.1)) 64 | 65 | 66 | png('HipHop.png',width = 13,height = 9,units = 'in',res = 120) 67 | whichcritics + 68 | ( ( hg/rankheat ) + 69 | plot_layout(heights = c(1,6)) ) + 70 | plot_layout(widths = c(2,1)) 71 | dev.off() 72 | 73 | 74 | 75 | # Extras ------------------------------------------------------------------ 76 | 77 | # peryearsongs <- polls %>% 78 | # group_by(year) %>% 79 | # summarize(totsongs = length(title)) 80 | # 81 | # peryearsongs %>% 82 | # ggplot(aes(x=year,y=totsongs)) + 83 | # geom_line(colour='white') + 84 | # xlab('Year') + 85 | # ylab('Number of songs on BBC list') + 86 | # hrbrthemes::theme_ft_rc() 87 | -------------------------------------------------------------------------------- /2020/17_GDPR.R: -------------------------------------------------------------------------------- 1 | # Get the Data 2 | library(tidyverse) 3 | library(lubridate) 4 | library(patchwork) 5 | library(scales) 6 | 7 | gdpr_violations <- readr::read_tsv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-04-21/gdpr_violations.tsv') 8 | gdpr_text <- readr::read_tsv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-04-21/gdpr_text.tsv') 9 | 10 | gdpr2 <- gdpr_violations %>% 11 | mutate(newdate = mdy(date)) %>% 12 | filter(newdate > '2000-01-01') 13 | 14 | 15 | make_numer <- function(x){ 16 | y <- str_extract(x,pattern = "[[:digit:]]+[:space:]") %>% 17 | str_trim(side = 'both') 18 | return(y) 19 | } 20 | 21 | # sort out the data and clean up the articles being violated 22 | articles_violated <- gdpr2 %>% 23 | select(id,article_violated) %>% 24 | separate(article_violated,c('n1','n2','n3','n4','n5'), 25 | sep=paste("\\|"),fill='right') %>% 26 | mutate_at(c('n1','n2','n3','n4','n5'),make_numer) %>% #extract articles violated! 27 | pivot_longer(-id) %>% 28 | drop_na(value) %>% 29 | select(-name) %>% 30 | rename('art_viol'=value) 31 | 32 | pdat <- articles_violated %>% 33 | left_join(gdpr2) %>% 34 | group_by(newdate,art_viol) %>% 35 | summarize(num_viols = n(),total_price = sum(price)) %>% 36 | arrange(newdate) %>% 37 | group_by(art_viol) %>% 38 | mutate(rolling_numviol = cumsum(num_viols)) %>% 39 | filter(length(num_viols)>3) 40 | 41 | ends <- pdat %>% 42 | group_by(art_viol) %>% 43 | top_n(1,newdate) %>% 44 | mutate(enddate = max(newdate)) %>% 45 | mutate(artlabel = paste(art_viol)) 46 | 47 | # set aesthetic stuff 48 | dg = "#222222" 49 | palitra.pal <- c('#96ffff','#52eeff','#17d8ff','#5cbcff','#5cbcff','#a696ff','#e064e6','#ff009f') 50 | nb.cols <- length(levels(factor(pdat$art_viol))) 51 | mycolors <- colorRampPalette(palitra.pal)(nb.cols) 52 | 53 | 54 | plot1 <- pdat %>% 55 | ggplot(aes(x=newdate,y=rolling_numviol,colour=art_viol)) + 56 | geom_line(lwd=1.2) + 57 | hrbrthemes::theme_ft_rc() + 58 | xlab('Date') + 59 | ylab('Cumulative number of violations') + 60 | scale_color_manual(values=mycolors) + 61 | scale_x_date(date_labels = paste0('%b',' 20','%y')) + 62 | ggrepel::geom_label_repel(data = ends, 63 | aes(x=enddate, 64 | y=rolling_numviol, 65 | label=artlabel), 66 | max.iter = 100, 67 | nudge_x = 25, 68 | fill = dg) + 69 | theme(legend.position = 'none', 70 | axis.title.x = element_text(size = rel(1.5)), 71 | axis.title.y = element_text(size = rel(1.5))) + 72 | labs(title = 'Violations of the General Data Protection Regulation (EU) ', 73 | subtitle = "Which articles of the GDPR had the most frequent violations?", 74 | caption = 'Data: GDPR, c/o Bob Rudis') 75 | 76 | p2 <- pdat %>% 77 | mutate(yr = year(newdate)) %>% 78 | group_by(yr,art_viol) %>% 79 | summarize(price = sum(total_price)) 80 | 81 | totals <- p2 %>% 82 | group_by(yr) %>% 83 | summarize(totp = sum(price)) %>% 84 | as.data.frame() 85 | 86 | plot2 <- p2 %>% 87 | ggplot(aes(x=yr,y=price,fill=art_viol)) + 88 | geom_col() + 89 | scale_fill_manual('Article violated',values=mycolors) + 90 | xlab('Year') + 91 | ylab('Total fines ( \u20AC )') + 92 | hrbrthemes::theme_ft_rc() + 93 | theme(axis.title.x = element_text(size = rel(1.5)), 94 | axis.title.y = element_text(size = rel(1.5))) + 95 | geom_text(data=totals, 96 | aes(x=yr,y=totp + 5e6, 97 | label=paste(format(round(totp,-3),big.mark = ','),' \u20AC')), 98 | inherit.aes = FALSE) + 99 | scale_y_continuous(labels = comma) 100 | 101 | png('GDPR.png',width = 14,height = 8,units = 'in',res = 110) 102 | plot1+plot2 + 103 | plot_layout(widths = c(2,1)) 104 | dev.off() 105 | -------------------------------------------------------------------------------- /2020/18_Broadway.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(lubridate) 3 | library(ggExtra) 4 | library(patchwork) 5 | #devtools::install_github("thebioengineer/tidytuesdayR") 6 | #devtools::install_github("seananderson/ggsidekick") 7 | 8 | tuesdata <- tidytuesdayR::tt_load(2020, week = 18) 9 | grosses <- tuesdata$grosses 10 | synopses <- tuesdata$synopses 11 | cpi <- tuesdata$cpi 12 | pre_1985_starts<- tuesdata$`pre-1985-starts` 13 | 14 | # Made a function to expand a palette to length(unique(x)): 15 | pal_lengthen <- function(x,shortpal){ 16 | ncolours <- length(unique(x)) 17 | newpal <- colorRampPalette(shortpal)(ncolours) 18 | return(newpal) 19 | } 20 | 21 | synopses %>% 22 | filter(str_detect(synopsis, "Sondheim")) 23 | # Already mad that there are no Sondheim classics in here... where did they go?? 24 | 25 | grosses2 <- grosses %>% 26 | mutate(year = year(ymd(week_ending))) 27 | 28 | # Get densities for a contour plot 29 | dens <- MASS::kde2d(grosses$avg_ticket_price, 30 | grosses$pct_capacity, n = 50) 31 | densf <- data.frame(expand.grid(x = dens$x, y = dens$y), 32 | z = as.vector(dens$z)) 33 | 34 | # Palettes from https://carto.com/carto-colors/ + datapasta: 35 | mint <- c("#e4f1e1", "#b4d9cc", "#89c0b6", "#63a6a0", "#448c8a", "#287274", "#0d585f") 36 | mypal <- pal_lengthen(x = 1:9,shortpal = mint) 37 | 38 | maxes <- grosses2 %>% 39 | filter(pct_capacity == max(pct_capacity) | 40 | avg_ticket_price == max (avg_ticket_price)) 41 | 42 | 43 | # Contour plot with scatter ----------------------------------------------- 44 | p1 <- ggplot(densf, aes(x=x,y=y,z=z)) + 45 | geom_contour_filled(breaks=seq(min(densf$z), 46 | max(densf$z), 47 | length.out=10), 48 | aes(colour=after_stat(level))) + 49 | scale_fill_manual(values = mypal) + 50 | scale_colour_manual(values = mypal) + 51 | geom_point(data = sample_n(grosses2,size = 100), 52 | aes(x=avg_ticket_price,y=pct_capacity),inherit.aes = FALSE,alpha=0.5,size = 2) + 53 | geom_point(data = filter(grosses2,pct_capacity>1.2 | avg_ticket_price> 400), 54 | aes(x=avg_ticket_price,y=pct_capacity),inherit.aes = FALSE,alpha=0.5,size = 2) + 55 | geom_hline(yintercept = 1,lty=2,colour='darkgrey') + 56 | geom_curve(aes(x=70, y=1.2, xend = maxes$avg_ticket_price[1], 57 | yend=maxes$pct_capacity[1]*.99), alpha=0.5, 58 | inherit.aes=FALSE,arrow=arrow(length= unit(0.015, "npc")), 59 | curvature = -0.2, color='darkgrey') + #adding arrows to join text to points 60 | geom_curve(aes(x=400, y=.7, xend=maxes$avg_ticket_price[2], 61 | yend=maxes$pct_capacity[2]*1.01), alpha=0.5, 62 | inherit.aes=FALSE,arrow=arrow(length= unit(0.015, "npc")), 63 | curvature = -0.2, color='darkgrey') + 64 | geom_label(aes(x=70,y = 1.2),label = maxes$show[1]) + 65 | geom_label(aes(x=400,y = .7),label = maxes$show[2]) + 66 | scale_x_continuous(expand = c(0.001, 0.001)) + 67 | scale_y_continuous(expand = c(0.001, 0.001)) + 68 | xlab('Average ticket price (USD)') + 69 | ylab('Percent capacity') + 70 | ggsidekick::theme_sleek(base_size = 14) + 71 | theme(legend.position = 'none') 72 | 73 | top_sellers <- grosses2 %>% 74 | group_by(show) %>% 75 | summarize(total_gross = sum(weekly_gross)) %>% 76 | top_n(4) 77 | 78 | mothcols <- PNWColors::pnw_palette("Moth",4,type = 'discrete') %>% as.vector() 79 | 80 | p2 <- grosses2 %>% 81 | filter(show %in% top_sellers$show & year<2020) %>% 82 | ggplot(aes(x=year,y=weekly_gross/1e6,colour=show,group=show)) + 83 | geom_point(alpha=0.7) + 84 | stat_summary(fun=mean, geom="line") + 85 | xlab('Year') + 86 | ylab('Weekly gross (x 1 million USD)') + 87 | ggsidekick::theme_sleek(base_size = 14) + 88 | theme(legend.position = 'bottom') + 89 | scale_colour_manual('',values=mothcols) 90 | 91 | p3 <- ggMarginal(p2, 92 | margins = 'y', 93 | groupFill = TRUE, 94 | colour='darkgrey', 95 | size=4) 96 | 97 | 98 | png(filename = 'Broadway.png',width = 12,height = 5,units = 'in',res=120) 99 | p1 + p3 100 | dev.off() 101 | -------------------------------------------------------------------------------- /2020/19_AnimalCrossing.R: -------------------------------------------------------------------------------- 1 | # ANIMAL CROSSING!!! 2 | 3 | library(tidyverse) 4 | library(rfishbase) 5 | library(calecopal) 6 | library(patchwork) 7 | 8 | # coding soundtrack: Edith Piaf 9 | 10 | items <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-05-05/items.csv') 11 | load("PolygonData.RData") # load data from Polygon site - tibble pdat includes Location and Shadow.Size 12 | 13 | getspinfo <- function(commonname){ 14 | nm <- common_to_sci(paste(commonname)) 15 | dat <- species(nm$Species, fields=c("Species", "PriceCateg", "Vulnerability")) 16 | nspecies <- nrow(dat) 17 | pricecat <- sort(table(dat$PriceCateg),decreasing=TRUE) 18 | x <- names(pricecat)[1] 19 | modeprice <- ifelse(x!='unknown',x,names(pricecat[2])) 20 | meanvuln <- mean(dat$Vulnerability,na.rm = T) 21 | return(list(n = nspecies,price_mode = modeprice,vuln = meanvuln)) 22 | } 23 | 24 | # Let's be honest... 25 | fishes <- items %>% 26 | filter(category =='Fish') 27 | 28 | RLprice = RLvuln = nspps = vector() 29 | 30 | # Didn't work with mutate() and I don't have time to figure out why 31 | for(i in 1:nrow(fishes)){ 32 | x <- getspinfo(fishes$name[i]) 33 | if(length(x$price_mode)==0){RLprice[i] = RLvuln[i] = nspps[i] = NA}else{ 34 | RLprice[i] <- x$price_mode 35 | RLvuln[i] <- x$vuln 36 | nspps[i] <- x$n 37 | } 38 | } 39 | 40 | fishvuln <- fishes %>% 41 | add_column(RLprice = RLprice) %>% 42 | add_column(RLvuln = RLvuln) %>% 43 | add_column(nspps = nspps) 44 | 45 | p1 <- fishvuln %>% 46 | ggplot(aes(x=RLvuln,y=sell_value,colour=RLprice)) + 47 | geom_point() + 48 | scale_color_manual('Price in real life',values = cal_palette("superbloom1")) + 49 | xlab('Mean vulnerability score in real life') + 50 | ylab('Sell value in Animal Crossing (bells)') + 51 | hrbrthemes::theme_ft_rc() 52 | 53 | #Labels for extreme values 54 | extremes1 <- fishvuln %>% 55 | filter(nspps>500) %>% 56 | distinct(name,.keep_all=TRUE) 57 | extremes2 <- fishvuln %>% 58 | filter(sell_value>12500) %>% 59 | distinct(name,.keep_all=TRUE) 60 | 61 | p2 <- fishvuln %>% 62 | ggplot(aes(x=sell_value,y=nspps)) + 63 | geom_point(colour='white') + 64 | xlim(c(0,20000)) + 65 | ggrepel::geom_text_repel(data=extremes1, 66 | aes(x=sell_value,y=nspps,label=name), 67 | colour='white') + 68 | ggrepel::geom_text_repel(data=extremes2, 69 | aes(x=sell_value,y=nspps,label=name), 70 | colour='white', 71 | nudge_x = 5000, 72 | nudge_y = 500, 73 | vjust=0, 74 | direction = 'y') + 75 | #scale_color_manual('Price in real life',values = cal_palette("superbloom1")) + 76 | xlab('Sell value in Animal Crossing (bells)') + 77 | ylab('Number of species in real life with this common name') + 78 | hrbrthemes::theme_ft_rc() 79 | scatplot2 80 | 81 | 82 | habs <- fishvuln %>% 83 | left_join(pdat,by = c("name"="Fish")) %>% 84 | select(name,Location,sell_value) %>% 85 | distinct(name,.keep_all=TRUE) 86 | 87 | p3 <- habs %>% 88 | group_by(Location) %>% 89 | filter(length(name)>2) %>% 90 | ggplot(aes(x=sell_value,fill=Location)) + 91 | geom_density(colour='white') + 92 | scale_fill_manual(values = cal_palette("chaparral2")) + 93 | facet_wrap(~Location,nrow=1) + 94 | xlab('Sell value (bells)') + 95 | ylab("Density") + 96 | coord_flip() + 97 | hrbrthemes::theme_ft_rc() + 98 | theme(axis.text.x=element_blank(), 99 | axis.ticks.x=element_blank(), 100 | legend.position = 'none') + 101 | labs(title = 'How valuable are fish in Animal Crossing', 102 | subtitle = "...and how does their value compare to the same taxa in real life?", 103 | caption = "Data: VillagerDB, FishBase, and Polygon") 104 | 105 | png(filename = 'AnimalCrossing.png',width = 12,height = 8,units = 'in',res = 120) 106 | p3 / (p1+p2 + plot_layout(widths=c(1,2))) 107 | dev.off() 108 | -------------------------------------------------------------------------------- /2020/19_AnimalCrossing_dataprep.R: -------------------------------------------------------------------------------- 1 | # Polygon data (collected from polygon using {datapasta}) 2 | pdat <- tibble::tribble( 3 | ~Critter.number, ~Fish, ~Location, ~Shadow.Size, ~Value, ~Time, ~Month.Hemisphere, 4 | 1L, "Bitterling", "River", "Smallest", 900, "All.day", "November-March.(Northern)./.May-September.(Southern)", 5 | 2L, "Pale Chub", "River", "Smallest", 160, "9 a.m. - 4 p.m.", "Year-round (Northern and Southern)", 6 | 3L, "Crucian Carp", "River", "Small", 160, "All day", "Year-round (Northern and Southern)", 7 | 4L, "Dace", "River", "Medium", 240, "4 p.m. - 9 a.m.", "Year-round (Northern and Southern)", 8 | 5L, "Carp", "Pond", "Large", 300, "All day", "Year-round (Northern and Southern)", 9 | 6L, "Koi", "Pond", "Large", 4000, "4 p.m. - 9 a.m.", "Year-round (Northern and Southern)", 10 | 7L, "Goldfish", "Pond", "Smallest", 1300, "All day", "Year-round (Northern and Southern)", 11 | 8L, "Pop-eyed Goldfish", "Pond", "Smallest", 1300, "9 a.m. - 4 p.m.", "Year-round (Northern and Southern)", 12 | 9L, "Ranchu Goldfish", "Pond", "Small", 4500, "9 a.m. - 4 p.m.", "Year-round (Northern and Southern)", 13 | 10L, "Killifish", "Pond", "Smallest", 300, "All day", "April-August (Northern) / October-February (Southern)", 14 | 11L, "Crawfish", "Pond", "Small", 200, "All day", "April-September (Northern) / October-March (Southern)", 15 | 12L, "Soft-shelled Turtle", "River", "Large", 3750, "4 p.m. - 9 a.m.", "August-September (Northern) / February-March (Southern)", 16 | 13L, "Snapping Turtle", "River", "X Large", 5000, "9 p.m. - 4 a.m.", "April-October (Northern) / October-April (Southern)", 17 | 14L, "Tadpole", "Pond", "Smallest", 100, "All day", "March-July (Northern) / September-January (Southern)", 18 | 15L, "Frog", "Pond", "Small", 120, "All day", "May-August (Northern) / November-Feburary (Southern)", 19 | 16L, "Freshwater Goby", "River", "Small", 400, "4 p.m. - 9 a.m.", "Year-round (Northern and Southern)", 20 | 17L, "Loach", "River", "Small", 400, "All day", "March-May (Northern) / September-November (Southern)", 21 | 18L, "Catfish", "Pond", "Large", 800, "4 p.m. - 9 a.m.", "May-October (Northern) / November-April (Southern)", 22 | 19L, "Giant Snakehead", "Pond", "X Large", 5500, "9 a.m. - 4 p.m.", "June-August (Northern) / December-February (Southern)", 23 | 20L, "Bluegill", "River", "Small", 180, "9 a.m. - 4 p.m.", "Year-round (Northern and Southern)", 24 | 21L, "Yellow Perch", "River", "Medium", 300, "All day", "October-March (Northern) / April-September (Southern)", 25 | 22L, "Black Bass", "River", "Large", 400, "All day", "Year-round (Northern and Southern)", 26 | 23L, "Tilapia", "River", "Medium", 800, "All day", "June-October (Northern) / December-April (Southern)", 27 | 24L, "Pike", "River", "X Large", 1800, "All day", "September-December (Northern) / March-June (Southern)", 28 | 25L, "Pond Smelt", "River", "Small", 500, "All day", "December-February (Northern) / June-August (Southern)", 29 | 26L, "Sweetfish", "River", "Medium", 900, "All day", "July-September (Northern) / January-March (Southern)", 30 | 27L, "Cherry Salmon", "River (Clifftop)", "Medium", 1000, "4 p.m. - 9 a.m.", "March-June, September-November (Northern) / March-May, September-December (Southern)", 31 | 28L, "Char", "River (Clifftop)", "Medium", 3800, "4 p.m. - 9 a.m.", "March-June, September-November (Northern) / March-May, September-December (Southern)", 32 | 29L, "Golden Trout", "River (Clifftop)", "Medium", 15000, "4 p.m. - 9 a.m.", "March-June, September-November (Northern) / March-May, September-December (Southern)", 33 | 30L, "Stringfish", "River (Clifftop)", "Largest", 15000, "4 p.m. - 9 a.m.", "December-March (Northern) / June-September (Southern)", 34 | 31L, "Salmon", "River (mouth)", "Large", 700, "All day", "September (Northern) / March (Southern)", 35 | 32L, "King Salmon", "River (mouth)", "Largest", 1800, "All day", "September (Northern) / March (Southern)", 36 | 33L, "Mitten Crab", "River", "Small", 2000, "4 p.m. - 9 a.m.", "September-November (Northern) / March-May (Southern)", 37 | 34L, "Guppy", "River", "Smallest", 1300, "9 a.m. - 4 p.m.", "April-November (Northern) / October-May (Southern)", 38 | 35L, "Nibble Fish", "River", "Small", 1500, "9 a.m. - 4 p.m.", "May-September (Northern) / November-March (Southern)", 39 | 36L, "Angelfish", "River", "Small", 3000, "4 p.m. - 9 a.m.", "May-October (Northern) / November-April (Southern)", 40 | 37L, "Betta", "River", "Small", 2500, "9 a.m. - 4 p.m.", "May-October (Northern) / November-April (Southern)", 41 | 38L, "Neon Tetra", "River", "Smallest", 500, "9 a.m. - 4 p.m.", "April-November (Northern) / October-May (Southern)", 42 | 39L, "Rainbowfish", "River", "Small", 800, "9 a.m. - 4 p.m.", "May-October (Northern) / November-April (Southern)", 43 | 40L, "Piranha", "River", "Small", 2500, "9 a.m. - 4 p.m., 9 p.m. - 4 a.m.", "June-September (Northern) / December-March (Southern)", 44 | 41L, "Arowana", "River", "Large", 10000, "4 p.m. - 9 a.m.", "June-September (Northern) / December-March (Southern)", 45 | 42L, "Dorado", "River", "X Large", 15000, "4 a.m. - 9 p.m.", "June-September (Northern) / December-March (Southern)", 46 | 43L, "Gar", "Pond", "Largest", 6000, "4 p.m. - 9 a.m.", "June-September (Northern) / December-March (Southern)", 47 | 44L, "Arapaima", "River", "Largest", 10000, "4 p.m. - 9 a.m.", "June-September (Northern) / December-March (Southern)", 48 | 45L, "Saddled Bichir", "River", "Large", 4000, "9 p.m. - 4 a.m.", "June-September (Northern) / December-March (Southern)", 49 | 46L, "Sturgeon", "River (mouth)", "Largest", 10000, "All day", "September-March (Northern) / March-September (Southern)", 50 | 47L, "Sea Butterfly", "Sea", "Smallest", 1000, "All day", "December-March (Northern) / June-September (Southern)", 51 | 48L, "Sea Horse", "Sea", "Smallest", 1100, "All day", "April-November (Northern) / October-May (Southern)", 52 | 49L, "Clown Fish", "Sea", "Smallest", 650, "All day", "April-September (Northern) / October-March (Southern)", 53 | 50L, "Surgeonfish", "Sea", "Small", 1000, "All day", "April-September (Northern) / October-March (Southern)", 54 | 51L, "Butterfly Fish", "Sea", "Small", 1000, "All day", "April-September (Northern) / October-March (Southern)", 55 | 52L, "Napoleonfish", "Sea", "Largest", 10000, "4 a.m. - 9 p.m.", "July-August (Northern) / January-February (Southern)", 56 | 53L, "Zebra Turkeyfish", "Sea", "Medium", 500, "All day", "April-November (Northern) / October-May (Southern)", 57 | 54L, "Blowfish", "Sea", "Medium", 5000, "6 p.m. - 4 a.m.", "November-February (Northern) / May-August (Southern)", 58 | 55L, "Puffer Fish", "Sea", "Medium", 250, "All day", "July-September (Northern) / January-March (Southern)", 59 | 56L, "Anchovy", "Sea", "Small", 200, "4 a.m. - 9 p.m.", "Year-round (Northern and Southern)", 60 | 57L, "Horse Mackerel", "Sea", "Small", 150, "All day", "Year-round (Northern and Southern)", 61 | 58L, "Barred Knifejaw", "Sea", "Medium", 5000, "All day", "March-November (Northern) / September-May (Southern)", 62 | 59L, "Sea Bass", "Sea", "X Large", 400, "All day", "Year-round (Northern and Southern)", 63 | 60L, "Red Snapper", "Sea", "Large", 3000, "All day", "Year-round (Northern and Southern)", 64 | 61L, "Dab", "Sea", "Medium", 300, "All day", "October-April (Northern) / April-October (Southern)", 65 | 62L, "Olive Flounder", "Sea", "Large", 800, "All day", "Year-round (Northern and Southern)", 66 | 63L, "Squid", "Sea", "Medium", 500, "All day", "December-August (Northern) / June-February (Southern)", 67 | 64L, "Moray Eel", "Sea", "Narrow", 2000, "All day", "August-October (Northern) / February-April (Southern)", 68 | 65L, "Ribbon Eel", "Sea", "Narrow", 600, "All day", "June-October (Northern) / December-April (Southern)", 69 | 66L, "Tuna", "Pier", "Largest", 7000, "All day", "November-April (Northern) / May-October (Southern)", 70 | 67L, "Blue Marlin", "Pier", "Largest", 10000, "All day", "July-September, November-April (Northern) / January-March, May-November (Southern)", 71 | 68L, "Giant Trevally", "Pier", "Large", 4500, "All day", "May-October (Northern) / November-April (Southern)", 72 | 69L, "Mahi-mahi", "Pier", "Large", 6000, "All day", "May-October (Northern) / November-April (Southern)", 73 | 70L, "Ocean Sunfish", "Sea", "Largest (Fin)", 4000, "4 a.m. - 9 p.m.", "July-September (Northern) / January-March (Southern)", 74 | 71L, "Ray", "Sea", "X Large", 3000, "4 a.m. - 9 p.m.", "August-November (Northern) / February-May (Southern)", 75 | 72L, "Saw Shark", "Sea", "Largest (Fin)", 12000, "4 p.m. - 9 a.m.", "June-September (Northern) / December-March (Southern)", 76 | 73L, "Hammerhead Shark", "Sea", "Largest (Fin)", 8000, "4 p.m. - 9 a.m.", "June-September (Northern) / December-March (Southern)", 77 | 74L, "Great White Shark", "Sea", "Largest (Fin)", 15000, "4 p.m. - 9 a.m.", "June-September (Northern) / December-March (Southern)", 78 | 75L, "Whale Shark", "Sea", "Largest (Fin)", 13000, "All day", "June-September (Northern) / December-March (Southern)", 79 | 76L, "Suckerfish", "Sea", "Large (Fin)", 1500, "All day", "June-September (Northern) / December-March (Southern)", 80 | 77L, "Football Fish", "Sea", "Large", 2500, "4 p.m. - 9 a.m.", "November-March (Northern) / May-September (Southern)", 81 | 78L, "Oarfish", "Sea", "Largest", 9000, "All day", "December-May (Northern) / June-November (Southern)", 82 | 79L, "Barreleye", "Sea", "Small", 15000, "9 p.m. - 4 a.m.", "Year-round (Northern and Southern)", 83 | 80L, "Coelacanth", "Sea (rainy days)", "Largest", 15000, "All day", "Year-round (Northern and Southern)" 84 | ) 85 | 86 | pdat <- as.data.frame(pdat) 87 | 88 | save(pdat,file = 'PolygonData.RData') 89 | -------------------------------------------------------------------------------- /2020/20_Volcanoes.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(patchwork) 3 | 4 | # For the map: 5 | library(sf) 6 | library(mapview) 7 | 8 | # Data 9 | volcano <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-05-12/volcano.csv') 10 | eruptions <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-05-12/eruptions.csv') 11 | events <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-05-12/events.csv') 12 | #tree_rings <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-05-12/tree_rings.csv') 13 | #sulfur <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-05-12/sulfur.csv') 14 | 15 | 16 | 17 | # Color stuff ------------------------------------------------------------- 18 | pal = c('#fcde9c','#faa476','#f0746e','#e34f6f','#dc3977','#b9257a','#7c1d6f') 19 | 20 | lengthen_pal <- function(x,shortpal){ 21 | ncolours <- length(unique(x)) 22 | newpal <- colorRampPalette(shortpal)(ncolours) 23 | return(newpal) 24 | } 25 | 26 | newpal <- lengthen_pal(x=1:10,shortpal = pal) 27 | 28 | 29 | # Data stuff -------------------------------------------------------------- 30 | locs <- volcano %>% 31 | select(volcano_name,major_rock_1,latitude,longitude) 32 | 33 | locations_sf <- st_as_sf(locs, coords = c("longitude", "latitude"), crs = 4326) 34 | mapviewOptions(basemaps = "CartoDB.DarkMatter") 35 | p <- mapview(locations_sf, 36 | zcol="major_rock_1", 37 | alpha=0, 38 | col.regions=newpal) 39 | 40 | mapshot(p,file='volcanoplot2.pdf', 41 | remove_controls = c("homeButton", "layersControl"), 42 | res=400, 43 | selfcontained = FALSE, 44 | zoom=1) 45 | 46 | volsumm <- eruptions %>% 47 | group_by(volcano_name) %>% 48 | count() %>% 49 | left_join(volcano) 50 | 51 | palitra <- c('#0099ff','#9d98f6','#d59de5','#f4a9d3','#ffbcca','#ffd2cd','#ffe8dd') 52 | 53 | p1 <- volsumm %>% 54 | ggplot(aes(x=longitude,y=n,colour = evidence_category)) + 55 | geom_point() + 56 | scale_colour_manual('Evidence category',values = palitra[c(2,3,4,5,6)]) + 57 | geom_vline(xintercept = 47.6062,colour='white',lty=2) + # longitude of Seattle 58 | ylab('Number of eruptions') + 59 | xlab('Longitude') + 60 | hrbrthemes::theme_ft_rc(base_size=11) + 61 | theme(axis.title.x = element_text(size = rel(1.5)), 62 | axis.title.y = element_text(size = rel(1.5))) + 63 | geom_curve(aes(x = 100, y = 200, xend = 47.6, yend = 225),colour='white',arrow = arrow(length = unit(0.03, "npc"))) + 64 | annotate('text',x=100,y=180,label="Seattle",colour='white')+ 65 | labs(title="A landscape of fear!", 66 | subtitle = "Was my childhood fear of volcanic annihilation well-founded?") 67 | 68 | p1 69 | 70 | volareas <- volcano %>% 71 | left_join(events) %>% 72 | mutate(longbin = case_when(longitude < (-100) ~ "l1", 73 | longitude > (-100) & longitude < 0 ~ "l2", 74 | longitude > 0 & longitude < 100 ~ "l3", 75 | longitude > 100 ~ "l4")) 76 | 77 | p2 <- volareas %>% 78 | mutate(longbin2 = recode(longbin,l1='< 100',l2='-100 < 0',l3='0 < x < 100',l4='x > 100')) %>% 79 | group_by(longbin2,event_type) %>% 80 | count() %>% 81 | filter(!is.na(event_type) & event_type != 'VEI (Explosivity Index)') %>% 82 | ggplot(aes(x=fct_reorder(event_type,n),y=n)) + 83 | geom_col(fill='lightgrey',colour='lightgrey') + 84 | ylab("Count") + 85 | xlab("Type of event") + 86 | facet_wrap(~longbin2,nrow=1) + 87 | coord_flip() + 88 | hrbrthemes::theme_ft_rc(base_size=10) + 89 | theme(axis.title.x = element_text(size = rel(1.5)), 90 | axis.title.y = element_text(size = rel(1.5))) + 91 | labs(caption = 'Data: The Smithsonian Institution') 92 | 93 | png('Volcano.png',width = 12,height = 12,units = 'in',res = 400) 94 | p1 + p2 + plot_layout(ncol=1,heights = c(1,3)) 95 | dev.off() 96 | 97 | -------------------------------------------------------------------------------- /2020/21_Volleyball.R: -------------------------------------------------------------------------------- 1 | #Beach volleyball 2 | library(tidyverse) 3 | library(ggchicklet) 4 | library(hrbrthemes) 5 | library(ggthemes) 6 | library(patchwork) 7 | 8 | vb_matches <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-05-19/vb_matches.csv', guess_max = 76000) 9 | 10 | unique(vb_matches$country) 11 | unique(vb_matches$w_p1_country) 12 | unique(vb_matches$circuit) 13 | 14 | 15 | pd <- vb_matches %>% 16 | select(circuit, 17 | tournament, 18 | country, 19 | year, 20 | w_p1_country, 21 | w_p1_tot_attacks, 22 | w_p1_tot_kills, 23 | w_p1_tot_aces, 24 | w_p1_tot_blocks, 25 | w_p1_tot_digs) %>% 26 | pivot_longer(cols = w_p1_tot_attacks:w_p1_tot_digs, 27 | names_to = c('winner','player','tot','move'), 28 | names_sep="_", 29 | values_to = 'n') %>% 30 | group_by(circuit,tournament,country,year,w_p1_country,winner,tot,move) %>% 31 | summarize(N = sum(n)) %>% 32 | ungroup() %>% 33 | select(-winner,-tot) 34 | 35 | p1 <- pd %>% 36 | filter(circuit=='FIVB' & !is.na(N)) %>% 37 | mutate(w_p1_country = fct_drop(w_p1_country)) %>% 38 | group_by(w_p1_country,move) %>% 39 | summarize(sum_moves = sum(N,na.rm=T)) %>% 40 | ggplot(aes(x=fct_reorder(w_p1_country,.x = sum_moves,.fun = sum),y=sum_moves,fill=move)) + 41 | xlab("Winning country (FIVB)") + 42 | ylab("Moves") + 43 | geom_chicklet() + 44 | coord_flip() + 45 | scale_fill_tableau("Miller Stone", name = NULL) + 46 | theme_ipsum_rc(grid="X") + 47 | theme(axis.title.x = element_text(size = rel(1.5)), 48 | axis.title.y = element_text(size = rel(1.5)), 49 | plot.margin = margin(1,1,1,1)) + 50 | labs(title = 'Moves by winning teams', 51 | subtitle = 'Total moves in all winning matches', 52 | caption = 'Data: Adam Vagnar') 53 | 54 | p1 55 | 56 | 57 | # Now maybe we can look at wins over time for each country 58 | pd2 <- vb_matches %>% 59 | select(year,circuit,tournament,country,match_num,w_p1_country) %>% 60 | filter(circuit =='FIVB') %>% 61 | group_by(w_p1_country,year) %>% 62 | count() 63 | 64 | 65 | 66 | 67 | cpal <- lengthen_pal(x=1:9,shortpal = tableau_color_pal(palette = "Summer")(8) ) 68 | 69 | pd2a <- pd2 %>% 70 | group_by(w_p1_country) %>% 71 | filter(sum(n)>2000) %>% # "big shot" countries have more than 2000 total wins 72 | ungroup() %>% 73 | mutate(bigshot = 1) %>% 74 | full_join(pd2) %>% 75 | replace_na(list(bigshot = 0)) 76 | 77 | p2 <- pd2a %>% 78 | mutate(w_p1_country = fct_drop(w_p1_country)) %>% 79 | ggplot(aes(x=year,y=n,colour=w_p1_country)) + 80 | geom_line(lwd=1) + 81 | scale_colour_manual('Country', 82 | values = cpal) + 83 | scale_x_continuous(limits = c(2001,2019)) + 84 | xlab('Year') + 85 | ylab('Total matches won') + 86 | theme_ipsum_rc(grid="X") + 87 | theme(axis.title.x = element_text(size = rel(1.5)), 88 | axis.title.y = element_text(size = rel(1.5)), 89 | legend.position = 'bottom', 90 | plot.margin = margin(1,1,1,1)) + 91 | gghighlight::gghighlight(bigshot==1,use_direct_label = FALSE) + 92 | facet_wrap(~w_p1_country) + 93 | labs(title = 'Wins over time', 94 | subtitle = 'For the nine "big shot" countries with over 2000 total wins', 95 | caption = 'Data: Adam Vagnar') 96 | 97 | 98 | png(filename = 'Volleyball1.png',width = 10,height = 6,units = 'in',res = 200) 99 | p1 100 | dev.off() 101 | 102 | png(filename = 'Volleyball2.png',width = 10,height = 6,units = 'in',res = 200) 103 | p2 104 | dev.off() 105 | 106 | 107 | -------------------------------------------------------------------------------- /2020/22_Cocktails.R: -------------------------------------------------------------------------------- 1 | # Cocktails! 2 | 3 | #devtools::install_github("gadenbuie/ggpomological") 4 | 5 | library(tidyverse) 6 | library(tidyr) 7 | library(stringr) 8 | library(vegan) 9 | library(patchwork) 10 | library(ggpomological) 11 | 12 | # Look at cocktails with fruits in them, for fun! 13 | cocktails <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-05-26/cocktails.csv') 14 | boston_cocktails <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-05-26/boston_cocktails.csv') 15 | 16 | test <- c("Cats","Banana","banana") 17 | 18 | fruit_list <- tolower(c("Kiwi", "Banana", "Apricot", "Avocado", "Cocos", "Clementine", "Mandarine", "Orange","Cranberry","Cherry","BLackcurrant","Lime", "Lemon", "Peach", "Plum", "Raspberry", "Strawberry", "Pineapple", "Pomegranate","Limon","Apple")) 19 | 20 | fruitycocktails <- cocktails %>% 21 | mutate(ingredient = tolower(ingredient)) %>% 22 | filter(ingredient %in% fruit_list) %>% 23 | select(drink) %>% 24 | unique() 25 | 26 | # Filter out fruity drinks 27 | fdrinks <- cocktails %>% 28 | group_by(drink) %>% 29 | right_join(fruitycocktails) 30 | 31 | # Tried to sort out amounts, too complicated for 2 hrs of coding... I fail! 32 | 33 | p1 <- fdrinks %>% 34 | mutate(alcoholic = tolower(alcoholic)) %>% 35 | distinct(drink,category,alcoholic) %>% #subset to distinct drinks 36 | group_by(category,alcoholic) %>% 37 | count() %>% 38 | ggplot(aes(x=fct_reorder(category,desc(-n)),y=n,fill=alcoholic))+ 39 | geom_col() + 40 | xlab('Category') + 41 | ylab('Number of drinks') + 42 | coord_flip() + 43 | scale_fill_pomological('') + 44 | labs(caption = 'Data: Kaggle')+ 45 | theme(legend.position = 'bottom') + 46 | theme_pomological("Homemade Apple", 16) 47 | 48 | 49 | 50 | # Turn fruity drinks into community matrix -------------------------------- 51 | cm <- fdrinks %>% 52 | pivot_wider(id_cols = drink, 53 | names_from = ingredient, 54 | values_from = row_id, 55 | values_fn = list(row_id = length)) %>% 56 | mutate_if(is.numeric, funs(replace_na(., 0))) # replace NA with 0 57 | 58 | cmat <-as.matrix(sapply(cm[,-1], as.numeric)) 59 | subset_ingredients <- which(colSums(cmat)>10) 60 | cmat <- cmat[,subset_ingredients] 61 | subset_drinks <- which(rowSums(cmat) != 0) 62 | cmat <- cmat[subset_drinks,] 63 | drink <- cm$drink[subset_drinks] 64 | mds.log <- log(cmat+1) 65 | sol <- metaMDS(mds.log) 66 | vec.sp <- envfit(sol$points, mds.log, perm=500) 67 | vec.sp.df <- as.data.frame(vec.sp$vectors$arrows*sqrt(vec.sp$vectors$r)) 68 | vec.sp.df$species <- rownames(vec.sp.df) 69 | vec.sp.df.tp <- vec.sp.df 70 | ord <- sol$points %>% 71 | as.data.frame() %>% 72 | add_column(drink = drink) %>% 73 | left_join(fdrinks) %>% 74 | distinct(id_drink,.keep_all = TRUE) 75 | 76 | p2 <- ord %>% 77 | filter(!is.na(drink)) %>% 78 | ggplot(aes(MDS1,MDS2,colour=category)) + 79 | geom_point(size=4) + 80 | labs(title = 'The community of fruity drinks') + 81 | scale_colour_pomological("Category") + 82 | theme_pomological("Homemade Apple", 16) 83 | 84 | png('Cocktails.png',width = 10,height = 10,units = 'in',res = 200) 85 | p2 + p1 + plot_layout(ncol = 1, heights=c(3,1)) 86 | dev.off() 87 | -------------------------------------------------------------------------------- /2020/25_university_statements.R: -------------------------------------------------------------------------------- 1 | #25_university_statements 2 | library(tidyverse) 3 | library(snakecase) 4 | library(hrbrthemes) 5 | library(ggtext) 6 | library(patchwork) 7 | 8 | dat <- read.csv('statements.csv') 9 | head(dat) 10 | 11 | # Clean up the data a little 12 | colnames(dat) <- to_any_case(colnames(dat),"snake") 13 | colnames(dat) <- gsub("x_","",colnames(dat)) 14 | 15 | dat <- dat %>% 16 | select(-conference, 17 | -strength_of_statement_mentioning_good_cops_1_7, 18 | -follow_up_link, 19 | -statement_link) %>% 20 | mutate(school_name = recode(school_name,"Louisiana State University and Agricultural & Mechanical College" = "Louisiana State University \n and Agricultural & Mechanical College")) 21 | 22 | props2plot <- dat %>% 23 | select(-statement_text) %>% 24 | pivot_longer(cols=on_twitter:good_cops) %>% 25 | select(-strength_of_statement,-follow_up_updated_statement) %>% 26 | mutate(val_bin = ifelse(value=="Yes",1,0)) %>% 27 | group_by(name) %>% 28 | summarize(prop.mentioned = sum(val_bin)/length(val_bin)) %>% 29 | ungroup() %>% 30 | mutate(labelnames = to_title_case(name)) %>% 31 | mutate(labelnames = recode(labelnames,"Murder Killing" = "Murder/Killing", 32 | "Mentions Blackness Discrimination Against Black People" = "Mentions Blackness \n or discrimination against Black people")) %>% 33 | filter(name!="on_twitter") %>% 34 | filter(name!="good_cops") 35 | 36 | 37 | p1 <- props2plot %>% 38 | ggplot(aes(x=fct_reorder(labelnames,prop.mentioned),y=prop.mentioned)) + 39 | geom_col(fill='white') + 40 | xlab("Name or Phrase") + 41 | ylab("Proportion of school statements mentioning name or phrase") + 42 | coord_flip() + 43 | hrbrthemes::theme_ft_rc()+ 44 | theme(axis.title.x = element_text(size = rel(1.5)), 45 | axis.title.y = element_text(size = rel(1.5))) 46 | 47 | 48 | stdat <- dat %>% 49 | group_by(school_name) %>% 50 | summarize(statement_length = stringi::stri_count_words(statement_text)) %>% 51 | left_join(dat) 52 | 53 | p2 <- stdat %>% 54 | filter(on_twitter %in% c("Yes","No")) %>% 55 | filter(statement_length>5) %>% 56 | ggplot(aes(x = statement_length, y = strength_of_statement, 57 | colour = on_twitter)) + 58 | geom_point(size = 2.3,alpha = 0.5) + 59 | scale_colour_manual("Statement on Twitter?", 60 | values=c('white','#9ecae1')) + 61 | xlab("Length of statement (words)") + 62 | ylab("Score of statement") + 63 | labs(caption = "Statement score is the number of \n key names/phrases in the university's primary statement. Dataset: @amaan_c") + 64 | theme_ft_rc() + 65 | theme(axis.title.x = element_text(size = rel(1.5)), 66 | axis.title.y = element_text(size = rel(1.5))) 67 | 68 | 69 | vp <- dat %>% 70 | select(-statement_text) %>% 71 | pivot_longer(cols=on_twitter:good_cops) %>% 72 | select(-strength_of_statement,-follow_up_updated_statement) %>% 73 | mutate(val_bin = ifelse(value=="Yes",1,0)) %>% 74 | filter(name != 'good_cops') %>% 75 | filter(name != 'on_twitter') %>% 76 | group_by(school_name) %>% 77 | summarize(score = sum(val_bin)) %>% 78 | arrange(desc(score)) %>% 79 | group_by(score) %>% 80 | mutate(count1 = 1,cc = cumsum(count1)) 81 | 82 | mentions_good_cops <- filter(dat, good_cops == 'Yes') %>% 83 | select(school_name) %>% 84 | as.vector() 85 | 86 | vp2 <- filter(vp,school_name %in% mentions_good_cops$school_name) 87 | 88 | p3 <- vp %>% ggplot(aes(x=score,y=cc)) + 89 | annotate("text",x = vp$score, y = vp$cc, 90 | label = vp$school_name, size = 3, 91 | color = "lightgrey", 92 | family = "Arial Narrow") + 93 | annotate("text",x = vp2$score, y = vp2$cc, 94 | label = vp2$school_name, size = 3, 95 | color = "red", 96 | family = "Arial Narrow") + 97 | hrbrthemes::theme_ft_rc() + 98 | xlab("Score (Total name and phrase mentions)") + 99 | ylab("") + 100 | theme(panel.grid.major=element_blank(), 101 | panel.grid.minor = element_blank(), 102 | axis.title.y=element_blank(), 103 | axis.text.y=element_blank(), 104 | axis.ticks.y=element_blank()) + 105 | labs(title = "How thorough were university statements since George Floyd's murder?", 106 | subtitle = "Red universities also mentioned 'good cops' in their statement.", 107 | caption = " Data source: Public university statements, collected and scored by @amaan_c") + 108 | theme(plot.subtitle = element_markdown(lineheight = 1.1), 109 | axis.title.x = element_text(size = rel(1.5)), 110 | axis.title.y = element_text(size = rel(1.5))) 111 | 112 | png("First.png",width = 14,height = 10,units = 'in',res = 120) 113 | p3 114 | dev.off() 115 | 116 | png("Second.png",width = 10,height = 12,units = 'in',res=120) 117 | p1 + p2 + plot_layout(ncol = 1) 118 | dev.off() 119 | # Extra bits -------------------------------------------------------------- 120 | # p2 <- dat %>% 121 | # select(-statement_text) %>% 122 | # pivot_longer(cols=on_twitter:good_cops) %>% 123 | # select(-strength_of_statement,-follow_up_updated_statement) %>% 124 | # mutate(val_bin = ifelse(value=="Yes",1,0)) %>% 125 | # filter(name != 'good_cops') %>% 126 | # filter(name != 'on_twitter') %>% 127 | # group_by(school_name) %>% 128 | # summarize(score = sum(val_bin)) %>% 129 | # ggplot(aes(x=fct_reorder(school_name,score),y=score)) + 130 | # geom_col(fill='white') + 131 | # xlab("School name") + 132 | # ylab("Score (sum of mentions of each name/phrase") + 133 | # coord_flip() + 134 | # hrbrthemes::theme_ft_rc() 135 | -------------------------------------------------------------------------------- /2020/26_Caribou.R: -------------------------------------------------------------------------------- 1 | # 26 - caribou 2 | library(tidyverse) 3 | library(patchwork) 4 | 5 | 6 | individuals <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-06-23/individuals.csv') 7 | locations <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-06-23/locations.csv') 8 | 9 | predated <- individuals %>% 10 | filter(stringr::str_detect(death_cause, 'predation')) %>% 11 | mutate(predator = ifelse(str_detect(death_cause,'wolf'),'wolf', 12 | ifelse(str_detect(death_cause,'bear'),'bear', 13 | 'unknown'))) 14 | loc_pred <- locations %>% 15 | filter(animal_id %in% predated$animal_id) 16 | unique(loc_pred$animal_id) 17 | 18 | 19 | # Tracks without map ------------------------------------------------------ 20 | 21 | tagged_day_summary <- locations %>% 22 | group_by(animal_id) %>% 23 | summarize(last_dttm = max(timestamp)) %>% 24 | right_join(locations) %>% 25 | group_by(animal_id) %>% 26 | mutate(day_of_life = as.numeric((last_dttm - timestamp)/(60*60*24))) %>% 27 | summarize(days_tagged = max(day_of_life)) 28 | 29 | xx <- locations %>% 30 | group_by(animal_id) %>% 31 | summarize(last_dttm = max(timestamp)) %>% 32 | right_join(locations) %>% 33 | filter(timestamp == last_dttm) %>% 34 | left_join(tagged_day_summary) %>% 35 | mutate(killed_by_preds = ifelse(animal_id %in% predated$animal_id,"Killed by predators","Other cause of death or tag loss")) %>% 36 | rename(`Number of days tagged` = days_tagged) 37 | 38 | p1 <- xx %>% 39 | arrange(desc(killed_by_preds)) %>% 40 | ggplot(aes(x=longitude,y=latitude, 41 | size = `Number of days tagged`, 42 | colour = killed_by_preds)) + 43 | geom_point(alpha=0.7) + 44 | scale_color_manual("",values=c('#F072B6','white')) + 45 | guides(size=guide_legend(override.aes=list(colour="white"), 46 | title.position = "top", 47 | ncol = 1), 48 | colour = guide_legend(ncol = 1))+ 49 | xlab('Longitude') + 50 | ylab('Latitude') + 51 | hrbrthemes::theme_ft_rc() + 52 | theme(axis.title.x = element_text(size = rel(1.5)), 53 | axis.title.y = element_text(size = rel(1.5)), 54 | legend.position = 'bottom') + 55 | labs(title = 'The work of wolves', 56 | subtitle = "Tagged caribou eaten by wolves on unceded First Nations territory") 57 | 58 | p1 59 | 60 | 61 | pdat2 <- loc_pred %>% 62 | group_by(animal_id) %>% 63 | summarize(last_dttm = max(timestamp)) %>% 64 | right_join(loc_pred) %>% 65 | group_by(animal_id) %>% 66 | mutate(day_of_life = as.numeric((last_dttm - timestamp)/(60*60*24))) %>% 67 | mutate(prop_of_tagged_time = day_of_life/(max(day_of_life))) 68 | 69 | ends <- loc_pred %>% 70 | group_by(animal_id) %>% 71 | mutate(last_dttm = max(timestamp)) %>% 72 | distinct(animal_id, .keep_all=TRUE) 73 | 74 | p2 <- pdat2 %>% 75 | ggplot(aes(x = longitude, y = latitude, group = animal_id,colour = prop_of_tagged_time)) + 76 | geom_path(alpha=0.5,lwd=1.2) + 77 | scale_colour_gradient("Proximity to mortality \n (fraction of tagged time)", low = '#FFF886',high = '#F072B6') + 78 | geom_point(data = ends,aes(x=longitude,y=latitude),colour="white",size = 2.5) + 79 | xlab('Longitude') + 80 | ylab('Latitude') + 81 | hrbrthemes::theme_ft_rc() + 82 | theme(axis.title.x = element_text(size = rel(1.5)), 83 | axis.title.y = element_text(size = rel(1.5)), 84 | legend.position = 'bottom', 85 | legend.key.width=unit(1,"cm")) + 86 | labs(caption = "Data: B.C. Ministry of Environment & Climate Change") 87 | 88 | 89 | p2 90 | 91 | png("Caribou.png",width = 12,height = 9,units = 'in',res = 120) 92 | p1+p2 93 | dev.off() 94 | 95 | 96 | 97 | # A bunch of mapping stuff that didn't work ------------------------------- 98 | 99 | # Get map (thank you Nyssa!!!) 100 | # x <- getData(name = "GADM", country = "CAN",level = 1) 101 | # bc <- x [x$NAME_1 == "British Columbia",] 102 | # bc@data$id <- rownames(bc@data) 103 | # create a data.frame from our spatial object 104 | # bcdf <- fortify(bc, region = "id") 105 | # 106 | # p1 <- ggplot(bcdf, aes(x = long,y = lat)) + 107 | # geom_polygon(fill = "grey30", size = 0.1) + 108 | # geom_path(data = loc_pred, aes(x = longitude, y = latitude, group = animal_id,colour = animal_id)) + 109 | # theme_classic() + 110 | # theme(legend.position='none') 111 | # p1 112 | 113 | -------------------------------------------------------------------------------- /2020/27_x-men.R: -------------------------------------------------------------------------------- 1 | # 27_xmen 2 | remotes::install_github("malcolmbarrett/claremontrun") 3 | 4 | comic_bechdel <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-06-30/comic_bechdel.csv') 5 | 6 | character_visualization <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-06-30/character_visualization.csv') 7 | 8 | characters <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-06-30/characters.csv') 9 | 10 | xmen_bechdel <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-06-30/xmen_bechdel.csv') 11 | 12 | covers <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-06-30/covers.csv') 13 | 14 | issue_collaborators <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-06-30/issue_collaborators.csv') 15 | 16 | locations <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-06-30/locations.csv') 17 | -------------------------------------------------------------------------------- /2020/28_coffee.R: -------------------------------------------------------------------------------- 1 | # Coffee 2 | library(tidyverse) 3 | library(ggridges) 4 | library(GGally) 5 | library(calecopal) 6 | library(patchwork) 7 | 8 | coffee_ratings <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-07/coffee_ratings.csv') 9 | 10 | # Some cleanup 11 | coffee <- coffee_ratings %>% 12 | mutate(country_of_origin=fct_recode(country_of_origin,"Hawai‘i" = "United States (Hawaii)", 13 | "United Republic of Tanzania"="Tanzania, United Republic Of")) 14 | 15 | # Get the countries with more than 30 samples (for freq) 16 | dat1 <- coffee %>% 17 | group_by(country_of_origin) %>% 18 | count() %>% 19 | filter(n>30) %>% 20 | left_join(coffee) 21 | 22 | order_to <- dat1 %>% 23 | group_by(country_of_origin) %>% 24 | summarize(mean(acidity)) %>% 25 | arrange(desc(`mean(acidity)`)) %>% 26 | pull(country_of_origin) 27 | 28 | dat1$country_of_origin <- factor(dat1$country_of_origin,levels = order_to) # because fct_reorder() didn't work and I do not have the time. 29 | 30 | coffeepal <- cal_palette(name = "desert", n = 12,type = 'continuous') 31 | coffeetheme <- hrbrthemes::theme_ipsum_rc() + 32 | theme(legend.position = 'none',axis.title.x = element_text(size = rel(1.5)), 33 | axis.title.y = element_text(size = rel(1.5)), 34 | plot.title = element_text(hjust = -2), 35 | plot.subtitle = element_text(hjust = -2)) 36 | 37 | p1 <- dat1 %>% 38 | ggplot(aes(x=acidity,y=country_of_origin,fill=country_of_origin)) + 39 | ggridges::geom_density_ridges(scale = 0.95) + 40 | scale_fill_manual(values = coffeepal) + 41 | xlim(c(5,9)) + 42 | coffeetheme + 43 | xlab('Acidity grade') + 44 | ylab('Country of origin') + 45 | labs(title = "Which country of origin has the least acidic beans?", 46 | subtitle = "Solving a mystery that Megsie and her dad have been pondering for years!", 47 | caption = "Data: James LeDoux @ Buzzfeed") 48 | 49 | p1 50 | 51 | coffee2 <- filter(coffee,acidity>0 & aftertaste>0 & body>0) %>% 52 | rename(Acidity = acidity, Aftertaste = aftertaste,Body = body) %>% 53 | as.data.frame() 54 | 55 | spcols <- cal_palette("sierra1")[c(1,4)] 56 | 57 | p2 <- ggpairs(coffee2, 58 | columns = c('Acidity','Aftertaste','Body'), 59 | mapping = ggplot2::aes(colour=species,fill=species)) + 60 | scale_colour_manual(values = spcols) + 61 | scale_fill_manual(values = spcols) + 62 | coffeetheme 63 | 64 | png('Coffee1.png',width = 9,height = 8,units = 'in',res = 200) 65 | p1 66 | dev.off() 67 | 68 | png('Coffee2.png',width = 8,height = 8,units = 'in',res = 200) 69 | p2 70 | dev.off() -------------------------------------------------------------------------------- /2020/29_astronauts.R: -------------------------------------------------------------------------------- 1 | # 29: Astronauts 2 | library(tidyverse) 3 | library(hrbrthemes) 4 | library(vegan) 5 | library(beyonce) 6 | library(patchwork) 7 | 8 | source('HelperFunctions.R') 9 | 10 | astronauts <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-14/astronauts.csv') 11 | 12 | # smidge of cleanup 13 | astronauts <- astronauts %>% 14 | mutate(occupation = tolower(occupation)) 15 | 16 | # set theme and colours 17 | spacetheme <- theme_ft_rc() + 18 | theme(panel.grid.major = element_blank(), 19 | panel.grid.minor = element_blank()) + 20 | theme(axis.title.x = element_text(size = rel(1.5)), 21 | axis.title.y = element_text(size = rel(1.5)), 22 | legend.position = 'bottom') 23 | palitra <- c('#FFFFDE','#FFE9B3','#FFCC94','#FFA98C','#FF809F','#FF5AC8','#FF52FF') 24 | pal <- rev(lengthen_pal(x = 1:8,shortpal = beyonce_palette(101))) 25 | gpal <- beyonce_palette(48)[c(4,6)] 26 | 27 | p1 <- astronauts %>% 28 | filter(eva_hrs_mission>0) %>% 29 | ggplot(aes(x=year_of_mission,y=eva_hrs_mission,colour=sex)) + 30 | geom_point(alpha=0.5) + 31 | geom_smooth(method = 'lm') + 32 | xlab('Year of mission') + 33 | ylab('Extravehicular hours') + 34 | scale_colour_manual('',values = gpal) + 35 | theme(legend.position = 'bottom') 36 | 37 | dtable <- astronauts %>% # diversity table 38 | count(mission_title,year_of_mission,nationality) %>% 39 | pivot_wider(names_from = nationality, 40 | values_from = n, 41 | values_fill = 0) 42 | 43 | divs <- data.frame(shannon = diversity(dtable[,-c(1,2)]), 44 | dtable[,c(1,2)]) 45 | 46 | missions <- astronauts %>% 47 | distinct(mission_title,year_of_mission) %>% 48 | full_join(divs) 49 | 50 | p2 <- missions %>% 51 | group_by(year_of_mission,shannon) %>% 52 | summarize(nmissions = length(mission_title)) %>% 53 | filter(shannon>0) %>% 54 | ggplot(aes(x = year_of_mission,y = shannon,size = nmissions)) + 55 | geom_point(colour = 'lightblue',alpha=0.5) + 56 | xlim(c(1960,2020)) + 57 | xlab("Year") + 58 | ylab("Diversity index of astronaut nationalities") + 59 | labs(size = "Number of missions") 60 | 61 | 62 | singlecountrymissions <- missions %>% 63 | filter(shannon == 0) %>% 64 | left_join(astronauts %>% select(nationality,mission_title)) %>% 65 | mutate(natcat = fct_lump(nationality,n = 6)) %>% 66 | group_by(year_of_mission,natcat) %>% 67 | summarise(n = n()) 68 | 69 | p3 <- singlecountrymissions %>% 70 | ggplot(aes(x=year_of_mission,y=n,fill=natcat,colour=natcat)) + 71 | geom_col() + 72 | xlim(c(1960,2020)) + 73 | scale_fill_manual('Country',values=pal) + 74 | scale_colour_manual('Country',values=pal) + 75 | xlab('Year') + 76 | ylab('Single-country missions') + 77 | theme_ft_rc() 78 | 79 | png("29_astronauts.png",width = 12,height = 10,units = 'in',res = 200) 80 | ((p2 / p3 + plot_layout(heights = c(2,1),ncol=1)) | p1 ) + 81 | plot_layout(ncol = 2) + 82 | plot_annotation(title = 'The diversity of space missions', 83 | subtitle = 'Collaborative missions between countries, and gender in astronaut extravehicular hours over time', 84 | caption = 'Data: Mariya Stavnichuk and Tatsuya Corlett') & spacetheme 85 | dev.off() 86 | -------------------------------------------------------------------------------- /2020/29_astronauts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/29_astronauts.png -------------------------------------------------------------------------------- /2020/2_Transit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/2_Transit.png -------------------------------------------------------------------------------- /2020/2_transit.R: -------------------------------------------------------------------------------- 1 | # 2_Transit 2 | 3 | 4 | # Soundtrack -------------------------------------------------------------- 5 | #Swedish procedural drama my folks are watching 6 | 7 | 8 | # Packages ---------------------------------------------------------------- 9 | # devtools::install_github("davidsjoberg/ggbump") 10 | # install.packages("countrycode") # for merging iso2 country codes 11 | 12 | library(tidyverse) 13 | library(countrycode) 14 | library(ggbump) 15 | library(gghighlight) 16 | 17 | # Theme ------------------------------------------------------------------- 18 | theme_transit <- hrbrthemes::theme_ft_rc() + 19 | theme( 20 | panel.grid.major = element_blank(), 21 | panel.grid.minor = element_blank(), 22 | axis.title.y = element_blank(), 23 | axis.text.y = element_blank(), 24 | axis.ticks.y = element_blank(), 25 | legend.position = "none" 26 | ) 27 | 28 | 29 | 30 | 31 | # Data -------------------------------------------------------------------- 32 | tuesdata <- tidytuesdayR::tt_load(2021, week = 2) 33 | transit_cost <- tuesdata$transit_cost 34 | 35 | tdat <- countrycode::codelist %>% 36 | select(country.name.en, iso2c, continent) %>% 37 | right_join(transit_cost, by = c("iso2c" = "country")) 38 | 39 | 40 | # Summarize data 41 | cont_lengths <- tdat %>% 42 | group_by(continent, year) %>% 43 | summarize(tot_length = sum(length, na.rm = T)) %>% 44 | drop_na() %>% 45 | group_by(year) %>% 46 | mutate(rank = dense_rank(desc(tot_length))) 47 | 48 | 49 | # ggbump plot of total track length --------------------------------------- 50 | 51 | p1 <- cont_lengths %>% 52 | filter(year < 2021) %>% 53 | ggplot(aes(year, rank, color = continent, group = continent)) + 54 | geom_bump(smooth = 10, size = 1.5, lineend = "round") + 55 | scale_y_reverse() + 56 | ghibli::scale_colour_ghibli_d(name = "MononokeMedium", direction = -1) + 57 | gghighlight(unhighlighted_params = list( 58 | size = 1, 59 | colour = alpha("grey", 0.2) 60 | )) + 61 | facet_wrap(~continent, ncol = 1) 62 | 63 | p1 +theme_transit 64 | 65 | 66 | # Density plot of number of stations per track/numbers of years -------- 67 | 68 | p2 <- tdat %>% 69 | filter(!is.na(start_year)) %>% 70 | filter(!is.na(end_year)) %>% 71 | filter(!is.na(continent)) %>% 72 | filter(end_year != "X") %>% 73 | mutate_at(c("start_year", "end_year"), .funs = as.numeric) %>% 74 | mutate(nyears = end_year - start_year) %>% 75 | ggplot(aes(x = stations / nyears, color = continent, fill = continent)) + 76 | # coord_flip() + 77 | geom_density() + 78 | xlab("Number of stations built per year") + 79 | ghibli::scale_colour_ghibli_d(name = "MononokeMedium", direction = -1) + 80 | scale_fill_manual(values = alpha(ghibli::ghibli_palette(name = "MononokeMedium", direction = -1), 81 | alpha = 0.5)) + 82 | facet_wrap(~continent, ncol = 1, scales = "free_y") 83 | 84 | p2 + theme_transit 85 | 86 | # Put them together ------------------------------------------------------- 87 | library(patchwork) 88 | 89 | png("2_Transit.png",width = 11, height = 9, units = 'in',res = 200) 90 | p1 + p2 + 91 | plot_annotation( 92 | title = "Transit", 93 | subtitle = "Which continents had the longest rail projects, and how long did they take?", 94 | caption = "Data: Transit Costs Project" 95 | ) & theme_transit 96 | dev.off() 97 | -------------------------------------------------------------------------------- /2020/30_RSPCA.R: -------------------------------------------------------------------------------- 1 | library(magrittr) 2 | library(hrbrthemes) 3 | library(tidyverse) 4 | library(waffle) 5 | library(ghibli) 6 | library(cowplot) #for insets 7 | 8 | # Get the data ------------------------------------------------------------ 9 | 10 | animal_outcomes <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-21/animal_outcomes.csv') 11 | animal_complaints <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-21/animal_complaints.csv') 12 | 13 | tuesdata <- tidytuesdayR::tt_load(2020, week = 30) 14 | brisbane_complaints <- tuesdata$brisbane_complaints 15 | 16 | # Set up themes ----------------------------------------------------------- 17 | 18 | waffletheme <- theme_ft_rc() + 19 | theme(legend.position = 'none', 20 | axis.title.x=element_blank(), 21 | axis.text.x=element_blank(), 22 | axis.ticks.x=element_blank(), 23 | axis.title.y=element_blank(), 24 | axis.text.y=element_blank(), 25 | axis.ticks.y=element_blank(), 26 | panel.border = element_blank(), 27 | panel.grid.major = element_blank(), 28 | panel.grid.minor = element_blank(), 29 | plot.margin = margin(0, 0, 0, 0, "cm")) 30 | maptheme <- theme_ft_rc() + 31 | theme(legend.position = 'none', 32 | axis.title.x=element_blank(), 33 | axis.text.x=element_blank(), 34 | axis.ticks.x=element_blank(), 35 | axis.title.y=element_blank(), 36 | axis.text.y=element_blank(), 37 | axis.ticks.y=element_blank()) 38 | petcols <- ghibli::ghibli_palette(name = "YesterdayMedium",n = 7,type = 'discrete',direction = -1) 39 | 40 | # Data for waffle plots 41 | wafdat <- animal_outcomes %>% 42 | filter(outcome %in% c("Rehomed","Reclaimed") & year == 2018) %>% 43 | select(-year,-outcome, - Total) %>% 44 | pivot_longer(-animal_type,names_to = "region") 45 | 46 | wd <- wafdat %>% group_by(region) 47 | region_list <- group_split(wd) 48 | 49 | plotfun <- function(x){ 50 | x %>% 51 | ggplot(aes(fill = animal_type,values = value/100)) + 52 | geom_waffle(n_rows = 4,size=.3,colour='white',make_proportional = T) + 53 | scale_fill_manual(values = petcols) + 54 | labs(subtitle= paste(x$region[1],"( n = ", sum(x$value),"animals )" )) + 55 | waffletheme + 56 | coord_equal() 57 | } 58 | 59 | act <- region_list[[1]] %>% 60 | mutate(region=fct_recode(region,"Australian Capital Territory"="ACT")) %>% 61 | plotfun() 62 | nsw <- region_list[[2]] %>% 63 | mutate(region=fct_recode(region,"New South Wales"="NSW")) %>% 64 | plotfun() 65 | nt <- region_list[[3]] %>% 66 | mutate(region=fct_recode(region,"Northern Territory"="NT")) %>% 67 | plotfun() 68 | qld <- region_list[[4]] %>% 69 | mutate(region=fct_recode(region,"Queensland"="QLD")) %>% 70 | plotfun() 71 | sa <- region_list[[5]] %>% 72 | mutate(region=fct_recode(region,"South Australia"="SA")) %>% 73 | plotfun() 74 | tas <- region_list[[6]] %>% 75 | mutate(region=fct_recode(region,"Tasmania"="TAS")) %>% 76 | plotfun() 77 | vic <- region_list[[7]] %>% 78 | mutate(region=fct_recode(region,"Victoria"="VIC")) %>% 79 | plotfun() 80 | wa <- region_list[[8]] %>% 81 | mutate(region=fct_recode(region,"Western Australia"="WA")) %>% 82 | plotfun() 83 | 84 | 85 | 86 | # Map --------------------------------------------------------------------- 87 | # Inset maps: https://www.r-bloggers.com/inset-maps-with-ggplot2/ 88 | library(cowplot) 89 | library(ozmaps) 90 | library(sf) 91 | 92 | oz_states <- ozmaps::ozmap_states 93 | 94 | ozmap <- ggplot(oz_states) + 95 | geom_sf(fill='darkgrey',colour = 'white') + 96 | coord_sf() + 97 | waffletheme + 98 | labs(title = "RSPCA Outcomes", 99 | subtitle = "Animals rehomed or reclaimed in 2018") 100 | 101 | # Get standalone legend to place in bigger plot 102 | xx <- ggplot(region_list[[8]], 103 | aes(region, fill = animal_type)) + 104 | geom_bar(colour='white') + 105 | scale_fill_manual("Animal type",values = petcols) + 106 | theme_ft_rc(base_size = 14) 107 | legend <- get_legend(xx) 108 | 109 | 110 | 111 | # Save it! ---------------------------------------------------------------- 112 | 113 | png("RSPCA1_v2.png",width = 14,height = 10,units = 'in',res=200) 114 | 115 | ggdraw() + 116 | draw_plot(ozmap) + 117 | draw_plot(act, x = .71, y = .77, width = .3, height = .3,scale = 0.9) + 118 | draw_plot(nsw, x = .71, y = .47, width = .3, height = .3,scale = 0.9) + 119 | draw_plot(nt, x = .3, y = .77, width = .3, height = .3,scale = 0.9) + 120 | draw_plot(qld, x = .71, y = .65, width = .3, height = .3,scale = 0.9) + 121 | draw_plot(sa, x = .3, y = -.08, width = .3, height = .3,scale = 0.9) + #sa 122 | draw_plot(tas, x = .71, y = -.08, width = .3, height = .3,scale = 0.9) + #tas 123 | draw_plot(vic, x = .71, y = .1, width = .3, height = .3,scale = 0.9) + #vic 124 | draw_plot(wa, x = 0, y = .59, width = .3, height = .3,scale = 0.9) + #wa 125 | draw_plot(legend, x=-.07, y=-0.05,width = .3, height = .3,scale = 1.5) + 126 | theme(plot.background = element_rect(fill="#252A32", color = NA)) 127 | 128 | dev.off() 129 | 130 | 131 | # Basic time series plot -------------------------------------------------- 132 | 133 | regionsummary <- animal_outcomes %>% 134 | pivot_longer(cols = ACT:WA,names_to = "region") %>% 135 | group_by(year,animal_type,region,outcome) %>% 136 | summarize(n = sum(value)) %>% 137 | group_by(year,animal_type,region) 138 | 139 | p2 <- regionsummary %>% 140 | summarize(alloutcomes = sum(n)) %>% 141 | right_join(regionsummary) %>% 142 | mutate(propn = n/alloutcomes) %>% 143 | filter(outcome %in% c("Rehomed","Reclaimed")) %>% 144 | group_by(year,animal_type,region) %>% 145 | summarize(claim_home = sum(propn)) %>% 146 | ggplot(aes(year,claim_home,colour=animal_type)) + 147 | geom_line(lwd = .7) + 148 | facet_wrap(~region,ncol = 4) + 149 | scale_colour_manual("Animal type",values = petcols) + 150 | theme_ft_rc() + 151 | theme(axis.title.x = element_text(size = rel(1.2),colour='white'), 152 | axis.title.y = element_text(size = rel(1.2),colour='white'), 153 | axis.text.x = element_text(colour = 'white'), 154 | axis.text.y = element_text(colour = 'white'), 155 | legend.text = element_text(colour = 'white'), 156 | legend.title = element_text(colour = 'white'), 157 | strip.text = element_text(colour = 'white') 158 | ) + 159 | ylab("Proportion reclaimed or rehomed") + 160 | xlab("Year") + 161 | labs(caption = "Data: Australia RSPCA") 162 | 163 | p2 164 | 165 | png("RSPCA2.png",width = 12,height = 7,units = 'in',res=200) 166 | p2 167 | dev.off() 168 | -------------------------------------------------------------------------------- /2020/31_penguins.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(ghibli) 3 | library(cowplot) 4 | library(dplyr) 5 | library(readr) 6 | 7 | 8 | tuesdata <- tidytuesdayR::tt_load(2020, week = 31) 9 | penguins <- tuesdata$penguins 10 | penguins <- penguins %>% 11 | filter(!is.na(sex)) 12 | 13 | # Raincloud stuff --------------------------------------------------------- 14 | packages <- c("cowplot", "readr", "ggplot2" , 15 | "dplyr", "lavaan") 16 | 17 | 18 | # Palettes and theme ------------------------------------------------------ 19 | penpal <- ghibli_palette("SpiritedMedium",direction = -1) 20 | pentheme <- hrbrthemes::theme_ipsum_rc() + 21 | theme(axis.title.x = element_text(size = rel(1.5)), 22 | axis.title.y = element_text(size = rel(1.5)), 23 | panel.grid.major.x = element_blank(), 24 | panel.grid.minor.x = element_blank()) 25 | 26 | 27 | # Plot stuff -------------------------------------------------------------- 28 | 29 | p1 <- ggplot(penguins, aes(x = sex, y = body_mass_g, fill = species)) + 30 | geom_flat_violin(position = position_nudge(x = .1, y = 0), 31 | adjust = 1.5, trim = FALSE, alpha = .5, colour = NA) + 32 | geom_point(aes(x = sex, y = body_mass_g, colour = species), 33 | position = position_jitter(width = .05), size = 1, shape = 20) + 34 | geom_boxplot(aes(x = sex, y = body_mass_g, fill = species), 35 | outlier.shape = NA, alpha = .5, width = .1, colour = "black") + 36 | scale_colour_manual("Species",values = penpal) + 37 | scale_fill_manual("Species",values = penpal) + 38 | xlab("Sex") + 39 | ylab("Body mass (g)") + 40 | coord_flip() + 41 | facet_wrap(~island,scale = "free_x",ncol = 1) + 42 | pentheme + 43 | theme(legend.position = 'none') 44 | 45 | p1 46 | 47 | p2 <- penguins %>% 48 | ggplot(aes(x = bill_length_mm,y = bill_depth_mm,colour = species)) + 49 | geom_point() + 50 | scale_colour_manual("Species",values = penpal) + 51 | facet_grid(island~sex) + 52 | xlab("Bill length (mm)") + 53 | ylab("Bill depth (mm)") + 54 | pentheme + 55 | theme(panel.grid.major = element_blank(), 56 | panel.grid.minor = element_blank(), 57 | legend.position = 'none') 58 | 59 | p3 <- penguins %>% 60 | ggplot(aes(x = body_mass_g,y = flipper_length_mm,colour = species)) + 61 | geom_point() + 62 | scale_colour_manual("Species",values = penpal) + 63 | facet_grid(island~sex) + 64 | xlab("Body mass (g)") + 65 | ylab("Flipper length (mm)") + 66 | pentheme + 67 | theme(panel.grid.major = element_blank(), 68 | panel.grid.minor = element_blank(), 69 | legend.position = 'bottom') 70 | 71 | png('Penguins.png',width = 12,height = 12,units = 'in',res=200) 72 | p1 + (p2/p3 + plot_layout(heights = c(4,3))) + 73 | plot_annotation(title = 'Palmer penguins', 74 | subtitle = 'Body mass and sex across species and island', 75 | caption = "Data: Dr. Kristen Gorman (via the palmerpenguins package)") & theme(text = element_text('Roboto Condensed',size = 16)) 76 | dev.off() 77 | 78 | -------------------------------------------------------------------------------- /2020/32_EuropeanEnergy.R: -------------------------------------------------------------------------------- 1 | # European energy 2 | library(tidyverse) 3 | 4 | 5 | # Get data ---------------------------------------------------------------- 6 | tuesdata <- tidytuesdayR::tt_load('2020-08-04') 7 | energy_types <- tuesdata$energy_types 8 | country_totals <- tuesdata$country_totals 9 | unique(country_totals$type) 10 | 11 | 12 | # Map stuff --------------------------------------------------------------- 13 | world_map <- map_data("world") 14 | ggplot(world_map) + 15 | geom_polygon(data = world_map, aes(x = long, y = lat, group = group), 16 | fill = "grey30", colour = "grey45", size = 0.1) + 17 | theme_void() 18 | -------------------------------------------------------------------------------- /2020/33_Airbender.R: -------------------------------------------------------------------------------- 1 | 2 | library(ggstream) 3 | library(tidyverse) 4 | library(tidytext) 5 | library(beyonce) 6 | library(patchwork) 7 | library(ggpomological) 8 | 9 | # Get the data ------------------------------------------------------------ 10 | tuesdata <- tidytuesdayR::tt_load('2020-08-11') 11 | avatar <- tuesdata$avatar 12 | scenes <- tuesdata$scene_description 13 | 14 | 15 | # Palettes ---------------------------------------------------------------- 16 | pn_pal <- c('#FFFFFF','#0096FF') 17 | pn_pal2 <- c('darkgrey','lightblue') 18 | char_pal <- beyonce_palette(90,n = 11)[c(1,4,10,11)] 19 | 20 | theme_avatar <- theme_pomological_fancy() + 21 | theme(text = element_text(family = "Herculanum", size = 12), 22 | legend.position = 'bottom', 23 | legend.box="vertical", legend.margin=margin(), 24 | panel.grid.major = element_blank(), 25 | panel.grid.minor = element_blank(), 26 | panel.border = element_blank()) 27 | # Plot 1: sentiments in episodes ------------------------------------------ 28 | xx <- avatar %>% 29 | mutate(clean_char = str_replace_all(character_words, "[^a-zA-Z\\s]", " ")) %>% 30 | mutate(clean_char = str_trim(clean_char, side = "both")) 31 | 32 | data('stop_words') 33 | 34 | tidied_words <- xx %>% 35 | unnest_tokens(word, clean_char) %>% 36 | anti_join(stop_words, by = "word") #get just the 'active words' 37 | 38 | d1 <- tidied_words %>% 39 | inner_join(get_sentiments("bing"), by = "word") %>% 40 | group_by(book,chapter_num,sentiment) %>% 41 | summarize(n = length(sentiment)) %>% 42 | pivot_wider(names_from = sentiment,values_from = n) %>% 43 | mutate(overall = positive - negative) %>% 44 | mutate(pn = ifelse(overall>0, "Positive", "Negative")) 45 | d1$book <- factor(d1$book,levels = c('Water','Earth','Fire')) 46 | 47 | # Does Appa's presence mean more positive words? 48 | appafreq <- avatar %>% 49 | mutate(n_appa = str_count(full_text,"Appa")) %>% 50 | group_by(book, chapter_num) %>% 51 | summarize(n_appa = sum(n_appa,na.rm=T)) 52 | 53 | # Appa appearances 54 | d1a <- d1 %>% 55 | left_join(appafreq,by=c('book','chapter_num')) %>% 56 | mutate(fake_y = -50, fake_size = 160) 57 | d1a$book <- factor(d1a$book,levels = c('Water','Earth','Fire')) 58 | 59 | d1a %>% 60 | ggplot(aes(x=chapter_num,y=fake_y,size=n_appa)) + 61 | geom_point(colour = 'white') + 62 | geom_point(data = d1a, 63 | aes(x=chapter_num,y=fake_y,size = fake_size), 64 | shape =1,colour = 'white') 65 | 66 | p1 <- d1 %>% 67 | ggplot(aes(x=chapter_num,y=overall)) + 68 | geom_hline(yintercept = 0, colour = 'darkgrey') + 69 | geom_segment(aes(x = chapter_num, xend=chapter_num, y=0, yend=overall,color = pn),lwd=4) + 70 | scale_colour_manual('',values = pn_pal2) + 71 | geom_point(data = d1a, 72 | aes(x=chapter_num,y=fake_y), 73 | shape = 1,size = 5, colour = 'darkgrey') + 74 | geom_point(data = d1a, 75 | aes(x = chapter_num,y = fake_y,size = n_appa), 76 | colour = 'darkgrey') + 77 | scale_size(name = "Number of Appa mentions") + 78 | guides(size = "legend") + 79 | facet_wrap(~book,ncol = 1) + 80 | xlab("Chapter") + 81 | ylab("Overall Bing sentiment score") + 82 | theme_avatar + 83 | labs(title = "Avatar: The Last Airbender", 84 | subtitle = "Are sentiments better when Appa is present?", 85 | caption = "Data: {appa} package") 86 | 87 | png('NewAvatar.png',width = 7,height = 10,units = 'in',res=200) 88 | p1 89 | dev.off() 90 | 91 | 92 | # Plot 2: ggstream plot of character lines over time ---------------------- 93 | d2 <- avatar %>% 94 | mutate(character_lumped = fct_lump(character,n = 5,other_level = 'someone_else')) %>% 95 | group_by(book,chapter_num,character_lumped) %>% 96 | count() %>% 97 | filter(!character_lumped %in% c("Scene Description","someone_else")) 98 | d2$book <- factor(d2$book,levels = c('Water','Earth','Fire')) 99 | 100 | p2 <- d2 %>% 101 | ggplot(aes(x = chapter_num,y = n, 102 | fill = character_lumped)) + 103 | geom_stream(colour='white',size=0.1) + 104 | scale_fill_manual('',values = char_pal) + 105 | facet_wrap(~book,ncol = 1) + 106 | xlim(c(0,20)) + 107 | xlab('Chapter') + 108 | ylab('Number of lines') + 109 | hrbrthemes::theme_ft_rc() + 110 | theme(legend.position = 'bottom', 111 | axis.ticks.y = element_blank(), 112 | axis.text.y = element_blank(), 113 | panel.grid.major = element_blank(), 114 | panel.grid.minor = element_blank(), 115 | panel.border = element_blank(), 116 | panel.background = element_blank()) + 117 | labs(caption = "Data: {appa} package by Avery Robbins") 118 | p2 119 | 120 | 121 | # Put em together and add title etc --------------------------------------- 122 | 123 | png("33_Airbender.png",width = 11,height = 8,units = 'in',res = 200) 124 | p1 + p2 125 | dev.off() 126 | 127 | 128 | -------------------------------------------------------------------------------- /2020/33_Airbender.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/33_Airbender.png -------------------------------------------------------------------------------- /2020/34_ExoticPlants.R: -------------------------------------------------------------------------------- 1 | #34_ExoticPlants 2 | library(tidyverse) 3 | library(ggchicklet) 4 | 5 | source('HelperFunctions.R') 6 | 7 | tuesdata <- tidytuesdayR::tt_load(2020, week = 34) 8 | 9 | plants <- tuesdata$plants 10 | actions <- tuesdata$actions 11 | threats <- tuesdata$threats 12 | 13 | extrafont::loadfonts() 14 | 15 | 16 | # Palettes ---------------------------------------------------------------- 17 | dangerpal <- calecopal::cal_palette(name = 'superbloom3')[1:2] 18 | threatpal <- calecopal::cal_palette(name = 'superbloom2') %>% 19 | lengthen_pal(x = 1:12) 20 | bgcolor <- "#FFFFF8" 21 | countrycolor <- "#dfdacd" 22 | 23 | world_map <- map_data("world") 24 | 25 | plant_statuses <- plants %>% 26 | select(country, group, red_list_category) %>% 27 | group_by(country,red_list_category) %>% 28 | count() %>% 29 | right_join(world_map,by = c("country" = "region") ) %>% 30 | filter(!is.na(red_list_category)) 31 | 32 | 33 | p1 <- ggplot() + 34 | geom_polygon(data = world_map, aes(x = long, y = lat, group = group), 35 | fill = countrycolor, colour = "grey45", size = 0.1) + 36 | geom_polygon(data = plant_statuses, 37 | aes(x = long, y = lat, group = group,fill = log(n)), 38 | colour = "grey45", size = 0.1) + 39 | facet_wrap(~red_list_category, ncol = 2) + 40 | theme_void(base_family = 'Lato') + 41 | scale_fill_stepsn('Number of \nspecies \n(log-scale)',colours = dangerpal) + 42 | theme(#panel.border = element_rect(fill = bgcolor,colour = bgcolor), 43 | panel.background = element_rect(fill = bgcolor,colour = bgcolor), 44 | plot.background = element_rect(fill = bgcolor,colour = bgcolor), 45 | panel.grid = element_blank(), 46 | legend.position = 'right') 47 | p1 48 | 49 | 50 | # Chicklet plots ---------------------------------------------------------- 51 | d2 <- threats %>% 52 | group_by(red_list_category,continent,threat_type) %>% 53 | summarize(nthreatened = sum(threatened,na.rm = TRUE)) 54 | 55 | p2 <- d2 %>% 56 | ggplot(aes(continent, nthreatened,fill = threat_type)) + 57 | geom_chicklet(width = 0.75,colour = bgcolor) + 58 | scale_fill_manual("Threat type",values = threatpal) + 59 | xlab("Continent") + 60 | ylab("Number of species") + 61 | theme(text = element_text(family = 'Lato'), 62 | panel.background = element_rect(fill = bgcolor,colour = bgcolor), 63 | plot.background = element_rect(fill = bgcolor,colour = bgcolor), 64 | #panel.border = element_rect(fill = bgcolor,colour = bgcolor), 65 | legend.background = element_rect(fill = bgcolor, colour = bgcolor), 66 | legend.key = element_rect(fill = bgcolor, color = NA), 67 | panel.grid = element_blank(), 68 | strip.background = element_blank(), 69 | legend.position = 'bottom') + 70 | facet_wrap(~red_list_category,ncol = 2,scales = "free_x") + 71 | coord_flip() 72 | p2 73 | 74 | 75 | library(patchwork) 76 | 77 | all <- p1 + p2 + 78 | plot_layout(heights = c(3,1)) + 79 | plot_annotation(title = 'Plants in danger', 80 | subtitle = 'Threats to plants that are extinct or extinct in the wild', 81 | caption = 'Data: Florent Laverne & Cédric Scherer') & 82 | theme(text = element_text('Lato',size = 14), 83 | panel.background = element_rect(fill = bgcolor,colour = bgcolor), 84 | plot.background = element_rect(fill = bgcolor,colour = bgcolor)) 85 | 86 | png('Plants.png',width = 12,height = 9,units = 'in',res = 200) 87 | all 88 | dev.off() 89 | 90 | 91 | 92 | # cowplot option (avoid weird projection issues from axis align wi -------- 93 | library(cowplot) 94 | p1 <- p1 + 95 | coord_map(xlim=c(-180,180)) + # fix projection (thanks Dan) 96 | theme(plot.margin = unit(c(0,0,0,6), "lines")) + # add whitespace to the left to sort of align 97 | labs(title = 'Plants in danger', 98 | subtitle = 'Threats to plants that are extinct or extinct in the wild') 99 | p2 <- p2 + 100 | labs(caption = 'Data: Florent Laverne & Cédric Scherer') 101 | 102 | png('Plants_cowplot.png',width = 12,height = 9,units = 'in',res = 200) 103 | plot_grid(p1,p2,ncol = 1) + 104 | theme(plot.background = element_rect(fill = bgcolor,colour = bgcolor)) 105 | dev.off() 106 | -------------------------------------------------------------------------------- /2020/36_Crops.R: -------------------------------------------------------------------------------- 1 | # 36: crop yields 2 | library(tidyverse) 3 | library(hrbrthemes) 4 | library(calecopal) 5 | 6 | tuesdata <- tidytuesdayR::tt_load(2020, week = 36) 7 | 8 | # Data 9 | key_crop_yields <- tuesdata$key_crop_yields 10 | fertilizer <- tuesdata$cereal_crop_yield_vs_fertilizer_application 11 | tractors <- tuesdata$cereal_yields_vs_tractor_inputs_in_agriculture 12 | land_use <- tuesdata$land_use_vs_yield_change_in_cereal_production 13 | arable_land <- tuesdata$arable_land_pin 14 | 15 | # Palettes 16 | source('HelperFunctions.R') 17 | croppal <- lengthen_pal(x = 1:11,shortpal = cal_palette('bigsur')) 18 | 19 | # give data nicer formatting ---------------------------------------------- 20 | clean_names <- function(x) word(x,1) %>% tolower() 21 | 22 | d1 <- key_crop_yields %>% 23 | rename_with(clean_names) 24 | 25 | 26 | # How have yields changed over time? -------------------------------------- 27 | 28 | d1a <- d1 %>% 29 | group_by(year) %>% 30 | summarize_at(vars(wheat:bananas),.funs = sum, na.rm = TRUE) %>% 31 | pivot_longer(names_to = "crop",cols = wheat:bananas) 32 | 33 | d1b <- d1a %>% 34 | group_by(year) %>% 35 | summarize(tot_prod = sum(value,na.rm = T)) %>% 36 | right_join(d1a) %>% 37 | mutate(prop = value/tot_prod) 38 | 39 | d1b %>% group_by(year) %>% summarize(sum(prop)) 40 | 41 | p1 <- d1a %>% 42 | ggplot(aes(x=year,y=value,fill = crop)) + 43 | geom_area(color = 'white') + 44 | scale_fill_manual('Crop', values = croppal) + 45 | xlab('Year') + 46 | ylab('Global yield (tonnes per hectare)') + 47 | theme_ipsum_rc() + 48 | labs(title = '', #blank labels to match alginment of plots 49 | subtitle = '', 50 | caption = 'Data: Our World in Data') 51 | 52 | 53 | # Which crops are dominant for each country? ------------------------------ 54 | # If you wanted to make a column with the max value from all yield cols 55 | # prod_cols <- d1 %>% select(wheat:cocoa) %>% names() 56 | # test <- d1 %>% 57 | # mutate(maxval = pmax(!!!rlang::syms(prod_cols),na.rm = TRUE)) 58 | 59 | d1x <- d1 %>% 60 | mutate(across(everything(), 61 | ~replace_na(.x, 0))) 62 | 63 | # Messy, but you know what? I don't care 64 | d1x$dom_crop <- colnames(d1x[,-(1:3)])[max.col(d1x[,-(1:3)],ties.method="first")] 65 | country_key <- c(`United States` = "USA", 66 | `Democratic Republic of Congo` = "Democratic Republic of the Congo", 67 | Congo = "Republic of Congo") 68 | d1y <- d1x %>% 69 | filter(year == 2018) %>% 70 | select(entity,dom_crop) %>% 71 | mutate(entity = recode(entity, 72 | !!!country_key)) 73 | 74 | croppal2 <- croppal[c(1:4,6,8)] # first four plus maize and potatoes 75 | bgcolor <- "white" 76 | 77 | crop_map <- map_data("world") %>% 78 | left_join(d1y,by = c('region' = 'entity')) 79 | 80 | p2 <- ggplot(crop_map) + 81 | geom_polygon(data = crop_map, aes(x = long, y = lat, 82 | group = group), 83 | fill = "grey30", 84 | size = 0.1) + 85 | geom_polygon(data = crop_map, aes(x = long, y = lat, 86 | group = group,fill = dom_crop), 87 | colour = "white", size = 0.1) + 88 | coord_map(xlim = c(-180,180)) + 89 | scale_fill_manual('Dominant crop (2018)', 90 | values = croppal2) + 91 | theme_ipsum_rc() + 92 | theme(axis.title.x = element_blank(), 93 | axis.title.y = element_blank(), 94 | panel.background = element_rect(fill = bgcolor,colour = bgcolor), 95 | plot.background = element_rect(fill = bgcolor,colour = bgcolor), 96 | panel.grid = element_blank(), 97 | panel.grid.major = element_blank(), 98 | axis.text.y = element_blank(), 99 | legend.position = 'none', 100 | plot.margin = unit(c(0,0,0,0), "lines")) + 101 | labs(title = 'Crop production', 102 | subtitle = 'Dominant stocks in each country and global yield') 103 | 104 | p2 105 | 106 | library(cowplot) 107 | png('36_Crops.png',width = 12,height = 6,units = 'in',res = 200) 108 | plot_grid(p2,p1,nrow = 1) + 109 | theme(plot.background = element_rect(fill = bgcolor,colour = bgcolor)) 110 | dev.off() -------------------------------------------------------------------------------- /2020/36_Crops.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/36_Crops.png -------------------------------------------------------------------------------- /2020/37_Friends.R: -------------------------------------------------------------------------------- 1 | #37_Friends 2 | 3 | library(tidyverse) 4 | 5 | tuesdata <- tidytuesdayR::tt_load('2020-09-08') 6 | 7 | friends <- tuesdata$friends 8 | friends_info <- tuesdata$friends_info 9 | friends_emotions <- tuesdata$friends_emotions 10 | 11 | -------------------------------------------------------------------------------- /2020/43_MoreBeer.R: -------------------------------------------------------------------------------- 1 | # Week 43 2 | 3 | library(tidyverse) 4 | library(ggbeeswarm) 5 | library(hrbrthemes) 6 | library(calecopal) 7 | library(patchwork) 8 | 9 | 10 | tuesdata <- tidytuesdayR::tt_load(2020, week = 43) 11 | states_regions <- data.frame("state" = state.abb, 12 | "region" = state.region) 13 | 14 | beer_awards <- tuesdata$beer_awards 15 | 16 | # Cleanup on aisle b 17 | beer_awards$state[which(beer_awards$state == 'Ak')] <- "AK" 18 | beer_awards$state[which(beer_awards$state == 'wa')] <- "WA" 19 | 20 | beer_counts <- beer_awards %>% 21 | group_by(medal, state, year) %>% 22 | count() %>% 23 | ungroup() %>% 24 | group_by(state, medal) %>% 25 | summarize(mean_nawards = mean(n,na.rm = T)) %>% 26 | left_join(states_regions) 27 | 28 | beer_counts$region[which(beer_counts$state == 'DC')] <- "South" 29 | 30 | 31 | 32 | # Plot 1: Beeswarm plot of medals by state -------------------------------- 33 | 34 | medal_pal <- cal_palette("chaparral1")[c(4,1,2)] 35 | 36 | maxlabs <- beer_counts %>% 37 | group_by(region) %>% 38 | filter(mean_nawards == max(mean_nawards) & !is.na(region)) 39 | 40 | p1 <- beer_counts %>% 41 | mutate(region = fct_relevel(region,"West","North Central","South","Northeast")) %>% 42 | ggplot(aes(x = region,y = mean_nawards,colour = medal)) + 43 | geom_quasirandom(method = "tukeyDense",size = 2) + 44 | xlab("Region") + 45 | ylab("Mean number of awards") + 46 | scale_colour_manual("Medal", values = medal_pal) + 47 | coord_flip() + 48 | geom_text_repel(data = maxlabs, 49 | aes(label = state), 50 | force = 10, nudge_x = 0.2,show.legend = FALSE) 51 | p1 52 | 53 | 54 | dat2 <- beer_awards %>% 55 | left_join(states_regions) 56 | 57 | # DC is a state now, and MD and VA are both "South" so...! 58 | dat2$region[which(dat2$state == "DC")] <- "South" 59 | 60 | dat3 <- dat2 %>% 61 | group_by(region,year,medal) %>% 62 | count() 63 | 64 | p2 <- dat3 %>% 65 | ggplot(aes(x=year,y=n,fill = medal)) + 66 | geom_area(lwd=0.3, colour = 'white') + 67 | facet_wrap(~region, scales = "free_y",ncol = 1) + 68 | scale_fill_manual("Medal",values = medal_pal) + 69 | theme(legend.position = 'none') + 70 | xlab("Year") + 71 | ylab("Number of states with awards") + 72 | guides(fill = FALSE) 73 | #+ 74 | # guides(colour = FALSE) 75 | 76 | beertheme <- theme_ipsum_rc(base_size = 12) + 77 | theme(panel.grid.major = element_blank(), 78 | panel.grid.minor = element_blank(), 79 | axis.title.x = element_text(size = rel(1.5)), 80 | axis.title.y = element_text(size = rel(1.5)), 81 | plot.margin=unit(c(3,3,3,3),"mm")) 82 | 83 | png("43_MoreBeer.png",width = 10,height = 8,units = "in",res = 200) 84 | p1 + p2 + 85 | plot_layout(ncol = 2, guides = 'collect') + 86 | plot_annotation(title = "Beer awards", 87 | subtitle = "Which regions of the U.S. cleaned up at the Great American Beer Festival?", 88 | caption = "Data: The Great American Beer Festival") & beertheme 89 | dev.off() 90 | -------------------------------------------------------------------------------- /2020/43_MoreBeer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/43_MoreBeer.png -------------------------------------------------------------------------------- /2020/48_WATrails.R: -------------------------------------------------------------------------------- 1 | #48_trails 2 | # By Megsie Siple 3 | library(tidyverse) 4 | library(patchwork) 5 | library(ggraph) 6 | 7 | # Palettes ---------------------------------------------------------------- 8 | # to make the colour palette, I cloned David L Miller's gist with the usfws palettes. Thank you Dave! 9 | # Gist URL: https://gist.github.com/dill/1729bbc9ad4f915942045f96a6cfbf9d 10 | source(here::here("USFWS-DLL","usfws_palette.R")) 11 | source(here::here("HelperFunctions.R")) 12 | p <- usfws_palette(n = 5,name = "dolly") 13 | trailpal <- lengthen_pal(x = 1:12, p) 14 | 15 | Emrld <- c("#d3f2a3", "#97e196", "#6cc08b", "#4c9b82", "#217a79", "#105965", "#074050") #from Carto colors 16 | FeatureColors <- c("#d39c83","#e597b9","#e4f1e1","#6cc08b",Emrld[4],"#d1afe8","#85c4c9") 17 | #fall, flowers, mtns, nodogs, oldgrowth, ridges, water 18 | 19 | hiketheme <- hrbrthemes::theme_ft_rc() + 20 | theme(text = element_text(colour = 'white'), 21 | title = element_text(colour = 'white'), 22 | axis.text = element_text(colour = 'white'), 23 | strip.text = element_text(colour = 'white'), 24 | plot.background = element_rect(fill = Emrld[7]), 25 | panel.border = element_blank(), 26 | panel.background = element_blank(), 27 | panel.grid = element_blank()) 28 | 29 | 30 | # Get data ---------------------------------------------------------------- 31 | tuesdata <- tidytuesdayR::tt_load('2020-11-24') 32 | hike_data <- tuesdata$hike_data 33 | 34 | # minor cleanup 35 | hike_data <- hike_data %>% 36 | mutate_at(c("gain","highpoint","rating"),.funs = as.numeric) %>% 37 | mutate(simple_location = sub("\\ --.*", "", location)) 38 | 39 | hdat <- hike_data %>% 40 | mutate(nfeatures = lengths(features)) %>% 41 | unnest(features) %>% 42 | mutate(pres = 1) %>% 43 | pivot_wider(names_from = features, 44 | values_from = pres, 45 | values_fill = 0,values_fn = length ) 46 | 47 | p1 <- hike_data %>% 48 | ggplot(aes(x = lengths(features), y = rating)) + 49 | annotate("rect", xmin = 7.5, xmax = 12.5, 50 | ymin = 3, ymax = 5.5, 51 | fill = 'white', colour = NA, alpha = 0.1) + 52 | geom_point(alpha = 0.5,colour = 'white') + 53 | geom_smooth(method = 'lm',colour = trailpal[11]) + 54 | ylim(c(0,6.2))+ 55 | annotate(geom = "text", x = 10, y = 6, 56 | family = "Roboto Condensed Light", 57 | label = "Megsie-preferred \nhiking zone", 58 | hjust = "center", 59 | colour = "white") + 60 | xlab("Number of features") + 61 | ylab("Hiker rating") + 62 | hiketheme 63 | 64 | p1 65 | 66 | d1 <- hdat %>% 67 | group_by(simple_location) %>% 68 | summarize(flowers = mean(`Wildflowers/Meadows`), 69 | mtnviews = mean(`Mountain views`), 70 | oldtrees = mean(`Old growth`)) 71 | 72 | p2 <- d1 %>% 73 | pivot_longer(cols = flowers:oldtrees) %>% 74 | mutate(name = recode(name, flowers = "Flowers", 75 | mtnviews = "Mountain views", 76 | oldtrees = "Old-growth forest")) %>% 77 | ggplot(aes(x=simple_location, y=value, fill = simple_location)) + 78 | geom_col() + 79 | facet_wrap(~name) + 80 | scale_fill_manual(values = trailpal[-1]) + 81 | scale_x_discrete(labels = function(x) str_wrap(x, width = 10)) + 82 | xlab("") + 83 | ylab("") + 84 | coord_polar(clip = "off") + 85 | labs(title = 'Searching for the best hike in Washington State',subtitle = 'Features and ratings for hikes in Washington State') + 86 | hiketheme + 87 | theme(legend.position = 'none', 88 | axis.text.x = element_text(size = 9), 89 | axis.text.y = element_blank(), 90 | panel.spacing = unit(4, "lines")) 91 | 92 | 93 | 94 | 95 | 96 | # Test out a network ------------------------------------------------------ 97 | fdat <- hdat %>% 98 | select(`Dogs allowed on leash`:Summits) 99 | 100 | trythis <- fdat %>% #woe is me 101 | mutate(nodogs_flowers = ifelse(`Dogs not allowed`==1 & `Wildflowers/Meadows`==1,1,0)) %>% 102 | mutate(nodogs_water = ifelse(`Dogs not allowed`==1 & `Waterfalls`==1,1,0)) %>% 103 | mutate(nodogs_mtns = ifelse(`Dogs not allowed`==1 & `Mountain views`==1,1,0)) %>% 104 | mutate(nodogs_oldgrowth = ifelse(`Dogs not allowed`==1 & `Old growth`==1,1,0)) %>% 105 | 106 | mutate(mtns_oldgrowth = ifelse(`Mountain views`==1 & `Old growth`==1,1,0)) %>% 107 | mutate(mtns_flowers = ifelse(`Mountain views`==1 & `Wildflowers/Meadows`==1,1,0)) %>% 108 | mutate(mtns_water = ifelse(`Mountain views`==1 & `Waterfalls`==1,1,0)) %>% 109 | 110 | mutate(water_flowers = ifelse(`Waterfalls`==1 & `Wildflowers/Meadows`==1,1,0)) %>% 111 | mutate(water_oldgrowth = ifelse(`Waterfalls`==1 & `Old growth`==1,1,0)) %>% 112 | 113 | mutate(fall_flowers = ifelse(`Fall foliage`==1 & `Wildflowers/Meadows`==1,1,0)) %>% 114 | mutate(fall_water = ifelse(`Fall foliage`==1 & `Waterfalls`==1,1,0)) %>% 115 | mutate(fall_flowers = ifelse(`Fall foliage`==1 & `Wildflowers/Meadows`==1,1,0)) %>% 116 | mutate(fall_oldgrowth = ifelse(`Fall foliage`==1 & `Old growth`==1,1,0)) %>% 117 | 118 | mutate(ridges_flowers = ifelse(`Ridges/passes`==1 & `Wildflowers/Meadows`==1,1,0)) %>% 119 | mutate(ridges_water = ifelse(`Ridges/passes`==1 & `Waterfalls`==1,1,0)) %>% 120 | mutate(ridges_flowers = ifelse(`Ridges/passes`==1 & `Wildflowers/Meadows`==1,1,0)) %>% 121 | mutate(ridges_oldgrowth = ifelse(`Ridges/passes`==1 & `Old growth`==1,1,0)) %>% 122 | 123 | select(nodogs_flowers:ridges_oldgrowth) %>% 124 | filter_all(any_vars(. != 0)) %>% # remove columns that are all zeroes 125 | pivot_longer(cols = nodogs_flowers:ridges_oldgrowth) %>% 126 | group_by(name) %>% 127 | summarize(nhikes = sum(value)) %>% 128 | separate(col = name,into = c("from","to")) %>% 129 | uncount(nhikes) 130 | 131 | graph <- as_tbl_graph(trythis) %>% 132 | mutate(Popularity = centrality_degree(mode = 'in')) 133 | 134 | p3 <- ggraph(graph, layout = 'kk') + 135 | geom_edge_fan2(aes(alpha = stat(index)), 136 | colour = "white",alpha = 0.2, 137 | show.legend = FALSE) + 138 | geom_node_point(aes(color = name), size = 5) + 139 | scale_colour_manual("",values = FeatureColors) + 140 | labs(x = NULL, y = NULL, 141 | caption = "Data: Washington Trails Association & TidyX") + 142 | hiketheme + 143 | theme(axis.text.x = element_blank(), 144 | axis.text.y = element_blank()) 145 | 146 | 147 | 148 | # Save it ----------------------------------------------------------------- 149 | png('48_WATrails.png',width = 12,height = 11,units = 'in',res=200) 150 | p2 + (p1|p3) + plot_layout(ncol = 1) 151 | dev.off() 152 | 153 | 154 | 155 | -------------------------------------------------------------------------------- /2020/48_WATrails.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/48_WATrails.png -------------------------------------------------------------------------------- /2020/49_Shelters.R: -------------------------------------------------------------------------------- 1 | 2 | #49_TorontoShelters 3 | 4 | library(tidyverse) 5 | tuesdata <- tidytuesdayR::tt_load('2020-12-01') 6 | shelters <- tuesdata$shelters 7 | -------------------------------------------------------------------------------- /2020/8_CO2Food.R: -------------------------------------------------------------------------------- 1 | #Feb 18, 2020 2 | # Food consumption and CO2 3 | library(sf) 4 | library(mapedit) 5 | library(leaflet) 6 | library(tidyverse) 7 | library(rnaturalearth) 8 | library(rnaturalearthdata) 9 | library(stringr) 10 | 11 | food_consumption <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-02-18/food_consumption.csv') 12 | str(food_consumption) 13 | unique(food_consumption$country) 14 | unique(food_consumption$food_category) 15 | 16 | # emissions per pound are very slightly different! 17 | food_consumption %>% 18 | group_by(food_category) %>% 19 | mutate(emission_per = co2_emmission/consumption) %>% 20 | as.data.frame() 21 | 22 | nfc <- food_consumption %>% 23 | mutate(emiss_per_cons = co2_emmission/consumption) 24 | # But I'm not going to worry about it 25 | 26 | # Let's map it! 27 | world <- ne_countries(scale = "medium", returnclass = "sf") 28 | 29 | pe <- food_consumption %>% 30 | filter(food_category == "Milk - inc. cheese") %>% 31 | mutate(country.corr = str_replace(country,"USA","United States")) 32 | 33 | worldp <- left_join(world,pe,by=c('name'='country.corr')) 34 | 35 | dairy <- ggplot(data = worldp) + 36 | geom_sf(aes(fill=co2_emmission)) + 37 | scale_fill_viridis_c("Emissions \n (kg/person/yr)",option = 'inferno',direction = -1) + 38 | theme_minimal() + 39 | ggtitle(expression('Per capita '~CO[2]~' emissions from milk/cheese consumption')) 40 | 41 | png('Dairy.png',width=6,height=4,units = 'in',res=150) 42 | dairy 43 | dev.off() 44 | 45 | 46 | # Fancy dark background version ------------------------------------------- 47 | # To make the fancy dark ones that everyone makes on twitter 48 | library(extrafont) #not sure which of these is required 49 | library(extrafontdb) 50 | library(hrbrthemes) 51 | 52 | dairy2 <- dairy + theme_ft_rc() 53 | dairy2 54 | -------------------------------------------------------------------------------- /2020/9_Measles.R: -------------------------------------------------------------------------------- 1 | # Measles vaccines 2 | # Moody bc I listened to the awful Feb 25th democratic debate while coding. 3 | library(tidyverse) 4 | library(stringr) 5 | library(sf) 6 | library(mapedit) 7 | library(leaflet) 8 | library(tidyverse) 9 | library(rnaturalearth) 10 | library(rnaturalearthdata) 11 | 12 | measles <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-02-25/measles.csv') 13 | 14 | measles.summ <- measles %>% 15 | group_by(state,year) %>% 16 | summarize(mean.mmr=mean(mmr)) %>% 17 | filter(mean.mmr != -1) %>% 18 | as.data.frame() 19 | 20 | states <- map_data("state") %>% 21 | mutate(State = str_to_title(region)) 22 | 23 | test <- measles.summ %>% # normally I would do something with date but I don't wanna 24 | select(state,mean.mmr) %>% 25 | right_join(states, by=c("state"="State")) 26 | 27 | mdf <- as.data.frame(measles) %>% 28 | mutate(lat = as.numeric(lat), 29 | lng = as.numeric(lng)) %>% 30 | filter(lng<(-70) & !is.na(lng) & !is.na(lat)) %>% #filter out a few funky points 31 | filter(!is.na(xper) & xper>25) #forget why I filtered for xper>25 32 | 33 | mapplot <- test %>% 34 | ggplot(aes(long,lat)) + 35 | xlab("") + 36 | ylab("") + 37 | geom_polygon(aes(group=state,fill=mean.mmr)) + 38 | scale_fill_viridis_c("") + 39 | hrbrthemes::theme_ft_rc() + 40 | theme(axis.ticks = element_blank(), 41 | panel.grid = element_blank(), 42 | axis.text = element_blank()) + 43 | ggtitle("Mean statewide MMR vaccination rate") 44 | mapplot 45 | 46 | # Map with white points 47 | personalvax <- mapplot + 48 | geom_point(data=mdf,aes(x=lng,y=lat,size=xper), 49 | colour='white', alpha=0.5) + 50 | scale_size_continuous("Percentage of students \n exempted for \n personal reasons (>25%)") + 51 | ggtitle("Students exempted from vaccines for personal reasons") 52 | 53 | 54 | bars <- measles %>% 55 | filter(mmr != -1 & overall != -1) %>% #take out NAs 56 | group_by(state) %>% 57 | summarize(mean.personal = mean(xper,na.rm=T)) %>% 58 | filter(!is.na(mean.personal)) %>% 59 | ggplot(aes(x=state,y=mean.personal)) + 60 | geom_col(fill='white') + 61 | ylab("Mean percentage exempted\n for personal reasons") + 62 | xlab("State") + 63 | hrbrthemes::theme_ft_rc() 64 | 65 | scatter <- measles %>% 66 | ggplot(aes(x=xmed,y=xper)) + 67 | geom_point(alpha=0.5,colour='white') + 68 | hrbrthemes::theme_ft_rc() + 69 | xlab("Exempted \nfor medical reasons (%)") + 70 | ylab("Exempted \nfor personal reasons (%)") + 71 | hrbrthemes::theme_ft_rc() 72 | 73 | png("TidyFeb25.png",width = 10,height=4,units = 'in',res = 150) 74 | p2 <- gridExtra::grid.arrange(bars,scatter,ncol=1) 75 | cowplot::plot_grid(mapplot,p2,rel_widths = c(2,1)) 76 | dev.off() 77 | -------------------------------------------------------------------------------- /2020/AnimalCrossing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/AnimalCrossing.png -------------------------------------------------------------------------------- /2020/AreaPlot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/AreaPlot.png -------------------------------------------------------------------------------- /2020/Beer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/Beer.png -------------------------------------------------------------------------------- /2020/Broadway.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/Broadway.png -------------------------------------------------------------------------------- /2020/Caribou.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/Caribou.png -------------------------------------------------------------------------------- /2020/Cocktails.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/Cocktails.png -------------------------------------------------------------------------------- /2020/Coffee1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/Coffee1.png -------------------------------------------------------------------------------- /2020/Coffee2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/Coffee2.png -------------------------------------------------------------------------------- /2020/First.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/First.png -------------------------------------------------------------------------------- /2020/GDPR.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/GDPR.png -------------------------------------------------------------------------------- /2020/HipHop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/HipHop.png -------------------------------------------------------------------------------- /2020/Hockey.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/Hockey.png -------------------------------------------------------------------------------- /2020/Measles.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/Measles.png -------------------------------------------------------------------------------- /2020/Measles.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/Measles.tiff -------------------------------------------------------------------------------- /2020/NewAvatar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/NewAvatar.png -------------------------------------------------------------------------------- /2020/Penguins.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/Penguins.png -------------------------------------------------------------------------------- /2020/Plants.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/Plants.png -------------------------------------------------------------------------------- /2020/Plants2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/Plants2.png -------------------------------------------------------------------------------- /2020/Plants_cowplot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/Plants_cowplot.png -------------------------------------------------------------------------------- /2020/RSPCA.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/RSPCA.png -------------------------------------------------------------------------------- /2020/RSPCA2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/RSPCA2.png -------------------------------------------------------------------------------- /2020/Salary_Tuition.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/Salary_Tuition.png -------------------------------------------------------------------------------- /2020/Second.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/Second.png -------------------------------------------------------------------------------- /2020/TheOffice.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/TheOffice.png -------------------------------------------------------------------------------- /2020/TourdeFrance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/TourdeFrance.png -------------------------------------------------------------------------------- /2020/Tuition.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/Tuition.gif -------------------------------------------------------------------------------- /2020/statements.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2020/statements.csv -------------------------------------------------------------------------------- /2021/11_2021_bechdel.R: -------------------------------------------------------------------------------- 1 | # 11_2021: Bechdel test 2 | source("HelperFunctions.R") 3 | 4 | tuesdata <- tidytuesdayR::tt_load(2021, week = 11) 5 | bechdel <- tuesdata$raw_bechdel 6 | movies <- tuesdata$movies 7 | 8 | library(tidyverse) 9 | 10 | 11 | # Do American movies pass the Bechdel test less often than others? ---------- 12 | d1 <- movies %>% 13 | mutate(isamerican = str_detect(string = country, 14 | pattern = "USA")) %>% 15 | group_by(year, clean_test, isamerican) %>% 16 | count() %>% 17 | filter(!is.na(isamerican)) %>% 18 | ungroup() %>% 19 | mutate(clean_test = recode(clean_test, 20 | "dubious" = "Probably doesn't pass", 21 | "nowomen" = "Fewer than two women", 22 | "notalk" = "Women don't talk to each other", 23 | "men" = "Women only talk about men", 24 | "ok" = "Passes the Bechdel test" 25 | )) %>% 26 | mutate(clean_test = factor(clean_test, 27 | levels = c( 28 | "Fewer than two women", 29 | "Women don't talk to each other", 30 | "Women only talk about men", 31 | "Probably doesn't pass", 32 | "Passes the Bechdel test" 33 | ) 34 | )) 35 | 36 | counts <- d1 %>% 37 | ungroup() %>% 38 | group_by(year, isamerican) %>% 39 | summarize(totalmovies = sum(n)) %>% 40 | ungroup() 41 | 42 | props <- d1 %>% 43 | left_join(counts, by = c("year", "isamerican")) %>% 44 | mutate(prop_ctotal = n / totalmovies) 45 | 46 | fpal <- c("#e63946", "#EA7F83", "#EEC5C0", "#a8dadc", "#457b9d") 47 | 48 | p1 <- props %>% 49 | mutate(is_american = ifelse(isamerican, 50 | "American Films", 51 | "Everyone Else")) %>% 52 | ggplot(aes( 53 | x = year, y = prop_ctotal, 54 | colour = clean_test, fill = clean_test 55 | )) + 56 | geom_col() + 57 | facet_wrap(~is_american) + 58 | scale_fill_manual(values = fpal) + 59 | scale_colour_manual(values = fpal) + 60 | ylab("Proportion of movies") + 61 | labs( 62 | title = "The Bechdel test in American films and globally", 63 | subtitle = "How do American films compare to films from other countries?" 64 | ) + 65 | hrbrthemes::theme_ipsum_rc() + 66 | theme(legend.position = "none") 67 | 68 | 69 | p2 <- d1 %>% 70 | mutate(is_american = ifelse(isamerican, "American Films", 71 | "Everyone Else")) %>% 72 | ggplot(aes(x = year, y = n, color = clean_test)) + 73 | facet_wrap(~is_american, scales = "free_y") + 74 | geom_point() + 75 | geom_smooth(se = FALSE) + 76 | scale_colour_manual("Bechdel result", values = fpal) + 77 | scale_fill_manual(values = fpal) + 78 | ylab("Number of movies") + 79 | labs(caption = "Plot: Margaret Siple (@margaretsiple) - Data: Bechdeltest.com API") + 80 | hrbrthemes::theme_ipsum_rc() + 81 | theme( 82 | strip.background = element_blank(), 83 | strip.text.x = element_blank() 84 | ) 85 | 86 | library(patchwork) 87 | 88 | png(filename = "CountryBechdel.png", width = 8, height = 8, units = "in", res = 200) 89 | p1 + p2 + 90 | plot_layout(ncol = 1, heights = c(1, 3), guides = "collect") 91 | dev.off() 92 | 93 | # Bechdel passes by genre and rating -------------------------------------- 94 | ct <- movies %>% 95 | select(year, title, clean_test, binary, country, genre, rated) %>% 96 | tidyr::separate(country, into = paste0("c", 1:5)) %>% 97 | tidyr::separate(genre, into = paste0("g", 1:3)) 98 | 99 | 100 | genres <- ct %>% 101 | select(g1, g2, g3) %>% 102 | t() %>% 103 | c() %>% 104 | unique() 105 | unique(movies$rated) 106 | 107 | library(gggibbous) 108 | 109 | d3 <- ct %>% 110 | select(title, binary, rated, g1, g2, g3) %>% 111 | filter(!is.na(g1)) %>% 112 | pivot_longer(cols = g1:g3, values_to = c("genre")) %>% 113 | select(-name) %>% 114 | group_by(rated, genre) %>% 115 | count(binary) %>% 116 | ungroup() %>% 117 | pivot_wider(names_from = binary, values_from = n) %>% 118 | mutate_at(c("FAIL", "PASS"), ~ replace_na(., replace = 0)) %>% 119 | mutate(total = FAIL + PASS) %>% 120 | filter(!is.na(genre) & genre != "Fi" & !rated %in% c("N/A", "Not Rated", "Unrated", "X", "TV-14", "TV-PG")) %>% 121 | mutate(genre = recode(genre, "Sci" = "Sci-Fi")) 122 | 123 | totalrow <- d3 %>% 124 | select(-rated) %>% 125 | group_by(genre) %>% 126 | summarise(across(where(is.numeric), ~ sum(.x, na.rm = T), .names = "{.col}")) %>% 127 | mutate(rated = "Genre total") 128 | 129 | d4 <- d3 %>% 130 | add_row(totalrow) %>% 131 | mutate(genre = fct_reorder(genre, PASS / total)) %>% 132 | mutate(rated = fct_relevel(rated, "G", "PG", "PG-13", "R", "NC-17", "Genre total")) 133 | 134 | mooncolor <- "grey" 135 | moonfill <- "white" 136 | highlightmoon <- "#457b9d" 137 | 138 | d4b <- d4 %>% 139 | filter(PASS / total > 0.66) 140 | 141 | p3 <- d4 %>% 142 | ggplot(aes(x = rated, y = genre)) + 143 | geom_moon(aes(ratio = PASS / total), 144 | fill = mooncolor, 145 | colour = mooncolor) + 146 | geom_moon(aes(ratio = FAIL / total), 147 | fill = moonfill, 148 | right = FALSE, 149 | colour = mooncolor) + 150 | geom_moon(data = d4b, aes(ratio = PASS / total), 151 | fill = highlightmoon, 152 | colour = highlightmoon) + 153 | geom_moon(data = d4b, aes(ratio = FAIL / total), 154 | fill = NA, 155 | right = FALSE, 156 | colour = highlightmoon) + 157 | ylab("Genre") + 158 | xlab("Rating") + 159 | labs( 160 | title = "Across genres, fewer R-rated films pass the Bechdel", 161 | subtitle = "Blue crescents indicate where more than two-thirds of films pass the test.", 162 | caption = "Plot: Margaret Siple (@margaretsiple) - Data: Bechdeltest.com API" 163 | ) + 164 | hrbrthemes::theme_ipsum_rc() + 165 | theme(plot.title = element_text(hjust = 0.5), 166 | plot.subtitle = element_text(colour = highlightmoon, hjust = 0.5)) 167 | 168 | png("BechdelMoons.png", width = 7, height = 10, units = "in", res = 200) 169 | p3 170 | dev.off() 171 | -------------------------------------------------------------------------------- /2021/17_2021_netflix.R: -------------------------------------------------------------------------------- 1 | # 17_netflix 2 | library(tidyverse) 3 | library(tidygraph) 4 | library(ggraph) 5 | 6 | # soundtrack: Olivia Rodrigo - SOUR 7 | rodrigopal <- c("#8780C3", "#71BDC3", "#EBD4C8", "#BB2D67", "#EEC660") 8 | source("HelperFunctions.R") 9 | rodrigopal_10 <- lengthen_pal(1:10, rodrigopal) 10 | 11 | # Load the data 12 | tuesdata <- tidytuesdayR::tt_load(2021, week = 17) 13 | 14 | # Clean up data 15 | netflix <- tuesdata$netflix %>% 16 | mutate(add_year = as.numeric(str_sub(date_added, -5, -1))) %>% 17 | filter(!is.na(add_year)) %>% 18 | select(-cast, -date_added, -rating, -duration, -director, -cast, -description) %>% 19 | filter(across(country, ~ grepl("United States", .))) 20 | 21 | 22 | alldat <- netflix %>% 23 | separate(listed_in, into = c("g1", "g2", "g3"), fill = "right") %>% 24 | pivot_longer(cols = g1:g3, values_to = "genre") %>% 25 | filter(!is.na(genre)) %>% 26 | filter(add_year > 2012) 27 | 28 | alldat 29 | 30 | 31 | 32 | # Functions for bubble plot ----------------------------------------------- 33 | 34 | make_nodes <- function(alldat) { 35 | unit_size <- alldat %>% 36 | group_by(type, genre) %>% 37 | summarize(size = length(show_id)) %>% 38 | ungroup() %>% 39 | rename(name = genre) 40 | listed_size <- unit_size %>% 41 | select(-type) 42 | 43 | type_size <- unit_size %>% 44 | group_by(type) %>% 45 | summarize(size = sum(size)) %>% 46 | rename(name = type) 47 | 48 | year_size <- tibble( 49 | name = as.character(alldat$add_year[1]), 50 | size = sum(unit_size$size) 51 | ) 52 | 53 | nodes <- bind_rows(listed_size, type_size, year_size) 54 | return(nodes) 55 | } 56 | 57 | 58 | make_edges <- function(alldat) { 59 | x <- alldat %>% 60 | group_by(add_year, type, genre) %>% 61 | summarize(size = length(show_id)) %>% 62 | ungroup() %>% 63 | rename(name = genre) 64 | 65 | base <- tibble( 66 | from = as.character(unique(x$add_year)), 67 | to = unique(x$type) 68 | ) 69 | 70 | inner <- x %>% 71 | select(from = type, to = name) %>% 72 | distinct() 73 | 74 | edges <- bind_rows(base, inner) 75 | return(edges) 76 | } 77 | 78 | 79 | make_year_plot <- function(x, pal = rodrigopal) { 80 | nodes <- make_nodes(x) 81 | edges <- make_edges(x) 82 | mygraph <- tbl_graph(nodes = nodes, edges = edges) 83 | 84 | # Make the plot 85 | plot <- ggraph(mygraph, layout = "circlepack", weight = size) + 86 | geom_node_circle(aes(fill = depth), color = "grey14", size = 0.2) + 87 | labs(title = unique(x$add_year)) + 88 | coord_equal() + 89 | scale_fill_gradientn(colours = pal) + 90 | hrbrthemes::theme_ipsum(base_size = 10) + 91 | scale_x_continuous(expand = c(0, 0)) + 92 | scale_y_continuous(expand = c(0, 0)) + 93 | labs(x = NULL, y = NULL) + 94 | theme( 95 | legend.position = "none", 96 | panel.grid.major = element_blank(), 97 | panel.grid.minor = element_blank(), 98 | panel.background = element_blank(), 99 | axis.ticks = element_blank(), 100 | axis.text = element_blank(), 101 | axis.text.x = element_blank(), 102 | axis.text.y = element_blank(), 103 | plot.title = element_text(size = 9) 104 | ) 105 | plot 106 | } 107 | 108 | 109 | library(patchwork) 110 | 111 | # amazing shortcut for many mini figs 112 | list <- alldat %>% 113 | split(.$add_year) %>% 114 | map(make_year_plot) 115 | 116 | p1 <- wrap_plots(list, ncol = 3, nrow = 3) + 117 | plot_annotation(caption = "Data: Sara Stoudt / The Economist. Plot: @margaretsiple w/ code from @jakekaupp") & (hrbrthemes::theme_ipsum(base_size = 10) + theme( 118 | legend.position = "none", 119 | panel.grid.major = element_blank(), 120 | panel.grid.minor = element_blank(), 121 | panel.background = element_blank(), 122 | axis.ticks = element_blank(), 123 | axis.text = element_blank(), 124 | axis.text.x = element_blank(), 125 | axis.text.y = element_blank(), 126 | plot.title = element_text(size = 12, hjust = 0) 127 | )) 128 | 129 | p1 130 | 131 | # Maybe just a nice time series fig now ------------------------------------- 132 | topgenres <- alldat %>% 133 | group_by(genre) %>% 134 | count() %>% 135 | filter(!genre %in% c("Movies", "TV", "Stand", "Up", "Shows")) %>% 136 | ungroup() %>% 137 | slice_max(order_by = n, n = 10) %>% 138 | # get top 10 genres 139 | select(genre) 140 | 141 | d2 <- alldat %>% 142 | right_join(topgenres) %>% 143 | group_by(add_year, type, genre) %>% 144 | summarize(n = length(unique(title))) %>% 145 | ungroup() %>% 146 | arrange(add_year) %>% 147 | mutate(cumu_n = cumsum(n)) %>% 148 | filter(add_year <= 2020) %>% 149 | rename(Genre = genre) 150 | 151 | p2 <- d2 %>% 152 | ggplot(aes(x = add_year, y = cumu_n, fill = Genre)) + 153 | geom_area() + 154 | scale_fill_manual(values = rodrigopal_10) + 155 | facet_wrap(~type, ncol = 1) + 156 | labs( 157 | x = "Year added", y = "Cumulative number of listings added", 158 | title = "Netflix additions in the U.S.", 159 | subtitle = "Movies and TV shows added to Netflix since 2013. \nYellow bubbles are genres, nested within type (TV show or movie), \nnested within year" 160 | ) + 161 | hrbrthemes::theme_ipsum(base_size = 10) + 162 | theme(legend.position = "bottom") 163 | 164 | 165 | # Export figs ------------------------------------------------------------- 166 | 167 | png("NetflixA.png", width = 6, height = 10, units = "in", res = 200) 168 | p2 169 | dev.off() 170 | 171 | png("NetflixB.png", width = 14, height = 10, units = "in", res = 200) 172 | p1 173 | dev.off() 174 | -------------------------------------------------------------------------------- /2021/51_2021_spice_girls.R: -------------------------------------------------------------------------------- 1 | # Spice girls! 2 | 3 | # Libraries --------------------------------------------------------------- 4 | # remotes::install_github("hrbrmstr/ggchicklet") 5 | library(tidyverse) 6 | library(tidytext) 7 | library(glue) 8 | library(stringr) 9 | library(ggchicklet) 10 | library(GGally) 11 | library(ggcorrplot) 12 | 13 | # Color palettes and themes ----------------------------------------------- 14 | # Palettes from Spice Girls album covers, made on coolors.co 15 | spice_pal <- c("#EF4035", "#FD7D34", "#9A351D", "#88D200", "#f9ea9a", "#cDB2CB", "#FFFFFF", "#BE525F", "#CDE009", "#A9973E") # 10 colours 16 | spiceworld_pal <- c("#B0A7B4", "#FF2332", "#07047C", "#FF1085", "#B6E6D1", "#E8F650", "#00A8D1", "#FFB463") # 8 colours 17 | 18 | scales::show_col(spice_pal) 19 | scales::show_col(spiceworld_pal) 20 | 21 | # Data -------------------------------------------------------------------- 22 | tuesdata <- tidytuesdayR::tt_load("2021-12-14") 23 | studio_album_tracks <- tuesdata$studio_album_tracks 24 | lyrics <- tuesdata$lyrics 25 | 26 | tokens <- lyrics %>% 27 | mutate(nsingers = str_count(section_artist, boundary("word"))) %>% 28 | mutate(nsingers = ifelse(nsingers > 5, 5, nsingers)) %>% 29 | unnest_tokens(output = "word", input = "line") %>% 30 | inner_join(get_sentiments("bing")) %>% 31 | group_by(nsingers) %>% 32 | count(sentiment) %>% # count the # of positive & negative words 33 | spread(sentiment, n, fill = 0) %>% 34 | mutate( 35 | prop_negative = negative / (negative + positive), 36 | prop_positive = positive / (negative + positive), 37 | pos_ratio = positive / negative 38 | ) 39 | 40 | tokens2 <- lyrics %>% 41 | unnest_tokens(output = "word", input = "line") %>% 42 | inner_join(get_sentiments("bing")) %>% 43 | group_by(track_number, album_name, section_artist) %>% 44 | count(sentiment) %>% # count the # of positive & negative words 45 | spread(sentiment, n, fill = 0) %>% 46 | mutate( 47 | prop_negative = negative / (negative + positive), 48 | prop_positive = positive / (negative + positive), 49 | pos_ratio = positive / negative 50 | ) 51 | 52 | tokens %>% 53 | select(nsingers, pos_ratio) %>% 54 | pivot_longer(pos_ratio) %>% 55 | ggplot(aes(x = nsingers, y = value)) + 56 | geom_point() + 57 | geom_line() 58 | 59 | 60 | # Spread singers from section_artist into columns -------------------------------------- 61 | get_singer <- function(col_str, singer) { 62 | ifelse(grepl(pattern = singer, x = col_str), 1, 0) 63 | } 64 | 65 | singers <- c("Scary", "Posh", "Ginger", "Baby", "Sporty") 66 | # Divide singers for each lyric into their own column a la presence/absence 67 | lyrics_wide <- lyrics %>% 68 | mutate( 69 | Baby = get_singer(singer = "Baby", section_artist), 70 | Posh = get_singer(singer = "Posh", section_artist), 71 | Scary = get_singer(singer = "Scary", section_artist), 72 | Sporty = get_singer(singer = "Sporty", section_artist), 73 | Ginger = get_singer(singer = "Ginger", section_artist), 74 | All = get_singer(singer = "All", section_artist) 75 | ) 76 | # No idea how to do this better...sorry tidyverse! 77 | for (i in 1:nrow(lyrics_wide)) { 78 | lyrics_wide[i, "Baby"] <- ifelse(lyrics_wide[i, "All"] == 1, 1, lyrics_wide[i, "Baby"]) 79 | lyrics_wide[i, "Scary"] <- ifelse(lyrics_wide[i, "All"] == 1, 1, lyrics_wide[i, "Scary"]) 80 | lyrics_wide[i, "Sporty"] <- ifelse(lyrics_wide[i, "All"] == 1, 1, lyrics_wide[i, "Sporty"]) 81 | lyrics_wide[i, "Posh"] <- ifelse(lyrics_wide[i, "All"] == 1, 1, lyrics_wide[i, "Posh"]) 82 | lyrics_wide[i, "Ginger"] <- ifelse(lyrics_wide[i, "All"] == 1, 1, lyrics_wide[i, "Ginger"]) 83 | } 84 | 85 | lyrics_wide2 <- lyrics_wide %>% 86 | select(-All) %>% 87 | unnest_tokens(output = "word", input = "line") %>% 88 | inner_join(get_sentiments("afinn")) %>% 89 | pivot_longer(cols = Baby:Ginger, values_to = "Singer") %>% 90 | filter(Singer == 1) %>% 91 | select(-Singer, -section_artist) %>% 92 | group_by(track_number, album_name, name) %>% 93 | count(sentiment) %>% # count the # of positive & negative words 94 | ungroup() %>% 95 | spread(sentiment, n, fill = 0) 96 | 97 | 98 | # Geom_chicklet plot of sentiments ---------------------------------------- 99 | 100 | p1 <- lyrics_wide2 %>% 101 | mutate(negative = negative * -1) %>% 102 | rename(Singer = name) %>% 103 | ggplot(aes(x = track_number, y = positive, fill = Singer)) + 104 | geom_chicklet() + 105 | geom_chicklet(aes(x = track_number, y = negative, fill = Singer)) + 106 | scale_fill_manual(values = spiceworld_pal[3:7]) + 107 | geom_hline(yintercept = 0) + 108 | facet_wrap(~ factor(album_name, # order albums chronologically 109 | levels = c("Spice", "Spiceworld", "Forever") 110 | ), ncol = 3) + 111 | xlab("Track number") + 112 | ylab("Number of positive or negative words \n(AFINN sentiment score)") + 113 | hrbrthemes::theme_ipsum_rc() + 114 | labs( 115 | x = "Track number", 116 | title = "Spice Girls and Sentiments", 117 | subtitle = "Sentiment analysis of Spice Girls lyrics and correlations between track characteristics" 118 | ) 119 | p1 120 | 121 | 122 | # Studio album tracks ---------------------------------------------------- 123 | 124 | dat2 <- studio_album_tracks %>% 125 | select( 126 | danceability, energy, loudness, liveness, valence, 127 | album_name 128 | ) 129 | 130 | p2 <- GGally::ggpairs(as.data.frame(dat2), 131 | columns = 1:5, aes(color = album_name, alpha = 0.8) 132 | ) + 133 | scale_color_manual(values = spice_pal[c(1, 8, 9)]) + 134 | scale_fill_manual(values = spice_pal[c(1, 8, 9)]) + 135 | hrbrthemes::theme_ipsum_rc() 136 | 137 | x <- studio_album_tracks %>% 138 | select(speechiness:tempo) 139 | 140 | xx <- cor_pmat(x) 141 | 142 | p3 <- ggcorrplot(xx, 143 | type = "upper", 144 | method = "circle", 145 | ggtheme = hrbrthemes::theme_ipsum_rc, 146 | colors = c("#6D9EC1", "white", "#07047C") 147 | ) + 148 | labs(caption = "Spice Girls dataset from @jacquietran") 149 | 150 | # Save plots -------------------------------------------------------------- 151 | 152 | 153 | png("SG.png", width = 12, height = 7, units = "in", res = 150) 154 | p1 + p3 + plot_layout(ncol = 2, widths = c(2, 1)) 155 | dev.off() 156 | -------------------------------------------------------------------------------- /2021/5_2021_plastics.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | 3 | 4 | # Soundtrack -------------------------------------------------------------- 5 | # Cardi B - UP! 6 | 7 | 8 | # Get the data ------------------------------------------------------------ 9 | tuesdata <- tidytuesdayR::tt_load(2021, week = 5) 10 | plastics <- tuesdata$plastics 11 | 12 | 13 | # Summarize --------------------------------------------------------------- 14 | totplastics <- plastics %>% 15 | group_by(parent_company, year) %>% 16 | summarize_at(vars(empty:pvc), .funs = ~ sum(.x, na.rm = TRUE)) 17 | 18 | top7 <- totplastics %>% 19 | filter(year == 2020) %>% 20 | pivot_longer(cols = empty:pvc, names_to = "type") %>% 21 | group_by(parent_company) %>% 22 | summarize(total = sum(value, na.rm = T)) %>% 23 | ungroup() %>% 24 | mutate(parent_company = case_when( 25 | parent_company %in% c("Unbranded", "null", "NULL", "#ERROR!") ~ "Unbranded_unknown", 26 | TRUE ~ parent_company 27 | )) %>% 28 | mutate(company_lumped = fct_lump(parent_company, n = 7, w = total)) %>% 29 | distinct(parent_company, company_lumped) %>% 30 | right_join(totplastics) %>% 31 | mutate(total = hdpe + ldpe + o + pet + pp + ps + pvc) # forgot how to sum across columns 32 | 33 | # Top 6 companies palettes, from design-seeds.com 34 | companypal <- c("#81C4CA", 35 | "#468D96", 36 | "#103128", 37 | "#E83D5F", 38 | "#FA6E90", 39 | "#FCB16D") 40 | 41 | # Plot 1: coord_polar with proportion of plastic types from each company in each plastic category-------------------- 42 | p1 <- top7 %>% 43 | pivot_longer(cols = empty:pvc, names_to = "type") %>% 44 | mutate(type = fct_reorder(type, desc(value))) %>% 45 | filter(company_lumped != "Unbranded_unknown") %>% 46 | filter(company_lumped != "Other") %>% 47 | filter(!type %in% c("ps", "pvc", "empty")) %>% 48 | group_by(company_lumped, type) %>% 49 | summarize(totvalue = sum(value, na.rm = TRUE)) %>% 50 | ungroup() %>% 51 | group_by(company_lumped) %>% 52 | mutate(prop_val = totvalue / sum(totvalue)) %>% 53 | ggplot(aes(fill = company_lumped, colour = company_lumped)) + 54 | geom_segment(aes( 55 | x = type, xend = type, 56 | y = 0.02, yend = prop_val + 0.02 57 | ), 58 | lwd = 2, lineend = "round" 59 | ) + 60 | geom_label(aes(x = type, y = 0, label = type), 61 | fill = "white", size = 2.5 62 | ) + 63 | ylim(c(0, 1.2)) + 64 | xlab("Type of plastic") + 65 | ylab("") + 66 | scale_colour_manual(values = companypal) + 67 | coord_polar(theta = "y") + 68 | facet_wrap(~company_lumped, ncol = 2) + 69 | hrbrthemes::theme_ipsum_rc() + 70 | theme( 71 | axis.text.x = element_blank(), 72 | axis.text.y = element_blank(), 73 | legend.position = "none" 74 | ) + 75 | labs(caption = "Data: Break Free From Plastic") 76 | 77 | p1 78 | 79 | # Heat map! --------------------------------------------------------------- 80 | top30 <- totplastics %>% 81 | filter(year == 2020) %>% 82 | pivot_longer(cols = empty:pvc, names_to = "type") %>% 83 | group_by(parent_company) %>% 84 | summarize(total = sum(value, na.rm = T)) %>% 85 | ungroup() %>% 86 | mutate(parent_company = case_when( 87 | parent_company %in% c("Unbranded", "null", "NULL", "#ERROR!") ~ "Unbranded_unknown", 88 | TRUE ~ parent_company 89 | )) %>% 90 | mutate(company_lumped = fct_lump(parent_company, n = 30, w = total)) %>% 91 | distinct(parent_company, company_lumped) %>% 92 | right_join(totplastics) %>% 93 | mutate(total = hdpe + ldpe + o + pet + pp + ps + pvc) # forgot how to sum across columns 94 | 95 | p2 <- top30 %>% 96 | group_by(company_lumped) %>% 97 | summarize_at(vars(empty:pvc), .funs = ~ sum(.x, na.rm = TRUE)) %>% 98 | filter(!is.na(company_lumped)) %>% 99 | mutate(row_sum = rowSums(select(., -1))) %>% 100 | mutate_at(-1, ~ . / row_sum) %>% 101 | select(-row_sum) %>% 102 | pivot_longer(cols = empty:pvc) %>% 103 | ggplot(aes(x = name, y = company_lumped, colour = value)) + 104 | geom_point(size = 4.5) + 105 | scale_colour_distiller(palette = "YlGnBu") + 106 | scale_y_discrete(limits = rev) + 107 | xlab("Type of plastic") + 108 | ylab("") + 109 | hrbrthemes::theme_ipsum_rc() + 110 | theme( 111 | legend.position = "none", 112 | plot.title = element_text(hjust = 0.01), 113 | plot.subtitle = element_text(hjust = 0.2) 114 | ) + 115 | labs( 116 | title = "Which type of plastic waste do these top companies produce?", 117 | subtitle = "Proportions of each plastic type by company. Based on labeled products from an audit report by Break Free From Plastic" 118 | ) 119 | 120 | p2 121 | 122 | 123 | 124 | # Save -------------------------------------------------------------------- 125 | library(patchwork) 126 | 127 | png("5_2021_plastics.png", width = 14, height = 10, units = "in", res = 200) 128 | p2 + p1 + plot_layout(ncol = 2, widths = c(2, 3)) 129 | dev.off() 130 | -------------------------------------------------------------------------------- /2021/5_2021_plastics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2021/5_2021_plastics.png -------------------------------------------------------------------------------- /2021/9_2021_employment.R: -------------------------------------------------------------------------------- 1 | # Employment 2 | tuesdata <- tidytuesdayR::tt_load(2021, week = 9) 3 | employed <- tuesdata$employed 4 | earn <- tuesdata$earn 5 | demo_names <- c("Men","Women","White","Black or African American", "Asian") 6 | 7 | library(tidyverse) 8 | table(employed$major_occupation) 9 | table(employed$race_gender) 10 | 11 | employed %>% 12 | filter(race_gender!="TOTAL" & !industry %in% demo_names) %>% # 13 | group_by(industry, year, race_gender) %>% # 14 | summarize(employ_N = sum(employ_n)) %>% 15 | ungroup() %>% 16 | ggplot(aes(x = year, y = employ_N, color = industry, lty = race_gender)) + 17 | geom_line() + 18 | facet_wrap(~industry) 19 | -------------------------------------------------------------------------------- /2021/BechdelMoons.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2021/BechdelMoons.png -------------------------------------------------------------------------------- /2021/CountryBechdel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcsiple/tidytuesday/ba5e27fa6e052edb71963c3cc09a6ce0e8babb4a/2021/CountryBechdel.png -------------------------------------------------------------------------------- /2021/incomplete/23_2021_survivor.R: -------------------------------------------------------------------------------- 1 | # Week 23: SURVIVOR the TV show 2 | 3 | library(tidyverse) 4 | 5 | summary <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-06-01/summary.csv') 6 | challenges <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-06-01/challenges.csv') 7 | castaways <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-06-01/castaways.csv') 8 | viewers <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-06-01/viewers.csv') 9 | jury_votes <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-06-01/jury_votes.csv') 10 | 11 | head(summary) 12 | 13 | summary2 <- summary %>% 14 | mutate(filming_duration = filming_ended - filming_started) 15 | # summary3 <- summary2 %>% 16 | # pivot_longer(cols = premiered:filming_ended, values_to = "date") 17 | # summary3 %>% 18 | # ggplot(aes(x = season, y = date, colour = name)) + 19 | # geom_point() 20 | 21 | summary %>% 22 | ggplot(aes(x = season)) + 23 | geom_linerange(aes(ymin = filming_ended, ymax = premiered), lwd=3) + 24 | coord_flip() 25 | 26 | 27 | 28 | summary2 %>% 29 | ggplot(aes(x=viewers_premier,y=viewers_finale)) + 30 | geom_point() 31 | -------------------------------------------------------------------------------- /2021/incomplete/25_2021_DuBoisChallenge.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | 3 | tuesdata <- tidytuesdayR::tt_load(2021, week = 25) 4 | tweets <- tuesdata$tweets 5 | 6 | world_map <- map_data("world") 7 | map_plot <- ggplot() + 8 | geom_polygon(data = world_map, 9 | aes(x = long, y = lat, group = group), 10 | fill = "grey30", colour = "grey45", size = 0.1) + 11 | xlab('') + 12 | ylab('') + 13 | coord_fixed() + 14 | hrbrthemes::theme_ft_rc(base_size=14) + 15 | theme(panel.grid.major = element_blank(), 16 | panel.grid.minor = element_blank(), 17 | panel.background = element_blank(), 18 | axis.ticks = element_blank(), 19 | axis.text = element_blank(), 20 | axis.text.x = element_blank(), 21 | axis.text.y = element_blank()) 22 | 23 | map_plot + 24 | geom_point(data = tweets, 25 | aes(x = long, y = lat, size = retweet_count)) 26 | -------------------------------------------------------------------------------- /2021/incomplete/35_lemurs.R: -------------------------------------------------------------------------------- 1 | #35_lemurs 2 | lemurs <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-08-24/lemur_data.csv') 3 | View(lemurs) 4 | unique(lemurs$taxon) 5 | -------------------------------------------------------------------------------- /2021/incomplete/X_2021_gunviolence.R: -------------------------------------------------------------------------------- 1 | # 2021_gunviolence 2 | # Gun violence dataset from Kaggle, contributed by James Ko 3 | # https://www.kaggle.com/jameslko/gun-violence-data 4 | gv <- read.csv("gun-violence-data_01-2013_03-2018.csv") 5 | -------------------------------------------------------------------------------- /HelperFunctions.R: -------------------------------------------------------------------------------- 1 | # Helper functions + code tidbits 2 | 3 | # for increasing the small little font size on the {hrbrthemes} plots: 4 | # theme_ipsum_rc() + 5 | # theme(axis.title.x = element_text(size = rel(1.5)), 6 | # axis.title.y = element_text(size = rel(1.5))) 7 | # This is a test to see if GH is working 8 | 9 | # for increasing the number of colors in a palette: 10 | lengthen_pal <- function(x=1:10,shortpal){ 11 | ncolours <- length(unique(x)) 12 | newpal <- colorRampPalette(shortpal)(ncolours) 13 | return(newpal) 14 | } 15 | 16 | 17 | # PATCHWORK NOTE ------------------------------------------------------------------- 18 | #if you use patchwork, use the & symbol to add a theme to the whole assemblage 19 | # p1 + p2 + plot_annotation(title = 'Patchwork plots',subtitle = 'Reminder about adding a theme to the whole shebang') & theme(text = element_text('Roboto Condensed',size = 16)) 20 | 21 | 22 | # MAP NOTES --------------------------------------------------------------- 23 | # world_map <- map_data("world") 24 | # ggplot(world_map) + 25 | # geom_polygon(data = world_map, aes(x = long, y = lat, group = group), 26 | # fill = "grey30", colour = "grey45", size = 0.1) 27 | 28 | 29 | # Add how to print palette in graph window -------------------------------- 30 | #taken from stackoverflow 31 | show_pal <- function(pal){ 32 | plot(seq_len(length(pal)), rep_len(1, length(pal)), 33 | col = pal, pch = 16, cex = 3, xaxt = 'n', yaxt = 'n', xlab = '', ylab = '') 34 | } 35 | --------------------------------------------------------------------------------