├── .gitignore ├── phd_commits.gif ├── phd-commits.Rproj ├── LICENSE ├── get_commits.R ├── repositories.tsv ├── README.md └── plot_commits.R /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | -------------------------------------------------------------------------------- /phd_commits.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lazappi/phd-commits/HEAD/phd_commits.gif -------------------------------------------------------------------------------- /phd-commits.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 4 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Luke Zappia 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /get_commits.R: -------------------------------------------------------------------------------- 1 | get_commits <- function(path, distinct = TRUE, filter = TRUE, 2 | users = c("l.zappia", "lazappi", "Luke Zappia"), 3 | from = "2016-02-08") { 4 | 5 | dirs <- fs::dir_ls(path, type = "directory") 6 | 7 | message("Searching ", length(dirs), " directories...") 8 | 9 | commits <- purrr::map_dfr(dirs, function(dir) { 10 | message("Processing ", fs::path_file(dir), "...") 11 | if (git2r::in_repository(dir)) { 12 | commits_list <- git2r::commits(dir) 13 | if (length(commits_list) > 0) { 14 | dir_commits <- purrr::map_dfr(commits_list, function(commit) { 15 | tibble::tibble( 16 | SHA = commit$sha, 17 | Name = commit$author$name, 18 | When = lubridate::as_datetime(commit$author$when$time) 19 | ) 20 | }) 21 | dir_commits$Repository <- fs::path_file(dir) 22 | 23 | return(dir_commits) 24 | } 25 | } 26 | }) 27 | 28 | message("Found ", nrow(commits), " commits") 29 | 30 | if (distinct) { 31 | message("Selecting distinct SHAs...") 32 | commits <- dplyr::distinct(commits, SHA, .keep_all = TRUE) 33 | message("Found ", nrow(commits), " distinct commits") 34 | } 35 | 36 | message("Filtering dates...") 37 | commits <- dplyr::filter(commits, When >= from) 38 | message("Found ", nrow(commits), " from ", from) 39 | 40 | if (filter) { 41 | message("Filtering names...") 42 | commits <- dplyr::filter( 43 | commits, Name %in% users 44 | ) 45 | message("Found ", nrow(commits), " commits by me") 46 | } 47 | 48 | return(commits) 49 | } 50 | -------------------------------------------------------------------------------- /repositories.tsv: -------------------------------------------------------------------------------- 1 | Repository Category Type abacbs17-twitter SideProject Code ABACBS2016 Reports Writing ABACBS2016 Reports Writing ATRetanoicAcid-organoid-scRNAseq Analysis Code Awesome-CV Other Writing awesome-single-cell Other Code BASiCS Other Code binf-scripts Other Code BiocStickers SideProject Writing camp_cerebral Analysis Code clustering-tree-analysis ClustTrees Code clustree ClustTrees Code clustree-paper ClustTrees Writing clustree-talks ClustTrees Writing combes-kidney-organoid-scRNAseq Analysis Code combes-mouse-kidney-scRNAseq Analysis Code combes-organoid-lindstrom-scRNAseq Analysis Code combes-organoid-paper Analysis Code Conferences2017 Reports Writing confirmation Reports Writing cookiecutter-r-analysis Other Code dotfiles SideProject Code easel Simulation Code gene-filtering Analysis Code gi2017-twitter SideProject Code gi2018-twitter SideProject Code Hello-World SideProject Code INFO20002-survey SideProject Code jtdwehi-twitter SideProject Code kairatholiva-RetD7-organoid-scRNAseq Analysis Code KleinK562 Analysis Code Kolodziejczyk-mESCs Analysis Code lawlor-wnt4-organoid-scrnaseq Analysis Code lazappi SideProject Code lazappi_blog SideProject Code lrs_db Other Code mcriPalettes SideProject Code mESCs-scRNAseq Analysis Code oshlack-logo Other Writing oshlack.github.io Other Code ozsinglecell18-twitter SideProject Code OzSingleCells2019 Analysis Code phd-commits SideProject Code phd-proposal Reports Writing phd-thesis Reports Writing phd-thesis-analysis Analysis Code plain-html SideProject Code prettytc SideProject Code r-novice-gapminder SideProject Writing RNAseq-R SideProject Code scater Other Code scRNA-tools Tools Code scRNAseqSim Simulation Code scRNAtools-paper Tools Writing scRNAtools-plots Tools Writing seurat Other Code seurat-3 Other Code Simplifying-simulation-of-single-cell-RNA-sequencing-with-Splatter Simulation Writing splatter Simulation Code splatter-estimation Simulation Code splatter-paper Simulation Writing splatter-paper Simulation Writing survey-2018 SideProject Code tidyr SideProject Code Tung-iPSCs Analysis Code twitter-stats SideProject Code twitter-stats.bak SideProject Code two-year-review Reports Writing unimelbdown SideProject Code workflowr Other Code workflowr-test SideProject Code workshop-notes SideProject Writing -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PhD commits 2 | 3 | Functions for scraping git commits from repositories associated with a PhD (or 4 | anything else) and plotting them. The result looks something like this. 5 | 6 | ![Commits GIF](phd_commits.gif) 7 | 8 | ## How to use 9 | 10 | ### 1. Get your commits 11 | 12 | The `get_commits()` function takes a path and searches each of the 13 | subdirectories. For each subdirectory that is a git repository it extracts 14 | the git commits. The other arguments can be used to filter the commits to 15 | a specific date or users. The output is a `tibble` with columns "SHA", "Name", 16 | "When" and "Repository". 17 | 18 | ```r 19 | commits <- get_commits("/path/to/repos") 20 | ``` 21 | 22 | If you have repositories in multiple locations you can combine them using 23 | `dplyr::bind_rows()` and filter on the SHA to remove any duplicates. 24 | 25 | ```r 26 | commits1 <- get_commits("/path/to/repos1") 27 | commits2 <- get_commits("/path/to/repos2") 28 | 29 | commits <- commits1 %>% 30 | dplyr::bind_rows(commits2) %>% 31 | dplyr::distinct(SHA, .keep_all = TRUE) 32 | ``` 33 | 34 | ### 2. Categorise your repositories 35 | 36 | Each repository needs a category and a type. Categories can be broad 37 | projects/thesis chapters and I used Type to specify if the repository was 38 | mostly code or writing. I did this by printing all the repositories using this 39 | code: 40 | 41 | ```r 42 | cat(paste(sort(unique(commits$Repository)), collapse = "\n")) 43 | ``` 44 | 45 | Then copying that list into Excel and categorising each repository. There is 46 | probably a better/more R way to do it but this works. What you need at the end 47 | is a second repository tibble with the columns "Repository", "Category" and 48 | "Type". 49 | 50 | ### 3. Plot your commits 51 | 52 | You can now pass these two tibbles to the `plot_commits()` function. 53 | 54 | ```r 55 | commits_plot <- plot_commits(commits, repositories) 56 | ``` 57 | 58 | Change the other arguments to make sure you have the right category 59 | order/labels, colours etc. The `label` parameter labels the first commit of 60 | each repository with the (truncated) repository name. 61 | 62 | ### 4. Animate your commits 63 | 64 | The last step is to animate everything using `animate_commits()`. This is 65 | mostly pretty easy, the tricky part is getting the subtitle right. 66 | 67 | ```r 68 | animate_commits(commits_plot) 69 | ``` 70 | 71 | Save your animation as a git using `gganimate::anim_save()`. 72 | 73 | ```r 74 | anim_save("commits.gif") 75 | ``` 76 | 77 | ## Notes 78 | 79 | * Hopefully this is useful/interesting for some people. 80 | * This code is not well tested, use at your own risk. 81 | * I tried to make the functions as generic as possible but there may be some 82 | places where things specific to me are still there. 83 | * I couldn't get any kind of enter tranistion to work for the points. I think 84 | it has something to do with having to give each point it's own group so they 85 | don't disappear. Solutions welcome. 86 | * Thanks to everyone who wrote the packages I used and everyone who has posted 87 | tutorials/answered questions on the internet. 88 | -------------------------------------------------------------------------------- /plot_commits.R: -------------------------------------------------------------------------------- 1 | plot_commits <- function(commits, repositories, label = FALSE, 2 | cat_levels = c("Tools", "Simulation", "ClustTrees", 3 | "Analysis", "Reports", "Other", 4 | "SideProject"), 5 | cat_labels = c("Tools", "Simulation", 6 | "Clustering trees", 7 | "Analysis", "Reports", "Other", 8 | "Side projects"), 9 | pal = c("#00ADEF", "#8DC63F", "#EC008C", "#00B7C6", 10 | "#7A52C7", "#F47920", "grey50"), 11 | # This isn't used in this function but has to go 12 | # here for the animation. Something to do with 13 | # namespaces I think... 14 | start_date = "2016-02-08" 15 | ) { 16 | 17 | `%>%` <- magrittr::`%>%` 18 | 19 | commits <- dplyr::left_join(commits, repositories, by = "Repository") 20 | 21 | if (any(is.na(commits$Category))) { 22 | stop("Some categories are NA!") 23 | } 24 | 25 | if (any(is.na(commits$Type))) { 26 | stop("Some types are NA!") 27 | } 28 | 29 | commits <- dplyr::mutate( 30 | commits, 31 | Category = factor( 32 | Category, 33 | levels = rev(cat_levels), 34 | labels = rev(cat_labels)) 35 | ) 36 | 37 | commits_plot <- ggplot2::ggplot( 38 | commits, 39 | ggplot2::aes(x = Category, y = When, colour = Category, shape = Type) 40 | ) + 41 | ggplot2::geom_jitter( 42 | # Give each point it's own group for animation 43 | ggplot2::aes(group = seq_along(SHA)), 44 | width = 0.2, height = 0, alpha = 0.8, size = 2 45 | ) + 46 | ggplot2::scale_x_discrete(drop = FALSE) + 47 | ggplot2::scale_shape_manual(values = c(16, 1)) + 48 | ggplot2::scale_colour_manual(values = rev(pal), guide = FALSE) + 49 | ggplot2::coord_flip() + 50 | ggplot2::labs( 51 | title = "A PhD in git commits", 52 | subtitle = glue::glue( 53 | "{nrow(commits)} commits in ", 54 | "{length(unique(commits$Repository))} repositories") 55 | ) + 56 | ggplot2::theme_minimal() + 57 | ggplot2::theme( 58 | plot.title = ggplot2::element_text(size = 20), 59 | plot.subtitle = ggplot2::element_text(size = 16), 60 | axis.title = ggplot2::element_blank(), 61 | axis.text = ggplot2::element_text(size = 12), 62 | legend.position = "bottom" 63 | ) 64 | 65 | if (label) { 66 | # Get the first commit for each repository 67 | first_commits <- commits %>% 68 | dplyr::group_by(Repository) %>% 69 | dplyr::arrange(When) %>% 70 | dplyr::filter(dplyr::row_number() == 1) %>% 71 | dplyr::ungroup() %>% 72 | dplyr::mutate(Repository = stringr::str_trunc(Repository, 13)) 73 | 74 | # Put half above the commits... 75 | first_commits_top <- first_commits %>% 76 | dplyr::group_by(Category) %>% 77 | dplyr::filter(dplyr::row_number() %% 2 == 1) %>% 78 | dplyr::ungroup() %>% 79 | # Stagger the adjustment to avoid overlaps 80 | dplyr::mutate(vjust = rep(c(3, 5), length.out = dplyr::n())) 81 | 82 | # ... and half below 83 | first_commits_bottom <- first_commits %>% 84 | dplyr::group_by(Category) %>% 85 | dplyr::filter(dplyr::row_number() %% 2 == 0) %>% 86 | dplyr::ungroup() %>% 87 | dplyr::mutate(vjust = rep(c(3, 5), length.out = dplyr::n())) 88 | 89 | commits_plot <- commits_plot + 90 | # ggplot2::geom_text(data = first_commits, aes(label = Repository), 91 | # vjust = -2) + 92 | ggrepel::geom_text_repel( 93 | data = first_commits_top, 94 | ggplot2::aes(label = Repository, vjust = -vjust, 95 | group = seq_len(nrow(first_commits_top))), 96 | size = 3, 97 | direction = "x", 98 | show.legend = FALSE 99 | ) + 100 | ggrepel::geom_text_repel( 101 | data = first_commits_bottom, 102 | ggplot2::aes(label = Repository, vjust = vjust, 103 | group = seq_len(nrow(first_commits_bottom))), 104 | size = 3, 105 | direction = "x", 106 | show.legend = FALSE 107 | ) 108 | } 109 | 110 | return(commits_plot) 111 | } 112 | 113 | # Functions used in the animated subtitle 114 | get_days <- function(the_date, start_date) { 115 | lubridate::as.period( 116 | lubridate::as_datetime(the_date) - 117 | lubridate::as_datetime(start_date) 118 | )@day 119 | } 120 | 121 | get_nrepos <- function(commits, frame_along) { 122 | length(unique(commits$Repository[commits$When <= frame_along])) 123 | } 124 | 125 | animate_commits <- function(commits_plot) { 126 | 127 | # Glue together the subtitle. `frame_along` is a variable created by 128 | # `transition_reveal()`. 129 | subtitle <- paste( 130 | "{get_nrepos(commits, frame_along)} repositories with", 131 | "{sum(commits$When <= frame_along)} commits in", 132 | "{get_days(frame_along, start_date)} days by", 133 | "{as.Date(frame_along)}" 134 | ) 135 | 136 | commits_ani <- commits_plot + 137 | gganimate::transition_reveal(When) + 138 | ggplot2::labs(subtitle = subtitle) 139 | 140 | gganimate::animate(commits_ani, height = 600, width = 800, 141 | start_pause = 10, end_pause = 20) 142 | } 143 | --------------------------------------------------------------------------------