├── comms.rds ├── eRum_keynote_18.pdf ├── README.md ├── author_graph.R ├── eRum_keynote_18.R ├── arg_lists_R-0.49.txt └── eRum_keynote_18.Rmd /comms.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsbivand/eRum18/HEAD/comms.rds -------------------------------------------------------------------------------- /eRum_keynote_18.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rsbivand/eRum18/HEAD/eRum_keynote_18.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # eRum18 2 | eRum 2018 keynote presentation files 3 | 4 | Not infrequently, we wonder why choices such as stringsAsFactors=TRUE or drop=TRUE were made. Understanding the original uses of S and R (in the 1900s), and seeing how these uses affected the development of R lets us appreciate the robustness of R's ecosystem. This keynote uses readings of the R sources and other information to explore R's history. The topics to be touched on include the "colour" books (brown, blue, white, green), interlinkages to SICP (Scheme) and LispStat, the lives of R-core and CRAN, Ancients and Moderns (see Exploring the CRAN social network). 5 | -------------------------------------------------------------------------------- /author_graph.R: -------------------------------------------------------------------------------- 1 | ## ---- echo = FALSE, eval=FALSE, mysize=TRUE, size='\\tiny', cache=TRUE---- 2 | pdb0 <- tools::CRAN_package_db() 3 | url <- url("https://bioconductor.org/packages/release/bioc/VIEWS") 4 | dcf <- as.data.frame(read.dcf(url), stringsAsFactors=FALSE) 5 | close(url) 6 | url_ann <- url("https://bioconductor.org/packages/release/data/annotation/VIEWS") 7 | dcf_ann <- as.data.frame(read.dcf(url_ann), stringsAsFactors=FALSE) 8 | close(url_ann) 9 | url_exp <- url("https://bioconductor.org/packages/release/data/experiment/VIEWS") 10 | dcf_exp <- as.data.frame(read.dcf(url_exp), stringsAsFactors=FALSE) 11 | close(url_exp) 12 | o1 <- intersect(names(dcf_exp), names(dcf_ann)) 13 | dcf2 <- rbind(dcf_exp[,o1], dcf_ann[,o1]) 14 | o2 <- intersect(names(dcf), names(dcf2)) 15 | dcf3 <- rbind(dcf2[,o2], dcf[,o2]) 16 | o3 <- intersect(names(pdb0), names(dcf3)) 17 | pdb <- rbind(pdb0[o3], dcf3[o3]) 18 | ## #o3 <- intersect(names(pdb0), names(dcf)) 19 | ## #pdb <- rbind(pdb0[o3], dcf[o3]) 20 | ## #pdb <- tools::CRAN_package_db() 21 | aut <- pdb$Author 22 | 23 | ## ---- echo = FALSE, eval=FALSE, mysize=TRUE, size='\\tiny', cache=TRUE, warning=FALSE, results="hide"---- 24 | suppressPackageStartupMessages(library(tidyverse)) 25 | suppressPackageStartupMessages(library(stringr)) 26 | suppressPackageStartupMessages(library(igraph)) 27 | suppressPackageStartupMessages(library(tidygraph)) 28 | suppressPackageStartupMessages(library(ggraph)) 29 | suppressPackageStartupMessages(library(magrittr)) 30 | 31 | ## ---- echo = FALSE, eval=FALSE, mysize=TRUE, size='\\tiny', cache=TRUE, results="hide"---- 32 | aut <- aut %>% 33 | str_replace_all("\\(([^)]+)\\)", "") %>% 34 | str_replace_all("\\[([^]]+)\\]", "") %>% 35 | str_replace_all("<([^>]+)>", "") %>% 36 | str_replace_all("\n", " ") %>% 37 | str_replace_all("[Cc]ontribution.* from|[Cc]ontribution.* by|[Cc]ontributors", " ") %>% 38 | str_replace_all("\\(|\\)|\\[|\\]", " ") %>% 39 | iconv(to = "ASCII//TRANSLIT") %>% 40 | str_replace_all("'$|^'", "") %>% 41 | gsub("([A-Z])([A-Z]{1,})", "\\1\\L\\2", ., perl = TRUE) %>% 42 | gsub("\\b([A-Z]{1}) \\b", "\\1\\. ", .) %>% 43 | map(str_split, ",|;|&| \\. |--|(?<=[a-z])\\.| [Aa]nd | [Ww]ith | [Bb]y ", simplify = TRUE) %>% 44 | map(str_replace_all, "[[:space:]]+", " ") %>% 45 | map(str_replace_all, " $|^ | \\.", "") %>% 46 | map(function(x) x[str_length(x) != 0]) %>% 47 | set_names(pdb$Package) %>% 48 | extract(map_lgl(., function(x) length(x) > 1)) 49 | 50 | ## ---- echo = FALSE, eval=FALSE, mysize=TRUE, size='\\tiny', cache=TRUE, results="hide"---- 51 | aut_list <- aut %>% 52 | unlist() %>% 53 | dplyr::as_data_frame() %>% 54 | count(value) %>% 55 | rename(Name = value, Package = n) 56 | 57 | ## ---- echo = FALSE, eval=FALSE, mysize=TRUE, size='\\tiny', cache=TRUE, results="hide"---- 58 | edge_list <- aut %>% 59 | map(combn, m = 2) %>% 60 | do.call("cbind", .) %>% 61 | t() %>% 62 | dplyr::as_data_frame() %>% 63 | arrange(V1, V2) %>% 64 | count(V1, V2) 65 | 66 | ## ---- echo = FALSE, eval=FALSE, mysize=TRUE, size='\\tiny', cache=TRUE, results="hide"---- 67 | g <- edge_list %>% 68 | select(V1, V2) %>% 69 | as.matrix() %>% 70 | graph.edgelist(directed = FALSE) %>% 71 | as_tbl_graph() %>% 72 | activate("edges") %>% 73 | mutate(Weight = edge_list$n) %>% 74 | activate("nodes") %>% 75 | rename(Name = name) %>% 76 | mutate(Component = group_components()) %>% 77 | filter(Component == names(table(Component))[which.max(table(Component))]) 78 | 79 | ## ---- echo = FALSE, eval=FALSE, mysize=TRUE, size='\\tiny', cache=TRUE, results="hide"---- 80 | suppressMessages(g <- g %>% 81 | left_join(aut_list) %>% 82 | filter(Package > 4) %>% 83 | mutate(Component = group_components()) %>% 84 | filter(Component == names(table(Component))[which.max(table(Component))])) 85 | 86 | ## ---- echo = FALSE, eval=FALSE, mysize=TRUE, size='\\tiny', cache=TRUE, results="hide"---- 87 | g <- mutate(g, Community = group_edge_betweenness(), 88 | Degree = centrality_degree()) 89 | 90 | ## ---- echo = FALSE, eval=FALSE, mysize=TRUE, size='\\tiny', cache=TRUE, results="hide"---- 91 | lapply(1:6, function(i) {filter(g, Community == names(sort(table(Community), decr = TRUE))[i]) %>% 92 | select(Name, Package) %>% 93 | arrange(desc(Package)) %>% 94 | top_n(100, Package) %>% as.data.frame()}) -> comms 95 | 96 | ## ---- fig6, eval=FALSE, fig.show='hide', fig.height=5, fig.width=12, dev.args=list(family="Fira Sans", bg="transparent")---- 97 | library(wordcloud) 98 | opar <- par(mar=c(0,0,0,0)+0.1, mfrow=c(1,2)) 99 | for (i in 1:2) wordcloud(comms[[i]]$Name, freq=comms[[i]]$Package, scale=rev(3.75*range(comms[[i]]$Package)/max(comms[[2]]$Package))) 100 | par(opar) 101 | 102 | ## ---- fig7, eval=FALSE, fig.show='hide', fig.height=5, fig.width=12, dev.args=list(family="Fira Sans", bg="transparent")---- 103 | opar <- par(mar=c(0,0,0,0)+0.1, mfrow=c(2,3)) 104 | for (i in 1:6) wordcloud(comms[[i]]$Name, freq=comms[[i]]$Package, scale=rev(2.75*range(comms[[i]]$Package)/max(comms[[2]]$Package))) 105 | par(opar) 106 | 107 | -------------------------------------------------------------------------------- /eRum_keynote_18.R: -------------------------------------------------------------------------------- 1 | ## ---- echo = FALSE, eval=TRUE, mysize=TRUE, size='\\tiny', cache=TRUE, results="hide"---- 2 | system("svn log --xml --verbose -r 6:74688 https://svn.r-project.org/R/trunk > trunk_verbose_log1.xml") 3 | library(XML) 4 | tr <- try(xmlTreeParse("trunk_verbose_log1.xml")) 5 | tr1 <- xmlChildren(xmlRoot(tr)) 6 | revs <- sapply(tr1, function(x) unname(xmlAttrs(x))) 7 | msgs <- sapply(tr1, function(x) xmlValue(xmlChildren(x)[["msg"]])) 8 | authors <- unname(sapply(tr1, function(x) xmlValue(xmlChildren(x)[["author"]]))) 9 | dates <- strptime(substring(unname(sapply(tr1, function(x) xmlValue(xmlChildren(x)[["date"]]))), 1, 18), format="%Y-%m-%dT%H:%M:%S", tz="UTC") 10 | years <- format(dates, "%Y") 11 | 12 | ## ---- echo = FALSE, eval=TRUE, mysize=TRUE, size='\\tiny', cache=TRUE, results="hide"---- 13 | n1 <- sub("thomas", "tlumley", authors) 14 | n2 <- sub("martyn", "plummer", n1) 15 | n3 <- sub("^r$", "rgentlem", n2) 16 | n4 <- sub("paul", "murrell", n3) 17 | n5 <- sub("root", "system", n4) 18 | n6 <- sub("apache", "system", n5) 19 | authors <- factor(n6) 20 | ad_tab <- table(authors, years) 21 | rs <- rowSums(ad_tab) 22 | 23 | ## ---- fig1, fig.show='hide', fig.height=6, fig.width=12, dev.args=list(family="Fira Sans", bg="transparent")---- 24 | pal <- scan("colormap_hex.txt", "character", quiet=TRUE) 25 | set.seed(1) 26 | pal_s <- sample(sample(pal)) 27 | plot(1998:2017, colSums(ad_tab)[2:21], type="b", xlab="", ylab="SVN commits", ylim=c(0, 4000)) 28 | grid() 29 | abline(v=c(2000.1639344, 2004.7568306, 2013.2547945), col=pal_s[1:3], lwd=2) 30 | legend("topright", legend=c("1.0.0 2000-02-29", "2.0.0 2004-10-04", "3.0.0 2013-04-03"), col=pal_s[1:3], bty="n", cex=0.8, lty=1, lwd=2) 31 | 32 | ## ---- fig2, fig.show='hide', fig.height=6, fig.width=12, dev.args=list(family="Fira Sans", bg="transparent")---- 33 | barplot(ad_tab[order(rs)[5:27],], col=pal_s) 34 | legend("topright", legend=rev(rownames(ad_tab)[order(rs)[5:27]]), ncol=5, fill=rev(pal_s), cex=0.8, bty="n") 35 | 36 | ## ---- echo = FALSE, eval=TRUE, mysize=TRUE, size='\\tiny', cache=TRUE---- 37 | tr1[[1]] 38 | 39 | ## ---- echo = FALSE, eval=TRUE, mysize=TRUE, size='\\tiny', cache=TRUE---- 40 | tr1[[length(tr1)]] 41 | 42 | ## ---- echo = FALSE, eval=TRUE, mysize=TRUE, size='\\tiny', cache=TRUE, results="hide"---- 43 | res <- vector(mode="list", length=length(tr1)) 44 | for (i in seq_along(tr1)) res[[i]] <- {x=xmlChildren(xmlChildren(tr1[[i]])[["paths"]]); list(paths=unname(sapply(x, xmlValue)), actions=unname(sapply(x, function(y) xmlAttrs(y)["action"])))} 45 | names(res) <- revs 46 | rl <- sapply(res, function(x) length(x$actions)) 47 | 48 | ## ---- echo = FALSE, eval=TRUE, mysize=TRUE, size='\\tiny', cache=TRUE---- 49 | cat(paste(years[order(rl, decreasing=TRUE)][1:10], revs[order(rl, decreasing=TRUE)][1:10], sort(unname(rl), decreasing=TRUE)[1:10], unname(unlist(sub("\\n", ", ", msgs[order(rl, decreasing=TRUE)])))[1:10]), sep="\n") 50 | 51 | ## ---- echo = FALSE, eval=TRUE, mysize=TRUE, size='\\tiny', cache=TRUE---- 52 | res50 <- res[rl < 50] 53 | rl_50 <- rl[rl < 50] 54 | f_50 <- unlist(sapply(res50, "[", 1)) 55 | a_50 <- unlist(sapply(res50, "[", 2)) 56 | a_50_actions <- unname(a_50) 57 | f_50_files <- unname(f_50) 58 | f_50_filesa <- substring(f_50_files, 2, nchar(f_50_files)) 59 | f_50_filesb <- strsplit(f_50_filesa, "/") 60 | f_50_filesc <- t(sapply(f_50_filesb, function(x) {out <- character(9); out[1:length(x)] <- x; out})) 61 | files_df <- data.frame(f_50_filesc) 62 | 63 | ## ---- echo = FALSE, eval=TRUE, mysize=TRUE, size='\\tiny', cache=TRUE---- 64 | tx <- table(files_df$X2) 65 | sort(tx[tx>350]) 66 | 67 | ## ---- echo = FALSE, eval=TRUE, mysize=TRUE, size='\\tiny', cache=TRUE---- 68 | tx <- table(files_df[files_df$X2=="src",]$X3) 69 | sort(tx[tx>50]) 70 | 71 | ## ---- echo = FALSE, eval=TRUE, mysize=TRUE, size='\\tiny', cache=TRUE---- 72 | tx <- table(files_df[files_df$X2=="src" & files_df$X3=="library",]$X4) 73 | sort(tx[tx>60]) 74 | 75 | ## ---- echo = FALSE, eval=TRUE, mysize=TRUE, size='\\tiny', cache=TRUE---- 76 | tx <- table(files_df[files_df$X2=="src" & files_df$X3=="library" & files_df$X4=="base",]$X5) 77 | sort(tx[tx>4]) 78 | 79 | ## ---- echo = FALSE, eval=TRUE, mysize=TRUE, size='\\tiny', cache=TRUE---- 80 | a_50_revs <- rep(names(rl_50), times=rl_50) 81 | o <- match(a_50_revs, revs) 82 | a_50_years <- years[o] 83 | t_acts <- table(a_50_years, factor(a_50_actions)) 84 | 85 | ## ---- fig3, fig.show='hide', fig.height=5, fig.width=12, dev.args=list(family="Fira Sans", bg="transparent")---- 86 | barplot(t(t_acts), col=rev(pal_s)[1:4]) 87 | legend("topright", legend=c("A The item was added", "D The item was deleted", "M The item was changed", "R The item was replaced"), ncol=2, fill=rev(pal_s)[1:4], cex=0.8, bty="n") 88 | 89 | ## ---- echo = FALSE, eval=TRUE, mysize=TRUE, size='\\tiny', cache=TRUE, warning=FALSE, results="hide"---- 90 | suppressPackageStartupMessages(library(BiocInstaller)) 91 | bioc <- available.packages(repo = biocinstallRepos()[1]) 92 | bioc_ann <- available.packages(repo = biocinstallRepos()[2]) 93 | bioc_exp <- available.packages(repo = biocinstallRepos()[3]) 94 | cran <- available.packages() 95 | pdb <- rbind(cran, bioc, bioc_ann, bioc_exp) 96 | 97 | ## ---- echo = FALSE, eval=TRUE, mysize=TRUE, size='\\tiny', cache=TRUE, warning=FALSE, results="hide"---- 98 | suppressPackageStartupMessages(library(miniCRAN)) 99 | suppressPackageStartupMessages(library(igraph)) 100 | suppressPackageStartupMessages(library(magrittr)) 101 | pg <- makeDepGraph(pdb[, "Package"], availPkgs = pdb, suggests=FALSE, enhances=TRUE, includeBasePkgs = FALSE) 102 | 103 | ## ---- echo = FALSE, eval=TRUE, mysize=TRUE, size='\\tiny', cache=TRUE, results="hide"---- 104 | pr <- pg %>% 105 | page.rank(directed = FALSE) %>% 106 | use_series("vector") %>% 107 | sort(decreasing = TRUE) %>% 108 | as.matrix %>% 109 | set_colnames("page.rank") 110 | 111 | ## ---- echo = FALSE, eval=TRUE, mysize=TRUE, size='\\tiny', cache=TRUE---- 112 | print(pr[1:30,], digits=4) 113 | 114 | ## ---- echo = FALSE, eval=TRUE, mysize=TRUE, size='\\tiny', cache=TRUE, results="hide"---- 115 | cutoff <- quantile(pr[, "page.rank"], probs = 0.2) 116 | popular <- pr[pr[, "page.rank"] >= cutoff, ] 117 | toKeep <- names(popular) 118 | vids <- V(pg)[toKeep] 119 | gs <- induced.subgraph(pg, vids = toKeep) 120 | cl <- walktrap.community(gs, steps = 3) 121 | 122 | ## ---- echo = FALSE, eval=TRUE, mysize=TRUE, size='\\tiny', cache=TRUE, results="hide"---- 123 | topClusters <- table(cl$membership) %>% 124 | sort(decreasing = TRUE) %>% 125 | head(25) 126 | cluster <- function(i, clusters, pagerank, n=10){ 127 | group <- clusters$names[clusters$membership == i] 128 | pagerank[group, ] %>% sort(decreasing = TRUE) %>% head(n) 129 | } 130 | z <- lapply(names(topClusters)[1:15], cluster, clusters=cl, pagerank=pr, n=20) 131 | 132 | ## ---- fig4, fig.show='hide', fig.height=5, fig.width=12, dev.args=list(family="Fira Sans", bg="transparent"), warning=FALSE---- 133 | library(RColorBrewer) 134 | library(wordcloud) 135 | opar <- par(mar=c(0,0,0,0)+0.1, mfrow=c(1,2)) 136 | for (i in 1:2) wordcloud(names(z[[i]]), freq=unname(z[[i]]), scale=rev(5*range(unname(z[[i]]))/max(unname(z[[5]])))) 137 | par(opar) 138 | 139 | ## ---- fig5, fig.show='hide', fig.height=5, fig.width=12, dev.args=list(family="Fira Sans", bg="transparent"), warning=FALSE---- 140 | opar <- par(mar=c(0,0,0,0)+0.1, mfrow=c(2,3)) 141 | for (i in 1:6) wordcloud(names(z[[i]]), freq=unname(z[[i]]), scale=rev(3*range(unname(z[[i]]))/max(unname(z[[5]])))) 142 | par(opar) 143 | 144 | ## ---- echo = FALSE, eval=FALSE, mysize=TRUE, size='\\tiny', cache=TRUE---- 145 | ## pdb0 <- tools::CRAN_package_db() 146 | ## url <- url("https://bioconductor.org/packages/release/bioc/VIEWS") 147 | ## dcf <- as.data.frame(read.dcf(url), stringsAsFactors=FALSE) 148 | ## close(url) 149 | ## url_ann <- url("https://bioconductor.org/packages/release/data/annotation/VIEWS") 150 | ## dcf_ann <- as.data.frame(read.dcf(url_ann), stringsAsFactors=FALSE) 151 | ## close(url_ann) 152 | ## url_exp <- url("https://bioconductor.org/packages/release/data/experiment/VIEWS") 153 | ## dcf_exp <- as.data.frame(read.dcf(url_exp), stringsAsFactors=FALSE) 154 | ## close(url_exp) 155 | ## o1 <- intersect(names(dcf_exp), names(dcf_ann)) 156 | ## dcf2 <- rbind(dcf_exp[,o1], dcf_ann[,o1]) 157 | ## o2 <- intersect(names(dcf), names(dcf2)) 158 | ## dcf3 <- rbind(dcf2[,o2], dcf[,o2]) 159 | ## o3 <- intersect(names(pdb0), names(dcf3)) 160 | ## pdb <- rbind(pdb0[o3], dcf3[o3]) 161 | ## #o3 <- intersect(names(pdb0), names(dcf)) 162 | ## #pdb <- rbind(pdb0[o3], dcf[o3]) 163 | ## #pdb <- tools::CRAN_package_db() 164 | ## aut <- pdb$Author 165 | 166 | ## ---- echo = FALSE, eval=FALSE, mysize=TRUE, size='\\tiny', cache=TRUE, warning=FALSE, results="hide"---- 167 | ## suppressPackageStartupMessages(library(tidyverse)) 168 | ## suppressPackageStartupMessages(library(stringr)) 169 | ## suppressPackageStartupMessages(library(igraph)) 170 | ## suppressPackageStartupMessages(library(tidygraph)) 171 | ## suppressPackageStartupMessages(library(ggraph)) 172 | ## suppressPackageStartupMessages(library(magrittr)) 173 | 174 | ## ---- echo = FALSE, eval=FALSE, mysize=TRUE, size='\\tiny', cache=TRUE, results="hide"---- 175 | ## aut <- aut %>% 176 | ## str_replace_all("\\(([^)]+)\\)", "") %>% 177 | ## str_replace_all("\\[([^]]+)\\]", "") %>% 178 | ## str_replace_all("<([^>]+)>", "") %>% 179 | ## str_replace_all("\n", " ") %>% 180 | ## str_replace_all("[Cc]ontribution.* from|[Cc]ontribution.* by|[Cc]ontributors", " ") %>% 181 | ## str_replace_all("\\(|\\)|\\[|\\]", " ") %>% 182 | ## iconv(to = "ASCII//TRANSLIT") %>% 183 | ## str_replace_all("'$|^'", "") %>% 184 | ## gsub("([A-Z])([A-Z]{1,})", "\\1\\L\\2", ., perl = TRUE) %>% 185 | ## gsub("\\b([A-Z]{1}) \\b", "\\1\\. ", .) %>% 186 | ## map(str_split, ",|;|&| \\. |--|(?<=[a-z])\\.| [Aa]nd | [Ww]ith | [Bb]y ", simplify = TRUE) %>% 187 | ## map(str_replace_all, "[[:space:]]+", " ") %>% 188 | ## map(str_replace_all, " $|^ | \\.", "") %>% 189 | ## map(function(x) x[str_length(x) != 0]) %>% 190 | ## set_names(pdb$Package) %>% 191 | ## extract(map_lgl(., function(x) length(x) > 1)) 192 | 193 | ## ---- echo = FALSE, eval=FALSE, mysize=TRUE, size='\\tiny', cache=TRUE, results="hide"---- 194 | ## aut_list <- aut %>% 195 | ## unlist() %>% 196 | ## dplyr::as_data_frame() %>% 197 | ## count(value) %>% 198 | ## rename(Name = value, Package = n) 199 | 200 | ## ---- echo = FALSE, eval=FALSE, mysize=TRUE, size='\\tiny', cache=TRUE, results="hide"---- 201 | ## edge_list <- aut %>% 202 | ## map(combn, m = 2) %>% 203 | ## do.call("cbind", .) %>% 204 | ## t() %>% 205 | ## dplyr::as_data_frame() %>% 206 | ## arrange(V1, V2) %>% 207 | ## count(V1, V2) 208 | 209 | ## ---- echo = FALSE, eval=FALSE, mysize=TRUE, size='\\tiny', cache=TRUE, results="hide"---- 210 | ## g <- edge_list %>% 211 | ## select(V1, V2) %>% 212 | ## as.matrix() %>% 213 | ## graph.edgelist(directed = FALSE) %>% 214 | ## as_tbl_graph() %>% 215 | ## activate("edges") %>% 216 | ## mutate(Weight = edge_list$n) %>% 217 | ## activate("nodes") %>% 218 | ## rename(Name = name) %>% 219 | ## mutate(Component = group_components()) %>% 220 | ## filter(Component == names(table(Component))[which.max(table(Component))]) 221 | 222 | ## ---- echo = FALSE, eval=FALSE, mysize=TRUE, size='\\tiny', cache=TRUE, results="hide"---- 223 | ## suppressMessages(g <- g %>% 224 | ## left_join(aut_list) %>% 225 | ## filter(Package > 4) %>% 226 | ## mutate(Component = group_components()) %>% 227 | ## filter(Component == names(table(Component))[which.max(table(Component))])) 228 | 229 | ## ---- echo = FALSE, eval=FALSE, mysize=TRUE, size='\\tiny', cache=TRUE, results="hide"---- 230 | ## g <- mutate(g, Community = group_edge_betweenness(), 231 | ## Degree = centrality_degree()) 232 | 233 | ## ---- echo = FALSE, eval=FALSE, mysize=TRUE, size='\\tiny', cache=TRUE, results="hide"---- 234 | ## lapply(1:6, function(i) {filter(g, Community == names(sort(table(Community), decr = TRUE))[i]) %>% 235 | ## select(Name, Package) %>% 236 | ## arrange(desc(Package)) %>% 237 | ## top_n(100, Package) %>% as.data.frame()}) -> comms 238 | 239 | ## ---- fig6, eval=FALSE, fig.show='hide', fig.height=5, fig.width=12, dev.args=list(family="Fira Sans", bg="transparent")---- 240 | ## opar <- par(mar=c(0,0,0,0)+0.1, mfrow=c(1,2)) 241 | ## for (i in 1:2) wordcloud(comms[[i]]$Name, freq=comms[[i]]$Package, scale=rev(3.75*range(comms[[i]]$Package)/max(comms[[2]]$Package))) 242 | ## par(opar) 243 | 244 | ## ---- fig7, eval=FALSE, fig.show='hide', fig.height=5, fig.width=12, dev.args=list(family="Fira Sans", bg="transparent")---- 245 | ## opar <- par(mar=c(0,0,0,0)+0.1, mfrow=c(2,3)) 246 | ## for (i in 1:6) wordcloud(comms[[i]]$Name, freq=comms[[i]]$Package, scale=rev(2.75*range(comms[[i]]$Package)/max(comms[[2]]$Package))) 247 | ## par(opar) 248 | 249 | -------------------------------------------------------------------------------- /arg_lists_R-0.49.txt: -------------------------------------------------------------------------------- 1 | %o% 2 | function (x, y, FUN = "*", ...) 3 | Gamma 4 | function (link = "inverse") 5 | I 6 | function (x) 7 | IQR 8 | function (x) 9 | NCOL 10 | function (x) 11 | NROW 12 | function (x) 13 | NextMethod 14 | function (generic = NULL, object = NULL, ...) 15 | Recall 16 | function (...) 17 | [.expression 18 | function (x, subs) 19 | [.ts 20 | function (x, i, j) 21 | [[.expression 22 | function (x, subs) 23 | abbreviate 24 | function (names.arg, minlength = 4, use.classes = T, dot = F) 25 | abline 26 | function (a = NULL, b = NULL, h = NULL, v = NULL, reg = NULL, coef = NULL, col = par("col"), lty = par("lty"), ...) 27 | anova 28 | function (x, ...) 29 | anova.glm 30 | function (object, ..., test = NULL, na.action = na.omit) 31 | anova.glmlist 32 | function (object, ..., test = NULL) 33 | anova.lm 34 | function (object, ...) 35 | anovalist.lm 36 | function (object, ..., test = NULL) 37 | aperm 38 | function (a, perm, resize = TRUE) 39 | append 40 | function (x, values, after = length(x)) 41 | apply 42 | function (X, MARGIN, FUN, ...) 43 | approx 44 | function (x, y, xout, method = "lines", n = 50, rule = 1) 45 | approxfun 46 | function (x, y, method = "lines", rule = 1) 47 | array 48 | function (data = NA, dim = length(data), dimnames = NULL) 49 | arrows 50 | function (x0, y0, x1, y1, length = 0.25, angle = 30, code = 2, col = par("fg"), lty = NULL, xpd = FALSE) 51 | as.array 52 | function (x) 53 | as.character 54 | function (x) 55 | as.complex 56 | function (x) 57 | as.double 58 | function (x) 59 | as.expression 60 | function (x) 61 | as.factor 62 | function (x, ordered = FALSE) 63 | as.formula 64 | function (object) 65 | as.function 66 | function (x) 67 | as.integer 68 | function (x) 69 | as.list 70 | function (x) 71 | as.logical 72 | function (x) 73 | as.matrix 74 | function (x) 75 | as.matrix.data.frame 76 | function (x) 77 | as.matrix.default 78 | function (x) 79 | as.name 80 | function (x) 81 | as.null 82 | function (x) 83 | as.numeric 84 | function (x) 85 | as.qr 86 | function (x) 87 | as.real 88 | function (x) 89 | as.single 90 | function (x) 91 | as.ts 92 | function (x) 93 | as.vector 94 | function (x, mode = "any") 95 | assign 96 | function (x, value, envir = sys.frame(sys.parent()), inherits = FALSE, immediate = TRUE) 97 | attach 98 | function (x, pos = 2) 99 | ave 100 | function (x, ...) 101 | axis 102 | function (which, at, labels = TRUE, ...) 103 | backsolve 104 | function (r, x, k = ncol(r)) 105 | barplot 106 | function (height, names.arg, col = NULL, border = par("fg"), beside = FALSE, space = 0.2, legend.text, main = NULL, xlab = NULL, ylab = NULL, xlim, ylim, ...) 107 | binomial 108 | function (link = "logit") 109 | body 110 | function (fun = sys.function(sys.parent())) 111 | box 112 | function (lty = "solid", ...) 113 | boxplot 114 | function (x, ..., range = 1.5, width = NULL, varwidth = FALSE, notch = FALSE, names, data = sys.frame(sys.parent()), plot = TRUE, border = par("fg"), col = NULL, log = "", pars = NULL) 115 | boxplot.stats 116 | function (x, coef) 117 | builtins 118 | function (internal = FALSE) 119 | bw.bcv 120 | function (x, samples = 100) 121 | bw.sj 122 | function (x, samples = 100) 123 | bw.ucv 124 | function (x, samples = 100) 125 | bxp 126 | function (z, notch = FALSE, width = NULL, varwidth = FALSE, border = par("fg"), col = NULL, log = "", pars = NULL, ...) 127 | cat 128 | function (..., file = "", sep = " ", fill = FALSE, labels = NULL, append = FALSE) 129 | cbind 130 | function (...) 131 | char.expand 132 | function (input, target, nomatch = stop("no match")) 133 | character 134 | function (n = 0) 135 | charmatch 136 | function (x, table, nomatch = NA) 137 | chisquare.test 138 | function (x, y = NULL, correct = TRUE, p = rep(1/length(x), length(x))) 139 | chol 140 | function (x) 141 | chol2inv 142 | function (x, size = ncol(x)) 143 | cm 144 | function (x) 145 | co.intervals 146 | function (x, number = 6, overlap = 0.5) 147 | coef 148 | function (x, ...) 149 | coefficients 150 | function (x, ...) 151 | coefficients.glm 152 | function (object) 153 | coefficients.lm 154 | function (z) 155 | col 156 | function (x, as.factor = FALSE) 157 | colnames 158 | function (x) 159 | colnames<- 160 | function (x, value) 161 | complete.cases 162 | function (...) 163 | complex 164 | function (n = 0, real = numeric(), imag = numeric()) 165 | contour 166 | function (x = seq(0, 1, len = nrow(z)), y = seq(0, 1, len = ncol(z)), z, nlevels = 10, levels = pretty(range(z, na.rm = TRUE), nlevels), labcex = 0, col = par("fg"), lty = par("lty"), add = FALSE) 167 | contr.helmert 168 | function (n, contrasts = TRUE) 169 | contr.poly 170 | function (n, contrasts = TRUE) 171 | contr.sum 172 | function (n, contrasts = TRUE) 173 | contr.treatment 174 | function (n, contrasts = TRUE) 175 | contrasts 176 | function (x, contrasts = TRUE) 177 | contrasts<- 178 | function (x, ctr) 179 | convolve 180 | function (x, y, conj = T) 181 | coplot 182 | function (formula, data, given.values, panel = points, rows, columns, show.given = TRUE, col = par("fg"), pch = par("pch"), ...) 183 | cor 184 | function (x, y = NULL, use = "all.obs") 185 | count.fields 186 | function (file, sep = "", skip = 0) 187 | cov 188 | function (x, y = NULL, use = "all.obs") 189 | covratio 190 | function (z) 191 | crossprod 192 | function (x, y = x) 193 | curve 194 | function (expr, from, to, n = 100, add = FALSE, type = "l", ...) 195 | cut 196 | function (x, breaks, labels) 197 | data 198 | function (..., list = character(0)) 199 | data.entry 200 | function (..., Modes = NULL, Names = NULL) 201 | data.frame 202 | function (..., row.names = NULL, col.names = NULL, as.is = FALSE) 203 | data.matrix 204 | function (frame) 205 | dcauchy 206 | function (x, location = 0, scale = 1) 207 | de 208 | function (..., Modes = NULL, Names = NULL) 209 | de.ncols 210 | function (inlist) 211 | de.restore 212 | function (inlist, ncols, coltypes, argnames, args) 213 | de.setup 214 | function (ilist, list.names, incols) 215 | delete.response 216 | function (termobj) 217 | density 218 | function (x, bw, adjust = 1, kernel = "gaussian", n = 512, width, from, to, cut = 3, plot.graph = FALSE) 219 | deriv 220 | function (x, ...) 221 | deriv.default 222 | function (expr, namevec, function.arg = NULL, tag = ".expr") 223 | deriv.formula 224 | function (expr, namevec, function.arg = NULL, tag = ".expr") 225 | detach 226 | function (name, pos = 2) 227 | deviance 228 | function (x, ...) 229 | deviance.glm 230 | function (object) 231 | deviance.lm 232 | function (z) 233 | dexp 234 | function (x, rate = 1) 235 | df.residual 236 | function (x, ...) 237 | df.residual.lm 238 | function (z) 239 | dfbetas 240 | function (z) 241 | dffits 242 | function (z) 243 | dgamma 244 | function (x, shape, scale = 1) 245 | diag 246 | function (x = 1, nrow, ncol = n) 247 | diag<- 248 | function (x, value) 249 | diff 250 | function (x, lag = 1, differences = 1) 251 | dlnorm 252 | function (x, meanlog = 0, sdlog = 1) 253 | dlogis 254 | function (x, location = 0, scale = 1) 255 | dnorm 256 | function (x, mean = 0, sd = 1) 257 | dotplot 258 | function (x, labels = NULL, groups = NULL, gdata = NULL, cex = par("cex"), pch = 21, gpch = 21, bg = par("bg"), color = par("fg"), gcolor = par("fg"), ...) 259 | double 260 | function (n = 0) 261 | dput 262 | function (x, file = "") 263 | drop.terms 264 | function (termobj, dropx = NULL, keep.response = FALSE) 265 | dump 266 | function (list, fileout = "dumpdata") 267 | dunif 268 | function (x, min = 0, max = 1) 269 | dweibull 270 | function (x, shape, scale = 1) 271 | dyn.load 272 | function (x) 273 | edit 274 | function (name = NULL, file = "", editor = options()$editor) 275 | effects 276 | function (x, ...) 277 | effects.glm 278 | function (object) 279 | effects.lm 280 | function (z, term) 281 | eigen 282 | function (x) 283 | emacs 284 | function (name = NULL, file = "") 285 | end 286 | function (x, ...) 287 | end.ts 288 | function (x) 289 | environment 290 | function (fun = NULL) 291 | eval 292 | function (expr, envir = sys.frame(sys.parent())) 293 | exists 294 | function (x, where = NULL, envir = NULL, frame = NULL, mode = "any", inherits = TRUE) 295 | expression 296 | function (...) 297 | factor 298 | function (x, levels = sort(unique(x)), labels, exclude = NA, ordered = FALSE) 299 | family 300 | function (x, ...) 301 | family.glm 302 | function (object) 303 | fft 304 | function (z, inverse = FALSE) 305 | fitted 306 | function (x, ...) 307 | fitted.values 308 | function (x, ...) 309 | fitted.values.glm 310 | function (object) 311 | fitted.values.lm 312 | function (z) 313 | fivenum 314 | function (x, na.rm = TRUE) 315 | fix 316 | function (x) 317 | formals 318 | function (fun = sys.function(sys.parent())) 319 | formatC 320 | function (x, digits = NULL, width = max(0, digits) + 1, format = NULL, flag = "", mode = NULL) 321 | formula 322 | function (x, ...) 323 | formula.default 324 | function (x) 325 | formula.formula 326 | function (x) 327 | formula.terms 328 | function (x) 329 | frame 330 | function (ask = NA) 331 | frame.cvt 332 | function (x, as.is = FALSE) 333 | frequency 334 | function (x, ...) 335 | frequency.ts 336 | function (x) 337 | gaussian 338 | function () 339 | get 340 | function (x, envir = NULL, mode = "any", inherits = TRUE) 341 | getenv 342 | function (item) 343 | gl 344 | function (n, k, length, labels = 1:n, ordered = FALSE) 345 | glm 346 | function (formula, family = gaussian, data, weights = NULL, subset = NULL, na.action = na.fail, start = NULL, offset = NULL, control = glm.control(epsilon = 0.0001, maxit = 10, trace = FALSE), model = TRUE, method = glm.fit, x = FALSE, y = TRUE) 347 | glm.control 348 | function (epsilon = 0.0001, maxit = 10, trace = FALSE) 349 | glm.fit 350 | function (x, y, weights = rep(1, nobs), start = NULL, offset = rep(0, nobs), family = gaussian(), control = glm.control(), intercept = TRUE) 351 | grep 352 | function (pattern, x, ignore.case = FALSE, extended = TRUE, value = FALSE) 353 | grid 354 | function (nx = 3, ny = 3, col = "lightgray", lty = "dotted") 355 | gsub 356 | function (pattern, replacement, x, ignore.case = FALSE, extended = TRUE) 357 | heat.colors 358 | function (n) 359 | help 360 | function (topic, data, library) 361 | help.start 362 | function (gui = "irrelevant", browser = "netscape") 363 | hist 364 | function (x, breaks, freq = TRUE, col = NULL, border = par("fg"), main = paste("Histogram of", deparse(substitute(x))), xlim = range(breaks), ylim = range(counts, 0), xlab = deparse(substitute(x)), ylab, ...) 365 | hsv 366 | function (h = 1, s = 1, v = 1, gamma = 1) 367 | identify 368 | function (x, y = NULL, text = as.character(seq(x)), pos = FALSE, ...) 369 | ifelse 370 | function (test, yes, no) 371 | image 372 | function (x = seq(0, 1, len = nrow(z)), y = seq(0, 1, len = ncol(z)), z, zlim = range(z, na.rm = TRUE), col = heat.colors(12), ...) 373 | inherits 374 | function (x, name) 375 | integer 376 | function (n = 0) 377 | inverse.gaussian 378 | function () 379 | is.expression 380 | function (x) 381 | is.finite 382 | function (x) 383 | is.na.data.frame 384 | function (x) 385 | is.qr 386 | function (x) 387 | is.symbol 388 | function (x) 389 | is.ts 390 | function (x) 391 | is.vector 392 | function (x, mode = "any") 393 | lapply 394 | function (x, FUN, ...) 395 | legend 396 | function (x, y, legend, fill, col = "black", lty, pch, bty = "o", bg = par("bg"), xjust = 0, yjust = 1, ...) 397 | library 398 | function (name) 399 | library.dynam 400 | function (name) 401 | license 402 | function () 403 | lines 404 | function (x, ...) 405 | lines.default 406 | function (x, y = NULL, type = "l", col = par("col"), ...) 407 | lm 408 | function (formula, data = NULL, subset = NULL, weights = NULL, na.action = na.fail, singular.ok = TRUE) 409 | lm.fit 410 | function (x, y) 411 | lm.influence 412 | function (z) 413 | lm.w.fit 414 | function (x, y, w) 415 | load 416 | function (file) 417 | locator 418 | function (n = 1) 419 | log10 420 | function (x) 421 | log2 422 | function (x) 423 | logical 424 | function (n = 0) 425 | lower.tri 426 | function (x, diag = FALSE) 427 | lowess 428 | function (x, y = NULL, f = 2/3, iter = 3, delta = 0.01 * diff(range(xy$x[o]))) 429 | ls 430 | function (name, pos = -1, envir = NULL, all.files = FALSE, pattern) 431 | ls.diag 432 | function (ls.out) 433 | ls.print 434 | function (ls.out, digits = 4, print.it = TRUE) 435 | lsfit 436 | function (x, y, wt = NULL, intercept = TRUE, tolerance = 1e-07, yname = NULL) 437 | macintosh 438 | function () 439 | mad 440 | function (y, center, constant = 1.4826, na.rm = FALSE) 441 | make.link 442 | function (link) 443 | mat.or.vec 444 | function (nr, nc) 445 | match 446 | function (x, table, nomatch = NA) 447 | match.arg 448 | function (arg, choices) 449 | match.call 450 | function (definition = NULL, call = sys.call(sys.parent()), expand.dots = T) 451 | matrix 452 | function (data = NA, nrow = 1, ncol = 1, byrow = FALSE, dimnames = NULL) 453 | max 454 | function (..., na.rm = FALSE) 455 | mean 456 | function (x, trim = 0, na.rm = FALSE) 457 | median 458 | function (x, na.rm = FALSE) 459 | menu 460 | function (x) 461 | methods 462 | function (generic.function, class) 463 | min 464 | function (..., na.rm = FALSE) 465 | mode 466 | function (x) 467 | mode<- 468 | function (x, value) 469 | model.data.frame 470 | function (...) 471 | model.extract 472 | function (frame, component) 473 | model.frame 474 | function (formula, data = sys.frame(sys.parent()), subset = NULL, na.action = eval(as.name(options("na.action")$na.action)), use.data = TRUE, process.offsets = TRUE, ...) 475 | model.matrix 476 | function (formula, data) 477 | model.offset 478 | function (x) 479 | model.response 480 | function (data, type = "numeric") 481 | model.weights 482 | function (x) 483 | mtext 484 | function (text, side = 3, line = 0, outer = FALSE, at = NULL, ...) 485 | mvfft 486 | function (z, inverse = FALSE) 487 | na.action 488 | function (x, ...) 489 | na.action.default 490 | function (x) 491 | na.fail 492 | function (frame) 493 | na.omit 494 | function (frame) 495 | named.elements 496 | function (x) 497 | names 498 | function (x, ...) 499 | names.default 500 | function (x) 501 | names<- 502 | function (x, ...) 503 | names<-.default 504 | function (x, n) 505 | ncol 506 | function (x) 507 | nextn 508 | function (n, factors = c(2, 3, 5)) 509 | nlm 510 | function (f, p, hessian = FALSE, typsiz = rep(1, length(p)), fscale = 1, print.level = 0, ndigit = 12, gradtl = 1e-06, stepmx = max(1000 * sqrt(sum((p/typsiz)^2)), 1000), steptl = 1e-06, itnlim = 100) 511 | nrow 512 | function (x) 513 | numeric 514 | function (n = 0) 515 | objects 516 | function (name, pos = -1, envir = NULL, all.files = FALSE, pattern) 517 | offset 518 | function (x) 519 | optimize 520 | function (f, interval, lower = min(interval), upper = max(interval), maximum = FALSE, tol = .Machine$double.eps^0.25, ...) 521 | options 522 | function (...) 523 | ordered 524 | function (x, levels = sort(unique(x)), labels, exclude = NA, ordered = TRUE) 525 | outer 526 | function (x, y, FUN = "*", ...) 527 | pairs 528 | function (x, labels, panel = points, main = NULL, font.main = par("font.main"), cex.main = par("cex.main"), ...) 529 | panel.smooth 530 | function (x, y, col, pch, f = 2/3, iter = 3, ...) 531 | par 532 | function (...) 533 | par2 534 | function (...) 535 | parse 536 | function (file = "", n = NULL, text = NULL, prompt = NULL, white = FALSE) 537 | paste 538 | function (..., sep = " ", collapse = NULL) 539 | pcauchy 540 | function (q, location = 0, scale = 1) 541 | pexp 542 | function (q, rate = 1) 543 | pgamma 544 | function (q, shape, scale = 1) 545 | pictex 546 | function (file = "Rplots.tex", width = 5, height = 4, debug = FALSE, bg = "white", fg = "black") 547 | piechart 548 | function (x, labels = names(x), edges = 200, radius = 0.8, col = NULL, main = NULL, ...) 549 | plnorm 550 | function (q, meanlog = 0, sdlog = 1) 551 | plogis 552 | function (q, location = 0, scale = 1) 553 | plot 554 | function (x, ...) 555 | plot.data.frame 556 | function (x, ...) 557 | plot.default 558 | function (x, y = NULL, type = "p", col = par("fg"), bg = NA, pch = par("pch"), xlim = NULL, ylim = NULL, log = "", axes = TRUE, frame.plot = TRUE, panel.first = NULL, panel.last = NULL, ann = par("ann"), main = NULL, xlab = NULL, ylab = NULL, cex = par("cex"), lty = par("lty"), ...) 559 | plot.density 560 | function (s, main = "", xlab, ylab = "Density", type = "l", ...) 561 | plot.factor 562 | function (x, y, ...) 563 | plot.new 564 | function (ask = NA) 565 | plot.ts 566 | function (x, type = "l", xlim, ylim, xlab, ylab, log = "", col = par("col"), bg = NA, pch = par("pch"), lty = par("lty"), ...) 567 | plot.xy 568 | function (xy, type, pch = 1, lty = "solid", col = par("fg"), bg = NA, cex = 1, ...) 569 | pmatch 570 | function (x, table, nomatch = NA) 571 | pmax 572 | function (..., na.rm = FALSE) 573 | pmin 574 | function (..., na.rm = FALSE) 575 | pnorm 576 | function (q, mean = 0, sd = 1) 577 | points 578 | function (x, ...) 579 | points.default 580 | function (x, y = NULL, pch = 1, col = "black", ...) 581 | poisson 582 | function (link = "log") 583 | polygon 584 | function (x, y = NULL, border = par("fg"), ...) 585 | postscript 586 | function (file = "Rplots.ps", paper = options("papersize")$papersize, landscape = TRUE, width = 0, height = 0, family = "Helvetica", pointsize = 12, bg = "white", fg = "black", onefile, print.it, append) 587 | power 588 | function (lambda = 1) 589 | ppoints 590 | function (x) 591 | predict 592 | function (fit, ...) 593 | predict.default 594 | function (object, ...) 595 | pretty 596 | function (x, n = 5) 597 | print 598 | function (x, ...) 599 | print.anova.glm 600 | function (x, digits = max(3, .Options$digits - 3), na.print = "", ...) 601 | print.anova.lm 602 | function (x, digits = max(3, .Options$digits - 3), ...) 603 | print.atomic 604 | function (x, quote = TRUE, ...) 605 | print.default 606 | function (x, digits = NULL, quote = TRUE, na.print = NULL, print.gap = NULL) 607 | print.expression 608 | function (x) 609 | print.family 610 | function (x, ...) 611 | print.formula 612 | function (x) 613 | print.glm 614 | function (x, digits = max(3, .Options$digits - 3), na.print = "", ...) 615 | print.htest 616 | function (x, digits = 4, quote = T, prefix = "") 617 | print.lm 618 | function (x, digits = max(3, .Options$digits - 3), ...) 619 | print.summary.glm 620 | function (x, digits = max(3, .Options$digits - 3), na.print = "", ...) 621 | print.summary.lm 622 | function (x, digits = max(3, .Options$digits - 3), ...) 623 | print.table 624 | function (x, digits = .Options$digits, quote = FALSE, na.print = "", ...) 625 | print.tabular 626 | function (table, digits = max(3, .Options$digits - 3), na.print = "", ...) 627 | print.terms 628 | function (x) 629 | print.ts 630 | function (x, calender, ...) 631 | prmatrix 632 | function (x, rowlab = character(0), collab = character(0), quote = TRUE, right = FALSE) 633 | prod 634 | function (..., na.rm = FALSE) 635 | prompt 636 | function (object, ...) 637 | prompt.default 638 | function (object, filename = paste0(name, ".man"), force.function = FALSE) 639 | provide 640 | function (name) 641 | punif 642 | function (q, min = 0, max = 1) 643 | pweibull 644 | function (q, shape, scale = 1) 645 | q 646 | function (save = "ask") 647 | qcauchy 648 | function (p, location = 0, scale = 1) 649 | qexp 650 | function (p, rate = 1) 651 | qgamma 652 | function (p, shape, scale = 1) 653 | qlnorm 654 | function (p, meanlog = 0, sdlog = 1) 655 | qlogis 656 | function (p, location = 0, scale = 1) 657 | qnorm 658 | function (p, mean = 0, sd = 1) 659 | qqline 660 | function (y, ...) 661 | qqnorm 662 | function (y, ylim, main = "Normal Q-Q Plot", xlab = "Theoretical Quantiles", ylab = "Sample Quantiles", ...) 663 | qqplot 664 | function (x, y, plot.it = TRUE, xlab = deparse(substitute(x)), ylab = deparse(substitute(y)), ...) 665 | qr 666 | function (x, tol = 1e-07) 667 | qr.coef 668 | function (qr, y) 669 | qr.fitted 670 | function (qr, y, k = qr$rank) 671 | qr.qty 672 | function (qr, y) 673 | qr.qy 674 | function (qr, y) 675 | qr.resid 676 | function (qr, y) 677 | qr.solve 678 | function (a, b, tol = 1e-07) 679 | quantile 680 | function (x, probs = seq(0, 1, 0.25), na.rm = FALSE) 681 | quasi 682 | function (link = "identity", variance = "constant") 683 | quit 684 | function (save = "ask") 685 | qunif 686 | function (p, min = 0, max = 1) 687 | quote 688 | function (x) 689 | qweibull 690 | function (p, shape, scale = 1) 691 | rainbow 692 | function (n, s = 1, v = 1, start = 0, end = (n - 1)/n, gamma = 1) 693 | range 694 | function (..., na.rm = FALSE) 695 | rcauchy 696 | function (n, location = 0, scale = 1) 697 | read.table 698 | function (file, header = FALSE, sep = "", row.names, col.names, as.is = FALSE, na.strings = "NA", skip = 0) 699 | real 700 | function (n = 0) 701 | rect 702 | function (xleft, ybottom, xright, ytop, col = NULL, border = par("fg"), lty = NULL, xpd = FALSE) 703 | reformulate 704 | function (termlabels, response = NULL) 705 | rep 706 | function (x, times, length.out) 707 | replace 708 | function (x, list, values) 709 | require 710 | function (name, quietly = FALSE) 711 | resid 712 | function (x, ...) 713 | residuals 714 | function (x, ...) 715 | residuals.glm 716 | function (object, type = "deviance") 717 | residuals.lm 718 | function (z) 719 | rev 720 | function (x) 721 | rexp 722 | function (n, rate = 1) 723 | rgamma 724 | function (n, shape, scale = 1) 725 | rgb 726 | function (r, g, b, names = NULL) 727 | rlnorm 728 | function (n, meanlog = 0, sdlog = 1) 729 | rlogis 730 | function (n, location = 0, scale = 1) 731 | rm 732 | function (..., list = character(0), envir = NULL, inherits = FALSE) 733 | rnorm 734 | function (n, mean = 0, sd = 1) 735 | row 736 | function (x, as.factor = FALSE) 737 | row.names 738 | function (x) 739 | row.names<- 740 | function (x, value) 741 | rownames 742 | function (x) 743 | rownames<- 744 | function (x, value) 745 | rstudent 746 | function (z) 747 | runif 748 | function (n, min = 0, max = 1) 749 | rweibull 750 | function (n, shape, scale = 1) 751 | sample 752 | function (x, size, replace = FALSE) 753 | sapply 754 | function (X, FUN, ..., simplify = TRUE) 755 | save 756 | function (..., list = character(0), file = "", ascii = FALSE) 757 | scan 758 | function (file = "", what = 0, nmax = -1, sep = "", skip = 0, nlines = 0, na.strings = "NA", flush = FALSE, strip.white = FALSE, quiet = FALSE) 759 | sd 760 | function (x, na.rm = FALSE) 761 | segments 762 | function (x0, y0, x1, y1, col = par("fg"), lty = par("lty")) 763 | seq 764 | function (from = 1, to = 1, by = ((to - from)/(length.out - 1)), length.out = NULL, along.with = NULL) 765 | sequence 766 | function (nvec) 767 | solve 768 | function (a, b, tol = 1e-07) 769 | solve.qr 770 | function (a, b, tol = 1e-07) 771 | sort 772 | function (x, partial = NULL, na.last = NA) 773 | source 774 | function (file, local = FALSE) 775 | spline 776 | function (x, y, n = 3 * length(x), method = "fmm", xmin = min(x), xmax = max(x)) 777 | splinefun 778 | function (x, y, method = "fmm") 779 | split 780 | function (x, f) 781 | start 782 | function (x, ...) 783 | start.ts 784 | function (x) 785 | stat.anova 786 | function (table, test, scale, df.scale, n) 787 | stem 788 | function (x, scale = 1, width = 80, atom = 1e-08) 789 | storage.mode 790 | function (x) 791 | storage.mode<- 792 | function (x, value) 793 | stripplot 794 | function (x, method = "overplot", jitter = 0.1, offset = 1/3, vertical = FALSE, group.names, xlim, ylim, main = "", ylab = "", xlab = "", pch = 0, col = par("fg"), cex = par("cex")) 795 | strsplit 796 | function (x, split) 797 | structure 798 | function (.Data, ...) 799 | strwidth 800 | function (s, units = "user", cex = NULL) 801 | sub 802 | function (pattern, replacement, x, ignore.case = FALSE, extended = TRUE) 803 | substr 804 | function (x, start, stop) 805 | substring 806 | function (text, first, last = 1000000) 807 | sum 808 | function (..., na.rm = FALSE) 809 | summary 810 | function (x, ...) 811 | summary.data.frame 812 | function (x, maxsum = 7, ...) 813 | summary.default 814 | function (object, ..., digits = max(options()$digits - 3, 3)) 815 | summary.factor 816 | function (x, maxsum = 100, ...) 817 | summary.glm 818 | function (object, dispersion = NULL, correlation = TRUE, na.action = na.omit) 819 | summary.lm 820 | function (z, correlation = FALSE) 821 | summary.matrix 822 | function (x, ...) 823 | svd 824 | function (x, nu = min(n, p), nv = min(n, p)) 825 | sweep 826 | function (x, MARGIN, STATS, FUN = "-", ...) 827 | switch 828 | function (EXPR, ...) 829 | sys.call 830 | function (which = 0) 831 | sys.calls 832 | function () 833 | sys.frame 834 | function (which = 0) 835 | sys.frames 836 | function () 837 | sys.function 838 | function (which = 0) 839 | sys.nframe 840 | function () 841 | sys.on.exit 842 | function () 843 | sys.parent 844 | function (n = 1) 845 | sys.parents 846 | function () 847 | sys.source 848 | function (file) 849 | sys.status 850 | function () 851 | system 852 | function (call, intern = FALSE) 853 | system.file 854 | function (dir, name) 855 | t.test 856 | function (x, y = NULL, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95) 857 | table 858 | function (x, ...) 859 | tabulate 860 | function (bin, nbins = max(bin)) 861 | tapply 862 | function (x, INDEX, FUN, ...) 863 | terms 864 | function (x, ...) 865 | terms.default 866 | function (x) 867 | terms.formula 868 | function (x, specials = NULL, abb = NULL, data = NULL, keep.order = FALSE) 869 | terms.terms 870 | function (x) 871 | terrain.colors 872 | function (n) 873 | text 874 | function (x, y = NULL, text, ...) 875 | time 876 | function (x, ...) 877 | time.ts 878 | function (x) 879 | title 880 | function (main = NULL, sub = NULL, xlab = NULL, ylab = NULL, ...) 881 | topo.colors 882 | function (n) 883 | traceback 884 | function () 885 | trunc 886 | function (x) 887 | ts 888 | function (data = NA, start = 1, end = numeric(0), frequency = 1, deltat = 1) 889 | tsp 890 | function (x) 891 | tsp<- 892 | function (x, tsp) 893 | uniroot 894 | function (f, interval, lower = min(interval), upper = max(interval), tol = .Machine$double.eps^0.25, ...) 895 | unix.time 896 | function (expr) 897 | update 898 | function (x, ...) 899 | update.lm 900 | function (lm.obj, formula, data, weights, subset, na.action) 901 | upper.tri 902 | function (x, diag = FALSE) 903 | var 904 | function (x, y = x, na.rm = FALSE) 905 | vi 906 | function (name = NULL, file = "") 907 | weighted.mean 908 | function (x, w, na.rm = FALSE) 909 | weights 910 | function (x, ...) 911 | weights.lm 912 | function (z) 913 | which 914 | function (x) 915 | window 916 | function (x, ...) 917 | window.ts 918 | function (x, start, end) 919 | windows 920 | function () 921 | write 922 | function (x, file = "data", ncolumns = if (is.character(x)) 1 else 5, append = FALSE) 923 | x11 924 | function (display = "", width = 7, height = 7, ps = 12, printcmd = options("printcmd")$printcmd, paper = options("papersize")$papersize, orientation = "flexible") 925 | xedit 926 | function (name = NULL, file = "") 927 | xemacs 928 | function (name = NULL, file = "") 929 | xinch 930 | function (x = 1) 931 | xy.coords 932 | function (x, y, xlab = NULL, ylab = NULL) 933 | yinch 934 | function (x = 1) 935 | -------------------------------------------------------------------------------- /eRum_keynote_18.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "A practical history of R (where things came from)" 3 | author: "Roger Bivand" 4 | date: "16 May 2018" 5 | output: 6 | beamer_presentation: 7 | theme: m 8 | pandoc_args: [ 9 | "--latex-engine=xelatex" 10 | ] 11 | highlight: pygments 12 | includes: 13 | in_header: header.tex 14 | keep_tex: true 15 | classoption: "aspectratio=169" 16 | --- 17 | 18 | ```{r setup, include=FALSE} 19 | knitr::opts_chunk$set(echo = FALSE) 20 | ``` 21 | 22 | ```{r size, echo=FALSE, results='hide'} 23 | knitr::knit_hooks$set(mysize = function(before, options, envir) { 24 | if (before) 25 | return(options$size) 26 | }) 27 | knitr::opts_chunk$set(prompt=TRUE) 28 | suppressMessages(library(extrafont)) 29 | suppressMessages(loadfonts()) 30 | ``` 31 | 32 | ```{r set-options, echo=FALSE, results='hide'} 33 | options(width = 64) 34 | ``` 35 | 36 | ## Source and links 37 | 38 | The keynote presentation files are on github at: 39 | [\textcolor{mLightBrown}{rsbivand/eRum18}](https://github.com/rsbivand/eRum18) 40 | 41 | 42 | ## Introduction 43 | 44 | - Not infrequently, we wonder why choices such as `stringsAsFactors=TRUE` or `drop=TRUE` were made. 45 | 46 | - Understanding the original uses of S and R (in the 1980s and 1990s), and seeing how these uses affected the development of R lets us appreciate the robustness of R's ecosystem. 47 | 48 | - This keynote uses readings of the R sources and other information to explore R's history. The topics to be touched on include the "colour" books (brown, blue, white, green), interlinkages to SICP (Scheme) and LispStat 49 | 50 | - We'll also touch on the lives of R-core, the mailing lists and CRAN, and Ancients and Moderns (see [\textcolor{mLightBrown}{Exploring the CRAN social network}](http://www.pieceofk.fr/?p=431)). 51 | 52 | # History of R and its data structures 53 | 54 | ## Sources 55 | 56 | - [\textcolor{mLightBrown}{Rasmus Bååth}](http://www.sumsar.net/blog/2014/11/tidbits-from-books-that-defined-s-and-r/) has a useful blog piece on R's antecedents in the S language 57 | 58 | - Something similar is present in the second chapter of \citep{chambers:16}, from the viewpoint of one of those responsible for the development of the S language 59 | 60 | - In addition to S, we need to take [\textcolor{mLightBrown}{SICP and Scheme}](http://sarabander.github.io/sicp/html/index.xhtml) into account \citep[][second edition]{sicp2e}, as described by \citet{ihaka:1996} and \citet{wickham:14} 61 | 62 | - Finally, LispStat and its creators have played and continue to play a major role in developing R \citep{Tierney1990,Tierney1996,JSSv013i09} 63 | 64 | ## Early R was Scheme via SICP 65 | 66 | ![Ross Ihaka's description](ihaka10.png) 67 | 68 | ([\textcolor{mLightBrown}{JSM talk}](https://www.stat.auckland.ac.nz/%7Eihaka/downloads/JSM-2010.pdf)) 69 | 70 | 71 | ## From S to R: Brown Books 72 | 73 | \begincols 74 | \begincol{0.48\textwidth} 75 | 76 | \citet{becker+chambers:84}: S: An Interactive Environment for Data Analysis and Graphics, A.K.A. the Brown Book 77 | 78 | \citet{becker+chambers:85}: Extending the S System 79 | 80 | \endcol 81 | 82 | \begincol{0.48\textwidth} 83 | 84 | \includegraphics[width=0.95\textwidth]{../pix/S2_books.png} 85 | 86 | \endcol 87 | \endcols 88 | 89 | ## From S to R: Blue and White Books 90 | 91 | \begincols 92 | \begincol{0.48\textwidth} 93 | 94 | \citet{R:Becker+Chambers+Wilks:1988}: The New S Language: A Programming Environment for Data Analysis and Graphics, A.K.A. the Blue Book. 95 | 96 | \citet{R:Chambers+Hastie:1992}: Statistical Models in S, A.K.A. the White Book. 97 | 98 | \endcol 99 | 100 | \begincol{0.48\textwidth} 101 | 102 | \includegraphics[width=0.95\textwidth]{../pix/S3_books.png} 103 | 104 | \endcol 105 | \endcols 106 | 107 | ## From S to R: Green Book 108 | 109 | \begincols 110 | \begincol{0.48\textwidth} 111 | 112 | \citet{R:Chambers:1998}: Programming with Data: A Guide to the S Language, A.K.A. the Green Book. 113 | 114 | \citet{R:Venables+Ripley:2000}: S Programming 115 | 116 | \endcol 117 | 118 | \begincol{0.48\textwidth} 119 | 120 | \includegraphics[width=0.95\textwidth]{../pix/S4_books.png} 121 | 122 | \endcol 123 | \endcols 124 | 125 | 126 | ## S2 to S3 to S4 127 | 128 | - The S2 system was described in the Brown Book, S3 in the Blue Book and completed in the White Book, finally S4 in the Green Book 129 | 130 | - The big advance from S2 to S3 was that users could write functions; that data.frame objects were defined; that formula objects were defined; and that S3 classes and method dispatch appeared 131 | 132 | - S4 brought connections and formal S4 classes, the latter seen in R in the **methods** package ([\textcolor{mLightBrown}{still controversial}](https://stat.ethz.ch/pipermail/r-devel/2017-December/075304.html)) 133 | 134 | - [\textcolor{mLightBrown}{S-PLUS}](https://en.wikipedia.org/wiki/S-PLUS) was/is the commercial implementation of [\textcolor{mLightBrown}{S}](https://en.wikipedia.org/wiki/S_(programming_language)) and its releases drove S3 and S4 changes 135 | 136 | ## S, Bell Labs, S-PLUS 137 | 138 | - S was a Bell Labs innovation, like Unix, C, C++, and many interpreted languages (like AWK); many of these share key understandings 139 | 140 | - Now owned by Nokia, previously Alcatel-Lucent, Lucent, and AT&T 141 | 142 | - Why would a telecoms major (AT&T) pay for fundamental research in computer science and data analysis (not to sell or market other products better)? 143 | 144 | - Some Green Book examples are for quality control of telecoms components 145 | 146 | ## S-PLUS and R 147 | 148 | - S-PLUS was quickly adopted for teaching and research, and with S3, provided extensibility in the form of libraries 149 | 150 | - Most links have died by now, but see this [\textcolor{mLightBrown}{FAQ}](http://ftp.uni-bayreuth.de/math/statlib/S/FAQ) for a flavour - there was a lively community of applied statisticians during the 1990s 151 | 152 | - S built on a long tradition of documentation through examples, with use cases and data sets taken from the applied statistical literature; this let users compare output with methods descriptions 153 | 154 | - ... so we get to R 155 | 156 | 157 | ## and what about LispStat? 158 | 159 | - Luke Tierney was in R core in 1997, and has continued to exert clear influence over development 160 | 161 | - Because R uses a Scheme engine, similar to Lisp, under the hood, his insight into issues like the garbage collector, namespaces, byte-compilation, serialization, parallelization, and now [\textcolor{mLightBrown}{ALTREP}](http://blog.revolutionanalytics.com/2018/04/r-350.html) has been crucial ([\textcolor{mLightBrown}{see also the proposal by Luke Tierney, Gabe Becker and Tomas Kalibera}](https://svn.r-project.org/R/branches/ALTREP/ALTREP.html)) 162 | 163 | - Many of these issues involve the defensive copy on possible change policy involved in lazy evaluation, which may lead to multiple redundant copies of data being present in memory 164 | 165 | - Luke Tierney and Brian Ripley have fought hard to let R load fast, something that is crucial to ease the use of R on multicore systems or inside databases 166 | 167 | # Vintage R 168 | 169 | ## Use questions: left assign 170 | 171 | \begincols 172 | \begincol{0.48\textwidth} 173 | 174 | Why was `underscore_separated` not a permitted naming convention in R earlier (see [\textcolor{mLightBrown}{\citet{RJ-2012-018}}](https://journal.r-project.org/archive/2012/RJ-2012-018/index.html))? `_` was not a permitted character in names until it had lost its left assign role, the same as `<-`, in 1.9.0 in 2004. (Brown Book p. 256, Blue Book p. 387) 175 | \endcol 176 | 177 | \begincol{0.48\textwidth} 178 | 179 | \includegraphics[width=0.95\textwidth]{../pix/assign.png} 180 | 181 | \endcol 182 | \endcols 183 | 184 | 185 | 186 | ## Use questions: `strings`A`s`F`actors` 187 | 188 | \begincols 189 | \begincol{0.48\textwidth} 190 | 191 | Why is the `factor` storage mode still so central? `stringsAsFactors = TRUE` was the legacy `as.is = FALSE`; analysis of categorical variables was more important, and `factor` only needed to store `nlevels()` strings (White Book p. 55-56, 567) 192 | \endcol 193 | 194 | \begincol{0.48\textwidth} 195 | 196 | \includegraphics[width=0.95\textwidth]{../pix/asis1.png} 197 | 198 | \endcol 199 | \endcols 200 | 201 | ## Use questions: `drop` 202 | 203 | \begincols 204 | \begincol{0.48\textwidth} 205 | 206 | `drop = TRUE` for array-like objects; since matrices are vectors with a `dim` atrribute, choosing (part of) a row or column made `dim` redundant (Blue Book p. 128, White Book p. 64) 207 | \endcol 208 | 209 | \begincol{0.48\textwidth} 210 | 211 | \includegraphics[width=0.95\textwidth]{../pix/drop.png} 212 | 213 | \endcol 214 | \endcols 215 | 216 | ## But scalars are also vectors ... 217 | 218 | Treating scalars as vectors is not efficient: 219 | 220 | \includegraphics[width=0.95\textwidth]{../pix/Screenshot-2018-4-6 Ihaka Lecture Series 2017 Statistical computing in a (more) static environment - YouTube.png} 221 | 222 | ## Vintage R 223 | 224 | - An [\textcolor{mLightBrown}{R-0.49 source tarball}](https://cran.r-project.org/src/base/R-0/R-0.49.tgz) is available from CRAN 225 | 226 | - Diffs for Fedora 27 (gcc 7.3.1) include setting compilers and `-fPIC` in `config.site`, putting `./` before `config.site` in `configure`, and three corrections in `src/unix`: in `dataentry.h` add `#include ` and comment out `NeedFunctionPrototypes`; in `rotated.c` comment out `/*static*/ double round`; in `system.c` comment out `__setfpucw` twice; BLAS must be provided externally 227 | 228 | - Not (yet) working: prototypes are missing in the **eda** and **mva** packages so the shared objects fail to build 229 | 230 | - [\textcolor{mLightBrown}{R-0.49 video}](run:R-0.49-2018-05-09\_09.07.15.mp4) 231 | 232 | # R SVN logs 233 | 234 | ## R SVN logs 235 | 236 | - The command: `svn log --xml --verbose -r 6:74688 https://svn.r-project.org/R/trunk > trunk_verbose_log1.xml` provides a rich data source 237 | 238 | - Each log entry has a revision number, author and timestamp, message and paths to files indicating the action undertaken for each file 239 | 240 | - The XML version is somewhat easier to untangle than the plain-text version 241 | 242 | - I haven't tried possible similar approaches to Winston Chang's [\textcolor{mLightBrown}{github r-source repo}](https://github.com/wch/r-source) 243 | 244 | 245 | ```{r , echo = FALSE, eval=TRUE, mysize=TRUE, size='\\tiny', cache=TRUE, results="hide"} 246 | library(XML) 247 | tr <- try(xmlTreeParse("../code/trunk_verbose_log1.xml")) 248 | tr1 <- xmlChildren(xmlRoot(tr)) 249 | revs <- sapply(tr1, function(x) unname(xmlAttrs(x))) 250 | msgs <- sapply(tr1, function(x) xmlValue(xmlChildren(x)[["msg"]])) 251 | authors <- unname(sapply(tr1, function(x) xmlValue(xmlChildren(x)[["author"]]))) 252 | dates <- strptime(substring(unname(sapply(tr1, function(x) xmlValue(xmlChildren(x)[["date"]]))), 1, 18), format="%Y-%m-%dT%H:%M:%S", tz="UTC") 253 | years <- format(dates, "%Y") 254 | ``` 255 | 256 | ## Commits 1998-2017 257 | 258 | ```{r , echo = FALSE, eval=TRUE, mysize=TRUE, size='\\tiny', cache=TRUE, results="hide"} 259 | n1 <- sub("thomas", "tlumley", authors) 260 | n2 <- sub("martyn", "plummer", n1) 261 | n3 <- sub("^r$", "rgentlem", n2) 262 | n4 <- sub("paul", "murrell", n3) 263 | n5 <- sub("root", "system", n4) 264 | n6 <- sub("apache", "system", n5) 265 | authors <- factor(n6) 266 | ad_tab <- table(authors, years) 267 | rs <- rowSums(ad_tab) 268 | ``` 269 | 270 | 271 | ```{r, fig1, fig.show='hide', fig.height=6, fig.width=12, dev.args=list(family="Fira Sans", bg="transparent")} 272 | pal <- scan("../colormap_hex.txt", "character", quiet=TRUE) 273 | set.seed(1) 274 | pal_s <- sample(sample(pal)) 275 | plot(1998:2017, colSums(ad_tab)[2:21], type="b", xlab="", ylab="SVN commits", ylim=c(0, 4000)) 276 | grid() 277 | abline(v=c(2000.1639344, 2004.7568306, 2013.2547945), col=pal_s[1:3], lwd=2) 278 | legend("topright", legend=c("1.0.0 2000-02-29", "2.0.0 2004-10-04", "3.0.0 2013-04-03"), col=pal_s[1:3], bty="n", cex=0.8, lty=1, lwd=2) 279 | ``` 280 | \includegraphics[width=0.95\textwidth]{eRum_keynote_18_files/figure-beamer/fig1-1.pdf} 281 | 282 | ## Commits by author and year between r6 and r74688 283 | 284 | ```{r, fig2, fig.show='hide', fig.height=6, fig.width=12, dev.args=list(family="Fira Sans", bg="transparent")} 285 | barplot(ad_tab[order(rs)[5:27],], col=pal_s) 286 | legend("topright", legend=rev(rownames(ad_tab)[order(rs)[5:27]]), ncol=5, fill=rev(pal_s), cex=0.8, bty="n") 287 | ``` 288 | \includegraphics[width=0.95\textwidth]{eRum_keynote_18_files/figure-beamer/fig2-1.pdf} 289 | 290 | ## XML logentry structure 291 | 292 | ```{r , echo = FALSE, eval=TRUE, mysize=TRUE, size='\\small', cache=TRUE} 293 | tr1[[1]] 294 | ``` 295 | ## XML logentry structure 296 | 297 | ```{r , echo = FALSE, eval=TRUE, mysize=TRUE, size='\\small', cache=TRUE} 298 | tr1[[length(tr1)]] 299 | ``` 300 | 301 | 302 | ```{r , echo = FALSE, eval=TRUE, mysize=TRUE, size='\\tiny', cache=TRUE, results="hide"} 303 | res <- vector(mode="list", length=length(tr1)) 304 | for (i in seq_along(tr1)) res[[i]] <- {x=xmlChildren(xmlChildren(tr1[[i]])[["paths"]]); list(paths=unname(sapply(x, xmlValue)), actions=unname(sapply(x, function(y) xmlAttrs(y)["action"])))} 305 | names(res) <- revs 306 | rl <- sapply(res, function(x) length(x$actions)) 307 | ``` 308 | 309 | 310 | ## Commit messages by number of files affected, year and revision 311 | 312 | ```{r , echo = FALSE, eval=TRUE, mysize=TRUE, size='\\small', cache=TRUE} 313 | cat(paste(years[order(rl, decreasing=TRUE)][1:10], revs[order(rl, decreasing=TRUE)][1:10], sort(unname(rl), decreasing=TRUE)[1:10], unname(unlist(sub("\\n", ", ", msgs[order(rl, decreasing=TRUE)])))[1:10]), sep="\n") 314 | ``` 315 | 316 | 317 | ```{r , echo = FALSE, eval=TRUE, mysize=TRUE, size='\\tiny', cache=TRUE} 318 | res50 <- res[rl < 50] 319 | rl_50 <- rl[rl < 50] 320 | f_50 <- unlist(sapply(res50, "[", 1)) 321 | a_50 <- unlist(sapply(res50, "[", 2)) 322 | a_50_actions <- unname(a_50) 323 | f_50_files <- unname(f_50) 324 | f_50_filesa <- substring(f_50_files, 2, nchar(f_50_files)) 325 | f_50_filesb <- strsplit(f_50_filesa, "/") 326 | f_50_filesc <- t(sapply(f_50_filesb, function(x) {out <- character(9); out[1:length(x)] <- x; out})) 327 | files_df <- data.frame(f_50_filesc) 328 | ``` 329 | 330 | ## Epoch file commits in trunk/ 331 | 332 | ```{r , echo = FALSE, eval=TRUE, mysize=TRUE, size='\\small', cache=TRUE} 333 | tx <- table(files_df$X2) 334 | sort(tx[tx>350]) 335 | ``` 336 | 337 | ## Epoch file commits in trunk/src 338 | 339 | ```{r , echo = FALSE, eval=TRUE, mysize=TRUE, size='\\small', cache=TRUE} 340 | tx <- table(files_df[files_df$X2=="src",]$X3) 341 | sort(tx[tx>50]) 342 | ``` 343 | 344 | ## Epoch file commits in trunk/src/library 345 | 346 | ```{r , echo = FALSE, eval=TRUE, mysize=TRUE, size='\\small', cache=TRUE} 347 | tx <- table(files_df[files_df$X2=="src" & files_df$X3=="library",]$X4) 348 | sort(tx[tx>200]) 349 | ``` 350 | 351 | ## Epoch file commits in trunk/src/library/base 352 | 353 | ```{r , echo = FALSE, eval=TRUE, mysize=TRUE, size='\\small', cache=TRUE} 354 | tx <- table(files_df[files_df$X2=="src" & files_df$X3=="library" & files_df$X4=="base",]$X5) 355 | sort(tx[tx>4]) 356 | ``` 357 | 358 | ```{r , echo = FALSE, eval=TRUE, mysize=TRUE, size='\\small', cache=TRUE} 359 | a_50_revs <- rep(names(rl_50), times=rl_50) 360 | o <- match(a_50_revs, revs) 361 | a_50_years <- years[o] 362 | t_acts <- table(a_50_years, factor(a_50_actions)) 363 | ``` 364 | 365 | ## Files by year and commit action 366 | 367 | ```{r, fig3, fig.show='hide', fig.height=5, fig.width=12, dev.args=list(family="Fira Sans", bg="transparent")} 368 | barplot(t(t_acts), col=rev(pal_s)[1:4]) 369 | legend("topright", legend=c("A The item was added", "D The item was deleted", "M The item was changed", "R The item was replaced"), ncol=2, fill=rev(pal_s)[1:4], cex=0.8, bty="n") 370 | ``` 371 | \includegraphics[width=0.95\textwidth]{eRum_keynote_18_files/figure-beamer/fig3-1.pdf} 372 | 373 | 374 | # CRAN and Bioconductor packages 375 | 376 | ## CRAN 377 | 378 | - Once S3 permitted extension by writing functions, and packaging functions in libraries, S and R ceased to be monolithic 379 | 380 | - In R, a library is where packages are kept, distinguishing between base and recommended packages distributed with R, and contributed packages 381 | 382 | - Contributed packages can be installed from CRAN (infrastructure built on CPAN and CTAN for Perl and Tex), Bioconductor, other package repositories, and other sources such as github 383 | 384 | - With over 12000 contributed packages, CRAN is central to the R community, but is stressed by dependency issues (CRAN is not run by R core) 385 | 386 | ## CRAN/Bioconductor package clusters 387 | 388 | - Andrie de Vries [\textcolor{mLightBrown}{Finding clusters of CRAN packages using \pkg{igraph}}](http://blog.revolutionanalytics.com/2014/12/finding-clusters-of-cran-packages-using-igraph.html) looked at CRAN package clusters from a page rank graph 389 | 390 | - We are over three years further on now, so updating may be informative 391 | 392 | - However, this is only CRAN, and there is the big Bioconductor repository to consider too 393 | 394 | - Adding in the Bioconductor (S4, curated) repo does alter the optics, as you'll see, over and above the cluster dominated by **Rcpp** 395 | 396 | 397 | 398 | ```{r , echo = FALSE, eval=TRUE, mysize=TRUE, size='\\tiny', cache=TRUE, warning=FALSE, results="hide"} 399 | suppressPackageStartupMessages(library(BiocInstaller)) 400 | bioc <- available.packages(repo = biocinstallRepos()[1]) 401 | bioc_ann <- available.packages(repo = biocinstallRepos()[2]) 402 | bioc_exp <- available.packages(repo = biocinstallRepos()[3]) 403 | cran <- available.packages() 404 | pdb <- rbind(cran, bioc, bioc_ann, bioc_exp) 405 | ``` 406 | 407 | 408 | ```{r , echo = FALSE, eval=TRUE, mysize=TRUE, size='\\tiny', cache=TRUE, warning=FALSE, results="hide"} 409 | suppressPackageStartupMessages(library(miniCRAN)) 410 | suppressPackageStartupMessages(library(igraph)) 411 | suppressPackageStartupMessages(library(magrittr)) 412 | pg <- makeDepGraph(pdb[, "Package"], availPkgs = pdb, suggests=FALSE, enhances=TRUE, includeBasePkgs = FALSE) 413 | ``` 414 | 415 | ```{r , echo = FALSE, eval=TRUE, mysize=TRUE, size='\\tiny', cache=TRUE, results="hide"} 416 | pr <- pg %>% 417 | page.rank(directed = FALSE) %>% 418 | use_series("vector") %>% 419 | sort(decreasing = TRUE) %>% 420 | as.matrix %>% 421 | set_colnames("page.rank") 422 | ``` 423 | 424 | ## CRAN/Bioconductor package page rank scores 425 | 426 | ```{r , echo = FALSE, eval=TRUE, mysize=TRUE, size='\\small', cache=TRUE} 427 | print(pr[1:20,], digits=4) 428 | ``` 429 | ```{r , echo = FALSE, eval=TRUE, mysize=TRUE, size='\\tiny', cache=TRUE, results="hide"} 430 | cutoff <- quantile(pr[, "page.rank"], probs = 0.2) 431 | popular <- pr[pr[, "page.rank"] >= cutoff, ] 432 | toKeep <- names(popular) 433 | vids <- V(pg)[toKeep] 434 | gs <- induced.subgraph(pg, vids = toKeep) 435 | cl <- walktrap.community(gs, steps = 3) 436 | ``` 437 | 438 | ```{r , echo = FALSE, eval=TRUE, mysize=TRUE, size='\\tiny', cache=TRUE, results="hide"} 439 | topClusters <- table(cl$membership) %>% 440 | sort(decreasing = TRUE) %>% 441 | head(25) 442 | cluster <- function(i, clusters, pagerank, n=10){ 443 | group <- clusters$names[clusters$membership == i] 444 | pagerank[group, ] %>% sort(decreasing = TRUE) %>% head(n) 445 | } 446 | z <- lapply(names(topClusters)[1:15], cluster, clusters=cl, pagerank=pr, n=20) 447 | ``` 448 | 449 | ## First package cluster 450 | 451 | 452 | ```{r , echo = FALSE, eval=TRUE, mysize=TRUE, size='\\small', cache=TRUE} 453 | z[[1]][1:12] 454 | ``` 455 | 456 | ## Second package cluster 457 | 458 | ```{r , echo = FALSE, eval=TRUE, mysize=TRUE, size='\\small', cache=TRUE} 459 | z[[2]][1:12] 460 | ``` 461 | 462 | ## Third package cluster 463 | 464 | 465 | ```{r , echo = FALSE, eval=TRUE, mysize=TRUE, size='\\small', cache=TRUE} 466 | z[[3]][1:12] 467 | ``` 468 | 469 | ## Fourth package cluster 470 | 471 | ```{r , echo = FALSE, eval=TRUE, mysize=TRUE, size='\\small', cache=TRUE} 472 | z[[4]][1:12] 473 | ``` 474 | 475 | 476 | ## Fifth package cluster 477 | 478 | 479 | ```{r , echo = FALSE, eval=TRUE, mysize=TRUE, size='\\small', cache=TRUE} 480 | tmp <- z[[5]][1:12] 481 | names(tmp) <- substring(names(tmp), 1, 15) 482 | tmp 483 | ``` 484 | 485 | ## Sixth package cluster 486 | 487 | ```{r , echo = FALSE, eval=TRUE, mysize=TRUE, size='\\small', cache=TRUE} 488 | z[[6]][1:12] 489 | ``` 490 | 491 | 492 | 493 | 494 | ## CRAN/Bioconductor top two page rank clusters 495 | 496 | ```{r, fig4, fig.show='hide', fig.height=5, fig.width=12, dev.args=list(family="Fira Sans", bg="transparent"), warning=FALSE} 497 | library(RColorBrewer) 498 | library(wordcloud) 499 | opar <- par(mar=c(0,0,0,0)+0.1, mfrow=c(1,2)) 500 | for (i in 1:2) wordcloud(names(z[[i]]), freq=unname(z[[i]]), scale=rev(4*range(unname(z[[i]]))/max(unname(z[[5]])))) 501 | par(opar) 502 | ``` 503 | \includegraphics[width=0.95\textwidth]{eRum_keynote_18_files/figure-beamer/fig4-1.pdf} 504 | 505 | ## CRAN/Bioconductor third and fourth page rank clusters 506 | 507 | ```{r, fig5a, fig.show='hide', fig.height=5, fig.width=12, dev.args=list(family="Fira Sans", bg="transparent"), warning=FALSE} 508 | opar <- par(mar=c(0,0,0,0)+0.1, mfrow=c(1,2)) 509 | for (i in 3:4) wordcloud(names(z[[i]]), freq=unname(z[[i]]), scale=rev(4*range(unname(z[[i]]))/max(unname(z[[5]])))) 510 | par(opar) 511 | ``` 512 | \includegraphics[width=0.95\textwidth]{eRum_keynote_18_files/figure-beamer/fig5a-1.pdf} 513 | 514 | ## CRAN/Bioconductor fifth and sixth page rank clusters 515 | 516 | ```{r, fig5b, fig.show='hide', fig.height=5, fig.width=12, dev.args=list(family="Fira Sans", bg="transparent"), warning=FALSE} 517 | opar <- par(mar=c(0,0,0,0)+0.1, mfrow=c(1,2)) 518 | for (i in 5:6) wordcloud(names(z[[i]]), freq=unname(z[[i]]), scale=rev(4*range(unname(z[[i]]))/max(unname(z[[5]])))) 519 | par(opar) 520 | ``` 521 | \includegraphics[width=0.95\textwidth]{eRum_keynote_18_files/figure-beamer/fig5b-1.pdf} 522 | 523 | ## CRAN/Bioconductor package author clusters 524 | 525 | - Francois Keck explored CRAN package co-authorship in a more recent blog: [\textcolor{mLightBrown}{Exploring the CRAN social network}](http://www.pieceofk.fr/?p=431) 526 | 527 | - Once again, a little time has passed, so maybe things have shifted 528 | 529 | - Thanks to Martin Morgan, I've added listings corresponding in part to `tools::CRAN_package_db()` 530 | 531 | - It is refreshing to see that Bioconductor is clearly present, and the people implicated are active in upgrading R internals 532 | 533 | 534 | ```{r , echo = FALSE, eval=FALSE, mysize=TRUE, size='\\tiny', cache=TRUE} 535 | pdb0 <- tools::CRAN_package_db() 536 | url <- url("https://Bioconductor.org/packages/release/bioc/VIEWS") 537 | dcf <- as.data.frame(read.dcf(url), stringsAsFactors=FALSE) 538 | close(url) 539 | url_ann <- url("https://Bioconductor.org/packages/release/data/annotation/VIEWS") 540 | dcf_ann <- as.data.frame(read.dcf(url_ann), stringsAsFactors=FALSE) 541 | close(url_ann) 542 | url_exp <- url("https://Bioconductor.org/packages/release/data/experiment/VIEWS") 543 | dcf_exp <- as.data.frame(read.dcf(url_exp), stringsAsFactors=FALSE) 544 | close(url_exp) 545 | o1 <- intersect(names(dcf_exp), names(dcf_ann)) 546 | dcf2 <- rbind(dcf_exp[,o1], dcf_ann[,o1]) 547 | o2 <- intersect(names(dcf), names(dcf2)) 548 | dcf3 <- rbind(dcf2[,o2], dcf[,o2]) 549 | o3 <- intersect(names(pdb0), names(dcf3)) 550 | pdb <- rbind(pdb0[o3], dcf3[o3]) 551 | #o3 <- intersect(names(pdb0), names(dcf)) 552 | #pdb <- rbind(pdb0[o3], dcf[o3]) 553 | #pdb <- tools::CRAN_package_db() 554 | aut <- pdb$Author 555 | ``` 556 | 557 | ```{r , echo = FALSE, eval=FALSE, mysize=TRUE, size='\\tiny', cache=TRUE, warning=FALSE, results="hide"} 558 | suppressPackageStartupMessages(library(tidyverse)) 559 | suppressPackageStartupMessages(library(stringr)) 560 | suppressPackageStartupMessages(library(igraph)) 561 | suppressPackageStartupMessages(library(tidygraph)) 562 | suppressPackageStartupMessages(library(ggraph)) 563 | suppressPackageStartupMessages(library(magrittr)) 564 | ``` 565 | 566 | ```{r , echo = FALSE, eval=FALSE, mysize=TRUE, size='\\tiny', cache=TRUE, results="hide"} 567 | aut <- aut %>% 568 | str_replace_all("\\(([^)]+)\\)", "") %>% 569 | str_replace_all("\\[([^]]+)\\]", "") %>% 570 | str_replace_all("<([^>]+)>", "") %>% 571 | str_replace_all("\n", " ") %>% 572 | str_replace_all("[Cc]ontribution.* from|[Cc]ontribution.* by|[Cc]ontributors", " ") %>% 573 | str_replace_all("\\(|\\)|\\[|\\]", " ") %>% 574 | iconv(to = "ASCII//TRANSLIT") %>% 575 | str_replace_all("'$|^'", "") %>% 576 | gsub("([A-Z])([A-Z]{1,})", "\\1\\L\\2", ., perl = TRUE) %>% 577 | gsub("\\b([A-Z]{1}) \\b", "\\1\\. ", .) %>% 578 | map(str_split, ",|;|&| \\. |--|(?<=[a-z])\\.| [Aa]nd | [Ww]ith | [Bb]y ", simplify = TRUE) %>% 579 | map(str_replace_all, "[[:space:]]+", " ") %>% 580 | map(str_replace_all, " $|^ | \\.", "") %>% 581 | map(function(x) x[str_length(x) != 0]) %>% 582 | set_names(pdb$Package) %>% 583 | extract(map_lgl(., function(x) length(x) > 1)) 584 | ``` 585 | 586 | ```{r , echo = FALSE, eval=FALSE, mysize=TRUE, size='\\tiny', cache=TRUE, results="hide"} 587 | aut_list <- aut %>% 588 | unlist() %>% 589 | dplyr::as_data_frame() %>% 590 | count(value) %>% 591 | rename(Name = value, Package = n) 592 | ``` 593 | 594 | ```{r , echo = FALSE, eval=FALSE, mysize=TRUE, size='\\tiny', cache=TRUE, results="hide"} 595 | edge_list <- aut %>% 596 | map(combn, m = 2) %>% 597 | do.call("cbind", .) %>% 598 | t() %>% 599 | dplyr::as_data_frame() %>% 600 | arrange(V1, V2) %>% 601 | count(V1, V2) 602 | ``` 603 | 604 | ```{r , echo = FALSE, eval=FALSE, mysize=TRUE, size='\\tiny', cache=TRUE, results="hide"} 605 | g <- edge_list %>% 606 | select(V1, V2) %>% 607 | as.matrix() %>% 608 | graph.edgelist(directed = FALSE) %>% 609 | as_tbl_graph() %>% 610 | activate("edges") %>% 611 | mutate(Weight = edge_list$n) %>% 612 | activate("nodes") %>% 613 | rename(Name = name) %>% 614 | mutate(Component = group_components()) %>% 615 | filter(Component == names(table(Component))[which.max(table(Component))]) 616 | ``` 617 | 618 | ```{r , echo = FALSE, eval=FALSE, mysize=TRUE, size='\\tiny', cache=TRUE, results="hide"} 619 | suppressMessages(g <- g %>% 620 | left_join(aut_list) %>% 621 | filter(Package > 4) %>% 622 | mutate(Component = group_components()) %>% 623 | filter(Component == names(table(Component))[which.max(table(Component))])) 624 | ``` 625 | 626 | ```{r , echo = FALSE, eval=FALSE, mysize=TRUE, size='\\tiny', cache=TRUE, results="hide"} 627 | g <- mutate(g, Community = group_edge_betweenness(), 628 | Degree = centrality_degree()) 629 | ``` 630 | 631 | ```{r , echo = FALSE, eval=FALSE, mysize=TRUE, size='\\tiny', cache=TRUE, results="hide"} 632 | lapply(1:6, function(i) {filter(g, Community == names(sort(table(Community), decr = TRUE))[i]) %>% 633 | select(Name, Package) %>% 634 | arrange(desc(Package)) %>% 635 | top_n(100, Package) %>% as.data.frame()}) -> comms 636 | ``` 637 | 638 | 639 | ## First two package author clusters 640 | 641 | --- 642 | # ![extract_ hell](../pix/Screenshot from 2018-05-11 14-11-22.png) 643 | --- 644 | ```{r , echo = FALSE, eval=TRUE, mysize=TRUE, size='\\tiny', cache=TRUE} 645 | comms <- readRDS("comms.rds") 646 | ``` 647 | \begincols 648 | \begincol{0.48\textwidth} 649 | ```{r , echo = FALSE, eval=TRUE, mysize=TRUE, size='\\small', cache=TRUE} 650 | comms[[1]][1:12,] 651 | ``` 652 | \endcol 653 | 654 | \begincol{0.48\textwidth} 655 | ```{r , echo = FALSE, eval=TRUE, mysize=TRUE, size='\\small', cache=TRUE} 656 | comms[[2]][1:12,] 657 | ``` 658 | \endcol 659 | \endcols 660 | 661 | ## Third and fourth 662 | 663 | \begincols 664 | \begincol{0.48\textwidth} 665 | ```{r , echo = FALSE, eval=TRUE, mysize=TRUE, size='\\small', cache=TRUE} 666 | tmp <- comms[[3]][1:12,] 667 | tmp[,1] <- substring(tmp[,1], 1, 20) 668 | tmp 669 | ``` 670 | \endcol 671 | \begincol{0.48\textwidth} 672 | ```{r , echo = FALSE, eval=TRUE, mysize=TRUE, size='\\small', cache=TRUE} 673 | comms[[4]][1:12,] 674 | ``` 675 | \endcol 676 | \endcols 677 | 678 | ## Fifth and sixth 679 | 680 | \begincols 681 | \begincol{0.48\textwidth} 682 | ```{r , echo = FALSE, eval=TRUE, mysize=TRUE, size='\\small', cache=TRUE} 683 | comms[[5]][1:12,] 684 | ``` 685 | \endcol 686 | \begincol{0.48\textwidth} 687 | ```{r , echo = FALSE, eval=TRUE, mysize=TRUE, size='\\small', cache=TRUE} 688 | tmp <- comms[[6]][1:12,] 689 | tmp[,1] <- substring(tmp[,1], 1, 20) 690 | tmp 691 | ``` 692 | \endcol 693 | \endcols 694 | 695 | 696 | 697 | ## First two package author clusters 698 | 699 | ```{r, fig6, eval=TRUE, fig.show='hide', fig.height=5, fig.width=12, dev.args=list(family="Fira Sans", bg="transparent")} 700 | opar <- par(mar=c(0,0,0,0)+0.1, mfrow=c(1,2)) 701 | for (i in 1:2) wordcloud(comms[[i]]$Name, freq=comms[[i]]$Package, scale=rev(3.75*range(comms[[i]]$Package)/max(comms[[2]]$Package))) 702 | par(opar) 703 | ``` 704 | \includegraphics[width=0.95\textwidth]{eRum_keynote_18_files/figure-beamer/fig6-1.pdf} 705 | 706 | ## Third and fourth package author clusters 707 | 708 | ```{r, fig7a, eval=TRUE, fig.show='hide', fig.height=5, fig.width=12, dev.args=list(family="Fira Sans", bg="transparent")} 709 | opar <- par(mar=c(0,0,0,0)+0.1, mfrow=c(1,2)) 710 | for (i in 3:4) wordcloud(comms[[i]]$Name, freq=comms[[i]]$Package, scale=rev(3.75*range(comms[[i]]$Package)/max(comms[[2]]$Package))) 711 | par(opar) 712 | ``` 713 | \includegraphics[width=0.95\textwidth]{eRum_keynote_18_files/figure-beamer/fig7a-1.pdf} 714 | 715 | ## Fifth and sixth package author clusters 716 | 717 | ```{r, fig7b, eval=TRUE, fig.show='hide', fig.height=5, fig.width=12, dev.args=list(family="Fira Sans", bg="transparent")} 718 | opar <- par(mar=c(0,0,0,0)+0.1, mfrow=c(1,2)) 719 | for (i in 5:6) wordcloud(comms[[i]]$Name, freq=comms[[i]]$Package, scale=rev(3.75*range(comms[[i]]$Package)/max(comms[[2]]$Package))) 720 | par(opar) 721 | ``` 722 | \includegraphics[width=0.95\textwidth]{eRum_keynote_18_files/figure-beamer/fig7b-1.pdf} 723 | 724 | 725 | 726 | ## Roundup: history 727 | 728 | - Many sources in applied statistics with an S-like syntax but Lisp/Scheme-like internals, and sustained tensions between these 729 | 730 | - Many different opinions on prefered ways of structuring data and data handling, opening for adaptations to different settings 731 | 732 | - More recently larger commercial interest in handling large input long data sets, previously also present; simulations also generate large output data sets; bioinformatics both wide and long 733 | 734 | - Differing views of the world in terms of goals and approaches 735 | 736 | - Differences provide ecological robustness 737 | 738 | 739 | 740 | \nobibliography{bus463} 741 | 742 | 743 | --------------------------------------------------------------------------------