├── README.md ├── .gitignore ├── useful_code.Rproj ├── R ├── filldown.r ├── toDev.R ├── googleSS.r ├── plotShapeCodes.r ├── dms_dd.r ├── colClasses.r ├── numbers2words.r ├── utm_dd.r ├── roxygenTemplate.R ├── sunPosition.r └── multiplot.r └── style.Rmd /README.md: -------------------------------------------------------------------------------- 1 | This contains some useful R functions and code snippets. 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | style.html 5 | style.md 6 | 7 | -------------------------------------------------------------------------------- /useful_code.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: knitr 13 | LaTeX: pdfLaTeX 14 | -------------------------------------------------------------------------------- /R/filldown.r: -------------------------------------------------------------------------------- 1 | filldown <- function(x) { 2 | notna <- !is.na(x) # elements with values 3 | ix <- cumsum(notna) # index to previous element (but zeros where we need NA) 4 | ix[ix==0] <- NA # use [NA] as index to produce NA in output 5 | return(x[notna][ix]) # for each: return previous value if found, else NA 6 | } -------------------------------------------------------------------------------- /R/toDev.R: -------------------------------------------------------------------------------- 1 | toDev <- function(expr, dev, filename, ..., verbose=TRUE) { 2 | # Useful function to send a figure (generated by a function) to a plotting 3 | # device. From here: 4 | # http://nicercode.github.io/blog/2013-07-09-figure-functions/ 5 | 6 | # expr: function to generate a plot 7 | # dev: the graphics device 8 | # ...: options passed to graphics device 9 | 10 | if ( verbose ) 11 | cat(sprintf("Creating %s\n", filename)) 12 | dev(filename, ...) 13 | on.exit(dev.off()) 14 | eval.parent(substitute(expr)) 15 | } -------------------------------------------------------------------------------- /R/googleSS.r: -------------------------------------------------------------------------------- 1 | googleSS <- function(key = NA, gid=0) { 2 | # This doesn't allow querying the spreadsheet, but seems to handle 3 | # mixed datatype columns better than getGoogleSS 4 | if (is.na(key)) {stop("\nDocumentkey (key) is missing\n")} 5 | require(RCurl) 6 | url <- getURL(paste("https://docs.google.com/spreadsheet/pub?key=", key, 7 | "&single=true&gid=", gid, "&output=csv", sep = ""), 8 | cainfo = system.file("CurlSSL", "cacert.pem", package = "RCurl")) 9 | read.csv(textConnection(url), header = T, sep = ",") 10 | } -------------------------------------------------------------------------------- /R/plotShapeCodes.r: -------------------------------------------------------------------------------- 1 | plotShapeCodes <- function() { 2 | ## plot shape codes for plotting in R: 3 | ## Run with no parameters plotShapeCodes() 4 | library(ggplot2) 5 | myTitle <- "Guide to Point Shape codes in R 6 | (from http://www.win-vector.com/blog/2012/04/how-to-remember-point-shape-codes-in-r/) 7 | \n" 8 | sum <- ggplot() + ggtitle(myTitle) + theme(plot.title=element_text(hjust=0)) 9 | for(i in 1:25) { 10 | sum <- sum + geom_point(data=data.frame(x=c(i)) 11 | , aes(x=x,y=x), shape=i 12 | , size=4) + facet_wrap(~x,scales='free') 13 | } 14 | sum 15 | } -------------------------------------------------------------------------------- /R/dms_dd.r: -------------------------------------------------------------------------------- 1 | dms_dd <- function(x, sep=":", hem) { 2 | ## Convert degrees minutes seconds to decimal degrees: 3 | # x: a vector containing the lat or long with elements separated by single character 4 | # sep: the character separating the degrees, minutes, seconds (default ":") 5 | # hem: the hemisphere ("N","S","E","W"). Assumes all coords in the same hemisphere 6 | if (hem %in% c("N","S","E","W")) { 7 | x <- lapply(strsplit(x,sep), as.numeric) 8 | x <- unlist(lapply(x, function(y) (y[1]+y[2]/60+y[3]/3600))) 9 | ifelse(hem %in% c("N","E"), 10 | ifelse(x>0,mult <- 1, mult <- -1), 11 | ifelse(x<0,mult <- 1, mult <- -1)) 12 | x <- x*mult 13 | x 14 | } else { 15 | print("Error: 'hem' must be N,S,E, or W") 16 | } 17 | } -------------------------------------------------------------------------------- /R/colClasses.r: -------------------------------------------------------------------------------- 1 | colClasses <- function(d, colClasses) { 2 | # Coerces data.frame columns to the specified classes 3 | # Example usage 4 | # DF <- as.data.frame(matrix(rnorm(25), 5, 5)) 5 | # DF2 <- colClasses(DF, c(rep("character", 3), rep("factor", 2))) 6 | # 7 | # DF3 <- colClasses(DF, 'Date') 8 | # str(DF3) 9 | colClasses <- rep(colClasses, len=length(d)) 10 | d[] <- lapply(seq_along(d) 11 | , function(i) switch(colClasses[i], 12 | numeric=as.numeric(d[[i]]), 13 | character=as.character(d[[i]]), 14 | Date=as.Date(d[[i]] 15 | , origin='1970-01-01'), 16 | POSIXct=as.POSIXct(d[[i]] 17 | , origin='1970-01-01'), 18 | factor=as.factor(d[[i]]), 19 | as(d[[i]], colClasses[i]) )) 20 | d 21 | } -------------------------------------------------------------------------------- /R/numbers2words.r: -------------------------------------------------------------------------------- 1 | numbers2words <- function(x){ 2 | ## Function by John Fox found here: 3 | ## http://tolstoy.newcastle.edu.au/R/help/05/04/2715.html 4 | 5 | helper <- function(x){ 6 | 7 | digits <- rev(strsplit(as.character(x), "")[[1]]) 8 | nDigits <- length(digits) 9 | if (nDigits == 1) as.vector(ones[digits]) 10 | else if (nDigits == 2) 11 | if (x <= 19) as.vector(teens[digits[1]]) 12 | else trim(paste(tens[digits[2]], 13 | Recall(as.numeric(digits[1])))) 14 | else if (nDigits == 3) trim(paste(ones[digits[3]], "hundred", 15 | Recall(makeNumber(digits[2:1])))) 16 | else { 17 | nSuffix <- ((nDigits + 2) %/% 3) - 1 18 | if (nSuffix > length(suffixes)) stop(paste(x, "is too large!")) 19 | trim(paste(Recall(makeNumber(digits[ 20 | nDigits:(3*nSuffix + 1)])), 21 | suffixes[nSuffix], 22 | Recall(makeNumber(digits[(3*nSuffix):1])))) 23 | } 24 | } 25 | trim <- function(text){ 26 | gsub("^\ ", "", gsub("\ *$", "", text)) 27 | } 28 | makeNumber <- function(...) as.numeric(paste(..., collapse="")) 29 | opts <- options(scipen=100) 30 | on.exit(options(opts)) 31 | ones <- c("", "one", "two", "three", "four", "five", "six", "seven", 32 | "eight", "nine") 33 | names(ones) <- 0:9 34 | teens <- c("ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", 35 | "sixteen", " seventeen", "eighteen", "nineteen") 36 | names(teens) <- 0:9 37 | tens <- c("twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", 38 | "ninety") 39 | names(tens) <- 2:9 40 | x <- round(x) 41 | suffixes <- c("thousand", "million", "billion", "trillion") 42 | if (length(x) > 1) return(sapply(x, helper)) 43 | helper(x) 44 | } -------------------------------------------------------------------------------- /R/utm_dd.r: -------------------------------------------------------------------------------- 1 | utm_dd <- function(zone=NULL, easting=NULL, northing=NULL, datum="NAD83", data=NULL, key=NULL) { 2 | 3 | ## Convert zone+utm pairs to lat/long. Can take either a single zone + utm, 4 | ## or a data frame with many. Allows for datasets with different zones and 5 | ## datum for each utm. 6 | # 7 | # Depends: rgdal, plyr 8 | # 9 | # zone: a number, or column name in 'data' 10 | # easting: a number, or column name in 'data' 11 | # northing: a number, or column name in 'data' 12 | # data: (optional) a data frame with zone + utms 13 | # key: Name of column 'data' that contains a unique identifier for each row 14 | # datum: string or column name in 'data'. Default 'NAD83' 15 | # 16 | # Returns: either a vector of length 2 with longitude and latitude 17 | # respectively, or a dataframe with the 'key' column and longitude 18 | # and latitude. 19 | 20 | require(rgdal) 21 | require(plyr) 22 | 23 | get_dd <- function(d) { 24 | # requires a one-row dataframe with zone, easting, northing, datum (in that order) 25 | 26 | utm <- SpatialPoints(d[2:3] 27 | , proj4string=CRS(paste0("+proj=utm +datum=", d[4] 28 | , " +zone=", d[1]))) 29 | sp <- spTransform(utm, CRS("+proj=longlat")) 30 | coordinates(sp) 31 | 32 | } 33 | 34 | if (any(is.null(zone),is.null(easting),is.null(northing))) { 35 | 36 | stop("You must supply zone, easting, and northing") 37 | 38 | } else if (is.null(data)) { 39 | 40 | utms <- data.frame(zone,easting,northing,datum, stringsAsFactors=FALSE) 41 | as.vector(get_dd(utms)) 42 | 43 | } else if (is.null(key)) { 44 | 45 | stop("You must supply a column name for 'key'") 46 | 47 | } else { 48 | if (!datum %in% colnames(data)) { 49 | datum <- rep(datum,nrow(data)) 50 | utms <- data.frame(data[c(key,zone,easting,northing)],datum 51 | , stringsAsFactors=FALSE) 52 | } else { 53 | utms <- data[c(key,zone,easting,northing,datum)] 54 | } 55 | 56 | utms <- na.omit(utms) 57 | 58 | longlat <- ddply(.data=utms,.variables=1, .fun= function(x) get_dd(x[2:5])) 59 | names(longlat)[2:3] <- c("Longitude","Latitude") 60 | longlat 61 | 62 | } 63 | 64 | } 65 | -------------------------------------------------------------------------------- /R/roxygenTemplate.R: -------------------------------------------------------------------------------- 1 | #' Populate the boilerplate roxygen template at the top of the function. 2 | #' 3 | #' Inspired by Karthik Ram's RTools Sublime Text 2 plugin: 4 | #' https://github.com/karthik/Rtools 5 | #' @param funfile path to the .R file containing the function 6 | #' @param params_start The (first) line that contains the parameters for your function (default 1) 7 | #' @param params_end (optional) If your parameter definitions breaks across multiple lines, 8 | #' specify the ending line (default \code{NULL}). 9 | #' @export 10 | #' @return nothing, but adds the roxygen template to the top of the file 11 | roxygen_template <- function(funfile, params_start=1, params_end = NULL) { 12 | 13 | fun_text <- readLines(funfile, warn=FALSE) 14 | 15 | if (is.null(params_end)) params_end <- params_start 16 | 17 | if (params_start == 1) { 18 | checks <- 1:5 19 | } else { 20 | checks <- (params_start - 5):params_start 21 | } 22 | 23 | if (any(grepl("^#'", fun_text[checks]))) { 24 | stop("It appears you already have roxygen documentation for your function!") 25 | } 26 | 27 | if (params_start == 1) { 28 | above <- NULL 29 | } else { 30 | above <- fun_text[1:(params_start - 1)] 31 | } 32 | the_rest <- fun_text[params_start:length(fun_text)] 33 | 34 | # Find the function and parameter definition line: 35 | # 36 | 37 | ## Combine multiple lines of parameters 38 | params_line <- paste(fun_text[params_start:params_end], collapse = "") 39 | 40 | # Pull out the function and parameter definitions: 41 | matches <- regexpr("(?<=\\().+?(?=\\)\\s*?\\{)", params_line, perl=TRUE) 42 | params <- regmatches(params_line,matches)[1] 43 | 44 | # Parse out and clean the parameter names: 45 | params <- strsplit(params, ",")[[1]] 46 | params <- gsub("\\s+|=.+", "", params) 47 | 48 | # Put together the roxygen fields: 49 | params <- paste0("#' @param ", params, " ") 50 | top <- "#' 51 | #' 52 | #' 53 | #' 54 | #' @import 55 | #' @importFrom " 56 | end <- "#' @export 57 | #' @keywords 58 | #' @seealso 59 | #' @return 60 | #' @alias 61 | #' @examples \\dontrun{ 62 | #' 63 | #'}" 64 | roxy <- paste(c(top, params, end), sep="") 65 | 66 | ## Strip off any accidentally introduced leading whitespace from lines: 67 | roxy <- gsub("^\\s+", "", roxy) 68 | roxy <- gsub("(\\n)\\s+", "\\1", roxy) 69 | 70 | # Write to the top of the file (without asking... should be safe, i think) 71 | writeLines(c(above, roxy, the_rest), funfile) 72 | 73 | # Open the file to fill in documentation 74 | file.edit(funfile) 75 | } 76 | -------------------------------------------------------------------------------- /R/sunPosition.r: -------------------------------------------------------------------------------- 1 | sunPosition <- function(year, month, day, hour=12, min=0, sec=0, 2 | lat=46.5, long=6.5) { 3 | ## Function from here: 4 | ## http://stackoverflow.com/a/8764866 5 | ## First proposed by SpoonNZ and improved by Josh O'Brien 6 | 7 | twopi <- 2 * pi 8 | deg2rad <- pi / 180 9 | 10 | # Get day of the year, e.g. Feb 1 = 32, Mar 1 = 61 on leap years 11 | month.days <- c(0,31,28,31,30,31,30,31,31,30,31,30) 12 | day <- day + cumsum(month.days)[month] 13 | leapdays <- year %% 4 == 0 & (year %% 400 == 0 | year %% 100 != 0) & 14 | day >= 60 & !(month==2 & day==60) 15 | day[leapdays] <- day[leapdays] + 1 16 | 17 | # Get Julian date - 2400000 18 | hour <- hour + min / 60 + sec / 3600 # hour plus fraction 19 | delta <- year - 1949 20 | leap <- trunc(delta / 4) # former leapyears 21 | jd <- 32916.5 + delta * 365 + leap + day + hour / 24 22 | 23 | # The input to the Atronomer's almanach is the difference between 24 | # the Julian date and JD 2451545.0 (noon, 1 January 2000) 25 | time <- jd - 51545. 26 | 27 | # Ecliptic coordinates 28 | 29 | # Mean longitude 30 | mnlong <- 280.460 + .9856474 * time 31 | mnlong <- mnlong %% 360 32 | mnlong[mnlong < 0] <- mnlong[mnlong < 0] + 360 33 | 34 | # Mean anomaly 35 | mnanom <- 357.528 + .9856003 * time 36 | mnanom <- mnanom %% 360 37 | mnanom[mnanom < 0] <- mnanom[mnanom < 0] + 360 38 | mnanom <- mnanom * deg2rad 39 | 40 | # Ecliptic longitude and obliquity of ecliptic 41 | eclong <- mnlong + 1.915 * sin(mnanom) + 0.020 * sin(2 * mnanom) 42 | eclong <- eclong %% 360 43 | eclong[eclong < 0] <- eclong[eclong < 0] + 360 44 | oblqec <- 23.439 - 0.0000004 * time 45 | eclong <- eclong * deg2rad 46 | oblqec <- oblqec * deg2rad 47 | 48 | # Celestial coordinates 49 | # Right ascension and declination 50 | num <- cos(oblqec) * sin(eclong) 51 | den <- cos(eclong) 52 | ra <- atan(num / den) 53 | ra[den < 0] <- ra[den < 0] + pi 54 | ra[den >= 0 & num < 0] <- ra[den >= 0 & num < 0] + twopi 55 | dec <- asin(sin(oblqec) * sin(eclong)) 56 | 57 | # Local coordinates 58 | # Greenwich mean sidereal time 59 | gmst <- 6.697375 + .0657098242 * time + hour 60 | gmst <- gmst %% 24 61 | gmst[gmst < 0] <- gmst[gmst < 0] + 24. 62 | 63 | # Local mean sidereal time 64 | lmst <- gmst + long / 15. 65 | lmst <- lmst %% 24. 66 | lmst[lmst < 0] <- lmst[lmst < 0] + 24. 67 | lmst <- lmst * 15. * deg2rad 68 | 69 | # Hour angle 70 | ha <- lmst - ra 71 | ha[ha < -pi] <- ha[ha < -pi] + twopi 72 | ha[ha > pi] <- ha[ha > pi] - twopi 73 | 74 | # Latitude to radians 75 | lat <- lat * deg2rad 76 | 77 | # Azimuth and elevation 78 | el <- asin(sin(dec) * sin(lat) + cos(dec) * cos(lat) * cos(ha)) 79 | az <- asin(-cos(dec) * sin(ha) / cos(el)) 80 | 81 | # For logic and names, see Spencer, J.W. 1989. Solar Energy. 42(4):353 82 | cosAzPos <- (0 <= sin(dec) - sin(el) * sin(lat)) 83 | sinAzNeg <- (sin(az) < 0) 84 | az[cosAzPos & sinAzNeg] <- az[cosAzPos & sinAzNeg] + twopi 85 | az[!cosAzPos] <- pi - az[!cosAzPos] 86 | 87 | # if (0 < sin(dec) - sin(el) * sin(lat)) { 88 | # if(sin(az) < 0) az <- az + twopi 89 | # } else { 90 | # az <- pi - az 91 | # } 92 | 93 | 94 | el <- el / deg2rad 95 | az <- az / deg2rad 96 | lat <- lat / deg2rad 97 | 98 | return(list(elevation=el, azimuth=az)) 99 | } -------------------------------------------------------------------------------- /R/multiplot.r: -------------------------------------------------------------------------------- 1 | #'Plot multiple plots in a single pane 2 | #' 3 | #'ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects) 4 | #' @import grid ggplot2 5 | #' @export 6 | #' 7 | #' @param ... Two or more ggplot2 objects 8 | #' @param plotlist (optional) a list of ggplot2 objects 9 | #' @param cols Number of columns in layout 10 | #' @param layout A matrix specifying the layout. If present, 'cols' is ignored. See Details 11 | #' @param title Optional title as a character string 12 | #' @param widths a vector of relative column widths eg. c(3,2) 13 | #' @param heights a vector of relative column heights eg. c(3,2) 14 | #' @param titlefont The font of the title 15 | #' @param titleface The font face (1 = normal, 2 = bold, 3 = italic, 4 = bold italic) 16 | #' @param titlesize The size of the title font 17 | #' 18 | #' @details If plotting three plots and the layout is something like 19 | #' matrix(c(1,2,3,3), nrow=2, byrow=TRUE), then plot 1 will go in the upper 20 | #' left, 2 will go in the upper right, and 3 will go all the way across the 21 | #' bottom. To save, you must use the desired device (eg \code{png()}), or 22 | #' save from the RStudio Viewer. 23 | #' 24 | #' Borrowed and modified from http://www.cookbook-r.com/Graphs/Multiple_graphs_on_one_page_(ggplot2)/ 25 | #' 26 | #' @return NULL (invisibly) 27 | #' @examples \dontrun{ 28 | #' library("ggplot2") 29 | #' plot1 <- ggplot(iris, aes(x = Species, y = Sepal.Length)) + 30 | #' geom_bar(stat = "identity") 31 | #' plot2 <- ggplot(mtcars, aes(x = mpg, y = disp)) + 32 | #' geom_smooth() 33 | #' multiplot(plot1, plot2, cols = 2, widths = c(3,2), title = "My two unrelated plots") 34 | #' multiplot(plot1, plot2, cols = 1, heights = c(10,2), title = "My two unrelated plots") 35 | #' myplots <- list(plot1, plot2, plot1) 36 | #' multiplot(plotlist = myplots, layout =matrix(c(1,2,3,3), nrow=2), 37 | #' heights = c(1,3), widths = c(3,4), title = "My three unrelated plots") 38 | #' ## Adjusting fonts 39 | #' library(extrafont) 40 | #' loadfonts() 41 | #' multiplot(plotlist = myplots, layout =matrix(c(1,2,3,3), nrow=2), 42 | #' heights = c(1,3), widths = c(3,4), title = "My three unrelated plots", 43 | #' titlefont = "Wingdings", titleface = 4, titlesize = 20) 44 | #'} 45 | multiplot <- function(..., plotlist=NULL, cols=1, layout=NULL, widths=NULL, heights=NULL, 46 | title=NULL, titlefont = "", titleface = 1, titlesize = 16) { 47 | 48 | # Make a list from the ... arguments and plotlist 49 | plots <- c(list(...), plotlist) 50 | 51 | numPlots = length(plots) 52 | 53 | # If layout is NULL, then use 'cols' to determine layout 54 | if (is.null(layout)) { 55 | # Make the panel 56 | # ncol: Number of columns of plots 57 | # nrow: Number of rows needed, calculated from # of cols 58 | layout <- matrix(seq(1, cols * ceiling(numPlots/cols)), 59 | ncol = cols, nrow = ceiling(numPlots/cols)) 60 | } 61 | 62 | if (!is.null(title)) { # Add a narrow row at the top for the title 63 | layout <- rbind(rep(0,ncol(layout)),layout) 64 | if (is.null(heights)) { 65 | plotrows <- nrow(layout)-1 66 | rowheights <- c(0.1, rep(1,plotrows)/plotrows) 67 | } else { 68 | rowheights <- c(0.1, heights/sum(heights)) 69 | } 70 | } else { 71 | if (is.null(heights)) { 72 | rowheights <- rep(1,nrow(layout)) 73 | } else { 74 | rowheights <- heights 75 | } 76 | } 77 | 78 | if (is.null(widths)) { 79 | colwidths <- rep(1, cols) 80 | } else { 81 | colwidths <- widths 82 | } 83 | 84 | if (numPlots==1) { 85 | 86 | return(plots[[1]] + labs(title=title)) 87 | 88 | } else { 89 | # Set up the page 90 | grid.newpage() 91 | pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout), 92 | widths=colwidths, 93 | heights=rowheights))) 94 | 95 | # Make each plot, in the correct location 96 | for (i in 1:numPlots) { 97 | # Get the i,j matrix positions of the regions that contain this subplot 98 | matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE)) 99 | 100 | print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row, 101 | layout.pos.col = matchidx$col)) 102 | } 103 | 104 | if (!is.null(title)) { 105 | grid.text(title, vp = viewport(layout.pos.row = 1 106 | , layout.pos.col = 1:ncol(layout)), 107 | gp = gpar(fontfamily = titlefont, fontface = titleface, 108 | fontsize = titlesize)) 109 | } 110 | 111 | } 112 | return(invisible(NULL)) 113 | } 114 | -------------------------------------------------------------------------------- /style.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: Style guide 3 | layout: default 4 | --- 5 | 6 | This style guide was taken from [Hadley Wickham's](http://adv-r.had.co.nz/Style.html), with a few tweaks of my own added. Not all of my work follows this (or any) style guide, especially my older stuff. 7 | 8 | # Style guide 9 | 10 | Good coding style is like using correct punctuation when writing: you can manage without it, but it sure makes things easier to read. As with punctuation, there are many possible variations, and the main thing is to be consistent. The following guide describes the style that I use - you don't have to use it, but you need to have some consistent style that you do follow. My style is based on Google's [R style guide][1], with a few tweaks. 11 | 12 | Good style is important because while your code only has one author, it will usually have multiple readers, and when you know you will be working with multiple people on the same code, it's a good idea to agree on a common style up-front. No style is uniformly better than any other style, and if you're working with a group of people, you may need to sacrifice some of your most favourite types of style. 13 | 14 | One package that can make adhering to a style guide easier is `formatR`, by Yihui Xie. It can't do everything, but if you're starting with very poorly formatted R code, it will get you to a good place much more quickly than doing everything by hand. Make sure to read [the notes on the wiki](https://github.com/yihui/formatR/wiki) before using it. 15 | 16 | ## Notation and naming 17 | 18 | ### File names 19 | 20 | File names should end in `.r` and be meaningful. 21 | 22 | # Good 23 | explore-diamonds.r 24 | hadley-wickham-hw-1.r 25 | # Bad 26 | foo.r 27 | my-homework.R 28 | 29 | ### Identifiers 30 | 31 | "There are only two hard things in Computer Science: cache invalidation and naming things." -- Phil Karlton 32 | 33 | Variable names should be lowercase and use `.` to separate words within a name. Function names should be camelCase. Column names in dataframes and matrices should be lowercase, with words within a name separated by `_`. Generally, variable names should be nouns and function names should be verbs. Strive for concise but meaningful names (this is not easy!) 34 | 35 | * Variable names: 36 | # Good 37 | day.one 38 | day.1 39 | # Bad 40 | first_day_of_the_month 41 | DayOne 42 | dayone 43 | djm1 44 | 45 | * Function names: 46 | # Good 47 | getStuff 48 | getMoreStuff 49 | writeStuff 50 | # Bad 51 | func1 52 | func_3 53 | 54 | ## Syntax 55 | 56 | ### Spacing 57 | 58 | Place spaces around all infix operators (`=`, `+`, `-`, `<-`, etc.). Do not place a space before a comma, but always place one after a comma (just like in regular English). 59 | 60 | # Good 61 | average <- mean(feet / 12 + inches, na.rm = T) 62 | # Bad 63 | average<-mean(feet/12+inches,na.rm=T) 64 | 65 | Place a space before left parentheses, except in a function call. 66 | 67 | # Good 68 | `if (debug)` 69 | `plot(x, y)` 70 | 71 | # Bad 72 | `if(debug)` 73 | `plot (x, y)` 74 | 75 | Extra spacing (i.e., more than one space in a row) is okay if it improves alignment of equals signs or arrows (`<-`). 76 | 77 | list( 78 | x = call_this_long_function(a, b), 79 | y = a * e / d ^ f) 80 | 81 | list( 82 | total = a + b + c, 83 | mean = (a + b + c) / n) 84 | 85 | Do not place spaces around code in parentheses or square brackets. (Except if there's a trailing comma: always place a space after a comma, just like in ordinary English.) 86 | 87 | # Good 88 | if (debug) 89 | diamonds[5, ] 90 | 91 | # Bad 92 | if ( debug ) # No spaces around debug 93 | x[1,] # Needs a space after the comma 94 | x[1 ,] # Space goes after, not before 95 | 96 | ### Curly braces 97 | 98 | An opening curly brace should never go on its own line and should always be followed by a new line; a closing curly brace should always go on its own line, unless followed by `else`. 99 | 100 | Always indent the code inside the curly braces. 101 | 102 | # Good 103 | 104 | if (y < 0 && debug) { 105 | message("Y is negative") 106 | } 107 | 108 | if (y == 0) { 109 | log(x) 110 | } else { 111 | y ^ x 112 | } 113 | 114 | # Bad 115 | 116 | if (y < 0 && debug) 117 | message("Y is negative") 118 | 119 | if (y == 0) { 120 | log(x) 121 | } 122 | else { 123 | y ^ x 124 | } 125 | 126 | It's ok to leave very short statements on the same line: 127 | 128 | if (y < 0 && debug) message("Y is negative") 129 | 130 | ### Line length 131 | 132 | Keep your lines less than 80 characters. This is the amount that will fit comfortably on a printed page at a reasonable size. If you find you are running out of room, this is probably an indication that you should encapsulate some of the work in a separate function. 133 | 134 | ### Indentation 135 | 136 | When indenting your code, use two spaces. Never use tabs or mix tabs and spaces. 137 | 138 | The only exception is if a function definition runs over multiple lines: indent the second line to line up with where the definition starts: 139 | 140 | ```R 141 | long_function_name <- function(a = "a long argument", b = "another argument", 142 | c = "another long argument") { 143 | # As usual code is indented by two spaces. 144 | } 145 | ``` 146 | 147 | ### Assignment 148 | 149 | Use `<-`, not `=`, for assignment. 150 | 151 | # Good 152 | x <- 5 153 | # Bad 154 | x = 5 155 | 156 | ## Organisation 157 | 158 | ### Commenting guidelines 159 | 160 | Comment your code. Entire commented lines should begin with `#` and one space. Comments should explain the why, not the what. 161 | 162 | Use commented lines of `-` and `=` to break up your files into scannable chunks. 163 | 164 | [1]: http://google-styleguide.googlecode.com/svn/trunk/Rguide.xml 165 | 166 | --------------------------------------------------------------------------------