├── .Rbuildignore ├── DESCRIPTION ├── NAMESPACE ├── NEWS ├── R ├── cacheQuery.r ├── execQuery.r ├── getCacheFilename.r ├── getParameters.r ├── getQueries.r ├── getSQL.r ├── getSQLRaw.r ├── interactiveSQL.r ├── recodeColumns.r ├── sqlFile.r ├── sqlPaths.r ├── sqldoc.r ├── sqlexec.R └── sqlutils-package.r ├── demo ├── 00Index ├── isql.r └── sqlutils.r ├── inst ├── db │ └── students.db └── sql │ ├── StudentSummary.sql │ └── StudentsInRange.sql ├── man ├── cacheQuery.Rd ├── execQuery.Rd ├── getCacheFilename.Rd ├── getParameters.Rd ├── getQueries.Rd ├── getSQL.Rd ├── getSQLRaw.Rd ├── is.null.string.Rd ├── isql.Rd ├── parse.element.Rd ├── parse.introduction.Rd ├── print.Rd ├── recodeColumns.Rd ├── sqlFile.Rd ├── sqlPaths.Rd ├── sqldoc.Rd ├── sqlexec.JDBCConnection.Rd ├── sqlexec.PostgreSQLConnection.Rd ├── sqlexec.RMySQL.Rd ├── sqlexec.RODBC.Rd ├── sqlexec.Rd ├── sqlexec.SQLiteConnection.Rd ├── sqlutils-package.Rd └── sqlutils.envir.Rd ├── readme.md ├── sqlutils-dev.r └── vignettes └── DataDictionary.Rnw /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | readme.md 4 | sqlutils-dev.r 5 | cran-comments.md 6 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: sqlutils 2 | Type: Package 3 | Title: Utilities for working with SQL files. 4 | Version: 1.2 5 | Date: 2014-11-19 6 | Author: Jason Bryer 7 | Maintainer: Jason Bryer 8 | URL: http://jason.bryer.org/sqlutils, http://github.com/jbryer/sqlutils 9 | BugReports: https://github.com/jbryer/sqlutils/issues 10 | Description: This package provides utilities for working with a library of SQL 11 | files. 12 | License: GPL 13 | Depends: 14 | roxygen2, 15 | stringr, 16 | DBI 17 | Suggests: 18 | tcltk, 19 | sqldf, 20 | xtable 21 | Enhances: RPostgreSQL, 22 | RODBC, 23 | RMySQL, 24 | RSQLite, 25 | RJDBC 26 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2 (4.0.1): do not edit by hand 2 | 3 | S3method(print,sqldoc) 4 | export(cacheQuery) 5 | export(execQuery) 6 | export(getCacheFilename) 7 | export(getParameters) 8 | export(getQueries) 9 | export(getSQL) 10 | export(isql) 11 | export(sqlPaths) 12 | export(sqldoc) 13 | export(sqlexec) 14 | import(DBI) 15 | import(roxygen2) 16 | import(stringr) 17 | -------------------------------------------------------------------------------- /NEWS: -------------------------------------------------------------------------------- 1 | CHANGES IN VERSION 1.2 2 | o Support SQL style comments (thanks to Andy Choens for the pull request). SQL files can have -- and --'. The latter are used to generage the Roxygen style documentation. 3 | o Added support for RJDBC database connections. 4 | 5 | CHANGES IN VERSION 1.1.2 6 | o Updates for compatibility with roxygen2 version 4.0 7 | 8 | CHANGES IN VERSION 1.1.1 9 | o Fixes to pass new R CMD CHECK errors and changes to the roxygen2 package. 10 | o Changed the default file format for cacheQuery to rda. 11 | 12 | CHANGES IN VERSION 1.1 13 | NEW FEATURES: 14 | o If the maxLevels parameter on execQuery is NULL, then the recode function will not be called. 15 | 16 | BUG FIXES: 17 | o Fixed recoding of columns to not fail if the query returned no columns. 18 | 19 | CHANGES IN VERSION 1.0 20 | NEW FEATURES: 21 | o Initial version! 22 | o Provides a framework for managing a library of SQL files. 23 | o Supports RSQLite, RODBC, RMySQL, and RPostgreSQL. 24 | o Interactive SQL mode to enter SQL commands and execute them against a database without leaving R. 25 | o Data dictionary template. 26 | -------------------------------------------------------------------------------- /R/cacheQuery.r: -------------------------------------------------------------------------------- 1 | #' Function for working with cached queries. 2 | #' 3 | #' This will first look in the given directory for a CSV or Rda version of the file, if 4 | #' it exists, that will be read and returned. Otherwise it will execute the query 5 | #' and then saves a CSV or Rda file. 6 | #' 7 | #' @param dir the directory to save and load cached data files. Defaults to the 8 | #' current working directory (i.e. \code{\link{getwd}}. 9 | #' @param filename the filename of the cached data file. 10 | #' @param query the query to execute. 11 | #' @param maxLevels the maximum number of levels a factor can have before being 12 | #' converted to a character vector. 13 | #' @param ... other parameters passed to the \code{\link{execQuery}} function including 14 | #' query parameters. 15 | #' @param format either \code{csv} for comma separated value files or \code{rda} for R data files. 16 | #' @return a data frame. 17 | #' @export 18 | cacheQuery <- function(query=NULL, dir=getwd(), 19 | filename=getCacheFilename(query=query, dir=dir, ext=format, ...), 20 | format='rda', 21 | maxLevels=20, 22 | ...) { 23 | if(file.exists(filename)) { 24 | message(paste("Reading from cached query file: ", filename, sep='')) 25 | if(tolower(format) == 'rda') { 26 | load(filename) 27 | } else if(tolower(format) == 'csv') { 28 | df = read.csv(filename) 29 | } else { 30 | stop('Unsupported format type.') 31 | } 32 | df = recodeColumns(df, maxLevels) 33 | } else { 34 | message(paste("Executing ", query, " and saving to ", filename, sep='')) 35 | df = execQuery(query=query, maxLevels=maxLevels, ...) 36 | if(tolower(format) == 'rda') { 37 | save(df, file=filename) 38 | } else if(tolower(format) == 'csv') { 39 | write.csv(df, filename, row.names=FALSE) 40 | } else { 41 | stop('Unsupported format type.') 42 | } 43 | } 44 | return(df) 45 | } 46 | -------------------------------------------------------------------------------- /R/execQuery.r: -------------------------------------------------------------------------------- 1 | #' Executes the specified query and returns a data frame. This function currently 2 | #' supports RODBC, RSQLite, and RMySQL. For other databases, use getQuery() and 3 | #' execute the SQL statement using the appropriate database connection. 4 | #' 5 | #' @param query the query to execute. 6 | #' @param connection the database connection. 7 | #' @param maxLevels the maximum number of levels a factor can have before being 8 | #' converted to a character. Set to \code{NULL} to not recode. 9 | #' @param ... other parameters passed to \code{\link{getSQL}} and \code{\link{sqlexec}}. 10 | #' @seealso sqlexec, cacheQuery 11 | #' @export 12 | execQuery <- function(query=NULL, connection=NULL, maxLevels=20, ...) { 13 | sql = getSQL(query=query, ...) 14 | df <- sqlexec(connection, sql=sql, ...) 15 | if(!is.null(maxLevels)) { 16 | df <- recodeColumns(df, maxLevels) 17 | } 18 | return(df) 19 | } 20 | -------------------------------------------------------------------------------- /R/getCacheFilename.r: -------------------------------------------------------------------------------- 1 | #' Returns the complete filepath to the cache file. 2 | #' 3 | #' @param query the query name. 4 | #' @param dir the directory to save the cache file to. 5 | #' @param ext file extension. 6 | #' @param ... query parameters. 7 | #' @return full filepath to the cached file. 8 | #' @export 9 | getCacheFilename <- function(query, dir=getwd(), ext='csv', ...) { 10 | parms = getParameters(query) 11 | parmvals = unlist(list(...)) 12 | filename = paste(dir, '/', query, sep='') 13 | if(length(parms) > 0) { 14 | for(i in 1:length(parms)) { 15 | filename = paste(filename, parms[i], parmvals[parms[i]], sep='.') 16 | } 17 | } 18 | if(nchar(filename) >= 251) { 19 | warning(paste0('The cached filename is longer than 255 characters. ', 20 | 'This will cause an error on some operating systems. Consider ', 21 | 'specifying your own filename parameter. The filename will be ', 22 | 'truncated to 255 characters.')) 23 | filename <- substr(filename, 1, 251) 24 | } 25 | filename = paste(filename, ext, sep='.') 26 | return(filename) 27 | } 28 | -------------------------------------------------------------------------------- /R/getParameters.r: -------------------------------------------------------------------------------- 1 | #' Returns the parameters that must be set for the given query. 2 | #' 3 | #' @param query the query name. 4 | #' @return list of parameter names. 5 | #' @export 6 | getParameters <- function(query) { 7 | sql = getSQLRaw(query) 8 | pos = gregexpr(":", sql) 9 | results = character() 10 | if(pos[[1]][1] > 0) { 11 | for(i in seq(1, length(pos[[1]]), by=2)) { 12 | results = c(results, (substr(sql, pos[[1]][i]+1, pos[[1]][i+1]-1)) ) 13 | } 14 | } 15 | return(unique(results)) 16 | } 17 | -------------------------------------------------------------------------------- /R/getQueries.r: -------------------------------------------------------------------------------- 1 | #' Returns a list of available queries in the current repository. 2 | #' 3 | #' @export 4 | getQueries <- function() { 5 | paths <- sqlPaths() 6 | files <- character() 7 | for(p in paths) { 8 | files = c(files, list.files(path=p, pattern="*.sql")) 9 | } 10 | return( substr(files, 0, nchar(files)-4) ) 11 | } 12 | -------------------------------------------------------------------------------- /R/getSQL.r: -------------------------------------------------------------------------------- 1 | #' Returns the query as a string with the parameters set. 2 | #' 3 | #' @param query the query name. 4 | #' @param ... SQL parameters. 5 | #' @return the SQL string with parameters replaced. 6 | #' @export 7 | getSQL <- function(query=NULL, ...) { 8 | sql <- getSQLRaw(query) 9 | parmvals <- unlist(list(...)) 10 | parms <- getParameters(query) 11 | notset <- parms[!parms %in% names(parmvals)] 12 | doc <- sqldoc(query) 13 | if(length(notset) > 0) { 14 | params <- doc$params 15 | for(v in notset) { 16 | if(!is.null(params) & length(params[params$param == v, 'default']) > 0 & 17 | !is.na(params[params$param == v, 'default'])) { 18 | val <- params[params$param == v, 'default'] 19 | val <- eval(parse(text=val)) 20 | parmvals = c(parmvals, val) 21 | names(parmvals)[length(parmvals)] <- v 22 | warning(paste("The ", v, ' parameter has not been set. Using the default value of ', 23 | val, sep='')) 24 | } else { 25 | stop(paste("The ", v, 26 | " parameter has not been set and no default value exists", sep='')) 27 | } 28 | } 29 | } 30 | if(length(parmvals)>0) { 31 | for(i in 1:length(parmvals)) { 32 | sql <- gsub(paste(":", names(parmvals)[i], ":", sep=''), 33 | parmvals[i], sql) 34 | } 35 | } 36 | return(sql) 37 | } 38 | -------------------------------------------------------------------------------- /R/getSQLRaw.r: -------------------------------------------------------------------------------- 1 | #' Returns the SQL from the file without the parameters replaced. 2 | #' 3 | #' @param query the query name. 4 | #' @return the unedited SQL statement. 5 | getSQLRaw <- function(query) { 6 | f <- sqlFile(query) 7 | 8 | if(is.null(f)) { stop(paste("Cannot find query file for ", query, sep='')) } 9 | 10 | sql <- scan(f, what="character", sep=';', multi.line=FALSE, 11 | comment.char=c("#"), quiet=TRUE, quote=NULL) 12 | 13 | sql <- ifelse( grepl("--", sql) 14 | ,substr(sql, 0, regexpr("--", sql)-1 ) 15 | ,sql 16 | ) 17 | 18 | sql <- paste(sql, collapse=" ") 19 | 20 | sql <- gsub(" ", " ", sql) 21 | 22 | return(sql) 23 | } 24 | -------------------------------------------------------------------------------- /R/interactiveSQL.r: -------------------------------------------------------------------------------- 1 | utils::globalVariables(c('tclvalue','tkget','tkdestroy','tktoplevel','tkpack', 2 | 'tklabel','tklabel','tktext','tkmark.set','tkinsert', 3 | 'tkfocus','tkbind','tkgrab.release','tkwait.window', 4 | 'tkbutton')) 5 | 6 | #' Interactive SQL session. 7 | #' 8 | #' This function will start an interactive SQL session. The user can enter SQL 9 | #' statements and execute them against the given database connection. This was 10 | #' initially developed as a teaching tool for learning SQL. 11 | #' 12 | #' @param conn a database connection. 13 | #' @param sql initial SQL statement. 14 | #' @param envir the environment to save data frames when executing \code{save}. 15 | #' @param ... other parameters passed to \code{\link{sqlexec}}. 16 | #' @return returns a list containing two character vectors, one with a history of 17 | #' commands and another with a history of SQL statements. 18 | #' @export 19 | isql <- function(conn, sql = character(), envir=baseenv(), ...) { 20 | library(tcltk) 21 | 22 | cat('Interactive SQL mode (type quit to exit, help for available commands)...\n') 23 | 24 | df <- NULL 25 | history <- list() 26 | history[['sql']] <- character() 27 | history[['commands']] <- character() 28 | 29 | cat("SQL>"); line <- readLines(n=1) 30 | while(line != 'quit' & line != 'exit') { 31 | history[['commands']] <- c(history[['commands']], line) 32 | if(line == 'exec') { 33 | if(length(sql) == 0) { 34 | cat('No SQL to execute\n') 35 | } else if(missing(conn)) { 36 | cat('No database connection available\n') 37 | } else { 38 | cat('Executing SQL...\n') 39 | df <- sqlexec(conn, sql=gsub("\n", " ", sql), ...) 40 | cat(paste(nrow(df), ' rows of ', ncol(df), ' variables returned\n', sep='')) 41 | } 42 | } else if(line == 'print') { 43 | cat(sql) 44 | cat('\n') 45 | } else if(line == 'sql') { 46 | cat("Enter SQL statement ending with semicolon:\n") 47 | sql <- character() 48 | line <- readLines(n=1) 49 | while(substr(line, nchar(line), nchar(line)) != ';') { 50 | sql <- paste(sql, line, sep='\n') 51 | line <- readLines(n=1) 52 | } 53 | sql <- paste(sql, substr(line, 1, nchar(line)-1), sep='\n') 54 | history[['sql']] <- c(history[['sql']], sql) 55 | } else if(substr(line, 1, 4) == 'save') { 56 | if(is.null(df)) { 57 | cat('No data frame to save. Try exec first.') 58 | } else { 59 | dfname <- 'results' 60 | if(nchar(line) > 6) { 61 | dfname <- substr(line, 6, nchar(line)) 62 | } 63 | assign(paste(dfname, '.sql', sep=''), sql, envir=envir) 64 | assign(dfname, df, envir=envir) 65 | cat(paste('Data frame ', dfname, ' saved to global environment\n', sep='')) 66 | } 67 | } else if(line == 'result') { 68 | print(df) 69 | } else if(line == 'edit') { 70 | if(require(tcltk)) { 71 | OnOK <- function() { 72 | sql <<- tclvalue(tkget(txt,"0.0","end")) 73 | tkdestroy(tt) 74 | } 75 | OnCancel <- function() { 76 | tkdestroy(tt) 77 | } 78 | tt <- tktoplevel() 79 | tkpack(tklabel(tt,text="SQL Entry")) 80 | txt <- tktext(tt) 81 | tkmark.set(txt,"insert","0.0") 82 | tkinsert(txt, "end", sql) 83 | OK.button <- tkbutton(tt, text="OK", command=OnOK) 84 | Cancel.button <- tkbutton(tt, text="Cancel", command=OnCancel) 85 | tkpack(txt) 86 | tkpack(OK.button, Cancel.button) 87 | tkfocus(txt) 88 | tkbind(tt, "", function() { tkgrab.release(tt) }) 89 | tkwait.window(tt) 90 | history[['sql']] <- c(history[['sql']], sql) 91 | } else { 92 | cat("tcltk package did not load") 93 | } 94 | } else if(line == 'help') { 95 | cat(' Command Description\n') 96 | cat(' ___________ ______________________________________________________\n') 97 | cat(' quit quit interactive mode\n') 98 | cat(' help display this message\n') 99 | cat(' sql enter SQL statement\n') 100 | cat(' edit edit SQL in a separate text window\n') 101 | cat(' print print the last entered SQL statement\n') 102 | cat(' exec execute that last entered SQL statement\n') 103 | cat(' result prints the last results\n') 104 | cat(' save [name] save the last executed query to the global environment\n') 105 | } 106 | cat("SQL>"); line <- readLines(n=1) 107 | } 108 | 109 | invisible(history) 110 | } 111 | 112 | -------------------------------------------------------------------------------- /R/recodeColumns.r: -------------------------------------------------------------------------------- 1 | #' Recodes factors with more than \code{maxLevels} to characters. 2 | #' @param df the data frame to recode. 3 | #' @param maxLevels the maximum number of levels a factor can have before being 4 | #' converted to a character. 5 | recodeColumns <- function(df, maxLevels=20) { 6 | for(c in seq_len(ncol(df))) { 7 | if(class(df[,c])[1] == 'factor' & length(levels(df[,c])) > maxLevels) { 8 | df[,c] = as.character(df[,c]) 9 | } 10 | } 11 | return(df) 12 | } 13 | -------------------------------------------------------------------------------- /R/sqlFile.r: -------------------------------------------------------------------------------- 1 | #' Returns the full path to the query or NULL if not found. 2 | #' 3 | #' @param query the query to find. 4 | #' @return path to the query file. 5 | sqlFile <- function(query) { 6 | paths <- sqlPaths() 7 | for(p in paths) { 8 | f <- paste(p, '/', query, '.sql', sep='') 9 | if(file.exists(f)) { 10 | return(f) 11 | } 12 | } 13 | warning(paste(query, ' not found.', sep='')) 14 | invisible(NULL) 15 | } 16 | -------------------------------------------------------------------------------- /R/sqlPaths.r: -------------------------------------------------------------------------------- 1 | #' Search paths for SQL repositories. 2 | #' 3 | #' @param path new path to add. This can a character vector of length greater than 1. 4 | #' @param replace if FALSE, the path(s) will be added to already existing list. 5 | #' @export 6 | sqlPaths <- function(path, replace=TRUE) { 7 | paths <- unlist(mget("sqlrepos", envir=sqlutils.envir, 8 | ifnotfound=list(paste(system.file(package='sqlutils'), '/data', sep='')))) 9 | if(!missing(path)) { 10 | path <- normalizePath(path.expand(path), mustWork=FALSE) 11 | if(replace) { 12 | paths <- unique(c(path)) 13 | } else { 14 | paths <- unique(c(path, paths)) 15 | } 16 | assign("sqlrepos", value=paths, envir=sqlutils.envir) 17 | } 18 | return(unname(paths)) 19 | } 20 | -------------------------------------------------------------------------------- /R/sqldoc.r: -------------------------------------------------------------------------------- 1 | #' Parses the query and returns a list with all the elements of the comment. 2 | #' 3 | #' @param query the query name. 4 | #' @return a list with documentation including \code{introduction}, \code{return}, 5 | #' and \code{params} (as a data frame). 6 | #' @export 7 | sqldoc <- function(query) { 8 | f <- sqlFile(query) 9 | if(is.null(f)) { stop(paste("Cannot find query file for ", query, sep='')) } 10 | 11 | sql = scan(f, what="character", 12 | sep=';', multi.line=FALSE, comment.char=c(""), quiet=TRUE, quote=NULL) 13 | for(i in seq_along(sql)){ 14 | sql[i] <- gsub("--'", "#'", sql[i]) 15 | } 16 | l <- c() 17 | for(i in seq_along(sql)) { 18 | if(substr(sql[1], 1,2) == "#'") { 19 | l = c(l, i) 20 | } 21 | } 22 | if(length(l) == 0) return(list()) 23 | lines <- sql[l] 24 | 25 | #Borrowed heavily from roxygen2 26 | #https://github.com/yihui/roxygen2/blob/master/R/parse-preref.R 27 | LINE.DELIMITER <- "\\s*#+' ?" 28 | delimited.lines <- lines[str_detect(lines, LINE.DELIMITER)] 29 | trimmed.lines <- str_trim(str_replace(delimited.lines, LINE.DELIMITER, ""), "right") 30 | if (length(trimmed.lines) == 0) return(list()) 31 | joined.lines <- str_c(trimmed.lines, collapse = '\n') 32 | elements <- strsplit(joined.lines, '(? 0 & !is.na(getParameters(query)[1])) { 39 | params <- data.frame(param=getParameters(query), desc=NA, default=NA, default.val=NA, 40 | stringsAsFactors=FALSE) 41 | for(l in sqldoc[names(sqldoc) == 'param']) { 42 | params[params$param == l$name,]$desc <- l$description 43 | } 44 | for(l in sqldoc[names(sqldoc) == 'default']) { 45 | params[params$param == l$name,]$default <- l$description 46 | params[params$param == l$name,]$default.val <- eval(parse(text=l$description)) 47 | } 48 | sqldoc$params <- params 49 | } 50 | returns <- data.frame(variable=character(), desc=character(), stringsAsFactors=FALSE) 51 | for(l in sqldoc[names(sqldoc) == 'return']) { 52 | returns <- rbind(returns, data.frame( 53 | variable=l$name, desc=l$description, stringsAsFactors=FALSE)) 54 | } 55 | 56 | sqldoc <- sqldoc[!(names(sqldoc) %in% c('param', 'default', 'return'))] 57 | sqldoc$returns <- returns 58 | 59 | class(sqldoc) <- c('sqldoc') 60 | return(sqldoc) 61 | } 62 | 63 | #' Prints the SQL documentation. 64 | #' @param x sqldoc object. 65 | #' @param ... currently unused. 66 | #' @method print sqldoc 67 | #' @rdname print 68 | #' @export 69 | print.sqldoc <- function(x, ...) { 70 | cat(x$introduction) 71 | cat('\n') 72 | if(!is.null(x$params)) { 73 | cat('Parameters:\n') 74 | print(x$params, row.names=FALSE) 75 | } 76 | if(!is.null(x$returns)) { 77 | cat('Returns (note that this list may not be complete):\n') 78 | print(x$returns, row.names=FALSE) 79 | } 80 | } 81 | 82 | #' Parse a raw string containing key and expressions. 83 | #' 84 | #' Copied from roxygen2: https://github.com/yihui/roxygen2/blob/master/R/parse-preref.R 85 | #' 86 | #' @param element the string containing key and expressions 87 | #' @param srcref source reference. 88 | #' @return A list containing the parsed constituents 89 | #' @author yihui 90 | parse.element <- function(element, srcref) { 91 | # From an old version of roxygen2 92 | parse.name.description <- function(key, rest, srcref) { 93 | pieces <- str_split_fixed(rest, "[[:space:]]+", 2) 94 | name <- pieces[, 1] 95 | rest <- str_trim(pieces[, 2]) 96 | if(is.null.string(name)) { 97 | stop(paste0(key, " requires a name and description: ", srcref)) 98 | } 99 | list(name = name, description = rest) 100 | } 101 | 102 | #TODO: This should only be done once when the package loads 103 | preref.parsers <- new.env(parent=emptyenv()) 104 | preref.parsers[['default']] <- parse.name.description 105 | preref.parsers[['return']] <- parse.name.description 106 | preref.parsers[['param']] <- parse.name.description 107 | 108 | pieces <- str_split_fixed(element, "[[:space:]]+", 2) 109 | 110 | tag <- pieces[, 1] 111 | rest <- pieces[, 2] 112 | 113 | #tag_parser <- preref.parsers[[tag]] %||% parse.unknown 114 | tag_parser <- preref.parsers[[tag]] 115 | res <- list(tag_parser(tag, rest, NULL)) 116 | names(res) <- tag 117 | return(res) 118 | } 119 | 120 | #' Parse introduction: the premier part of a roxygen block 121 | #' containing description and option details separated by 122 | #' a blank roxygen line. 123 | #' 124 | #' Copied from roxygen2: https://github.com/yihui/roxygen2/blob/master/R/parse-preref.R 125 | #' 126 | #' @param expression the description to be parsed 127 | #' @return A list containing the parsed description 128 | #' @author yihui 129 | parse.introduction <- function(expression) { 130 | if (is.null.string(expression)) return(NULL) 131 | list(introduction = str_trim(expression)) 132 | } 133 | 134 | #' Does the string contain no matter, but very well [:space:]? 135 | #' @param string the string to check 136 | #' @return TRUE if the string contains words, otherwise FALSE 137 | is.null.string <- function(string) { 138 | str_length(str_trim(string)) == 0 139 | } 140 | 141 | "%||%" <- function(a, b) { 142 | if (!is.null(a)) a else b 143 | } 144 | -------------------------------------------------------------------------------- /R/sqlexec.R: -------------------------------------------------------------------------------- 1 | #' Generic function for executing a query. 2 | #' 3 | #' @param connection the database connection. 4 | #' @param sql the query to execute. 5 | #' @param ... other parameters passed to the appropriate \code{sqlexec} function. 6 | #' @return a data frame. 7 | #' @export sqlexec 8 | sqlexec <- function(connection, sql, ...) { UseMethod("sqlexec") } 9 | 10 | #' Executes queries for RODBC package. 11 | #' @inheritParams sqlexec 12 | sqlexec.RODBC <- function(connection, sql, ...) { 13 | library(RODBC) 14 | RODBC::sqlQuery(connection, sql) #TODO: Why doesn't this work with ... passed through 15 | } 16 | 17 | #' Executes queries for RSQLite package. 18 | #' @inheritParams sqlexec 19 | sqlexec.SQLiteConnection <- function(connection, sql, ...) { 20 | library(RSQLite) 21 | RSQLite::dbGetQuery(connection, sql, ...) 22 | } 23 | 24 | #' Executes queries for RMySQL package. 25 | #' @inheritParams sqlexec 26 | sqlexec.RMySQL <- function(connection, sql, ...) { 27 | library(RMySQL) 28 | RMySQL::dbSendQuery(connection, sql, ...) 29 | } 30 | 31 | #' Executes queries for RPostgreSQL 32 | #' @inheritParams sqlexec 33 | sqlexec.PostgreSQLConnection <- function(connection, sql, ...) { 34 | library(RPostgreSQL) 35 | rs <- RPostgreSQL::dbSendQuery(connection, sql) 36 | RPostgreSQL::fetch(rs, n=-1) 37 | } 38 | 39 | #' Executes queries for RJDBC 40 | #' @inheritParams sqlexec 41 | sqlexec.JDBCConnection <- function(connection, sql, ...) { 42 | library(RJDBC) 43 | RJDBC::dbGetQuery(connection, sql) 44 | } 45 | -------------------------------------------------------------------------------- /R/sqlutils-package.r: -------------------------------------------------------------------------------- 1 | #' Utilities for managing a library of SLQ files. 2 | #' 3 | #' @name sqlutils-package 4 | #' @aliases sqlutils 5 | #' @docType package 6 | #' @title Utilities for working with SQL files. 7 | #' @author Jason Bryer \email{jason@@bryer.org} 8 | #' @keywords package database sql 9 | #' @import DBI 10 | #' @import roxygen2 11 | #' @import stringr 12 | NULL 13 | 14 | #' The locations of SQL files 15 | sqlutils.envir <- new.env() 16 | 17 | .onAttach <- function(libname, pkgname) { 18 | assign("sqlrepos", value=c(paste(system.file(package='sqlutils'), '/sql', sep='')), 19 | envir=sqlutils.envir) 20 | } 21 | -------------------------------------------------------------------------------- /demo/00Index: -------------------------------------------------------------------------------- 1 | sqlutils Creates a database with two tables and demonstrates how to use the included queries. 2 | isql This demo will setup a database for using the interactive SQL mode. 3 | -------------------------------------------------------------------------------- /demo/isql.r: -------------------------------------------------------------------------------- 1 | require(sqlutils) 2 | require(RSQLite) 3 | 4 | sqlfile <- paste(system.file(package='sqlutils'), '/db/students.db', sep='') 5 | m <- dbDriver("SQLite") 6 | conn <- dbConnect(m, dbname=sqlfile) 7 | 8 | hist <- isql(conn=conn, sql=getSQL('StudentSummary')) 9 | names(hist) 10 | hist[['commands']] 11 | hist[['sql']] 12 | -------------------------------------------------------------------------------- /demo/sqlutils.r: -------------------------------------------------------------------------------- 1 | require(sqlutils) 2 | require(RSQLite) 3 | 4 | sqlfile <- paste(system.file(package='sqlutils'), '/db/students.db', sep='') 5 | m <- dbDriver("SQLite") 6 | conn <- dbConnect(m, dbname=sqlfile) 7 | 8 | #This will return the path(s) where query files will be loaded from 9 | sqlPaths() 10 | 11 | #List of available queries 12 | getQueries() 13 | 14 | #Return documentation of the queries 15 | sqldoc('StudentSummary') 16 | sqldoc('StudentsInRange') 17 | 18 | #Execute the query 19 | q1 <- execQuery('StudentSummary', connection=conn) 20 | head(q1) 21 | #Can always get the SQL statement to examine 22 | getSQL('StudentSummary') 23 | 24 | q2 <- execQuery('StudentsInRange', connection=conn) 25 | head(q2) 26 | #This query that has parameters will have their values replaced. 27 | getSQL('StudentsInRange') 28 | 29 | #Cache query 30 | fn <- tempfile(fileext='.rda') 31 | q3 <- cacheQuery('StudentSummary', filename=fn, connection=conn) 32 | names(q3); nrow(q3) 33 | 34 | #Since this will read from the cache, we don't need to specify the connection. 35 | q4 <- cacheQuery('StudentSummary', filename=fn) 36 | names(q4); nrow(q4) 37 | 38 | #Clean-up our session 39 | dbDisconnect(conn) 40 | -------------------------------------------------------------------------------- /inst/db/students.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jbryer/sqlutils/ae9e9f118ce29d0d239f9abf5858dabc0eecc030/inst/db/students.db -------------------------------------------------------------------------------- /inst/sql/StudentSummary.sql: -------------------------------------------------------------------------------- 1 | --' Provides counts of all records by month. 2 | --' @return CreatedDate the warehouse date. 3 | --' @return count the number of students enrolled as of the corresponding CreatedDate 4 | SELECT CreatedDate, count(StudentId) AS count 5 | FROM students 6 | GROUP BY CreatedDate 7 | ORDER BY CreatedDate 8 | -------------------------------------------------------------------------------- /inst/sql/StudentsInRange.sql: -------------------------------------------------------------------------------- 1 | --' Students enrolled within the given date range. 2 | --' 3 | --' @param startDate the start of the date range to return students. 4 | --' @default startDate '2012-01-01' 5 | --' @param endDate the end of the date range to return students. 6 | --' @default endDate format(Sys.Date(), '%Y-%m-%d') 7 | --' @return CreatedDate the date the row was added to the warehouse data. 8 | --' @return StudentId the student id. 9 | SELECT * 10 | FROM students 11 | WHERE CreatedDate >= ':startDate:' AND CreatedDate <= ':endDate:' 12 | -------------------------------------------------------------------------------- /man/cacheQuery.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \name{cacheQuery} 3 | \alias{cacheQuery} 4 | \title{Function for working with cached queries.} 5 | \usage{ 6 | cacheQuery(query = NULL, dir = getwd(), filename = getCacheFilename(query 7 | = query, dir = dir, ext = format, ...), format = "rda", maxLevels = 20, 8 | ...) 9 | } 10 | \arguments{ 11 | \item{dir}{the directory to save and load cached data files. Defaults to the 12 | current working directory (i.e. \code{\link{getwd}}.} 13 | 14 | \item{filename}{the filename of the cached data file.} 15 | 16 | \item{query}{the query to execute.} 17 | 18 | \item{maxLevels}{the maximum number of levels a factor can have before being 19 | converted to a character vector.} 20 | 21 | \item{...}{other parameters passed to the \code{\link{execQuery}} function including 22 | query parameters.} 23 | 24 | \item{format}{either \code{csv} for comma separated value files or \code{rda} for R data files.} 25 | } 26 | \value{ 27 | a data frame. 28 | } 29 | \description{ 30 | This will first look in the given directory for a CSV or Rda version of the file, if 31 | it exists, that will be read and returned. Otherwise it will execute the query 32 | and then saves a CSV or Rda file. 33 | } 34 | 35 | -------------------------------------------------------------------------------- /man/execQuery.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \name{execQuery} 3 | \alias{execQuery} 4 | \title{Executes the specified query and returns a data frame. This function currently 5 | supports RODBC, RSQLite, and RMySQL. For other databases, use getQuery() and 6 | execute the SQL statement using the appropriate database connection.} 7 | \usage{ 8 | execQuery(query = NULL, connection = NULL, maxLevels = 20, ...) 9 | } 10 | \arguments{ 11 | \item{query}{the query to execute.} 12 | 13 | \item{connection}{the database connection.} 14 | 15 | \item{maxLevels}{the maximum number of levels a factor can have before being 16 | converted to a character. Set to \code{NULL} to not recode.} 17 | 18 | \item{...}{other parameters passed to \code{\link{getSQL}} and \code{\link{sqlexec}}.} 19 | } 20 | \description{ 21 | Executes the specified query and returns a data frame. This function currently 22 | supports RODBC, RSQLite, and RMySQL. For other databases, use getQuery() and 23 | execute the SQL statement using the appropriate database connection. 24 | } 25 | \seealso{ 26 | sqlexec, cacheQuery 27 | } 28 | 29 | -------------------------------------------------------------------------------- /man/getCacheFilename.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \name{getCacheFilename} 3 | \alias{getCacheFilename} 4 | \title{Returns the complete filepath to the cache file.} 5 | \usage{ 6 | getCacheFilename(query, dir = getwd(), ext = "csv", ...) 7 | } 8 | \arguments{ 9 | \item{query}{the query name.} 10 | 11 | \item{dir}{the directory to save the cache file to.} 12 | 13 | \item{ext}{file extension.} 14 | 15 | \item{...}{query parameters.} 16 | } 17 | \value{ 18 | full filepath to the cached file. 19 | } 20 | \description{ 21 | Returns the complete filepath to the cache file. 22 | } 23 | 24 | -------------------------------------------------------------------------------- /man/getParameters.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \name{getParameters} 3 | \alias{getParameters} 4 | \title{Returns the parameters that must be set for the given query.} 5 | \usage{ 6 | getParameters(query) 7 | } 8 | \arguments{ 9 | \item{query}{the query name.} 10 | } 11 | \value{ 12 | list of parameter names. 13 | } 14 | \description{ 15 | Returns the parameters that must be set for the given query. 16 | } 17 | 18 | -------------------------------------------------------------------------------- /man/getQueries.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \name{getQueries} 3 | \alias{getQueries} 4 | \title{Returns a list of available queries in the current repository.} 5 | \usage{ 6 | getQueries() 7 | } 8 | \description{ 9 | Returns a list of available queries in the current repository. 10 | } 11 | 12 | -------------------------------------------------------------------------------- /man/getSQL.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \name{getSQL} 3 | \alias{getSQL} 4 | \title{Returns the query as a string with the parameters set.} 5 | \usage{ 6 | getSQL(query = NULL, ...) 7 | } 8 | \arguments{ 9 | \item{query}{the query name.} 10 | 11 | \item{...}{SQL parameters.} 12 | } 13 | \value{ 14 | the SQL string with parameters replaced. 15 | } 16 | \description{ 17 | Returns the query as a string with the parameters set. 18 | } 19 | 20 | -------------------------------------------------------------------------------- /man/getSQLRaw.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \name{getSQLRaw} 3 | \alias{getSQLRaw} 4 | \title{Returns the SQL from the file without the parameters replaced.} 5 | \usage{ 6 | getSQLRaw(query) 7 | } 8 | \arguments{ 9 | \item{query}{the query name.} 10 | } 11 | \value{ 12 | the unedited SQL statement. 13 | } 14 | \description{ 15 | Returns the SQL from the file without the parameters replaced. 16 | } 17 | 18 | -------------------------------------------------------------------------------- /man/is.null.string.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \name{is.null.string} 3 | \alias{is.null.string} 4 | \title{Does the string contain no matter, but very well [:space:]?} 5 | \usage{ 6 | is.null.string(string) 7 | } 8 | \arguments{ 9 | \item{string}{the string to check} 10 | } 11 | \value{ 12 | TRUE if the string contains words, otherwise FALSE 13 | } 14 | \description{ 15 | Does the string contain no matter, but very well [:space:]? 16 | } 17 | 18 | -------------------------------------------------------------------------------- /man/isql.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \name{isql} 3 | \alias{isql} 4 | \title{Interactive SQL session.} 5 | \usage{ 6 | isql(conn, sql = character(), envir = baseenv(), ...) 7 | } 8 | \arguments{ 9 | \item{conn}{a database connection.} 10 | 11 | \item{sql}{initial SQL statement.} 12 | 13 | \item{envir}{the environment to save data frames when executing \code{save}.} 14 | 15 | \item{...}{other parameters passed to \code{\link{sqlexec}}.} 16 | } 17 | \value{ 18 | returns a list containing two character vectors, one with a history of 19 | commands and another with a history of SQL statements. 20 | } 21 | \description{ 22 | This function will start an interactive SQL session. The user can enter SQL 23 | statements and execute them against the given database connection. This was 24 | initially developed as a teaching tool for learning SQL. 25 | } 26 | 27 | -------------------------------------------------------------------------------- /man/parse.element.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \name{parse.element} 3 | \alias{parse.element} 4 | \title{Parse a raw string containing key and expressions.} 5 | \usage{ 6 | parse.element(element, srcref) 7 | } 8 | \arguments{ 9 | \item{element}{the string containing key and expressions} 10 | 11 | \item{srcref}{source reference.} 12 | } 13 | \value{ 14 | A list containing the parsed constituents 15 | } 16 | \description{ 17 | Copied from roxygen2: https://github.com/yihui/roxygen2/blob/master/R/parse-preref.R 18 | } 19 | \author{ 20 | yihui 21 | } 22 | 23 | -------------------------------------------------------------------------------- /man/parse.introduction.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \name{parse.introduction} 3 | \alias{parse.introduction} 4 | \title{Parse introduction: the premier part of a roxygen block 5 | containing description and option details separated by 6 | a blank roxygen line.} 7 | \usage{ 8 | parse.introduction(expression) 9 | } 10 | \arguments{ 11 | \item{expression}{the description to be parsed} 12 | } 13 | \value{ 14 | A list containing the parsed description 15 | } 16 | \description{ 17 | Copied from roxygen2: https://github.com/yihui/roxygen2/blob/master/R/parse-preref.R 18 | } 19 | \author{ 20 | yihui 21 | } 22 | 23 | -------------------------------------------------------------------------------- /man/print.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \name{print.sqldoc} 3 | \alias{print.sqldoc} 4 | \title{Prints the SQL documentation.} 5 | \usage{ 6 | \method{print}{sqldoc}(x, ...) 7 | } 8 | \arguments{ 9 | \item{x}{sqldoc object.} 10 | 11 | \item{...}{currently unused.} 12 | } 13 | \description{ 14 | Prints the SQL documentation. 15 | } 16 | 17 | -------------------------------------------------------------------------------- /man/recodeColumns.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \name{recodeColumns} 3 | \alias{recodeColumns} 4 | \title{Recodes factors with more than \code{maxLevels} to characters.} 5 | \usage{ 6 | recodeColumns(df, maxLevels = 20) 7 | } 8 | \arguments{ 9 | \item{df}{the data frame to recode.} 10 | 11 | \item{maxLevels}{the maximum number of levels a factor can have before being 12 | converted to a character.} 13 | } 14 | \description{ 15 | Recodes factors with more than \code{maxLevels} to characters. 16 | } 17 | 18 | -------------------------------------------------------------------------------- /man/sqlFile.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \name{sqlFile} 3 | \alias{sqlFile} 4 | \title{Returns the full path to the query or NULL if not found.} 5 | \usage{ 6 | sqlFile(query) 7 | } 8 | \arguments{ 9 | \item{query}{the query to find.} 10 | } 11 | \value{ 12 | path to the query file. 13 | } 14 | \description{ 15 | Returns the full path to the query or NULL if not found. 16 | } 17 | 18 | -------------------------------------------------------------------------------- /man/sqlPaths.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \name{sqlPaths} 3 | \alias{sqlPaths} 4 | \title{Search paths for SQL repositories.} 5 | \usage{ 6 | sqlPaths(path, replace = TRUE) 7 | } 8 | \arguments{ 9 | \item{path}{new path to add. This can a character vector of length greater than 1.} 10 | 11 | \item{replace}{if FALSE, the path(s) will be added to already existing list.} 12 | } 13 | \description{ 14 | Search paths for SQL repositories. 15 | } 16 | 17 | -------------------------------------------------------------------------------- /man/sqldoc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \name{sqldoc} 3 | \alias{sqldoc} 4 | \title{Parses the query and returns a list with all the elements of the comment.} 5 | \usage{ 6 | sqldoc(query) 7 | } 8 | \arguments{ 9 | \item{query}{the query name.} 10 | } 11 | \value{ 12 | a list with documentation including \code{introduction}, \code{return}, 13 | and \code{params} (as a data frame). 14 | } 15 | \description{ 16 | Parses the query and returns a list with all the elements of the comment. 17 | } 18 | 19 | -------------------------------------------------------------------------------- /man/sqlexec.JDBCConnection.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \name{sqlexec.JDBCConnection} 3 | \alias{sqlexec.JDBCConnection} 4 | \title{Executes queries for RJDBC} 5 | \usage{ 6 | \method{sqlexec}{JDBCConnection}(connection, sql, ...) 7 | } 8 | \arguments{ 9 | \item{connection}{the database connection.} 10 | 11 | \item{sql}{the query to execute.} 12 | 13 | \item{...}{other parameters passed to the appropriate \code{sqlexec} function.} 14 | } 15 | \description{ 16 | Executes queries for RJDBC 17 | } 18 | 19 | -------------------------------------------------------------------------------- /man/sqlexec.PostgreSQLConnection.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \name{sqlexec.PostgreSQLConnection} 3 | \alias{sqlexec.PostgreSQLConnection} 4 | \title{Executes queries for RPostgreSQL} 5 | \usage{ 6 | \method{sqlexec}{PostgreSQLConnection}(connection, sql, ...) 7 | } 8 | \arguments{ 9 | \item{connection}{the database connection.} 10 | 11 | \item{sql}{the query to execute.} 12 | 13 | \item{...}{other parameters passed to the appropriate \code{sqlexec} function.} 14 | } 15 | \description{ 16 | Executes queries for RPostgreSQL 17 | } 18 | 19 | -------------------------------------------------------------------------------- /man/sqlexec.RMySQL.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \name{sqlexec.RMySQL} 3 | \alias{sqlexec.RMySQL} 4 | \title{Executes queries for RMySQL package.} 5 | \usage{ 6 | \method{sqlexec}{RMySQL}(connection, sql, ...) 7 | } 8 | \arguments{ 9 | \item{connection}{the database connection.} 10 | 11 | \item{sql}{the query to execute.} 12 | 13 | \item{...}{other parameters passed to the appropriate \code{sqlexec} function.} 14 | } 15 | \description{ 16 | Executes queries for RMySQL package. 17 | } 18 | 19 | -------------------------------------------------------------------------------- /man/sqlexec.RODBC.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \name{sqlexec.RODBC} 3 | \alias{sqlexec.RODBC} 4 | \title{Executes queries for RODBC package.} 5 | \usage{ 6 | \method{sqlexec}{RODBC}(connection, sql, ...) 7 | } 8 | \arguments{ 9 | \item{connection}{the database connection.} 10 | 11 | \item{sql}{the query to execute.} 12 | 13 | \item{...}{other parameters passed to the appropriate \code{sqlexec} function.} 14 | } 15 | \description{ 16 | Executes queries for RODBC package. 17 | } 18 | 19 | -------------------------------------------------------------------------------- /man/sqlexec.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \name{sqlexec} 3 | \alias{sqlexec} 4 | \title{Generic function for executing a query.} 5 | \usage{ 6 | sqlexec(connection, sql, ...) 7 | } 8 | \arguments{ 9 | \item{connection}{the database connection.} 10 | 11 | \item{sql}{the query to execute.} 12 | 13 | \item{...}{other parameters passed to the appropriate \code{sqlexec} function.} 14 | } 15 | \value{ 16 | a data frame. 17 | } 18 | \description{ 19 | Generic function for executing a query. 20 | } 21 | 22 | -------------------------------------------------------------------------------- /man/sqlexec.SQLiteConnection.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \name{sqlexec.SQLiteConnection} 3 | \alias{sqlexec.SQLiteConnection} 4 | \title{Executes queries for RSQLite package.} 5 | \usage{ 6 | \method{sqlexec}{SQLiteConnection}(connection, sql, ...) 7 | } 8 | \arguments{ 9 | \item{connection}{the database connection.} 10 | 11 | \item{sql}{the query to execute.} 12 | 13 | \item{...}{other parameters passed to the appropriate \code{sqlexec} function.} 14 | } 15 | \description{ 16 | Executes queries for RSQLite package. 17 | } 18 | 19 | -------------------------------------------------------------------------------- /man/sqlutils-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \docType{package} 3 | \name{sqlutils-package} 4 | \alias{sqlutils} 5 | \alias{sqlutils-package} 6 | \title{Utilities for working with SQL files.} 7 | \description{ 8 | Utilities for managing a library of SLQ files. 9 | } 10 | \author{ 11 | Jason Bryer \email{jason@bryer.org} 12 | } 13 | \keyword{database} 14 | \keyword{package} 15 | \keyword{sql} 16 | 17 | -------------------------------------------------------------------------------- /man/sqlutils.envir.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \docType{data} 3 | \name{sqlutils.envir} 4 | \alias{sqlutils.envir} 5 | \title{The locations of SQL files} 6 | \format{\preformatted{ 7 | }} 8 | \usage{ 9 | sqlutils.envir 10 | } 11 | \description{ 12 | The locations of SQL files 13 | } 14 | \keyword{datasets} 15 | 16 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # The sqlutils Package 2 | 3 | The `sqlutils` package provides a set of utility functions to help manage a library of structured query language (SQL) files. The package can be installed from Github using the `devtools` package. 4 | 5 | ```R 6 | devtools::install_github('jbryer/sqlutils') 7 | ``` 8 | 9 | The `sqlutils` package provides functions to document, cache, and execute SQL queries. The location of the SQL files is determined by the `sqlPaths()` function. This function behaves in a manner consistent with the `.libPaths()` function. 10 | By default, a single path will be defined being the `data` directory where the `sqlutils` package is installed. 11 | 12 | > sqlPaths() 13 | [1] "/Users/jbryer/R/sqlutils/data" 14 | 15 | Additional search paths can be added using `sqlPaths('/Path/To/SQL/Files')`. By convention, `sqlutils` will work with any plain text files with a `.sql` file extention in any of the directories returned from `sqlPaths()`. In the case of multiple files with the same name, first one wins. 16 | 17 | In addition to working with a library (directory) of SQL files, `sqlutils` recognizes `roxygen2` style documentation. The `StudentsInRange` script (located in the `data` directory of the installed package), exemplifies how to create a SQL query with two parameters as well as how to define those parameters and provide default values. Default values are used when the user fails to supply values within the `execQuery` or `cacheQuery` functions (described in detail bellow). The available documenations tages are: 18 | 19 | * @param *paramName* - This provides a description of the parameter. 20 | * @default *paramName* - This defines the default value. This can be any valid R statement. 21 | * @return *columnName* - Provides documentation for any returned columns. 22 | 23 | The contents of the `StudentsInRange` query follows: 24 | 25 | #' Students enrolled within the given date range. 26 | #' 27 | #' @param startDate the start of the date range to return students. 28 | #' @default startDate format(Sys.Date(), '%Y-01-01') 29 | #' @param endDate the end of the date range to return students. 30 | #' @default endDate format(Sys.Date(), '%Y-%m-%d') 31 | #' @return CreatedDate the date the row was added to the warehouse data. 32 | #' @return StudentId the student id. 33 | SELECT * 34 | FROM students 35 | WHERE CreatedDate >= ':startDate:' AND CreatedDate <= ':endDate:' 36 | 37 | It should be noted that parameters are replaced just before executing the query and must be contained with a pair of colons (:) and be valid R object names (i.e. not start with a number, contain spaces, or special characters). 38 | 39 | We can now retrieve the documentation from within R using the `sqldoc` command. 40 | 41 | > sqldoc('StudentsInRange') 42 | Students enrolled within the given date range. 43 | Parameters: 44 | param desc default default.val 45 | startDate the start of the date range to return students. format(Sys.Date(), '%Y-01-01') 2012-01-01 46 | endDate the end of the date range to return students. format(Sys.Date(), '%Y-%m-%d') 2012-11-19 47 | Returns (note that this list may not be complete): 48 | variable desc 49 | CreatedDate the date the row was added to the warehouse data. 50 | StudentId the student id. 51 | 52 | The required parameters can also be retrieved using the `getParameters` function. 53 | 54 | > getParameters('StudentsInRange') 55 | [1] "startDate" "endDate" 56 | 57 | In the case there are no parameters, an empty character vector is returned. 58 | 59 | > getParameters('StudentSummary') 60 | character(0) 61 | 62 | A list of all available queries is returned using the `getQueries()` function. 63 | 64 | > getQueries() 65 | [1] "StudentsInRange" "StudentSummary" 66 | 67 | There are two functions available to execute queries, `execQuery` and `cacheQuery`. The former will send the SQL query to the database upon every execution. The latter however, maintains a local cached version (as a CSV or Rda file) of the resulting data frame. Specifically, the function creates a unique filename based upon the query name and parameters (see `getCacheFilename` function; this can also be overwritten using the `filename` parameter). If that file exists in specified directory (the current working directory by default), then it reads the file from disk and returns that. If the file does not exist, then `execQuery` is called, the result data frame saved to disk, and then the data frame is returned. The following complete example loads the `students` data frame from the `retention` package, saves it to a SQLite database, and executes the two included queries. 68 | 69 | > require(RSQLite) 70 | > sqlfile <- paste(system.file(package='sqlutils'), '/db/students.db', sep='') 71 | > m <- dbDriver("SQLite") 72 | > conn <- dbConnect(m, dbname=sqlfile) 73 | > q1 <- execQuery('StudentSummary', connection=conn) 74 | > head(q1) 75 | CreatedDate count 76 | 1 2002-07-15 8365 77 | 2 2002-08-15 8251 78 | 3 2002-09-15 8259 79 | 4 2002-10-15 8258 80 | 5 2002-11-15 8151 81 | 6 2002-12-15 8415 82 | 83 | ### Supported databases 84 | 85 | The `sqlutils` package supports database access using the [`RODBC`](http://cran.r-project.org/web/packages/RODBC/index.html), [`RSQLite`](http://cran.r-project.org/web/packages/RSQLite/index.html), [`RPostgreSQL`](http://cran.r-project.org/web/packages/RPostgreSQL/index.html), and [`RMySQL`](http://cran.r-project.org/web/packages/RMySQL/index.html) packages using an S3 generic function call called `sqlexec` based upon the class of the `connection` parameter. For example, create a new database connection for connections of class `foo`, the following provides the skeleton of the function to implement: 86 | 87 | ```R 88 | sqlexec.foo <- function(connection, sql, ...) { 89 | #Database implementation here. 90 | #The ... will be passed through from the execQuery call. 91 | } 92 | ``` 93 | -------------------------------------------------------------------------------- /sqlutils-dev.r: -------------------------------------------------------------------------------- 1 | require(devtools) 2 | setwd("~/Dropbox/Projects/") 3 | 4 | ## Build functions 5 | document('sqlutils') 6 | check_doc('sqlutils') 7 | install('sqlutils', build_vignettes=FALSE) 8 | build_vignettes('sqlutils') 9 | build('sqlutils') 10 | check('sqlutils', cran=TRUE) 11 | 12 | release('sqlutils') 13 | 14 | require(sqlutils) 15 | vignette('DataDictionary') 16 | 17 | 18 | ##### Data setup ############################################################### 19 | # Get a subset of the students from the retention package 20 | require(RSQLite) 21 | data(students) 22 | students <- students[!is.na(students$CreatedDate),] 23 | students$CreatedDate = as.character(students$CreatedDate) 24 | students <- students[students$CreatedDate > '2011-07-01',] 25 | students <- students[students$Level == 'Associate',] 26 | sqlfile <- 'sqlutils/data/students.db' 27 | if(file.exists(sqlfile)) { unlink(sqlfile) } 28 | m <- dbDriver("SQLite") 29 | conn <- dbConnect(m, dbname=sqlfile) 30 | dbWriteTable(conn, "students", students[!is.na(students$CreatedDate),]) 31 | dbDisconnect(conn) 32 | 33 | ##### RPostgreSQL test using Postgress.app ##################################### 34 | require(RPostgreSQL) 35 | drv <- dbDriver('PostgreSQL') 36 | con <- dbConnect(drv, dbname='jbryer', user='', password='', host='localhost', port=5432) 37 | class(con) 38 | -------------------------------------------------------------------------------- /vignettes/DataDictionary.Rnw: -------------------------------------------------------------------------------- 1 | %\VignetteIndexEntry{Example data dictionary using queries from the SQL repository} 2 | \documentclass[letterpaper,11pt,nogin]{article} 3 | 4 | \usepackage[top=.75in,right=.75in,left=.75in,bottom=.75in]{geometry} 5 | \usepackage{hyperref} 6 | \usepackage{listings} 7 | \lstset{breaklines=true} 8 | 9 | <>= 10 | library(sqlutils) 11 | library(RSQLite) 12 | library(xtable) 13 | 14 | sqlfile <- paste(system.file(package='sqlutils'), '/db/students.db', sep='') 15 | m <- dbDriver("SQLite") 16 | conn <- dbConnect(m, dbname=sqlfile) 17 | sqlPaths(paste(system.file(package='sqlutils'), '/sql', sep=''), replace=TRUE) 18 | queries <- getQueries() 19 | nlevels <- 10 20 | 21 | sanitizeLatex <- function(str) { 22 | gsub('([#$%&~_\\^\\\\{}])', '\\\\\\1', str, perl=TRUE) 23 | } 24 | @ 25 | 26 | \begin{document} 27 | \SweaveOpts{concordance=TRUE} 28 | 29 | \label{sec:toc} 30 | \renewcommand{\contentsname}{Table of Contents} 31 | \tableofcontents 32 | 33 | \ \\ \ \\ 34 | \section{Introduction} 35 | 36 | This ``vignette" is a template for building a data dictionary based upon the all the queries returned by \texttt{getQueries}. This Sweave file can be run using your own library of quries as defined in \texttt{sqlPaths}. 37 | 38 | <>= 39 | for(q in seq_along(queries)) { 40 | doc <- sqldoc(queries[q]) 41 | 42 | cat('\\clearpage\n') 43 | 44 | cat(paste('\\section{', queries[q], '}\n', sep='')) 45 | cat(paste('\\label{', queries[q], '}\n\n', sep='')) 46 | 47 | if(!is.null(doc$introduction)) { 48 | cat(doc$introduction) 49 | cat("\n\n") 50 | } 51 | 52 | tryCatch( { 53 | 54 | sql <- suppressWarnings(getSQL(queries[q])) 55 | cat('\\subsection{Parameters}\n') 56 | if(length(getParameters(queries[q])) > 0) { 57 | cat('\\begin{description}\n') 58 | for(j in 1:nrow(doc$params)) { 59 | desc <- doc$params[j,]$desc 60 | desc <- ifelse(is.null(desc) | desc=='', ' not specified', desc) 61 | default <- doc$params[j,]$default 62 | default <- ifelse(is.null(default) | default=='', ' not specified', default) 63 | cat(paste('\\item[', doc$params[j,]$param, '] ', 64 | desc, 65 | '\n\n\\begin{lstlisting}\n', 66 | default, 67 | '\n\\end{lstlisting}\n', sep='')) 68 | } 69 | cat('\\end{description}\n\n') 70 | } else { 71 | cat('None\n\n') 72 | } 73 | 74 | start <- proc.time() 75 | results <- suppressWarnings(execQuery(queries[q], connection=conn)) 76 | time <- proc.time() - start 77 | df <- data.frame(Variable=character(), Type=character(), Missing=numeric(), Levels=character(), stringsAsFactors=FALSE) 78 | 79 | for(i in 1:ncol(results)) { 80 | v = names(results)[i] 81 | if(class(results[,i])[1] == 'factor') { 82 | t = paste('Factor with ', length(levels(results[,i])), ' levels', sep='') 83 | if(length(levels(results[,i])) > nlevels) { 84 | l = paste(levels(results[,i])[1:nlevels], collapse='; ') 85 | } else { 86 | l = paste(levels(results[,i]), collapse='; ') 87 | } 88 | } else { 89 | t = paste(class(results[,i]), collapse=", ") 90 | l = '' 91 | } 92 | m = length(which(is.na(results[,i]))) / nrow(results) * 100 93 | df = rbind(df, data.frame(Variable=v, Type=t, Missing=m, Levels=l)) 94 | } 95 | 96 | cat('\\subsection{Results}\n') 97 | cat(paste('Returned ', nrow(results), ' rows and ', ncol(results), ' columns. Took ', format(time[1], digits=1), ' seconds to execute query.\n\n', sep='')) 98 | #x = xtable(df, caption=NULL, label=paste(queries[q], '-results', sep=''), 99 | # align=c('l','l','r','r','p{3.0in}'), digits=2) 100 | #print(x, include.rownames=FALSE) 101 | cat('\\begin{description}\n') 102 | for(r in 1:nrow(df)) { 103 | cat(paste('\n\n\\item[', sanitizeLatex(df[r,]$Variable), '] ', 104 | df[r,]$Type, 105 | ' (', format(df[r,]$Missing, digits=2), '\\% missing)\n', 106 | sanitizeLatex(df[r,]$Levels), 107 | sep='')) 108 | } 109 | cat('\n\n\\end{description}\n\n') 110 | 111 | cat('\\subsection{SQL}\n\\begin{lstlisting}\n') 112 | cat(sql) 113 | cat('\n\\end{lstlisting}\n') 114 | 115 | }, error=function(e) { print(e) } ) 116 | } 117 | 118 | @ 119 | 120 | \end{document} 121 | --------------------------------------------------------------------------------