├── DESCRIPTION ├── NAMESPACE ├── R ├── addFile.R ├── backupFiles.R ├── copyCommonMk.R ├── copyGitIgnore.R ├── copyTemplates.R ├── createFromTemplate.R ├── createProjectSkeleton.R ├── createSyntaxR.R ├── dryworkflow.R ├── getProjectConfig.R ├── readCodeBook.R ├── setUpDirectoryStructure.R ├── setupFileAndDataNames.R └── zzz.R ├── README.md ├── inst ├── demoFiles │ ├── CodeBook-small2.csv │ ├── data1-birth.csv │ ├── data1-yr21.csv │ ├── data1_codebook.csv │ ├── setupProject.R │ ├── small2.csv │ └── small2_codebook.csv ├── git │ └── DOTgitignore ├── makefile.common │ └── common.mk └── templates │ ├── Makefile_base.txt │ ├── template_analyseR.txt │ ├── template_beamerRmd.txt │ ├── template_beamerRnw.txt │ ├── template_cleanR.txt │ ├── template_codebookR.txt │ ├── template_mergeAllR.txt │ ├── template_presentRmd.txt │ ├── template_readR.txt │ ├── template_reportRmd.txt │ ├── template_reportRnw.txt │ └── template_summaryR.txt └── man ├── addFile.Rd ├── backupFiles.Rd ├── copyCommonMk.Rd ├── copyGitIgnore.Rd ├── copyTemplates.Rd ├── createFromTemplate.Rd ├── createProjectSkeleton.Rd ├── createSyntaxR.Rd ├── dryworkflow.Rd ├── extractSubstStrings.Rd ├── getProjectConfig.Rd ├── readCodeBook.Rd ├── readTemplate.Rd ├── setUpDirectoryStructure.Rd ├── setupFileAndDataNames.Rd └── whichReadCommand.Rd /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: dryworkflow 2 | Title: DRY (Don't Repeat Yourself) Workflow for more efficient data analysis 3 | Version: 0.1.9019 4 | Date: 29/6/2015 5 | Authors@R: person("Peter", "Baker", email = "pete@petebaker.id.au", 6 | role = c("aut", "cre")) 7 | License: GPL-3 8 | Depends: R (>= 3.1.3) 9 | Imports: Hmisc (>= 3.14-6), 10 | lubridate (>= 0.6.2), 11 | stringr (>= 1.0.0), 12 | rmarkdown (>= 0.5.1), 13 | knitr (>= 1.9), 14 | tools (>= 3.1.3), 15 | compare (>= 0.2-5), 16 | zoo (>= 1.7-12) 17 | Suggests: ggplot2, 18 | dplyr, 19 | sas7bdat (>= 0.5), 20 | reshape, 21 | reshape2, 22 | readxl (>= 0.1.0) 23 | Description: Given data files and documents, a project skeleton is 24 | generated with initial directories, template log files, 25 | template R syntax for data checking and initial analysis, 26 | makefiles and a git repository is initialised. 27 | LazyData: true 28 | URL: http://www.petebaker.id.au 29 | BugReports: http://github.com/petebaker/dryworkflow/issues 30 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2 (4.1.1): do not edit by hand 2 | 3 | export(copyCommonMk) 4 | export(copyGitIgnore) 5 | export(copyTemplates) 6 | export(createProjectSkeleton) 7 | export(readCodeBook) 8 | -------------------------------------------------------------------------------- /R/addFile.R: -------------------------------------------------------------------------------- 1 | ##' add a new code book, data file, document or R file to project 2 | ##' 3 | ##' add a code book, data or document file to a \code{dryworkflow} 4 | ##' project. If appropriate, relevant R files will be created and 5 | ##' Makefiles, log files and git repository updated 6 | ##' 7 | ##' @param file.name name of file to add to project as a string 8 | ##' @param move whether to move instead of copy file Default: TRUE 9 | ##' @param projectDir base directory of \code{dryworkflow} project 10 | ##' directory. Default: \dQuote{myProject} 11 | ##' @param replace logical, if file exists then replace. Default: FALSE 12 | ##' @return logical TRUE/FALSE indicating success of adding file 13 | addFile <- function( 14 | file.name, 15 | move = TRUE, 16 | projectDir = "myProject", 17 | replace = FALSE) 18 | { 19 | 20 | ## is there a configuration file here If so use that, else exit 21 | ## but in future could make it more flexible 22 | 23 | ## check options --------------------------------------------- 24 | if (!file.exists(file.name)) stop("File not found: ", file.name) 25 | if (!dir.exists(projectDir)) stop("Directory not found: ", projectDir) 26 | if (!is.logical(move)) stop("move must be TRUE or FALSE") 27 | if (replace) 28 | stop("Option not yet implemented for safety reasons\n Please remove file manually") 29 | 30 | ## could modify this later rewrite of not in top level directory 31 | baseDir <- projectDir 32 | 33 | ## determine project config ------------------------------------------- 34 | ## use project configuration if found - otherwise use global options?? 35 | ## needs some more thought 36 | ##projectConfig <- getProjectConfig(projectDir = projectDir, checkAbove = TRUE) 37 | projectConfig <- getProjectConfig(projectDir = projectDir) 38 | 39 | cat("++++ Project Configuration Details:\n") 40 | print(comment.pc <- comment(projectConfig)) 41 | 42 | ## determine log file to add details of file added ------------------- 43 | setup.log <- stringr::str_c("setup_", projectConfig$settings$dir.project, 44 | projectConfig$settings$log.ext) 45 | if (file.exists(setup.log)){ 46 | addToSetup <- TRUE 47 | } else { 48 | stop("Please start 'addFile' in directory containing Project Directory: ", 49 | projectConfig$settings$dir.project) 50 | } 51 | ## Obsolete? 52 | ## if (!projectConfig){ 53 | ## projectConfig <- getOption("dryworkflow") 54 | ## cat("Using global 'dryworkflow' options\n") 55 | ## } else { 56 | ## cat("Using options for project:", projectConfig$name.project, "\n") 57 | ## } 58 | style <- projectConfig$style 59 | 60 | ## determine type of file -------------------------------------- 61 | file.ext <- tolower(tools::file_ext(file.name)) 62 | file.ext1 <- stringr::str_c(".", file.ext) 63 | 64 | file.type <- NULL 65 | if (file.ext1 %in% projectConfig$settings$inital.files$doc$ext) 66 | file.type <- "doc" 67 | if (file.ext1 %in% projectConfig$settings$inital.files$codebook$ext) 68 | file.type <- "codebook" 69 | if (file.ext1 %in% projectConfig$settings$inital.files$data$ext) 70 | file.type <- "data" 71 | 72 | if (is.null(file.type)){ 73 | stop("Error: file must be a data, code book or document file") 74 | } else { 75 | cat("File of type:", file.type, "\n\n") 76 | } 77 | 78 | ## determine destination ---------------------------------------- 79 | destinationSub <- 80 | switch(file.type, 81 | doc = projectConfig$projectDirs$destinations$doc, 82 | data = projectConfig$projectDirs$working.dirs$dataOrig, 83 | codebook = projectConfig$projectDirs$working.dirs$codebook) 84 | destination <- file.path(baseDir, destinationSub) 85 | 86 | ## add to setup logfile 87 | sink(setup.log, append = TRUE, split = TRUE) 88 | ## NB: should make this more flexible in future 89 | cat("\n++++++++++++++++++++++++++++++++++++++++++++++++\n") 90 | cat(paste0("Details of adding file: '", file.name, "'\n")) 91 | cat("++++++++++++++++++++++++++++++++++++++++++++++++\n\n") 92 | 93 | ## move file -------------------------------------------------------- 94 | cat(paste0("File: '", file.name, "' will be moved to '", destination, "'\n")) 95 | 96 | dest.file <- file.path(destination, file.name) 97 | if (file.exists(dest.file)){ 98 | stop("Destination file:", dest.file, "exists. Remove first?") 99 | } 100 | MV <- ifelse(move, "moved", "copied") 101 | if (move){ 102 | moved.success <- 103 | file.rename(file.name, dest.file) 104 | } else { 105 | moved.success <- 106 | file.copy(file.name, dest.file, copy.date = TRUE) 107 | ## overwrite = force, copy.date = TRUE) 108 | } 109 | if (moved.success) 110 | cat(stringr::str_c("File: '", file.name, "' ", MV, " successfully\n\n")) 111 | 112 | ## if doc then update projectConfig and git repo and exit ------------- 113 | if (file.type == "doc"){ 114 | wd <- getwd(); on.exit(setwd(wd)) # jump back when finished 115 | sink() ## stop sink'ing until after chamge to project directory 116 | setwd(projectDir) 117 | 118 | ## Needs more checking but looks reasonable !!!!!!!!!!!!!!!!!!!!!!!! 119 | sink(flog <- file.path("..", setup.log), append = TRUE, split = TRUE) 120 | 121 | ## update project config 122 | comment(projectConfig) <- 123 | c(comment.pc, stringr::str_c("File: '", file.name, "' added at ", date())) 124 | saveRDS(projectConfig, file = projectConfig$configFile) 125 | cat("+++ Project configuration updated\n\n") 126 | ## update git 127 | cat("+++ Updating git repository\n") 128 | sgit.1 <- 129 | system2("git", stringr::str_c("add ", 130 | file.path(destinationSub, file.name)), 131 | stdout = TRUE, stderr = TRUE) 132 | if (length(sgit.1)) print(sgit.1) 133 | sgit.2 <- 134 | system2("git", stringr::str_c("commit -m \"Added file ',", file.name, 135 | "'\" -a"), stdout = TRUE, stderr = TRUE) 136 | if (length(sgit.2)) print(sgit.2) 137 | 138 | ## clean up and exit 139 | setwd(wd) 140 | sink() 141 | return(TRUE) 142 | } 143 | 144 | ## Construct R files, makefiles, .Rmd files etc ------------------- 145 | ## message("\n+++ Creating R Syntax templates for reading/analysing data ...") 146 | cat("\n+++ Creating R Syntax templates for reading/analysing data ...\n") 147 | 148 | ## Use template files for make, .R syntax, .Rmd etc etc etc etc 149 | template.choices <- 150 | list(data = c("readR", "cleanR", "summaryR", "analyseR"), 151 | codebook = "codebookR") 152 | template.dir <- projectConfig$settings$template.dir 153 | templates <- template.files <- projectConfig$settings$templates 154 | 155 | Rsyntax.types <- template.choices[[file.type]] 156 | Rsyntax.types <- gsub("R$", "", Rsyntax.types) # drop last R 157 | 158 | MakefileLines <- vector(mode = "list", length = length(Rsyntax.types)) 159 | names(MakefileLines) <- c(Rsyntax.types) 160 | 161 | ## set up file names and data frame names 162 | newFilesAndDFs <- list(file.name) 163 | names(newFilesAndDFs) <- 164 | ifelse(file.type == "data", "dataFiles", "codebookFiles") 165 | newFilesAndDFs$dataFrames <- 166 | lapply(Rsyntax.types, 167 | function(x) 168 | stringr::str_c(x, "_", 169 | stringr::str_replace(file.name, "\\.", "_"))) 170 | names(newFilesAndDFs$dataFrames) <- Rsyntax.types 171 | newFilesAndDFs$RsyntaxFiles <- 172 | lapply(newFilesAndDFs$dataFrames, function(x) stringr::str_c(x, ".R")) 173 | newFilesAndDFs$RoutFiles <- 174 | lapply(newFilesAndDFs$dataFrames, function(x) stringr::str_c(x, ".Rout")) 175 | newFilesAndDFs$RDataFiles <- 176 | lapply(newFilesAndDFs$dataFrames, function(x) stringr::str_c(x, ".RData")) 177 | newFilesAndDFs$availableCodeBooks <- projectConfig$availableCodeBooks 178 | newFilesAndDFs$directories <- projectConfig$filesAndDFs$directories 179 | class(newFilesAndDFs) <- "fileAndDataNames" ## cheating 180 | 181 | ## loop through R syntax files 182 | cat("R syntax files:\n") 183 | for (RS in Rsyntax.types){ # RS <- "read" 184 | if (length(newFilesAndDFs$RsyntaxFiles[[RS]]) > 0) { 185 | MakefileLines[[RS]] <- 186 | createSyntaxR(dir.project = projectDir, filesAndDFs = newFilesAndDFs, 187 | project.steps = RS, template.dir = template.dir, 188 | print.mismatches = 189 | projectConfig$settings$print.mismatches, 190 | ## myFunction.files = myFunction.files, 191 | template = template.files[paste0(RS, "R")]) 192 | } 193 | } 194 | 195 | ## add new lines to Makefiles --------------------------------------- 196 | ## message("\n+++ Appending new file to Makefiles ...") 197 | cat("\n+++ Appending new files to Makefiles ...\n") 198 | 199 | makeTypes <- c("readMerge", "work") 200 | makeDirs <- 201 | unlist(projectConfig$projectDirs$working.dirs)[makeTypes] 202 | makeFiles <- file.path(baseDir, makeDirs, "Makefile") 203 | names(makeFiles) <- makeTypes 204 | 205 | ## read/clean 206 | mkfiler <- file(mfname1r <- makeFiles["readMerge"], open = "rt") 207 | mf1 <- readLines(mkfiler) 208 | close(mkfiler) 209 | unlink(mkfiler) 210 | 211 | ## add targets to all line 212 | allLine <- grep("^all:", mf1) 213 | mf1[allLine] <- paste(mf1[allLine], paste(MakefileLines$clean$targets, 214 | collapse = " ")) 215 | ## add extra lines to makefile 216 | mf1 <- c(mf1, "", paste0("## Extra R syntax for file: '", file.name, 217 | "' added at ", date()), 218 | MakefileLines$read$makefileLines, MakefileLines$clean$makefileLines) 219 | 220 | mkfiler <- file(mfname1r, open = "wt") 221 | writeLines(mf1, mkfiler) 222 | close(mkfiler) 223 | unlink(mkfiler) 224 | cat("Wrote file:", mfname1r, "\n") 225 | 226 | ## summary/analysis 227 | mkfiler <- file(mfname1r <- makeFiles["work"], open = "rt") 228 | mf1 <- readLines(mkfiler) 229 | close(mkfiler) 230 | unlink(mkfiler) 231 | 232 | ## add targets to all line 233 | allLine <- grep("^all:", mf1) 234 | mf1[allLine] <- paste(mf1[allLine], 235 | paste(MakefileLines$summary$targets, collapse = " "), 236 | paste(MakefileLines$analyse$targets, collapse = " ")) 237 | ## add extra lines to makefile 238 | mf1 <- c(mf1, "", paste0("## Extra R syntax for file: '", file.name, 239 | "' added at ", date()), 240 | MakefileLines$summary$makefileLines, 241 | MakefileLines$analyse$makefileLines) 242 | 243 | mkfiler <- file(mfname1r, open = "wt") 244 | writeLines(mf1, mkfiler) 245 | close(mkfiler) 246 | unlink(mkfiler) 247 | cat("Wrote file:", mfname1r, "\n") 248 | 249 | ## add file and new syntax files to git then update --------------- 250 | 251 | ## seems that writing Makefiles messes up sink() 252 | ## cat("sink.number\n") 253 | ## print(sink.number()) 254 | sink() ## stop sink'ing until after chamge to project directory 255 | 256 | ## only set up directory for the appropriate directory ----------- 257 | wd <- getwd(); on.exit(setwd(wd)) # jump back when finished 258 | setwd(projectDir) 259 | 260 | ## Needs checking but looks reasonable !!!!!!!!!!!!!!!!!!!!!!!! 261 | sink(flog <- file.path("..", setup.log), append = TRUE, split = TRUE) 262 | 263 | ## update project config 264 | comment(projectConfig) <- 265 | c(comment.pc, stringr::str_c("File: '", file.name, "' added at ", date())) 266 | saveRDS(projectConfig, file = projectConfig$configFile) 267 | cat("+++ Project configuration updated\n\n") 268 | 269 | ## add the file - but this produces no output 270 | sgit.1 <- 271 | system2("git", stringr::str_c("add ", 272 | file.path(destinationSub, file.name)), 273 | stdout = TRUE, stderr = TRUE) 274 | ## if (length(sgit.1)) print(sgit.1) 275 | 276 | rsFiles <- 277 | file.path(newFilesAndDFs$directories[c("read", "clean", "analyse", 278 | "summary")], 279 | newFilesAndDFs$RsyntaxFiles) 280 | rsFiles <- gsub("\\.\\.", "\\.", rsFiles) 281 | rsFiles <- paste(rsFiles, collapse = " ") 282 | sgit.2 <- 283 | system2("git", stringr::str_c("add ", rsFiles), 284 | stdout = TRUE, stderr = TRUE) 285 | ## no output produced 286 | ## if (length(sgit.2)) print(sgit.2) 287 | 288 | sgit.3 <- 289 | system2("git", stringr::str_c("commit -m \"Added file '", file.name, 290 | "'\" -a"), stdout = TRUE, stderr = TRUE) 291 | if (length(sgit.3)) print(sgit.3) 292 | 293 | ## return to start dir, finish off log file and exit 294 | setwd(wd) 295 | sink() 296 | 297 | return(TRUE) 298 | 299 | } 300 | 301 | ## attr(addFile,"ex") <- function(){ 302 | ## internal function only 303 | ##CUT##\dontrun{addFile("myNewDat.dta")} 304 | 305 | -------------------------------------------------------------------------------- /R/backupFiles.R: -------------------------------------------------------------------------------- 1 | ##' Backup original, R files, R report and posted files to allow regenerating 2 | ##' 3 | ##' Zips all relevant files to allow work to be reproduced although, 4 | ##' for safety, it is best to make your own backup. 5 | ##' 6 | ##' While \code{backupFiles} provides a way to backup all relevant 7 | ##' files to enable work to be reproduced, it is best to have a more 8 | ##' comprehensive personalised strategy tailored to your own 9 | ##' circumstances. In particular, it may be more prudent to modify 10 | ##' \code{Makefile} to backup off site by using make. See Makefile and 11 | ##' type \code{make help rsync} at the command or shell prompt to help 12 | ##' automate remote backup. Note that this function should be called 13 | ##' from the main project directory not a work/reporting sub directory 14 | ##' 15 | ##' @param zipFile string containing name for zip file. Default: 16 | ##' \code{NULL} whereby name is derived from today's date and the 17 | ##' project directory name 18 | ##' @return None 19 | ##' @author Peter Baker \email{pete@@petebaker.id.au} 20 | backupFiles <- function(zipFile = NULL){ 21 | 22 | ## somehow put this in somewhere - testhat?? 23 | ## @examples 24 | ## backupFiles() 25 | 26 | ## check to see if project configuration is available ---------------- 27 | if (file.exists("configFile.rds")){ 28 | projectConfig <- readRDS("configFile.rds") 29 | } else { 30 | stop("backupFiles can only be called from the main project directory") 31 | } 32 | 33 | cat("Important files for", projectConfig$project.dir, "will be zipped\n", 34 | "NB: Do not rely on this. Always make you own backup!\n") 35 | 36 | dir.project <- projectConfig$settings$dir.project 37 | zipFile <- 38 | stringr::str_c(dir.project, "_", as.character(lubridate::today()), ".zip") 39 | ## stringr::str_c(dir.project, lubridate::today(), ".zip") 40 | cat("+++ Files will be zipped to", zipFile, "\n\n") 41 | 42 | ## files to backup ------------------------------------------------- 43 | pc <- projectConfig 44 | full.backed.up <- c(pc$projectDirs$working.dirs$dataOrig, 45 | pc$projectDirs$working.dirs$codebook, 46 | pc$projectDirs$destinations$lib, 47 | pc$projectDirs$directories$posted, 48 | pc$projectDirs$directories$doc) 49 | dirs.syntax.R <- c(pc$projectDirs$working.dirs$readMerge, 50 | pc$projectDirs$working.dirs$work) 51 | dirs.syntax.Rmd <- c(pc$projectDirs$working.dirs$reports) 52 | 53 | allFiles <- c(list.dirs(full.backed.up), 54 | list.files(dirs.syntax.R, pattern = ".R$|Makefile", 55 | full.names = TRUE), 56 | list.files(dirs.syntax.Rmd, pattern = ".Rmd$|.Rnw$|Makefile", 57 | full.names = TRUE), "Makefile", "configFile.rds") 58 | 59 | 60 | cat("+++ Files to be backed up:\n") 61 | print(allFiles) 62 | 63 | zipTest <- zip(file.path("..", zipFile), allFiles) 64 | if (zipTest == 0) cat("All files successfully zipped\n") 65 | 66 | } 67 | -------------------------------------------------------------------------------- /R/copyCommonMk.R: -------------------------------------------------------------------------------- 1 | ##' Copy dryworkflow package common.mk file to specified destination 2 | ##' 3 | ##' The file \code{common.mk} contains pattern rules to process 4 | ##' \code{R}, \code{Rmd} and \code{Rnw} files to output a range of 5 | ##' output files including \code{Rout}, \code{pdf}, \code{html} and 6 | ##' \code{docx} files 7 | ##' 8 | ##' The \code{common.mk} file is supplied with the \code{dryworkflow} 9 | ##' package. Ideally, this file should be placed in a specific 10 | ##' directory used for all data analysis projects. In \code{linux} 11 | ##' this would usually be something like \code{~/lib}. The definitions 12 | ##' and rules can then be used for any project by including the 13 | ##' contents with an \code{include} command placed near the end of a 14 | ##' \code{Makefile}. Individual definitions or rules can be overridden 15 | ##' by redefining them after the \code{include} statement. The latest 16 | ##' version of \code{common.mk} is always available at 17 | ##' \url{https://github.com/petebaker/r-makefile-definitions}. Once a 18 | ##' basic Makefile is set up (usually by 19 | ##' \code{\link{createProjectSkeleton}}) then type \code{make help} 20 | ##' for more details. 21 | ##' 22 | ##' @param destination string containing directory name for copying 23 | ##' \code{common.mk}. Default: "~/lib" for unix style set ups and 24 | ##' $HOME/Library for windows style set ups 25 | ##' @param overwriteFile logical indicating whether to overwrite 26 | ##' existing \code{common.mk} file: Default: FALSE 27 | ##' @param createDir whether to create destination directory if it 28 | ##' doesn't exist: Default = FALSE 29 | ##' @return None 30 | ##' @author Peter Baker \email{pete@@petebaker.id.au} 31 | ##' @examples 32 | ##' copyCommonMk("testMake", createDir = TRUE) 33 | ##' @export 34 | copyCommonMk <- function( 35 | destination = NULL, overwriteFile = FALSE, createDir = FALSE){ 36 | 37 | ## filename - perhaps could be a argument to function 38 | common.mk <- "common.mk" 39 | 40 | ## find common.mk in dryworkflow package ----------------------------- 41 | source.dir <- file.path(system.file(package="dryworkflow"), "makefile.common") 42 | if (!file.exists(source.dir)) stop(paste("Directory", source.dir, 43 | "not found")) 44 | source.file <- file.path(source.dir, common.mk) 45 | 46 | ## process destination ------------------------------------------------- 47 | config <- FALSE 48 | if (is.null(destination)) { 49 | HOME <- Sys.getenv("HOME") 50 | ## is there a configuration file here (or below). If so use that, 51 | ## otherwise use drywoptions to dset destination directory 52 | if (file.exists("configFile.rds")) { 53 | config <- TRUE 54 | projectConfig <- readRDS("configFile.rds") 55 | } 56 | if (file.exists(fp <- file.path("..", "configFile.rds"))) { 57 | config <- TRUE 58 | projectConfig <- readRDS(fp) 59 | } 60 | if(config){ 61 | if (class(projectConfig) != "drywProjectConfig"){ 62 | warning("('projectConfig' not of class 'drywProjectConfig'") 63 | config <- FALSE 64 | style <- getOption("dryworkflow")$style 65 | cat("Using global 'dryworkflow' options\n") 66 | } else { 67 | style <- projectConfig$style 68 | config <- TRUE 69 | cat("Using options for project:", projectConfig$name.project, "\n") 70 | } 71 | } 72 | if (length(grep(" ", destination)) > 0) 73 | warning(paste0("Directory '", destination, "' should not contain spaces")) 74 | 75 | 76 | destination <- file.path(HOME, ifelse(style == "unix", "lib", "Library")) 77 | cat("Destination set to:", destination) 78 | } 79 | 80 | ## copy file -------------------------------------------------- 81 | if (!dir.exists(destination) & createDir) 82 | dir.create(destination) 83 | commonFile <- file.path(destination, common.mk) 84 | 85 | if (file.exists(commonFile) & !overwriteFile) 86 | stop ("File", commonFile, "exists, specify 'overwriteFile' if needs be") 87 | 88 | file.copy(source.file, commonFile, overwrite = overwriteFile, 89 | copy.date = TRUE) 90 | cat(stringr::str_c("+++ File successfully copied to '", destination,"'\n")) 91 | 92 | cat("NB: The newest version of 'common.mk' is always available at\n", 93 | " 'https://github.com/petebaker/r-makefile-definitions'\n") 94 | } 95 | -------------------------------------------------------------------------------- /R/copyGitIgnore.R: -------------------------------------------------------------------------------- 1 | ##' Copy dryworkflow package file .gitignore to specified destination 2 | ##' 3 | ##' The file \code{.gitignore} contains patterns and file names 4 | ##' indicating which files are not to be tracked by \code{git}. This 5 | ##' is copied to a destination directory or the current directory if 6 | ##' not specified. 7 | ##' 8 | ##' The file \code{.gitignore} is supplied with the \code{dryworkflow} 9 | ##' package. The file is called \code{DOTgitignore} and, by default, 10 | ##' renamed automatically to \code{.gitignore} and copied to the 11 | ##' current directory. Various output from \code{R}, intermediate 12 | ##' files from \code{latex} and \code{knitr} are specified as files 13 | ##' which \code{git} does not track. Note that on operating systems 14 | ##' like \code{linux}, files beginning with a dot (.) are hidden and so 15 | ##' to unhide \code{.gitignore} the file name is modified to start 16 | ##' with the letters \code{DOT}. However, to work effectively with 17 | ##' \code{git}, the file must be named \code{.gitignore}. 18 | ##' 19 | ##' @param destination string containing directory name for copying 20 | ##' \code{gitignore}. Default: current directory 21 | ##' @param overwriteFile logical indicating whether to overwrite 22 | ##' existing \code{.gitignore} file: Default: FALSE 23 | ##' @param createDir whether to create destination directory if it 24 | ##' doesn't exist: Default = FALSE 25 | ##' @param renameDotGitignore logical, whether to rename 26 | ##' \code{DOTgitignore} to \code{.gitignore}. Default: TRUE 27 | ##' @return None 28 | ##' @author Peter Baker \email{pete@@petebaker.id.au} 29 | ##' @examples 30 | ##' copyGitIgnore("testGit", createDir = TRUE, renameDotGitignore = FALSE) 31 | ##' @export 32 | copyGitIgnore <- function( 33 | destination = NULL, overwriteFile = FALSE, createDir = FALSE, 34 | renameDotGitignore = TRUE){ 35 | 36 | if (is.null(destination)) destination <- "." 37 | 38 | ## filename - perhaps could be a argument to function 39 | gitignoreSrc <- gitignore <- "DOTgitignore" 40 | if (renameDotGitignore){ 41 | gitignore <- ".gitignore" 42 | } 43 | 44 | ## find gitignore in dryworkflow package ----------------------------- 45 | source.dir <- file.path(system.file(package="dryworkflow"), "git") 46 | if (!file.exists(source.dir)) stop(paste("Directory", source.dir, 47 | "not found")) 48 | source.file <- file.path(source.dir, gitignoreSrc) 49 | if (!file.exists(source.file)) stop(paste("File:", source.file, 50 | "not found")) 51 | 52 | ## process destination ------------------------------------------------- 53 | config <- FALSE 54 | if (length(grep(" ", destination)) > 0){ 55 | warning(paste0("Directory '", destination, "' should not contain spaces")) 56 | cat("Destination is:", destination) 57 | } 58 | 59 | ## copy file -------------------------------------------------- 60 | if (!dir.exists(destination) & createDir) 61 | dir.create(destination) 62 | gitFile <- file.path(destination, gitignore) 63 | 64 | if (file.exists(gitFile) & !overwriteFile) 65 | stop ("File", gitFile, "exists, specify 'overwriteFile' if needs be") 66 | 67 | file.copy(source.file, gitFile, overwrite = overwriteFile, 68 | copy.date = TRUE) 69 | cat(stringr::str_c("+++ File '", gitignoreSrc, "' successfully copied to '", 70 | destination, "' as '", gitignore, "'\n")) 71 | 72 | ## cat("NB: The newest version of 'gitignore' is always available at\n", 73 | ## " 'https://github.com/petebaker/r-makefile-definitions'\n") 74 | } 75 | -------------------------------------------------------------------------------- /R/copyTemplates.R: -------------------------------------------------------------------------------- 1 | ##' Copy dryworkflow package template files to specified destination 2 | ##' 3 | ##' Copies all template \code{.txt} files from package 4 | ##' \code{dryworkflow} to a directory so that they can be modified and 5 | ##' reused. Files can be customised and then used by 6 | ##' \code{\link{createProjectSkeleton}} by specifying an alternative 7 | ##' \code{template.dir}. Note that if the directory specified already 8 | ##' exists then an error will be produced. 9 | ##' 10 | ##' Files have names like \code{template_cleanR.txt}, 11 | ##' \code{template_readR.txt}, \code{template_analyseR.txt} and so 12 | ##' on. Their use should be obvious from the file name. Strings which 13 | ##' may be changed are described at the top of the template file and 14 | ##' the description will be removed from the syntax file which is 15 | ##' produced using \code{\link{createProjectSkeleton}}. 16 | ##' 17 | ##' @param destination string containing directory name for copying 18 | ##' template files. Default: a new directory "templates" in the 19 | ##' current directory 20 | ##' @return None 21 | ##' @author Peter Baker \email{pete@@petebaker.id.au} 22 | ##' @examples 23 | ##' copyTemplates() 24 | ##' @export 25 | copyTemplates <- function(destination = "templates"){ 26 | 27 | ## find common.mk in dryworkflow package ----------------------------- 28 | source.dir <- file.path(system.file(package="dryworkflow"), "templates") 29 | if (!file.exists(source.dir)) stop(paste("Directory", source.dir, 30 | "not found")) 31 | 32 | source.files <- list.files(path = source.dir, 33 | pattern = ".txt$", all.files = TRUE) 34 | cat("Template files:\n") 35 | print(source.files) 36 | 37 | ## process destination ------------------------------------------------- 38 | 39 | ## create directroy but exit if present 40 | if (dir.exists(destination)) 41 | stop(stringr::str_c("Directory: '", destination, "' exists. Remove first")) 42 | dir.create(destination) 43 | 44 | ## copy files to destination 45 | cat("\n+++++ Template files being copied to '", destination, "'\n", sep="") 46 | successful.logical <- 47 | file.copy(file.path(source.dir, source.files), destination, 48 | overwrite = FALSE) 49 | 50 | if (all(successful.logical)) cat("All files successfully copied\n") 51 | 52 | } 53 | -------------------------------------------------------------------------------- /R/createFromTemplate.R: -------------------------------------------------------------------------------- 1 | ## Filename: createFromTemplate.R 2 | ## Purpose: Create an R, Rmd, Rnw, ... file from a template 3 | ## 4 | ## To run in terminal use: R CMD BATCH --vanilla createFromTemplate.R 5 | 6 | ## Created at: Tue Apr 14 12:25:20 2015 7 | ## Author: Peter Baker 8 | ## Hostname: sph-ph-428-04p.sph.uq.edu.au 9 | ## Directory: /home/pete/Data/R.workflow/Rpackage-201501/src/R/ 10 | ## Licence: GPLv3 see 11 | ## 12 | ## Change Log: 13 | ## 14 | 15 | ## First like could be put in as AT title 16 | 17 | ##' Create an R, Rmd or Rnw file from a template 18 | ##' 19 | ##' Creates and writes an \code{R} syntax or report file from a 20 | ##' template by substituting for specified strings. By default, these 21 | ##' templates are provided as part of the \code{\link{dryworkflow}} 22 | ##' package. In order to customise these files to a particular project 23 | ##' or style of working, alternate templates and string formats can be 24 | ##' provided. 25 | ##' 26 | ##' By default, strings in the template file that look like 27 | ##' @@@@SYNTAX.FILE@@@@ and @@@@DATA.FILE@@@@ are substituted with 28 | ##' strings provided as elements in a list with named components 29 | ##' SYNTAX.FILE, DATA.FILE and so on provided as the \code{subst.strings} 30 | ##' argument. The string prefix and suffix can be changed but must be 31 | ##' the same throughout the template. 32 | ##' 33 | ##' @param file.name full filename of file to be written provided as a 34 | ##' string and including directory if necessary 35 | ##' @param subst.strings named list of string substitutions 36 | ##' @param template name of template text file as string. Default: 37 | ##' \code{NULL} for predefined template from \code{\link{dryworkflow}} 38 | ##' package. 39 | ##' @param template.dir directory containing template. Default: 40 | ##' \code{\link{dryworkflow}} package directory 41 | ##' @param overwrite.file logical whether or not to allow overwrite of 42 | ##' existing file. Default: \code{FALSE} 43 | ##' @param string.prefix string of characters for start of strings to 44 | ##' be substituted from template file. Default \sQuote{@@@@} 45 | ##' @param string.suffix string of characters for end of strings to be 46 | ##' substituted from template file. Default: same as 47 | ##' \code{string.prefix} 48 | ##' @param delete.start lines between and including those containing 49 | ##' the \code{delete.start} and \code{delete.end} patterns will be 50 | ##' removed. Default: \dQuote{---- START: DELETE THIS SECTION ----} 51 | ##' @param delete.end Default: \dQuote{---- END: DELETE THIS SECTION ----} 52 | ##' @param print.mismatches logical to declare wther to print warnings 53 | ##' about unused and undefined strings. Default: \code{FALSE} 54 | ##' @return None 55 | ##' @author Peter Baker \email{pete@@petebaker.id.au} 56 | createFromTemplate <- function( 57 | file.name, subst.strings, template, template.dir, 58 | overwrite.file = FALSE, 59 | string.prefix = "@@", string.suffix = string.prefix, 60 | delete.start = "-- START: DELETE THIS SECTION --", 61 | delete.end = "-- END: DELETE THIS SECTION --", 62 | print.mismatches = FALSE){ 63 | 64 | ## ------------------------------------------------------------- 65 | ## process function arguments ---------------------------------- 66 | ## ------------------------------------------------------------- 67 | 68 | ## extract filename and directory 69 | x <- basename(file.name) 70 | dir.x <- dirname(file.name) 71 | 72 | ## x - filename for writing ---------------------------------------------- 73 | if (!is.character(x)) 74 | stop(paste0("Filename '", x, "' should be a character string")) 75 | if (length(grep(" ", template)) > 0) 76 | stop(paste0("Filename '", x, "' should not contain spaces")) 77 | 78 | ## dir.x - for writing file x ------------------------------------------ 79 | if (is.null(dir.x)){ 80 | stop("dir.x' must be specified") 81 | } else { 82 | if (!file.exists(dir.x)) 83 | stop(paste0("destination directory '", dir.x, "'for '", x, "' not found")) 84 | } 85 | 86 | ## subst.strings --------------------------------------------- 87 | if (!is.list(subst.strings)) stop("'subst.strings' must be a list") 88 | if (!all(sapply(subst.strings, is.character))) 89 | stop("All components of 'subst.strings' must be character strings") 90 | 91 | ## overwrite.file ------------------------------------------------------ 92 | if (overwrite.file){ 93 | if (file.exists(file.name)) file.remove(file.name) 94 | } else{ 95 | if (file.exists(file.name)) 96 | stop("file '", x, "' found in '", dir.x, "'. Remove first?") 97 | } 98 | 99 | ## ## file.string ------------------------------------------------------- 100 | ## if (!is.character(file.string)){ 101 | ## stop(paste0("'file.string' must be a character string")) 102 | ## } 103 | ## ## check that 'file.string' is not in 'subst.strings' but if it is, 104 | ## ## make sure its sensible 105 | ## if (file.string %in% names(subst.strings)){ 106 | ## warning("'file.string' also found in 'subst.strings'") 107 | ## if (subst.strings[[file.string]] != x) 108 | ## stop("Inconsistent definitions of file to be written\n", 109 | ## paste0("File to be written 'x' = ", x), 110 | ## paste0("'subst.strings file' = ", subst.strings[[file.string]])) 111 | ## } else{ # otherwise set it for 'x' 112 | ## subst.strings[[file.string]] <- x 113 | ## } 114 | 115 | ## ------------------------------------------------------------------ 116 | ## process template file -------------------------------------------- 117 | ## ------------------------------------------------------------------ 118 | 119 | ## read template file ---------------------------------------- 120 | template.txt <- 121 | readTemplate(template = template, template.dir = template.dir, 122 | delete.start = delete.start, delete.end = delete.end) 123 | 124 | ## find strings in file ------------------------------------------------- 125 | strings4change <- 126 | extractSubstStrings(template.txt, 127 | string.prefix = string.prefix, 128 | string.suffix = string.suffix) 129 | 130 | ## Warn about mismatches - filename already extracted from x ----------- 131 | if (print.mismatches){ 132 | missing.strings <- setdiff(strings4change, names(subst.strings)) 133 | if (length(missing.strings)>0){ 134 | warning("These strings missing and so will need to be set manually: ", 135 | paste(missing.strings, collapse = ", ")) 136 | } 137 | extra.strings <- setdiff(names(subst.strings), strings4change) 138 | if (length(extra.strings)>0){ 139 | warning("Extra strings defined but will not be used: ", 140 | paste(extra.strings, collapse = ", ")) 141 | } 142 | } 143 | 144 | ## replace filename - but now just incorporated into subst.strings 145 | ## template.txt <- 146 | ## stringr::str_replace_all( 147 | ## template.txt, 148 | ## stringr::str_c(string.prefix, file.string, string.suffix), x) 149 | 150 | for (y in names(subst.strings)){ 151 | sub.name <- stringr::str_c(string.prefix, y, string.suffix) 152 | noStrings <- length(subst.strings[[y]]) 153 | if (noStrings == 1){ 154 | template.txt <- 155 | stringr::str_replace_all(template.txt, sub.name, subst.strings[[y]]) 156 | } else { ## multiline replacement! roll your own # assume full line 157 | while(length(toReplace <- grep(y, template.txt))>0){ 158 | template.txt <- 159 | c(template.txt[c(1:(toReplace-1))], subst.strings[[y]], 160 | template.txt[c((toReplace+1):length(template.txt))]) 161 | } 162 | } 163 | } 164 | 165 | ## ------------------------------------------------------------------ 166 | ## write new file --------------------------------------------------- 167 | ## ------------------------------------------------------------------ 168 | 169 | file2write <- file(file.name, "wt") 170 | writeLines(template.txt, file2write) 171 | close(file2write) 172 | 173 | cat(stringr::str_c("File: '", file.name, "' written\n")) 174 | } 175 | 176 | ## debugging 177 | ## x <- "read-data1_csv.R" 178 | ## dir.x <- "myRproject/readMergeData" 179 | ## lib.dir <- "../lib" 180 | ## my.lib.files <- c("prestend.R", "tttt.R") 181 | ## my.libs <- stringr::str_c("source(file.path('", lib.dir, 182 | ## "', '", my.lib.files, "')") 183 | ## libs <- c("require(dryworkflow) # Hmisc etc will be auto loaded", 184 | ## "require(foreign)", "require(Hmisc)") 185 | ## subst.strings <- list(DATE.CREATED = date(), ## LICENCE MISSING! 186 | ## DATAFILE = "data1.csv", 187 | ## DIR.DATAFILE = "../data/original", 188 | ## RDATA.FILE = "../data/derived/data1_csv.RData", 189 | ## LIBRARIES = libs, 190 | ## DATAFRAME = "data1", 191 | ## READ.DATA.COMMAND = "read.csv", 192 | ## READ.CODEBOOK = "### Code book not used", 193 | ## MYLIB.DIR = "../lib", 194 | ## MYLIB.FILES= my.libs) 195 | ## ## type <- "readR" # not used in this function but outside 196 | ## template <- "template_readR.txt" 197 | ## template.dir <- "templates" 198 | ## file.string <- "FILENAME" 199 | ## delete.start <- "-- START: DELETE THIS SECTION --" 200 | ## delete.end <- "-- END: DELETE THIS SECTION --" 201 | ## overwrite.file <- FALSE 202 | 203 | ## createFromTemplate("read_data1_csv.R", dir.x = "myRproject/readMergeData", 204 | ## subst.strings = subst.strings, 205 | ## template = "template_readR.txt", template.dir = template.dir) 206 | -------------------------------------------------------------------------------- /R/createSyntaxR.R: -------------------------------------------------------------------------------- 1 | ## Filename: createSyntaxR.R 2 | ## Purpose: Create an R syntax file from a template file to read data 3 | ## 4 | ## To run in terminal use: R CMD BATCH --vanilla createSyntaxR.R 5 | 6 | ## Created at: Sat 2015-04-25 at 01:57:17 7 | ## Author: Peter Baker 8 | ## Hostname: sph-ph-428-04p.sph.uq.edu.au 9 | ## Directory: /home/pete/Data/R.workflow/Rpackage-201501/src/R/ 10 | ## Licence: GPLv3 see 11 | ## 12 | ## Change Log: 2015-05-01 at 17:47:14 13 | ## Added dependencies and generalised to several process.types 14 | ## 15 | 16 | ##' Create \code{R} syntax for reading, cleaning, summarising and analysing 17 | ##' 18 | ##' Function is used internally by \code{\link{createProjectSkeleton}} and 19 | ##' \code{\link{addFile}} to produce individual \code{.R} template 20 | ##' syntax files for cleaning, summarising and analysing data files in 21 | ##' a data analysis project 22 | ##' 23 | ##' @param dir.project directory name for project 24 | ##' @param filesAndDFs object of S3 class \code{fileAndDataName} 25 | ##' containing relevant filenames, directories for setting up syntax 26 | ##' and Makefiles 27 | ##' @param template name of template text file as string. Default: 28 | ##' \code{NULL} for predefined template from \code{\link{dryworkflow}} 29 | ##' package. 30 | ##' @param project.steps steps to be carried out in project, specified 31 | ##' as a vector of strings. Options are \dQuote{read} to read data 32 | ##' (always assumed present), \dQuote{clean} clean data, 33 | ##' \dQuote{summary} summary statistics and basic plots, 34 | ##' \dQuote{analyse} perform statistical analysis, \dQuote{compare} 35 | ##' compare datasets and in particular different versions of the same 36 | ##' data set, \dQuote{mergeAll} merge data sets of more than one; and 37 | ##' \dQuote{reportRmd} or \dQuote{reportRnw} produce reports using 38 | ##' \code{\link{rmarkdown}} and/or\code{\link{Sweave}} and 39 | ##' \dQuote{presentRmd} or \dQuote{beamerRnw} produce presentations 40 | ##' using \code{\link{rmarkdown}} and/or\code{\link{Sweave}} 41 | ##' @param makefile.depends files to be used as dependencies in 42 | ##' addition to the syntax file for targets. Default: worked out from 43 | ##' project structure (\code{NULL}) 44 | ##' @param makefile.targets strings with file extensions for targets 45 | ##' in makefiles Default: c(\dQuote{Rout}, \dQuote{pdf}) using 46 | ##' \code{R BATCH} and \code{stitch} via \code{rmarkdown}, respectively 47 | ##' @param myFunction.files character vector of own homegrown function 48 | ##' file names to be sourced not currently put in to a package 49 | ##' @param libraries character vector of library statements to be 50 | ##' added near top of \code{R} syntax file. Default: load 51 | ##' \code{dryworkflow} \code{Hmisc}, \code{foreign} 52 | ##' @param template.dir directory containing template. Default: 53 | ##' \code{\link{dryworkflow}} package directory 54 | ##' @param print.mismatches print mismatches when creating syntax or 55 | ##' markdown files from templates. Default: \code{FALSE} 56 | ##' @param overwrite.file logical whether or not to allow overwrite of 57 | ##' existing file. Default: FALSE 58 | ##' @param string.prefix string of characters for start of strings to 59 | ##' be substituted from template file. Default \sQuote{@@@@} 60 | ##' @param string.suffix string of characters for end of strings to be 61 | ##' substituted from template file. Default: same as 62 | ##' \code{string.prefix} 63 | ##' @param delete.start lines between and including those containing 64 | ##' the \code{delete.start} and \code{delete.end} patterns will be 65 | ##' removed. Default: \dQuote{---- START: DELETE THIS SECTION ----} 66 | ##' @param delete.end Default: \dQuote{---- END: DELETE THIS SECTION ----} 67 | ##' @param extras list of extra options to be passed to function for 68 | ##' substitution in template. Default: licence, author and 69 | ##' title.project obtained from global options 70 | ##' @param AUTHOR string containing Author's name for \code{R} and 71 | ##' markdown syntax files 72 | ##' @param TITLE.PROJECT string containing title of project for log 73 | ##' files and reports 74 | ##' @return Lines to be included in Makefile for reading files as 75 | ##' character vector 76 | ##' @author Peter Baker \email{pete@@petebaker.id.au} 77 | createSyntaxR <- 78 | function( 79 | dir.project, filesAndDFs, template, 80 | project.steps = c("read", "clean", "summary", "analyse", "mergeAll", 81 | "codebook", "reportRmd", "reportRnw", "presentRmd", "beamerRmd", 82 | "beamerRnw"), 83 | makefile.depends = NULL, makefile.targets = c("Rout", "pdf"), 84 | myFunction.files = NULL, libraries = NULL, 85 | template.dir = NULL, print.mismatches = FALSE, 86 | overwrite.file = FALSE, 87 | string.prefix = "@@", string.suffix = string.prefix, 88 | delete.start = "-- START: DELETE THIS SECTION --", 89 | delete.end = "-- END: DELETE THIS SECTION --", 90 | extras = list(LICENCE = options()$dryworkflow$licence, 91 | AUTHOR = options()$dryworkflow$author, 92 | TITLE.PROJECT = options()$dryworkflow$title.project) 93 | ) 94 | { 95 | 96 | ## targets = c("clean", "summary", "analysis"), 97 | ## depends = c("read", "codebook", "clean", "summary", "analysis"), 98 | ## projectDirs, filesAndDFs, 99 | ## gsub(".R$", ".Rout", basename(syntax.file)), 100 | ## makefile.pdf = gsub(".R$", ".pdf", basename(syntax.file)), 101 | 102 | ## should I just write a separate one for each task and get fancier 103 | ## later - guessing this is pretty easy to do it this way as so much 104 | ## in common 105 | 106 | 107 | ## project.steps arguments 108 | project.steps <- match.arg(project.steps) # there can only be one 109 | makefile.targets <- 110 | match.arg(makefile.targets, c("Rout", "pdf", "docx", "html"), 111 | several.ok = TRUE) 112 | if (project.steps %in% c("reportRmd", "reportRnw", "presentRmd", 113 | "beamerRmd", "beamerRnw")){ 114 | reportFile <- TRUE 115 | makefile.targets <- setdiff(makefile.targets, "Rout") 116 | syntax.ext <- 117 | ifelse(project.steps %in% c("reportRmd", "presentRmd", "beamerRmd"), 118 | "Rmd", "Rnw") 119 | } else { 120 | reportFile <- FALSE 121 | syntax.ext <- "R" 122 | } 123 | 124 | ## project.steps options ------------------------------------------------ 125 | ## not good test - need some sort of project configuration 126 | ## maybe not so needed 127 | if (mode(dir.project) != "character") stop("dir.project wrong") 128 | ## check correct classes 129 | ## if (class(projectDirs) != "drywDestinationDirs") 130 | ## stop("'projectDirs' not of class 'drywDestinationDirs'") 131 | if (class(filesAndDFs) != "fileAndDataNames") 132 | stop("'filesAndDFs' not of class 'fileAndDataNames'") 133 | if (mode(extras) != "list") 134 | stop("'extras' not of mode 'list'") 135 | ## if (mode(data.dir) != "list") 136 | ## stop("'data.dir' not of mode 'list'") 137 | 138 | ## libraries -------------------------------------------------- 139 | if (is.null(libraries)){ 140 | libraries <- 141 | c("library(dryworkflow) # Some of these libraries load others too", 142 | "library(plyr)", "library(reshape2)", 143 | "library(lubridate)", "library(stringr)", 144 | "library(Hmisc)", "library(car)", "library(compare)") 145 | } else { 146 | libraries <- "" 147 | } 148 | 149 | if (is.null(myFunction.files)) myFunction.files <- "" 150 | 151 | ## dependencies --------------------------------------------- ????????? 152 | reportDeps <- c("clean", "summary", "analyse") 153 | if (is.null(makefile.depends)){ 154 | makefileDepends <- list(read = "data", clean = "read", mergeAll = "clean", 155 | summary = "clean", analyse = "clean", 156 | codebook = "codebook", 157 | reportRmd = reportDeps, reportRnw = reportDeps, 158 | presentRmd = reportDeps, 159 | beamerRmd = reportDeps, beamerRnw = reportDeps) 160 | } 161 | 162 | ## specific read in data commands 163 | readDataCommands <- 164 | switch(project.steps, 165 | read = whichReadCommand(filesAndDFs[["dataFiles"]]), 166 | codebook = "readCodeBook", 167 | "load") 168 | ## if (project.steps == "read"){ 169 | ## readDataCommands <- whichReadCommand(filesAndDFs[["dataFiles"]]) 170 | ## } else { 171 | ## if (project.steps == "codebook"){ 172 | ## readDataCommands <- "readCodeBook" 173 | ## } else { 174 | ## readDataCommands <- "load" 175 | ## } 176 | ## } 177 | 178 | ## simple substitutions -------------------------------------------- 179 | ## input/output directories and reading data in 180 | dataInputDir <- switch(project.steps, 181 | read = filesAndDFs$directories["dataOrig"], 182 | codebook = filesAndDFs$directories["dataCodebook"], 183 | filesAndDFs$directories["dataDeriv"]) 184 | ## data input files and data frame 185 | dataframe.read <- 186 | switch(project.steps, read = paste(filesAndDFs$dataFrames[["read"]], "<- "), 187 | codebook = paste(filesAndDFs$dataFrames[["codebook"]], "<- "), NULL) 188 | if (reportFile){ 189 | dataFileInput <- filesAndDFs$RDataFiles[["clean"]] 190 | } else { 191 | dataFileInput <- 192 | switch(project.steps, 193 | read = filesAndDFs$dataFiles, 194 | codebook = filesAndDFs$codebookFiles, 195 | filesAndDFs$RDataFiles[[makefileDepends[[project.steps]]]]) 196 | } 197 | ## if (project.steps == "read"){ 198 | ## dataFileInput <- filesAndDFs$dataFiles 199 | ## } else { 200 | ## if (project.steps == "codebook"){ 201 | ## dataFileInput <- filesAndDFs$codebookFiles 202 | ## } else { 203 | ## dataFileInput <- 204 | ## filesAndDFs$RDataFiles[[makefileDepends[[project.steps]]]] 205 | ## } 206 | ## } 207 | inputFileCommands <- 208 | paste0(dataframe.read, 209 | stringr::str_c(readDataCommands, '("', dataInputDir, '/', 210 | dataFileInput, '")')) 211 | 212 | 213 | ## data frames - input/output 214 | if (reportFile){ 215 | dataFrameIn <- filesAndDFs$dataFrames[["clean"]] 216 | dataFrameSum <- filesAndDFs$dataFrames[["summary"]] 217 | dataFrameAna <- filesAndDFs$dataFrames[["analyse"]] 218 | rdataSum <- filesAndDFs$RDataFiles[["summary"]] 219 | rdataAna <- filesAndDFs$RDataFiles[["analyse"]] 220 | } else { 221 | dataFrameIn <- 222 | switch(project.steps, read = filesAndDFs$dataFrames[["read"]], 223 | codebook = filesAndDFs$dataFrames[["codebook"]], 224 | filesAndDFs$dataFrames[[makefileDepends[[project.steps]]]]) 225 | } 226 | ## dataFrameSaved <- switch(reportFile, NULL, 227 | ## filesAndDFs$dataFrames[[project.steps]]) 228 | if (reportFile){ 229 | dataFrameSaved <- dataOutputDir <- dataFileOutput <- NULL 230 | } else{ 231 | dataFrameSaved <- filesAndDFs$dataFrames[[project.steps]] 232 | ## always put derived data in this directory 233 | dataOutputDir <- filesAndDFs$directories["dataDeriv"] 234 | dataFileOutput <- filesAndDFs$RDataFiles[[project.steps]] 235 | } 236 | 237 | ## same for all R syntax so need to be processed for all R syntax 238 | ## files 239 | if(!reportFile) ps2 <- project.steps else ps2 <- NULL 240 | common.strings <- c(list( 241 | PROJECT.STEP = ps2, 242 | DATE.CREATED = date(), 243 | DIR.DATA.INPUT = dataInputDir, 244 | DIR.DATA.SAVED = dataOutputDir, 245 | MYLIB.FILES = myFunction.files, 246 | LIBRARIES = libraries), extras) 247 | 248 | ## destination directories for R syntax ---------------------------- 249 | ## rSyntaxDirs <- list(read = projectDirs$working.dirs$readMerge, 250 | ## clean = projectDirs$working.dirs$readMerge, 251 | ## codebook = projectDirs$working.dirs$readMerge, 252 | ## compare = projectDirs$working.dirs$readMerge, 253 | ## mergeAll = projectDirs$working.dirs$readMerge, 254 | ## summary = projectDirs$working.dirs$work, 255 | ## analyse = projectDirs$working.dirs$work, 256 | ## reportRmd = projectDirs$working.dirs$report, 257 | ## reportRnw = projectDirs$working.dirs$report, 258 | ## presentRmd = projectDirs$working.dirs$report, 259 | ## beamerRmd = projectDirs$working.dirs$report, 260 | ## beamerRnw = projectDirs$working.dirs$report) 261 | 262 | ## rsyntax directories 263 | dataDirs <- project.steps %in% c("dataDeriv", "dataOrig", "dataCodebook") 264 | rSyntaxDirs <- filesAndDFs$directories[!dataDirs] 265 | rSyntaxDirs <- lapply(rSyntaxDirs, stringr::str_replace, 266 | pattern = stringr::str_c("^..", .Platform$file.sep), 267 | replacement = "") 268 | 269 | ## mergeAll - need inputs as a string instead of separate strings 270 | if (project.steps == "mergeAll"){ 271 | dataFrames4merge <- 272 | paste0("c('", paste(dataFrameIn, collapse = "', '"), "')") 273 | dataFrameIn <- dataFrames4merge 274 | rdataFiles4merge <- 275 | paste0("c('", paste(dataFileInput, collapse = "', '"), "')") 276 | dataFileInput <- rdataFiles4merge 277 | dataFiles4merge <- 278 | paste0("c('", paste(filesAndDFs$dataFiles, collapse = "', '"), "')") 279 | } 280 | 281 | ## syntax file ------------------------------------------------------ 282 | if(reportFile){ 283 | RsyntaxFiles <- filesAndDFs$reportFiles[[project.steps]] 284 | } else { 285 | RsyntaxFiles <- filesAndDFs$RsyntaxFiles[[project.steps]] 286 | } 287 | syntax.files <- 288 | file.path(dir.project, 289 | dir.x <- rSyntaxDirs[[project.steps]], RsyntaxFiles) 290 | 291 | ## specific strings to substitute - one for each R syntax file ------ 292 | subst.strings1 <- 293 | list(DATAFILE = switch(project.steps, 294 | codebook = filesAndDFs$codebookFiles, 295 | mergeAll = dataFiles4merge, filesAndDFs$dataFiles), 296 | INPUT.COMMANDS = inputFileCommands, 297 | RDATA.INPUT = dataFileInput, 298 | RDATA.SAVED = dataFileOutput, 299 | DATAFRAME.INPUT = dataFrameIn, 300 | DATAFRAME.SAVED = dataFrameSaved) 301 | if (!reportFile) 302 | subst.strings1 <- c(subst.strings1, list(SYNTAX.FILE = RsyntaxFiles)) 303 | 304 | if (project.steps == "mergeAll"){ # want loading files on separate 305 | # lines not in loop for each file 306 | common.strings <- 307 | c(common.strings, 308 | list(INPUT.COMMANDS = subst.strings1$INPUT.COMMANDS)) 309 | subst.strings1$INPUT.COMMANDS <- NULL 310 | } 311 | 312 | if (reportFile){ 313 | common.strings <- c(common.strings, list(SYNTAX.FILE = RsyntaxFiles)) 314 | names(subst.strings1)[names(subst.strings1) == "DATAFRAME.INPUT"] <- 315 | "DATAFRAME.CLEAN" 316 | names(subst.strings1)[names(subst.strings1) == "RDATA.INPUT"] <- 317 | "RDATA.CLEAN.SAVED" 318 | subst.strings1 <- c(subst.strings1, 319 | list(RDATA.SUMMARY.SAVED = rdataSum, 320 | RDATA.ANALYSIS.SAVED = rdataAna, 321 | DATAFRAME.SUMMARY = dataFrameSum, 322 | DATAFRAME.ANALYSIS = dataFrameAna)) 323 | ## drop NULLs 324 | subst.strings1[names(subst.strings1[sapply(subst.strings1, 325 | function(x) (is.null(x)))])] <- 326 | NULL 327 | common.strings[names(common.strings[sapply(common.strings, 328 | function(x) (is.null(x)))])] <- 329 | NULL 330 | } 331 | 332 | ## Check for consistency - all same length for writing in loop 333 | if (project.steps != "compare"){ 334 | if (! length( unique( sapply(subst.strings1, length))) == 1){ 335 | print(subst.strings1) 336 | stop("not all elements of 'subst.string1' have same length") 337 | } 338 | } 339 | 340 | makefileLines <- 341 | c("", 342 | switch(project.steps, 343 | read = "## Read data and store for cleaning and analysis", 344 | summary = "## Summaries and analyses", NULL)) 345 | 346 | ## apply the subst list - seems easiest in a loop ---------------------- 347 | ## template <- ifelse(reportFile, template.files[project.steps], 348 | ## template.files[paste0(project.steps, "R")]) 349 | previous.step <- makefileDepends[[project.steps]] 350 | if (length(previous.step) == 1) { 351 | whichDirs <- switch(previous.step, data = "dataOrig", 352 | codebook = "dataCodebook", previous.step) 353 | } else { 354 | whichDirs <- previous.step 355 | } 356 | previousDirectory <- filesAndDFs$directories[whichDirs] 357 | 358 | needRecursive <- 359 | (previousDirectory != filesAndDFs$directories[[project.steps]]) 360 | if (all(needRecursive)){ 361 | make.dep.dir <- paste0(previousDirectory, "/") 362 | } else { 363 | make.dep.dir <- NULL 364 | } 365 | 366 | targets <- NULL 367 | 368 | for (J in 1:length(RsyntaxFiles)){ # J <- 1 369 | subst.Str <- c(lapply(subst.strings1, function(x) x[J]), 370 | common.strings) 371 | createFromTemplate( 372 | syntax.files[J], subst.strings = subst.Str, 373 | template = template, template.dir = template.dir, 374 | print.mismatches = print.mismatches, 375 | overwrite.file = overwrite.file, 376 | string.prefix = string.prefix, string.suffix = string.suffix, 377 | delete.start = delete.start, delete.end = delete.end) 378 | ## Rout for dependency for Makefile 379 | ## same directory? 380 | if (!reportFile){ # R syntax needs previous step .Rout as a dependency 381 | if (project.steps != "mergeAll"){ 382 | make.dep.file <- 383 | switch(previous.step, 384 | data = dataFileInput[J], 385 | codebook = dataFileInput[J], 386 | stringr::str_replace(filesAndDFs$RsyntaxFiles[[previous.step]][J], 387 | ".R$", ".Rout")) 388 | } else { 389 | make.dep.file <- 390 | paste(stringr::str_replace(filesAndDFs$RsyntaxFiles[[previous.step]], 391 | ".R$", ".Rout"), collapse = " ") 392 | } 393 | make.dep <- 394 | stringr::str_c(make.dep.dir, make.dep.file) 395 | } else { # reports/ presentations need clean, summary and analysis as dependendencies 396 | if (filesAndDFs$report.which == "first"){ 397 | make.dep.file <- 398 | paste(stringr::str_replace(sapply(filesAndDFs$RsyntaxFiles[previous.step], 399 | function(x) x[J]), ".R$", ".Rout")) 400 | make.dep <- paste(paste0(make.dep.dir, make.dep.file), collapse =" ") 401 | } 402 | } 403 | ## write make file lines ---------------------------------------------- 404 | TARGETEXT <- stringr::str_c(".", syntax.ext, "$") 405 | for (EXT in makefile.targets){ 406 | t1 <- stringr::str_c(stringr::str_replace(basename(syntax.files[J]), 407 | TARGETEXT, paste0(".", EXT))) 408 | targets <- c(targets, t1) 409 | makefileLines <- 410 | c(makefileLines, 411 | stringr::str_c(t1, ": ${@:.", EXT, "=.", syntax.ext,"} ", make.dep)) 412 | } 413 | } 414 | 415 | ## for makefiles 416 | list(targets = targets, makefileLines = makefileLines) 417 | 418 | } 419 | 420 | 421 | 422 | ## function(x, dir.project = dir.project, mnemonic = MNEMONIC, 423 | ## readMerge = file.path(dir.project, 424 | ## projectDirs$working.dirs$readMerge), 425 | ## data.orig = file.path(projectDirs$working.dirs$dataOrig), 426 | ## data.deriv = file.path(projectDirs$working.dirs$dataDeriv), 427 | ## data.codebook = file.path(projectDirs$working.dirs$codebook), 428 | ## codebook = NULL) 429 | 430 | ## ## for debugging 431 | ## dir.project 432 | ## filesAndDFs 433 | ## myFunction.files 434 | ## template = template.files["reportRmd"] 435 | ## template.dir 436 | ## makefile.depends = NULL 437 | ## overwrite.file = FALSE 438 | ## file.string = "FILENAME" 439 | ## string.prefix = "@@" 440 | ## string.suffix = string.prefix 441 | ## delete.start = "-- START: DELETE THIS SECTION --" 442 | ## delete.end = "-- END: DELETE THIS SECTION --" 443 | ## extras = list(LICENCE = licence) 444 | ## type.project = c("normal", "simple") 445 | ## libraries = NULL 446 | 447 | ## ##makefile.targets <- c("Rout", "pdf") 448 | ## ##project.steps <- "read" 449 | ## makefile.targets <- c("pdf", "html") 450 | ## project.steps <- "reportRmd" 451 | 452 | ## ## how to choose read/clean etc dests - only a couple but 453 | ## rSyntaxDirs <- list(read = projectDirs$working.dirs$readMerge, 454 | ## clean = projectDirs$working.dirs$readMerge, 455 | ## codebook = projectDirs$working.dirs$readMerge, 456 | ## summary = projectDirs$working.dirs$work, 457 | ## analyse = projectDirs$working.dirs$work) 458 | 459 | ## syntax.file <- file.path(dir.project, rSyntaxDirs[["clean"]], 460 | ## filesAndDFs$RsyntaxFiles[["clean"]][1]) 461 | ## ## if dir.project NULL then use a "." 462 | ## ##syntax.file <- file.path(".", rSyntaxDirs[["clean"]], 463 | ## ## filesAndDFs$RsyntaxFiles[["clean"]][1]) 464 | ## ## syntax.file 465 | ## ## > dirname(syntax.file) 466 | ## ## [1] "myRproject/readMergeData" 467 | ## ## > basename(syntax.file) 468 | ## ## [1] "clean_data1_csv.R" 469 | 470 | ## createSyntaxR(dir.project = dir.project, 471 | ## syntax.file = syntax.file, 472 | 473 | ## projectDirs = projectDirs, 474 | ## filesAndDFs = filesAndDFs, 475 | ## rdata.dir = data.dir$DIR.DERIVED, 476 | ## myFunction.files = myFunction.files, 477 | ## template = template.files[["readR"]], 478 | ## template.dir = template.dir) 479 | ## } 480 | -------------------------------------------------------------------------------- /R/dryworkflow.R: -------------------------------------------------------------------------------- 1 | ##' dryworkflow: don't repeat yourself workflow for more efficient data analysis 2 | ##' 3 | ##' The \code{dryworkflow} package produces a project skeleton for 4 | ##' data analysis including \code{R} syntax files, report and 5 | ##' Makefiles. Given data files and documents, the skeleton is 6 | ##' generated with initial directories, template log files, template 7 | ##' \code{R} syntax for data checking and initial analysis, makefiles 8 | ##' and a \code{git} repository is initialised. 9 | ##' 10 | ##' @section Templates: \code{R} syntax templates for reading, 11 | ##' cleaning, merging, summarising and analysing data and 12 | ##' \code{Rmarkdown} and \code{Sweave} templates for reports. The 13 | ##' function \code{\link{copyTemplates}} may be used to get copies 14 | ##' of these templates which can then be modified for use when 15 | ##' creating a project skeleton. 16 | ##' 17 | ##' @section Make and definitions: Makefiles are generated. The file 18 | ##' \code{common.mk} provides pattern rules to produce 19 | ##' \code{.Rout} and \code{.pdf} files from \code{R} syntax files 20 | ##' and \code{.html}, \code{.pdf} and \code{.docx} files from 21 | ##' \code{.Rmd} R markdown and \code{.Rnw} files. The function 22 | ##' \code{\link{copyCommonMk}} may be used to get a copy the 23 | ##' \code{common.mk} file used by the installed version of the 24 | ##' \code{dryworkflow} package. The latest version of 25 | ##' \code{common.mk} can always be found at 26 | ##' \url{https://github.com/petebaker/r-makefile-definitions}. 27 | ##' 28 | ##' @section .gitignore: A \code{.gitignore} file is created in the 29 | ##' base project directory to indicate files not to be tracked by 30 | ##' \code{git}. The function \code{\link{copyGitIgnore}} may be 31 | ##' used to get a copy the \code{.gitignore} file used by the 32 | ##' installed version of the \code{dryworkflow} package. The 33 | ##' latest version of \code{.gitignore} can always be found at 34 | ##' \url{https://github.com/petebaker/r-gitignore}. 35 | ##' 36 | ##' @section Project Options: Note that option parameters are either 37 | ##' set as an argument to the function 38 | ##' \code{\link{createProjectSkeleton}} or automatically via 39 | ##' global options using 40 | ##' \code{getOption("dryworkflow")}. Customised options may be set 41 | ##' in \code{.Rprofile} using global options and these will be set 42 | ##' automatically when \code{dryworkflow} is loaded. 43 | ##' 44 | ##' @examples 45 | ##' 46 | ##' ## setting global options or put these in .Rprofile 47 | ##' 48 | ##'\dontrun{ 49 | ##'current.opts <- options() 50 | ##'options("dryworkflow" = list(git = list(user.name = "My Name", user.email = "myname@@email.com"))) 51 | ##'library(dryworkflow) 52 | ##'options("dryworkflow") 53 | ##'} 54 | ##' 55 | ##' ## A project with all default settings 56 | ##' 57 | ##' ## copy .csv file and codebook from dryWorkflow package 58 | ##' ## noting that normally you just place files in current directory 59 | ##' ## and then run 'createProjectSkeleton' 60 | ##' file.copy(system.file('demoFiles', 'small2.csv', package='dryworkflow'), 61 | ##' 'small2.csv') 62 | ##' file.copy(system.file('demoFiles', 'small2_codebook.csv', 63 | ##' package='dryworkflow'), 'small2_codebook.csv') 64 | ##' 65 | ##' ## NB: In practice, always check directories, R syntax etc 66 | ##' ## before using 'make' 67 | ##' createProjectSkeleton(dir.proj = "testProject2", 68 | ##' name.project = "Experiment 1", 69 | ##' dontmove = "dryworkflow-Ex.R") 70 | ##' 71 | ##' @docType package 72 | ##' @name dryworkflow 73 | ##' @aliases dryworkflow dryworkflow-package 74 | NULL 75 | -------------------------------------------------------------------------------- /R/getProjectConfig.R: -------------------------------------------------------------------------------- 1 | ##' Get dryworkflow configuration from configFile.rds file 2 | ##' 3 | ##' Configuration file \code{configFile.rds} is created with 4 | ##' \code{\link{createProjectSkeleton}} when a project is created. It 5 | ##' contains details like directory structures and various parameters 6 | ##' for a project. 7 | ##' 8 | ##' @param projectDir base directory of dryWorkflow project 9 | ##' @param checkAbove check directory above current directory for 10 | ##' presence of configuration file 11 | ##' @param checkSubDirs check directories below current directory for 12 | ##' presence of configuration file(s). If there is more than one then 13 | ##' print locations. If there is only one, print a message indicating 14 | ##' which project directory is present and use that. 15 | ##' 16 | ##' @return object of class \dQuote{drywProjectConfig} else FALSE if 17 | ##' file \code{configFile.rds} is not found or if object not of 18 | ##' correct class 19 | getProjectConfig <- function( 20 | projectDir = ".", 21 | checkAbove = FALSE, 22 | checkSubDirs = FALSE) 23 | { 24 | 25 | if (!(dir.exists(projectDir))) 26 | stop("Project directory:", projectDir, "not found") 27 | 28 | ## check project directory ---------------------------------------- 29 | if (projectDir =="."){ 30 | fp <- "configFile.rds" 31 | } else { 32 | fp <- file.path(projectDir, "configFile.rds") 33 | } 34 | 35 | if (checkAbove & checkSubDirs){ 36 | stop("Please specify only one of 'checkAbove' or 'checkSubDirs'") 37 | } 38 | 39 | ## check directory above if specified ----------------------------- 40 | if (checkAbove){ 41 | fp <- file.path("..", "configFile.rds") 42 | } 43 | 44 | ## check directory(s) below if specified ----------------------------- 45 | if (checkSubDirs){ 46 | checkDirs <- setdiff(list.dirs(), ".") # all subdirectories not "." 47 | if (length(checkDirs) > 0){ 48 | allConfigs <- 49 | list.files(checkDirs, "configFile.rds", full.names = TRUE) 50 | } 51 | if (length(allConfigs) > 1){ 52 | cat("Multiple possibilities for configuration files:\n") 53 | print(allConfigs) 54 | stop("Please specify only one configuration file") 55 | } else { 56 | fp <- allConfigs 57 | } 58 | } 59 | 60 | ## tidy up and return config object --------------------------------- 61 | projectConfig <- readRDS(fp) 62 | 63 | if (class(projectConfig) != "drywProjectConfig"){ 64 | warning("('projectConfig' not of class 'drywProjectConfig'") 65 | projectConfig <- FALSE 66 | } 67 | 68 | ## return project configuration 69 | projectConfig 70 | } 71 | -------------------------------------------------------------------------------- /R/readCodeBook.R: -------------------------------------------------------------------------------- 1 | ##' Read a code book in standard format as a csv file 2 | ##' 3 | ##' Reads a code book stored as a \code{csv} file for either checking 4 | ##' against a data file or relabelling factor levels or labelling 5 | ##' variables. 6 | ##' 7 | ##' Often, data dictionaries or code books are provided with data 8 | ##' files. Rather than a \code{word} \code{doc} or \code{pdf} files, 9 | ##' the format required here is in a very specific format stored as a 10 | ##' \code{csv} file. Once read in, attributes such as factor 11 | ##' labels/levels and variable labels can be added to the 12 | ##' \code{data.frame} and/or also used to check factor labels and 13 | ##' variable names are consistent with the code book. Note that while 14 | ##' various methods may be available which attempt to convert word 15 | ##' docs or pdf's to a spreadsheet and/or csv file, extreme care 16 | ##' should be taken as these are far from perfect. 17 | ##' 18 | ##' @param x filename of codebook to parse 19 | ##' @param codebook.directory directory containing codebook 20 | ##' @param col.names named character vector containing column names in 21 | ##' Code Book file. The vector contains components \dQuote{var.name} = 22 | ##' variable name, \dQuote{var.orig} = original name (if changed), 23 | ##' \dQuote{label} for printing/plotting, \dQuote{level} = factor 24 | ##' levels, \dQuote{min} and \dQuote{max} for continuous measurements, 25 | ##' \dQuote{comments} = comments about the variable which may include 26 | ##' the measurement instrument or references about the measurement 27 | ##' @param non.standard named list of non-standard names of columns 28 | ##' with names \code{c("var.name", "var.orig", "var.label", "levels", "min", "max")} 29 | ##' @param na.strings a character vector of strings which are to be 30 | ##' interpreted as \sQuote{NA} values. Blank fields are also 31 | ##' considered to be missing values in logical, integer, numeric and 32 | ##' complex fields. Default: \code{c("", "NA", ".", " ")} 33 | ##' @return S3 object of type class \dQuote{codebook} 34 | ##' @author Peter Baker \email{pete@@petebaker.id.au} 35 | ##' @examples 36 | ##' file.copy(system.file('demoFiles', 'data1_codebook.csv', 37 | ##' package='dryworkflow'), 'data1_codebook.csv') 38 | ##' data1_codebook <- readCodeBook("data1_codebook.csv", 39 | ##' non.standard = list(levels = "Factor.Levels", 40 | ##' var.orig = "Old.Variable")) 41 | ##' @export 42 | readCodeBook <- 43 | function( 44 | x, codebook.directory = NULL, 45 | col.names = c(var.name = "Variable", var.orig = "Original.Name", 46 | var.label = "Label", levels = "Levels", min = "Min", max = "Max"), 47 | non.standard = NULL, 48 | na.strings = c("", "NA", ".", " ")) 49 | { 50 | 51 | if (is.null(codebook.directory)) { 52 | codebook.directory <- "." 53 | } 54 | 55 | ## set up column names for processing ----------------------------- 56 | col.names <- match.arg(col.names, several.ok = TRUE) 57 | which.names <- c(var.name = NA, var.orig = NA, var.label = NA, 58 | levels = NA, min = NA, max = NA) 59 | names.set <- names(which.names) 60 | 61 | if (!is.null(non.standard)){ 62 | if (! all(names(non.standard) %in% names.set)){ 63 | cat("User provided names for 'non.standard':\n") 64 | print(names(non.standard)) 65 | cat("Should be in:\n") 66 | print(names.set) 67 | stop("Please provide correct names.") 68 | } else { 69 | col.names[names(non.standard)] <- non.standard 70 | } 71 | } 72 | 73 | ## read in codebook ---------------------------------------- 74 | cat("\nFunction 'readCodeBook' largely untested: beware!\n\n") 75 | code.file <- file.path(codebook.directory, x) 76 | xCodes <- read.csv(code.file, na.strings = na.strings, 77 | stringsAsFactors = FALSE) 78 | fileName <- deparse(substitute(x)) 79 | colNames <- names(xCodes) 80 | 81 | ## check names present and not ------------------------------------------- 82 | definedNames <- col.names %in% colNames # are these present 83 | presentNames <- unlist(col.names[definedNames]) # names that are present 84 | absentNames <- which.names[!(names(which.names) %in% names(presentNames))] 85 | 86 | 87 | ## are variables names same as specified and if not make suggestions 88 | if (!(all(colNames %in% col.names))){ 89 | cat(stringr::str_c("File: '", fileName, "'"), "\n") 90 | cat("Column Names:\n") 91 | print(colNames) 92 | cat("Not all column names properly defined. Not defined:\n") 93 | print(absentNames) 94 | cat("\nVariables present:\n") 95 | print(presentNames) 96 | varNames <- tolower(colNames) 97 | possible <- list(var.name = "variable|var", var.orig = "orig|old", 98 | var.label = "lab", levels = "lev", min = "min", 99 | max = "max") 100 | cat("\nPotential variable names (see ?readCodeBook):\n") 101 | ptest <- function(y) 102 | { 103 | if (length(gg <- grep(possible[[y]], tolower(colNames)))>0) 104 | colNames[gg] 105 | else NA 106 | } 107 | poss <- lapply(names(possible), ptest) 108 | names(poss) <- names(possible) 109 | print(poss) 110 | } 111 | if ("levels" %in% names(absentNames)){ 112 | cat("Warning: factor levels column not found.\n This should be set if possible\n") 113 | isFactorLevels <- FALSE 114 | } else { 115 | isFactorLevels <- TRUE 116 | } 117 | 118 | ## variable labels ------------------------------------------------ 119 | if (length(presentNames["var.label"]) > 0){ 120 | vNames <- !is.na(xCodes[ ,presentNames["var.name"]]) 121 | varLabels <- xCodes[ ,presentNames["var.label"]][vNames] 122 | names(varLabels) <- xCodes[ ,presentNames["var.name"]][vNames] 123 | } 124 | 125 | ## renamed variables: -------------------------------------------------- 126 | ## if variable renamed then construct table with old, new name 127 | if ("var.orig" %in% names(presentNames)){ 128 | ## extract old/new variable names 129 | renamedVars <- xCodes[,c(presentNames["var.name"], 130 | presentNames["var.orig"])] 131 | ## drop wissings which are result of info re factor levels etc 132 | renamedVars <- renamedVars[!is.na(renamedVars[ ,presentNames["var.name"]]),] 133 | } else { 134 | renamedVars <- NA 135 | } 136 | 137 | ## set factor levels ------------------------------------------------ 138 | if (isFactorLevels){ 139 | xCodes$var.name.filled <- 140 | as.character(zoo::na.locf(xCodes[,presentNames["var.name"]])) 141 | ## appears more than twice then is a factor 142 | factors <- rle(xCodes$var.name.filled) 143 | n.levels <- factors$lengths 144 | factors <- factors$values[factors$lengths>1] 145 | n.levels <- n.levels[n.levels > 1] 146 | names(n.levels) <- factors 147 | 148 | factor.info <- xCodes[xCodes[, "var.name.filled"] %in% factors, ] 149 | tmp <- strsplit(factor.info[, presentNames["levels"]], "=") 150 | factor.info$fac.level <- sapply(tmp, function(y) y[1]) 151 | factor.info$fac.label <- sapply(tmp, function(y) y[2]) 152 | factor.info$Factors <- factor.info$var.name.filled 153 | ## hadley doesn't like dots so var.name.filled messes up VNF ok 154 | ## plyr::dlply(factor.info, #.(factor.info$Factors), 155 | ## FACTOR, 156 | ## function(y) list(fac.level = y$fac.level, 157 | ## fac.label = y$fac.label)) 158 | ## but really weird plyr interaction is driving me mad - use by instead 159 | factorLevels <- 160 | by(factor.info, factor.info$Factors, function(y) 161 | list(fac.level = y$fac.level, fac.label = y$fac.label)) 162 | ## min and max for continuous ------------------------------------- 163 | contVars <- xCodes[grep("[Cc]ont", xCodes[,presentNames["levels"]]), 164 | presentNames["var.name"]] 165 | contVars <- unique(contVars) 166 | minMaxVars <- data.frame(var.name = contVars, min = NA, max = NA) 167 | 168 | if ("min" %in% names(presentNames)){ 169 | for (C in contVars) 170 | minMaxVars[minMaxVars$var.name == C, "min"] <- 171 | xCodes[xCodes[, presentNames["var.name"]] == C, presentNames["min"]] 172 | } 173 | if ("max" %in% names(presentNames)){ 174 | for (C in contVars) 175 | minMaxVars[minMaxVars$var.name == C, "max"] <- 176 | xCodes[xCodes[, presentNames["var.name"]] == C, presentNames["max"]] 177 | } 178 | } else { 179 | factorLevels <- factor.info <- factors <- contVars <- minMaxVars <- NA 180 | } 181 | 182 | ## store all codebook data away in a S3 "codebook" class 183 | code.book <- list(codeBook = xCodes, 184 | varNames = names(varLabels), 185 | varLabels = varLabels, 186 | factorNames = factors, 187 | factorLevels = factorLevels, 188 | minMaxVars = minMaxVars, 189 | factorInfo = factor.info, 190 | renamedVars = renamedVars, 191 | otherInfo = list(presentNames = presentNames, 192 | absentNames = absentNames, 193 | contNames = contVars)) 194 | class(code.book) <- "codebook" 195 | comment(code.book) <- paste0("Codebook read from '", code.file, 196 | "' at ", date()) 197 | code.book 198 | } 199 | 200 | ## codebook.directory <- "../inst/demoFiles" 201 | ## x <- "data1_codebook.csv" 202 | ## col.names <- c(var.name = "Variable", var.orig = "Old.Variable", 203 | ## var.label = "Label", levels = "Factor.Levels") 204 | 205 | -------------------------------------------------------------------------------- /R/setUpDirectoryStructure.R: -------------------------------------------------------------------------------- 1 | ## Filename: setUpDirectoryStructure.R 2 | ## Purpose: Create directory structure for dryWorkflow project 3 | ## 4 | ## To run in terminal use: R CMD BATCH --vanilla setUpDirectoryStructure.R 5 | 6 | ## Created at: Mon Mar 23 23:38:52 2015 7 | ## Author: Peter Baker 8 | ## Hostname: clearwell2.fritz.box 9 | ## Directory: /home/pete/Data/R.workflow/Rpackage-201501/src/R/ 10 | ## Licence: GPLv3 see 11 | ## 12 | ## Change Log: 13 | ## 14 | 15 | ## perhaps this shouldn't be called directly so but may need to for 16 | ## custom setup 17 | 18 | ##' Set up directories for data analysis project 19 | ##' 20 | ##' @param style style for directory and file names (\code{unix} or 21 | ##' \code{windows}), Default: \code{unix} 22 | ##' @param type.project type of project: normal, simple or custom 23 | ##' (custom Not Yet Implemented). The style of directory structure 24 | ##' for the project. Default: \dQuote{normal} 25 | ##' @param destinations \code{list} of destination directories where 26 | ##' original (and added) files will be moved. This is a \code{list} 27 | ##' with named components \sQuote{data}, \sQuote{doc}, 28 | ##' \sQuote{codebook} and \sQuote{lib} for data files, documents, 29 | ##' codebooks and R functions, respectively. Default: unix directory 30 | ##' names will be \code{list(data = "data/original", doc = "doc/original", codebook = "data/codebook", lib = "lib")} and 31 | ##' Windows will be of similar form. 32 | ##' @param extra extra directories additional to standard setup 33 | ##' @param extra.destinations extra destination directories additional 34 | ##' to standard setup 35 | ##' @param ... extra arguments passed to specific functions 36 | ##' @return \code{list} of directories including destinations for 37 | ##' initial files of class \sQuote{drywDestinationDirs}. Named 38 | ##' components are \code{directories} and \code{destinations}. 39 | setUpDirectoryStructure <- 40 | function(style = NULL, type.project = NULL, destinations = NULL, 41 | extra = NULL, extra.destinations = NULL, ...) 42 | { 43 | 44 | ## I think that these directories should be taken out of 45 | ## .setDRYWOptions() and put in separate function but somehow it 46 | ## must match up with the destination directories 47 | 48 | ## keywords internal - may reinstate 49 | 50 | ## HERE IT IS - some deleting of comments required b4 git commit!!! 51 | 52 | ## custom could just accept extra and all others set 53 | 54 | ## basically need these destinations: - ignored if unix or windows 55 | ## data files: data/original 56 | ## doc files: doc/original 57 | ## code books: doc/codebook 58 | ## libs: lib 59 | ## extra customised directories to be added to standard ones 60 | 61 | ## 'style' arg --------------------------------------------- 62 | 63 | allStyles <- c("unix", "windows", "custom") 64 | 65 | if (length(style) == 0){ 66 | STYLE <- getOption("dryworkflow")$style 67 | } else { 68 | STYLE <- match.arg(style, allStyles) 69 | } 70 | ## how to test - is this OK 71 | if (STYLE == "custom"){ 72 | stop("Error: 'custom' style not yet implemented") 73 | } 74 | 75 | ## 'type.project' arg ---------------------------------------- 76 | 77 | allTypes <- c("normal", "simple", "custom") 78 | if (length(type.project) == 0){ 79 | TYPE.PROJECT <- getOption("dryworkflow")$type.project 80 | } else { 81 | TYPE.PROJECT <- match.arg(type.project, allTypes) 82 | } 83 | ## how to test - is this OK 84 | if (TYPE.PROJECT == "custom"){ 85 | stop("Error: 'custom' 'type.project' not yet implemented") 86 | } 87 | 88 | 89 | 90 | ## set up directory names depending on filename style 91 | ## simple just has a subset of these and 'custom' - not yet implemented 92 | window.dirs <- list(top = c("Administration", "Backups", 93 | "Datasets", "Documents", "Extra", "Library_Functions", 94 | "Original", "Posted", "Reading", 95 | "Read_And_Merge_Data", "Reports", "Source", 96 | "Testing", "Working"), 97 | admin = paste0("Administration/", 98 | c("Budget", "Correspondence")), 99 | data = paste0("Datasets/", 100 | c("Code_Books", "Derived", "Original")), 101 | doc = paste0("Documents/", c("Original", "Reading")), 102 | extra = extra, 103 | posted = paste0("Posted/", 104 | c("Code_Books", "Data", "Documents", "Reports"))) 105 | 106 | unix.dirs <- list(top = c("admin", "backups", "data", "doc", "extra", "lib", 107 | "posted", "readMergeData", "reports", 108 | "src", "test", "work"), 109 | admin = paste0("admin/", c("budget", "correspondence")), 110 | data = paste0("data/", c("codebook", "derived", "original")), 111 | doc = paste0("doc/", c("original", "reading")), 112 | extra = extra, 113 | posted = paste0("posted/", 114 | c("codebook", "data", "doc", "reports"))) 115 | 116 | ## set up directory structure -------------------------- 117 | 118 | if (TYPE.PROJECT== "normal"){ 119 | DIRS <- switch(STYLE, unix = unix.dirs, windows = window.dirs) 120 | } else { # simple - need to think about custom later 121 | if (STYLE == "unix"){ 122 | DIRS <- c("admin", "data", "doc", "posted") 123 | } else { # windows 124 | DIRS <- c("Administration", "Data", "Documents", "Posted") 125 | } 126 | } 127 | 128 | ## if 'extra' directories then set them here no error checking yet - 129 | ## should be list of character vetors that does not conflict which 130 | ## general structure just set up 131 | if (!is.null(extra)) { 132 | DIRS$extra <- extra 133 | } 134 | 135 | ## set up destination directories ------------------------------ 136 | ## NB: codebooks need to come before data files in this list 137 | ## otherwise they will get moved to 'data/original' and not be 138 | ## present to move to 'data/codebook' 139 | 140 | DESTINATION <- list() 141 | if (TYPE.PROJECT== "normal"){ 142 | DESTINATION$codebook <- switch(STYLE, unix = "data/codebook", 143 | windows = "Data/Code_Books") 144 | DESTINATION$data <- switch(STYLE, unix = "data/original", 145 | windows = "Data/Original") 146 | DESTINATION$doc <- switch(STYLE, unix = "doc/original", 147 | windows = "Documents/Original") 148 | DESTINATION$lib <- switch(STYLE, unix = "lib", 149 | windows = "Library_Functions") 150 | } else { # simple - need to think about custom later 151 | DESTINATION$codebook <- switch(STYLE, unix = "data", windows = "Data") 152 | DESTINATION$data <- switch(STYLE, unix = "data", windows = "Data") 153 | DESTINATION$doc <- switch(STYLE, unix = "doc", windows = "Documents") 154 | DESTINATION$lib <- "." 155 | } 156 | 157 | if (!is.null(extra.destinations)) { 158 | DESTINATION$extra <- extra.destinations 159 | } 160 | 161 | ## set up directories for RsyntaxFiles, analysis, etc 162 | 163 | WORKING <- list() 164 | WORKING$readMerge <- ifelse(TYPE.PROJECT == "normal", DIRS$top[8], ".") 165 | WORKING$work <- ifelse(TYPE.PROJECT == "normal", DIRS$top[12], ".") 166 | WORKING$reports <- ifelse(TYPE.PROJECT == "normal", DIRS$top[9], ".") 167 | WORKING$dataDeriv <- ifelse(TYPE.PROJECT == "normal", DIRS$data[2], ".") 168 | WORKING$dataOrig <- DESTINATION$data 169 | WORKING$codebook <- DESTINATION$codebook 170 | 171 | ## return directory lists of class 'drywDestinationDirs'' 172 | 173 | destDirs <- list(directories = DIRS, destinations = DESTINATION, 174 | working.dirs = WORKING) 175 | class(destDirs) <- "drywDestinationDirs" 176 | destDirs 177 | } 178 | -------------------------------------------------------------------------------- /R/setupFileAndDataNames.R: -------------------------------------------------------------------------------- 1 | ## Filename: setupFileAndDataNames.R 2 | ## Purpose: Set up filenames for R syntax and markdown along with object names 3 | ## 4 | ## To run in terminal use: R CMD BATCH --vanilla setupFilenamesAndDataFrames.R 5 | 6 | ## Created at: Fri Apr 17 15:52:39 2015 7 | ## Author: Peter Baker 8 | ## Hostname: clearwell2.fritz.box 9 | ## Directory: /home/pete/Data/R.workflow/Rpackage-201501/src/R/ 10 | ## Licence: GPLv3 see 11 | ## 12 | ## Change Log: 13 | ## 14 | 15 | ## NB: work with 'initialFiles' first then use it fpr addFile (single file) 16 | 17 | ## food for thought - this could even create read.code etc but that 18 | ## might be too messy 19 | 20 | 21 | 22 | ##' Internal: Create syntax filenames and object names for processing 23 | ##' 24 | ##' This is an internal \code{\link{dryworkflow-package}} function. It 25 | ##' is primarily designed to be called by 26 | ##' \code{\link{createProjectSkeleton}} and \code{\link{addFile}} to 27 | ##' set up file and object names for processing. Given a list of data 28 | ##' filenames and optionally, a list of project steps, various names 29 | ##' are created for use with templates, makefiles and git for version 30 | ##' control. 31 | ##' 32 | ##' @param dir.project dir.project directory name for project. Default: 33 | ##' \dQuote{myRproject} 34 | ##' @param destinations \code{list} of destination directories where 35 | ##' original (and added) files will be moved. This is a \code{list} 36 | ##' with named components \sQuote{data}, \sQuote{doc}, 37 | ##' \sQuote{codebook} and \sQuote{lib} for data files, documents, 38 | ##' codebooks and R functions, respectively. Default: unix directory 39 | ##' names will be \code{list(data = "data/original", doc = 40 | ##' "doc/original", codebook = "data/codebook", lib = "lib")} and 41 | ##' Windows will be of similar form but capitalised 42 | ##' with longer form names. 43 | ##' @param projectConfig project configuration stored at project 44 | ##' creation and updated when files added. Format similar to similar 45 | ##' to getOptions(\dQuote{dryworkflow}) 46 | ##' @param projectDirs directory structure of project of class 47 | ##' 'drywDestinationDirs' 48 | ##' @param filenames filenames for added files but not set for new 49 | ##' project. 50 | ##' @param initial.files initial file sources and extensions 51 | ##' @param mnemonic three or four letter mnemonic to aid remembering 52 | ##' and for succinct naming \code{R}, \code{Rmd} and \code{Rnw} files 53 | ##' and project directory. Default: \code{NULL} for none 54 | ##' @param project.steps steps to be carried out in project, specified 55 | ##' as a vector of strings. Options are \dQuote{read} to read data 56 | ##' (always assumed present), \dQuote{clean} clean data, 57 | ##' \dQuote{summary} summary statistics and basic plots, 58 | ##' \dQuote{analyse} perform statistical analysis, \dQuote{compare} 59 | ##' compare datasets and in particular different versions of the same 60 | ##' data set, \dQuote{mergeAll} merge data sets of more than one; and 61 | ##' \dQuote{reportRmd} or \dQuote{reportRnw} produce reports using 62 | ##' \code{\link{rmarkdown}} and/or\code{\link{Sweave}} and 63 | ##' \dQuote{presentRmd} or \dQuote{beamerRnw} produce presentations 64 | ##' using \code{\link{rmarkdown}} and/or\code{\link{Sweave}} 65 | ##' @param report.markdown vector of markdown file types to be 66 | ##' employed to produce reports such as \dQuote{.org}, \dQuote{.Rmd} 67 | ##' and \dQuote{.Rnw}. Default: \dQuote{.Rmd} and \dQuote{.Rnw}. 68 | ##' @param report.which which data files to produce reports 69 | ##' for. Choices: \dQuote{first}, \dQuote{merge}, \dQuote{all}) 70 | ##' Default: \dQuote{first} 71 | ##' @return an S3 object of class \code{fileAndDataName} 72 | ##' 73 | setupFileAndDataNames <- 74 | function(dir.project, destinations, projectConfig, projectDirs, 75 | filenames = NULL, 76 | initial.files = NULL, mnemonic = "", 77 | project.steps = c("read", "codebook", "clean", "summary", "analyse", 78 | "compare", "mergeAll", "reportRmd", "reportRnw", "presentRmd", 79 | "beamerRmd", "beamerRnw"), 80 | report.markdown = c("Rmd", "Rnw"), 81 | report.which = c("first", "merge", "all")) 82 | { 83 | 84 | ## NB: need to extract projectDirs from projectConfig when it gets written 85 | 86 | ## Q: Do I really need Rout files - I think not 87 | ## can just do the old 88 | ## makefile.target = gsub(".R$", ".Rout", basename(syntax.file)), 89 | ## makefile.pdf = gsub(".R$", ".pdf", basename(syntax.file)), 90 | 91 | ## keywords internal - may reinstate this or not 92 | 93 | ## check inputs (some of them) ---------------------------------- 94 | if (class(projectDirs) != "drywDestinationDirs") 95 | stop("'projectDirs' should be of class 'drywDestinationDirs'") 96 | 97 | 98 | cat("\n++++ Setting up file names and object names\n\n") 99 | cat("Creating names of R syntax, markdown file names and object names for\n") 100 | if (is.null(filenames)) { 101 | ## createProjectSkeleton assuming all files have just been moved 102 | ## so only works for successfully moved files 103 | PROJ.SKEL <- TRUE 104 | cat(" new project\n") 105 | } else { 106 | ## addFiles need to check for consistency, existence etc LATER!! 107 | ## of course need to specify filenames but dir.project and 108 | ## destinations should from config 109 | cat("existing project '", projectConfig$name.project, "'\n", sep ="") 110 | PROJ.SKEL <- FALSE 111 | print(filenames) 112 | } 113 | 114 | ## project types and report types ------------------------------- 115 | if (PROJ.SKEL){ # new project - perhaps most of this can be reused! 116 | project.steps <- 117 | match.arg(project.steps, 118 | c("read", "codebook", "clean", "summary", "analyse", 119 | "compare", "mergeAll", "reportRmd", "reportRnw", 120 | "presentRmd", "beamerRmd", "beamerRnw"), several.ok = TRUE) 121 | report.markdown <- 122 | match.arg(report.markdown, c("Rmd", "Rnw"), several.ok = TRUE) 123 | report.which <- match.arg(report.which) 124 | 125 | ## names of data types and codebooks 126 | ## set up data types for moved/copied files based on extension 127 | ## and find matching codebook if any 128 | 129 | ## MOST of thise needs to be done for (PROJ.SKEL === TRUE) or both may be OK 130 | dataFiles <- list.files(stringr::str_c(dir.project, "/", 131 | destinations[["data"]]), 132 | all.files = TRUE, ignore.case = TRUE)[-c(1:2)] 133 | ## possibleDataTypes <- gsub("\\.", "", tolower(data.ext)) 134 | 135 | ## UP TO HERE BUT THINK THIS SHOULD BE SOMEWHERE ELSE - MORE GENERAL IN CASE ADD NEW DATA TYPES _ DEFEINITELY SHOULD BE AN ARGUMENT 136 | 137 | data.types <- tolower(tools::file_ext(dataFiles)) 138 | data.files <- data.frame(ID = tolower(tools::file_path_sans_ext(dataFiles)), 139 | dataType = data.types, dataFile = dataFiles, 140 | stringsAsFactors = FALSE) 141 | codebookFiles <- list.files(stringr::str_c(dir.project, "/", 142 | destinations[["codebook"]]), 143 | all.files = TRUE, ignore.case = TRUE)[-c(1:2)] 144 | codebook.types <- tolower(tools::file_ext(codebookFiles)) 145 | codebookBase <- tools::file_path_sans_ext(codebookFiles) 146 | codebookBase <- gsub("_codebook$", "", codebookBase) 147 | codebook.files <- data.frame(ID = tolower(codebookBase), 148 | codebook = codebookFiles, 149 | codebookType = codebook.types, 150 | stringsAsFactors = FALSE) 151 | 152 | ## matching data/codebooks NB: can have same codebook for several data files 153 | codebookMatches <- merge(data.files, codebook.files, all = TRUE) 154 | } 155 | ## set up names for R and report files ----------------------------------- 156 | 157 | report.steps <- project.steps[grep("report|present|beamer", project.steps)] 158 | allRsyntax <- setdiff(project.steps, c(report.steps, "compare", "mergeAll")) 159 | 160 | ## filenames for R syntax ------------------------------------------- 161 | RsyntaxFiles <- 162 | lapply(allRsyntax, 163 | function(y) stringr::str_c(mnemonic, y, "_", 164 | gsub("\\.", "_", dataFiles), ".R", 165 | sep="")) 166 | names(RsyntaxFiles) <- allRsyntax 167 | ## if more than 1 data file then create and add in merge syntax 168 | if ("mergeAll" %in% project.steps & length(dataFiles) > 1){ 169 | RsyntaxFiles$mergeAll <- "mergeAll.R" 170 | } 171 | 172 | ## codebook R syntax file to read codebooks and store/compare if available 173 | if (length(codebookFiles) > 0){ # correct test?? 174 | RsyntaxFiles$codebook <- 175 | stringr::str_c(mnemonic, "read_codebook_", codebookBase, ".R") 176 | } 177 | 178 | ## filenames for reports/presentations --------------------------------- 179 | rStepSplits <- stringr::str_split_fixed(report.steps, "R", 2) 180 | rownames(rStepSplits) <- report.steps 181 | rStepSplits[,2] <- stringr::str_c(".R",rStepSplits[,2]) 182 | reportFiles <- 183 | lapply(report.steps, 184 | function(y) 185 | stringr::str_c(mnemonic, rStepSplits[y,1], "_", 186 | gsub("\\.", "_", dataFiles), rStepSplits[y,2])) 187 | names(reportFiles) <- report.steps 188 | if (report.which == "merge") stop("Sorry - not yet implemented") 189 | if (report.which == "first"){ 190 | for (II in 1:length(reportFiles)) 191 | reportFiles[[II]] <- reportFiles[[II]][1] 192 | } 193 | 194 | ## create Rout filenames for make ------------------------------- 195 | RoutFiles <- 196 | lapply(RsyntaxFiles, function (x) gsub("\\.R$", "\\.Rout", x)) 197 | 198 | ## create RData filenames for make/R --------------------------- 199 | RDataFiles <- 200 | lapply(RsyntaxFiles, 201 | function (x) gsub("read-", "", gsub("\\.R$", "\\.RData", x))) 202 | ## codebooks if present 203 | if (length(codebookFiles) > 0){ # correct test?? 204 | RDataFiles$codebook <- 205 | gsub("^read_codebook", "codebook", gsub("\\.R$", "\\.RData", 206 | RsyntaxFiles$codebook))} 207 | ## RData files - replace read with orig 208 | RDataFiles <- 209 | lapply(RDataFiles, 210 | function(y){ 211 | stringr::str_replace(y, "^read", "orig")}) 212 | 213 | ## data frame names --------------------------------------------- 214 | suffix <- c("_orig", "_cl", "_sum", "_anly") # best to have untouched orig 215 | names(suffix) <- c("read", "clean", "summary", "analyse") 216 | dataFrames <- 217 | lapply(allRsyntax, function(y){ 218 | stringr::str_c(stringr::str_replace( dataFiles, "\\.|-", "_"), 219 | suffix[y])}) 220 | names(dataFrames) <- allRsyntax 221 | 222 | ## add data frame name for merged 223 | if ("mergeAll" %in% names(RsyntaxFiles)){ 224 | dataFrames$mergeAll <- "mergedData" 225 | } 226 | ## add codebooks 227 | if (length(codebookFiles) > 0){ # correct test?? 228 | dataFrames$codebook <- stringr::str_c("codebook_", codebookBase)} 229 | 230 | ## add destination directories 231 | directories <- file.path("..", 232 | c(projectDirs$working.dirs$readMerge, 233 | projectDirs$working.dirs$readMerge, 234 | projectDirs$working.dirs$readMerge, 235 | projectDirs$working.dirs$work, 236 | projectDirs$working.dirs$work, 237 | projectDirs$working.dirs$readMerge, 238 | projectDirs$working.dirs$readMerge, 239 | projectDirs$working.dirs$reports, 240 | projectDirs$working.dirs$reports, 241 | projectDirs$working.dirs$reports, 242 | projectDirs$working.dirs$reports, 243 | projectDirs$working.dirs$reports, 244 | projectDirs$working.dirs$dataDeriv, 245 | projectDirs$working.dirs$dataOrig, 246 | projectDirs$working.dirs$codebook)) 247 | names(directories) <- c(project.steps, "dataDeriv", "dataOrig", 248 | "dataCodebook") 249 | 250 | ## return "fileAndDataNames" S3 object 251 | fileAndDataNames <- list(dataFiles = dataFiles, 252 | codebookFiles = codebookFiles, 253 | RsyntaxFiles = RsyntaxFiles, 254 | reportFiles = reportFiles, 255 | report.which = report.which, 256 | RoutFiles = RoutFiles, 257 | RDataFiles = RDataFiles, 258 | dataFrames = dataFrames, 259 | availableCodeBooks = codebookMatches, 260 | directories = directories) 261 | class(fileAndDataNames) <- "fileAndDataNames" 262 | 263 | fileAndDataNames 264 | 265 | } 266 | ## could ouput very similar to whats need for templates but perhaps it 267 | ## already is 268 | 269 | 270 | ## ## for debugging 271 | ## dir.project 272 | ## destinations 273 | ## projectConfig <- drywOptions # only used for unset options so far in 274 | ## # calling function - hopefully no 275 | ## # mistakes made - better strategy? 276 | ## filenames <- NULL 277 | ## project.steps <- c("read", "clean", "summary", "analyse", 278 | ## "compare", "mergeAll", "report") 279 | ## report.markdown <- c("Rmd", "Rnw") 280 | ## mnemonic <- NULL 281 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # dryworkflow 2 | 3 | The information below is also available in package help. 4 | 5 | Also see [the blog site](http://www.petebaker.id.au/r-package-dryworkflow "Peter Baker's R blog") for details. 6 | 7 | ## Installation 8 | 9 | In *R* as Administrator, to install please use the following: 10 | 11 | ```{r} 12 | library(devtools) # available on CRAN (or github) 13 | devtools::install_github("petebaker/dryworkflow", dependencies = TRUE) 14 | ``` 15 | 16 | ### Prerequisites 17 | 18 | To use Makefile definitions and version control using git, you need to install 19 | - GNU Make [http://www.gnu.org/software/make/](http://www.gnu.org/software/make/) 20 | - R [http://www.r-project.org/](http://www.r-project.org/) 21 | - latexmk [http://www.ctan.org/pkg/latexmk/](http://www.ctan.org/pkg/latexmk/) 22 | - R packages on CRAN: rmarkdown, knitr 23 | 24 | Note that **Windows** users can install Rtools (available from CRAN) to get a working version of *make* and may also need to install *pandoc* and *latex* to produce pdf files if they haven't already. Miktex is recommended although texlive will also work well. 25 | - Rtools [http://cran.r-project.org/bin/windows/Rtools/](http://cran.r-project.org/bin/windows/Rtools/) 26 | - pandoc [http://johnmacfarlane.net/pandoc/](http://johnmacfarlane.net/pandoc/) 27 | - miktex [http://miktex.org/](http://miktex.org/) 28 | 29 | **MACOSX** users should install a recent version of *Xcode CLT* (Xcode command line tools) and *Homebrew* in order to install *make* and *git*. Unfortunately, I don't yet know much about Macs as my brand new (and first) MacBook Pro is still in for repairs. For some hints try [http://www.moncefbelyamani.com/how-to-install-xcode-homebrew-git-rvm-ruby-on-mac/](http://www.moncefbelyamani.com/how-to-install-xcode-homebrew-git-rvm-ruby-on-mac/). Finally, to produce pdf reports *MacTex* [https://tug.org/mactex/](https://tug.org/mactex/) is recommended. 30 | 31 | In **linux**, if they aren't already installed, simply install these 32 | packages using the system package manager. 33 | 34 | The easiest way to install *git* and *pandoc* on all platforms is to 35 | install *RStudio*. If you don't have a favourite programmer's editor 36 | that you already use for **R** then this is the best way to use **R** 37 | as well. Install **RStudio** from http://rstudio.org. Note that you 38 | may need to put the directory containing *RStudio* etc in the *PATH*. 39 | 40 | You can check that *make*, *git* and *pandoc* are installed by typing 41 | 42 | ```{bash} 43 | git --version 44 | make --version 45 | pandoc --version 46 | ``` 47 | 48 | Finally, check that latex is available with 49 | 50 | ```{bash} 51 | pdflatex --version 52 | ``` 53 | 54 | ## Using the dryworkflow package 55 | 56 | The **dryworkflow** package produces a project skeleton for data 57 | analysis including *R* syntax files, report and Makefiles. Given data 58 | files and documents, the skeleton is generated with initial 59 | directories, template log files, template *R* syntax for data checking 60 | and initial analysis, makefiles and a *git* repository is initialised. 61 | 62 | ### Templates 63 | 64 | *R* syntax templates for reading, cleaning, merging, summarising and 65 | analysing data and *Rmarkdown* and *Sweave* templates for reports. The 66 | function *copyTemplates* may be used to get copies of these templates 67 | which can then be modified for use when creating a project skeleton. 68 | 69 | ### Make and definitions 70 | 71 | Makefiles are generated. The file *common.mk* provides pattern rules 72 | to produce *.Rout* and *.pdf* files from *R* syntax files and *.html*, 73 | *.pdf* and *.docx* files from *.Rmd* R markdown and *.Rnw* files. The 74 | function *copyCommonMk* may be used to get a copy the *common.mk* file 75 | used by the installed version of the *dryworkflow* package. The latest 76 | version of *common.mk* can always be found at 77 | [https://github.com/petebaker/r-makefile-definitions](https://github.com/petebaker/r-makefile-definitions) 78 | 79 | ### .gitignore 80 | 81 | A *.gitignore* file is created in the base project directory to 82 | indicate files not to be tracked by *git*. The function 83 | *copyGitIgnore* may be used to get a copy the *.gitignore* file used 84 | by the installed version of the *dryworkflow* package. The latest 85 | version of *.gitignore* can always be found at 86 | [https://github.com/petebaker/r-gitignore](https://github.com/petebaker/r-gitignore) 87 | 88 | ### Project Options 89 | 90 | Note that option parameters are either set as an argument to the 91 | function *createProjectSkeleton* or automatically via global options 92 | using *getOption("dryworkflow")*. Customised options may be set in 93 | *.Rprofile* using global options and these will be set automatically 94 | when *dryworkflow* is loaded. 95 | 96 | ### Examples 97 | 98 | #### setting global options or put these in .Rprofile 99 | 100 | ```{r} 101 | current.opts <- options() 102 | options("dryworkflow" = list(git = list(user.name = "My Name", user.email = "myname@email.com"))) 103 | library(dryworkflow) 104 | options("dryworkflow") 105 | ``` 106 | 107 | #### A project with all default settings 108 | 109 | ```{r} 110 | ## File: setupProject.R 111 | ## 112 | ## copy .csv file and codebook from dryWorkflow package 113 | ## noting that normally you just place files in current directory 114 | ## and then run 'createProjectSkeleton' 115 | file.copy(system.file('demoFiles', 'small2.csv', package='dryworkflow'), 116 | 'small2.csv') 117 | file.copy(system.file('demoFiles', 'small2_codebook.csv', 118 | package='dryworkflow'), 'small2_codebook.csv') 119 | 120 | ## NB: In practice, always check directories, R syntax etc 121 | ## before using 'make' 122 | createProjectSkeleton(dir.proj = "testProject2", 123 | name.project = "Experiment 1", 124 | dontmove = "setupProject.R") 125 | ``` 126 | 127 | -------------------------------------------------------------------------------- /inst/demoFiles/CodeBook-small2.csv: -------------------------------------------------------------------------------- 1 | Variable,Old.Variable,Label,Factor.Levels,Factor.Type 2 | ID,codea,"Subject ID",Continuous, 3 | bmi.21,bmi22,"BMI at 21 Years",Continuous, 4 | bpSys.21,NA,"Mean Systolic Blood Pressure at 21 Years",Continuous, 5 | ysrExter.14,jexter,"YSR: externalising at 14 years",Continuous, 6 | ysrInter.14,jinter,"YSR: internalising at 14 years",Continuous, 7 | ysrAggre.14,jaggre,"YSR: aggression at 14 years",Continuous, 8 | matEducat,ra80,"Maternal Education","1=Incomplete High",Ordered 9 | ,,,"2=Complete High", 10 | ,,,"3=Post High", 11 | familyIncome,ra90,"Recoded Income Phase A","1=$10399 or less",Nominal 12 | ,,,"2=$10400 or more", 13 | sex,c45,"Sex of baby",1=Male,Factor 14 | ,,,2=Female, 15 | -------------------------------------------------------------------------------- /inst/demoFiles/data1-birth.csv: -------------------------------------------------------------------------------- 1 | ID,matEducat,familyIncome,bmiM,sex 2 | 5455,Incomplete High,$10400 or more,18.903591156,Female 3 | 7036,Complete High,$10399 or less,17.9981632233,Female 4 | 5973,Complete High,$10400 or more,23.507806778,Female 5 | 7142,Incomplete High,$10400 or more,36.2624397278,Female 6 | 3003,Complete High,$10400 or more,21.3593063354,Female 7 | 1020,Complete High,$10399 or less,18.4265327454,Female 8 | 1998,Incomplete High,$10399 or less,24.3910579681,Female 9 | 3377,Post High,$10400 or more,18.1448688507,Female 10 | 5486,Complete High,$10400 or more,20.0773353577,Female 11 | 8321,Complete High,$10400 or more,35.2955474854,Female 12 | 238,Complete High,$10400 or more,20.0796031952,Female 13 | 911,Post High,$10400 or more,17.8565158844,Female 14 | 5576,Complete High,$10400 or more,22.4813289642,Female 15 | 6,Complete High,$10400 or more,20.2812328339,Female 16 | 2476,Complete High,$10399 or less,18.4425468445,Female 17 | 3018,Complete High,$10400 or more,25.5593280792,Female 18 | 2463,Complete High,$10400 or more,23.1246681213,Female 19 | 2525,Complete High,$10400 or more,21.9671211243,Female 20 | 1077,Complete High,$10399 or less,21.7192497253,Female 21 | 7745,Incomplete High,$10400 or more,NA,Female 22 | 2948,Post High,$10400 or more,18.75,Female 23 | 1992,Complete High,$10399 or less,20.661157608,Female 24 | 7887,Complete High,$10399 or less,23.1472549438,Female 25 | 5145,Incomplete High,$10400 or more,22.6473770142,Female 26 | 4499,Complete High,$10399 or less,19.1953029633,Female 27 | 2461,Post High,$10400 or more,19.8347110748,Female 28 | 5058,Complete High,$10400 or more,19.4931774139,Female 29 | 3590,Complete High,$10399 or less,21.1927433014,Female 30 | 1307,Complete High,$10400 or more,19.7238674164,Female 31 | 3144,Incomplete High,$10400 or more,20.5761318207,Female 32 | 6078,Incomplete High,$10399 or less,23.4375,Female 33 | -------------------------------------------------------------------------------- /inst/demoFiles/data1-yr21.csv: -------------------------------------------------------------------------------- 1 | ID,bmi.21,bpSys.21,bpDia.21,ysrExter.14,ysrInter.14,ysrAnxiousDep.14,ysrAggre.14 2 | 3377,23.981262207,97,64,9,19,8,5 3 | 5486,21.1265296936,106.5,58.5,16,15,8,13 4 | 8321,25.3243427277,93,58.5,NA,NA,NA,NA 5 | 238,21.9651050568,118.5,66.5,11,16,9,8 6 | 911,20.5254440308,110,69.5,NA,NA,NA,NA 7 | 5576,26.9159183502,105.5,58,1,9,3,1 8 | 6,23.2088565826,105,66.5,8,13,4,7 9 | 2476,23.8914604187,124,86.5,7,20,9,6 10 | 3018,34.0110778809,122,77,8,12,6,3 11 | 2463,23.9096088409,115.5,58,16,23,11,12 12 | 2525,19.7118644714,106,72,6,4,3,5 13 | 1077,24.9430541992,103,60,NA,NA,NA,NA 14 | 7745,25.0972232819,111.5,65,11,14,9,8 15 | 2948,21.1183815002,114,62,4,7,3,2 16 | 1992,16.8445663452,110.5,76.5,4,7,2,2 17 | 7887,31.9731903076,106,53.5,20,24,14,15 18 | 5145,19.8052864075,103,69,0,8,0,0 19 | 4499,18.8999385834,119,63.5,31,32,16,22 20 | 2461,30.5249824524,98.5,64,15,11,5,14 21 | 5058,19.9596271515,109,58.5,13,17,9,9 22 | 3590,22.6426544189,107,68,26,20,10,21 23 | 1307,19.2290248871,97,64,9,13,5,8 24 | 3144,22.893573761,111.5,69,15,19,9,12 25 | 6078,23.2556152344,111,55,NA,NA,NA,NA 26 | 40,45.010433197,NA,NA,NA,NA,NA,NA 27 | 1110,20.3244380951,106.5,63.5,12,7,4,10 28 | 4845,22.6430568695,87.5,54.5,11,13,4,7 29 | 2301,17.393705368,100,62,9,13,2,6 30 | 2241,17.6504669189,109,69.5,9,7,3,8 31 | 6794,23.3109378815,103,63,29,5,1,22 32 | 7071,26.2172737122,105,68,13,31,13,7 33 | 4180,24.1990242004,132,86.5,6,4,2,4 34 | -------------------------------------------------------------------------------- /inst/demoFiles/data1_codebook.csv: -------------------------------------------------------------------------------- 1 | Variable,Old.Variable,Label,Factor.Levels 2 | ID,codea,"Subject ID",Continuous 3 | bmi.21,bmi22,"BMI at 21 Years",Continuous 4 | bpSys.21,NA,"Mean Systolic Blood Pressure at 21 Years",Continuous 5 | ysrExter.14,jexter,"YSR: externalising at 14 years",Continuous 6 | ysrInter.14,jinter,"YSR: internalising at 14 years",Continuous 7 | ysrAggre.14,jaggre,"YSR: aggression at 14 years",Continuous 8 | matEducat,ra80,"Maternal Education","1=Incomplete High" 9 | ,,,"2=Complete High" 10 | ,,,"3=Post High" 11 | familyIncome,ra90,"Recoded Income Phase A","1=$10399 or less" 12 | ,,,"2=$10400 or more" 13 | sex,c45,"Sex of baby",1=Male 14 | ,,,2=Female 15 | -------------------------------------------------------------------------------- /inst/demoFiles/setupProject.R: -------------------------------------------------------------------------------- 1 | ## File: setupProject.R 2 | ## Purpose: use 'dryworkflow' library to set up initial project structure 3 | 4 | library(dryworkflow) 5 | 6 | ## Place relavent data, document (and perhaps R function) files in 7 | ## current directory. The next command sets up the project 8 | createProjectSkeleton("test1") 9 | 10 | ## Once you are happy with created files, run this command to run R 11 | ## files and make 12 | makeProject("test1") 13 | 14 | ## ideally run make from inside RStudio, emacs, command line or 15 | ## programmer's editor 16 | 17 | ## For testing run this to retrieve data/docs and remove 18 | ##removeProject("test1", ask=FALSE) 19 | 20 | -------------------------------------------------------------------------------- /inst/demoFiles/small2_codebook.csv: -------------------------------------------------------------------------------- 1 | Variable,Old.Variable,Label,Factor.Levels,Factor.Type 2 | ID,codea,"Subject ID",Continuous, 3 | bmi.21,bmi22,"BMI at 21 Years",Continuous, 4 | bpSys.21,NA,"Mean Systolic Blood Pressure at 21 Years",Continuous, 5 | ysrExter.14,jexter,"YSR: externalising at 14 years",Continuous, 6 | ysrInter.14,jinter,"YSR: internalising at 14 years",Continuous, 7 | ysrAggre.14,jaggre,"YSR: aggression at 14 years",Continuous, 8 | matEducat,ra80,"Maternal Education","1=Incomplete High",Ordered 9 | ,,,"2=Complete High", 10 | ,,,"3=Post High", 11 | familyIncome,ra90,"Recoded Income Phase A","1=$10399 or less",Nominal 12 | ,,,"2=$10400 or more", 13 | sex,c45,"Sex of baby",1=Male,Factor 14 | ,,,2=Female, 15 | -------------------------------------------------------------------------------- /inst/git/DOTgitignore: -------------------------------------------------------------------------------- 1 | ## File: .gitignore 2 | ## Purpose: Specifies untracked files for git to ignore 3 | ## Some from http://help.github.com/ignore-files/ 4 | ## and added R/latex/emacs/RStudio specific etc 5 | ## Note: 1) goes in root of project and is NOT stored with git repo 6 | ## 2) you may wish to add some files which are excluded here like 7 | ## something.log or somthing.pdf. Simply use 'git add filename' 8 | ## which will track specified file. Alternatively, comment the 9 | ## pattern(s) below 10 | ## 3) you may wish to allow certain files like pdfs or docs in 11 | ## subdirectories. To allow a pattern in a specific subdirectory 12 | ## then create a .gitignore file in that directory using a text 13 | ## editor and use negate (!). See 'man gitignore' 14 | 15 | 16 | ######################################################## 17 | # R output # 18 | ######################################################## 19 | 20 | .Rhistory 21 | *.Rout 22 | *_Rout.txt 23 | Rplots.pdf 24 | *.pdf 25 | *.png 26 | *.tiff 27 | *.jpg 28 | *.jpeg 29 | 30 | ########################## 31 | # Packages/zip/iso files # 32 | ########################## 33 | # it's better to unpack these files and commit the raw source 34 | # git has its own built in compression methods 35 | *.7z 36 | *.dmg 37 | *.gz 38 | *.iso 39 | *.jar 40 | *.rar 41 | *.tar 42 | *.zip 43 | 44 | ############# 45 | # Databases # 46 | ############# 47 | *.sql 48 | *.sqlite 49 | 50 | ############# 51 | # latex # 52 | ############# 53 | *.aux 54 | *.glo 55 | *.idx 56 | *.log 57 | *.toc 58 | *.ist 59 | *.acn 60 | *.acr 61 | *.alg 62 | *.bbl 63 | *.blg 64 | *.dvi 65 | *.glg 66 | *.gls 67 | *.ilg 68 | *.ind 69 | *.lof 70 | *.lot 71 | *.maf 72 | *.mtc 73 | *.mtc1 74 | *.out 75 | *.synctex.gz 76 | *.pdfsync 77 | *.nav 78 | *.snm 79 | 80 | ############# 81 | # latexmk # 82 | ############# 83 | 84 | *.fdb_latexmk 85 | *.fls 86 | 87 | ################## 88 | # C object files 89 | ################## 90 | *.o 91 | *.so 92 | 93 | ################## 94 | # RStudio files 95 | ################## 96 | .Rproj.user 97 | 98 | ##################### 99 | # emacs backups etc # 100 | ##################### 101 | *~ 102 | \#*\# 103 | auto/ 104 | -------------------------------------------------------------------------------- /inst/makefile.common/common.mk: -------------------------------------------------------------------------------- 1 | ## File: common.mk - to be included in Makefile(s) 2 | ## Purpose: Define gnu make rules for R, knitr, Rmarkdown and Sweave 3 | ## Version: 0.2.01 4 | ## Usage: Place file in a directory such as ~/lib and include with 5 | ## include ~/lib/common.mk 6 | ## at the bottom of Makefile (or adjust for your directory of choice) 7 | ## To override any definitions place them after the include statement 8 | ## NB: if using makepp then ~ is not recognized but the following is OK 9 | ## include ${HOME}/lib/common.mk 10 | ## 11 | ## The latest version of this file is available at 12 | ## https://github.com/petebaker/r-makefile-definitions 13 | 14 | ## For help after including common.mk in Makefile: run 15 | ## $ make help 16 | 17 | ## Changelog: None recorded until Frid 2015-02-06 at 15:40:21 18 | ## On Frid 2015-02-06 19 | ## 1) Added Rmarkdown rules 20 | ## run $ make help-rmarkdown 21 | ## 2) Added stitch rules 22 | ## Sat 2015-03-28 at 20:48:20 23 | ## 1) added version 24 | ## 2) added git and rsync targets 25 | ## 3) fixed some knitr/rmarkdown targets 26 | 27 | ## TODO: 1) proper documentation 2015-02-21 at 23:41:44 28 | ## 2) make knit more system independent 29 | ## PARTIALLY DONE 2015-03-29 at 09:37:41 30 | ## 3) generic clean/backup needs work (see end of file) 31 | 32 | ## For Sweave I've changed the default to knit as that's what I 33 | ## usually want but to use Sweave then uncomment appropriate lines below 34 | ## KNIT not used but now used as it is now called inside Rscript 35 | ## but could be changed if this way preferred 36 | ## note - may need to use this (or similar) instead if knit is not in path 37 | ## KNIT = knit 38 | ## KNIT = /usr/lib/R/site-library/knitr/bin/knit 39 | ## KNIT = /usr/lib/R/library/knitr/bin/knit 40 | ## KNIT = /usr/lib64/R/library/knitr/bin/knit 41 | ## KNIT_FLAGS = -n -o 42 | ## %.md: %.Rmd 43 | ## ${KNIT} $@ ${KNIT_OPTS} $< 44 | ##%.tex: %.Rnw 45 | ## ${R} CMD Sweave $< 46 | ##%.R: %.Rnw 47 | ## ${R} CMD Stangle $< 48 | 49 | ## program defs: 50 | ##MAKE = make 51 | 52 | ## general help ----------------------------------------------------- 53 | 54 | .PHONY: help 55 | help: 56 | @echo "" 57 | @echo Simple help can be obtained with 58 | @echo "" 59 | @echo make help-r 60 | @echo make help-rmarkdown 61 | @echo make help-stitch 62 | @echo make help-beamer 63 | @echo make help-git 64 | @echo make help-rsync 65 | 66 | # latex variables --------------------------------------------------- 67 | 68 | ## can be used to convert simple latex to .rtf file for MS word 69 | LATEX2RTF = latex2rtf 70 | 71 | ## cross platform way to run latex properly but best to run through R 72 | LATEXMK = $(R) CMD latexmk 73 | LATEXMK_FLAGS = -pdf 74 | ## rubber - latexmk alternative on linux systems only 75 | RUBBER = $(R) CMD rubber 76 | RUB_FLAGS = -d 77 | 78 | ## git variables --------------------------------------------------- 79 | 80 | GIT_REMOTE = master 81 | ## GIT_REMOTE = ssh://pete@192.168.0.1:port/git.repository 82 | GIT_ORIGIN = origin 83 | GIT = git 84 | GIT_FLAGS = -a 85 | 86 | ## rsync variables ------------------------------------------------ 87 | 88 | RSYNC_DESTINATION = 89 | ## RSYNC_DESTINATION = ~/ownCloud/myProject 90 | RSYNC = rsync 91 | RSYNC_FLAGS = -auvtr 92 | RSYNC_FILES_LOCAL = * 93 | RSYNC_FILES_REMOTE = * 94 | RSYNC_DRY_RUN = --dry-run 95 | 96 | ## pandoc variables --------------------------------------------- 97 | 98 | PANDOC = pandoc 99 | PANDOC_OPTS = -s 100 | 101 | ## R variables --------------------------------------------- 102 | 103 | R = R 104 | RSCRIPT = Rscript 105 | R_FLAGS = CMD BATCH 106 | ##R_OPTS = --no-save --no-restore --no-restore-history --no-readline 107 | R_OPTS = --vanilla 108 | RWEAVE = $(R) CMD Sweave 109 | RWEAVE_FLAGS = 110 | 111 | ## R pattern rules ------------------------------------------------- 112 | .PHONY: help-r 113 | help-r: 114 | @echo "" 115 | @echo Just one major rule to produce .Rout but can stitch .R file too 116 | @echo "" 117 | @echo $$ make myFile.R 118 | @echo will produce 'myFile.Rout' using R CMD BATCH --vanilla myFile.R 119 | @echo but you can change options with something like 120 | @echo $$ R_OPTS=--no-restore-history make myFile.R 121 | @echo "" 122 | @echo To stitch file \(like RStudio\) just choose any or all of: 123 | @echo make myFile.pdf 124 | @echo make myFile.docx 125 | @echo make myFile.html 126 | @echo NB: This assumes you don\'t have files like myFile.\{Rmd,Rnw,tex\} etc present, 127 | @echo " only 'myFile.R'" 128 | @echo " So good practice is to use different names for reports and analysis" 129 | 130 | ## produce .Rout from .R file -------------------------------------- 131 | 132 | ## Running R to produce text file output 133 | ## If you want to see start and end time on a linux system uncomment 134 | ## the echo lines 135 | %.Rout: %.R 136 | ## @echo Job $<: started at `date` 137 | ${R} ${R_FLAGS} ${R_OPTS} $< 138 | ## @echo Job $<: finished at `date` 139 | 140 | ## knit (and Sweave) pattern rules ---------------------------------- 141 | 142 | %.R: %.Rnw 143 | ${RSCRIPT} ${R_OPTS} -e "library(knitr);purl(\"${@:.R=.Rnw}\")" 144 | %.R: %.Rmd 145 | ${RSCRIPT} ${R_OPTS} -e "library(knitr);purl(\"${@:.R=.Rmd}\")" 146 | %.tex: %.Rnw 147 | ${RSCRIPT} ${R_OPTS} -e "library(knitr);knit('${@:.tex=.Rnw}')" 148 | %.pdf : %.tex 149 | ${LATEXMK} ${LATEXMK_FLAGS} $< 150 | ## ${RUBBER} ${RUB_FLAGS} $< 151 | ## %.pdf: %.Rnw 152 | ## ${RWEAVE} ${RW_FLAGS} $< 153 | ## ${RUBBER} ${RUB_FLAGS} $< 154 | 155 | %.rtf: %.tex 156 | ${LATEX2RTF} ${L2R_FLAGS} ${@:.rtf=} 157 | 158 | ## wonder if this would cause a conflict with rmarkdown - shouldn't as 159 | ## long as R markdown rules come after this and possible override with 160 | ## explicit definitions? 161 | 162 | %.md: %.Rmd 163 | ${RSCRIPT} ${R_OPTS} -e "library(knitr);knit(\"${@:.md=.Rmd}\")" 164 | %.md: %.rmd 165 | ${RSCRIPT} ${R_OPTS} -e "library(knitr);knit(\"${@:.md=.rmd}\")" 166 | 167 | ## pandoc pattern rules ---------------------------------------------- 168 | 169 | %.pdf: %.md 170 | ${PANDOC} ${PANDOC_OPTS} $< -o $@ 171 | %.docx: %.md 172 | ${PANDOC} ${PANDOC_OPTS} $< -o $@ 173 | %.html: %.md 174 | ${PANDOC} ${PANDOC_OPTS} $< -o $@ 175 | %.tex: %.md 176 | ${PANDOC} ${PANDOC_OPTS} $< -o $@ 177 | 178 | ## stitch an R file using knitr -------------------------------------- 179 | 180 | ## find that rmarkdown seems to be a better option than knitr 181 | ## both on CRAN now so easier to install 182 | 183 | .PHONY: help-stitch 184 | help-stitch: 185 | @echo "" 186 | @echo To stitch file \(like RStudio\) just do one of the following: 187 | @echo make myFile.pdf 188 | @echo make myFile.docx 189 | @echo make myFile.html 190 | @echo NB: This assumes you don\'t have files like myFile.\{Rmd,Rnw,tex\} etc present, 191 | @echo " only 'myFile.R' So good practice is to use different" 192 | @echo " file (base)names for reports and analysis" 193 | 194 | %.pdf: %.R 195 | ${RSCRIPT} ${R_OPTS} -e "library(rmarkdown);render(\"${@:.pdf=.R}\", \"pdf_document\")" 196 | %.html: %.R 197 | ${RSCRIPT} ${R_OPTS} -e "library(rmarkdown);render(\"${@:.html=.R}\", \"html_document\")" 198 | ## this borrows line from below 199 | %.docx: %.R 200 | ${RSCRIPT} ${R_OPTS} -e "library(rmarkdown);render(\"${@:.docx=.R}\", \"word_document\")" 201 | 202 | ## Rmarkdown pattern rules -------------------------------------------------- 203 | 204 | ## generating pdf, docx, html other from Rmarkdown/sweave 205 | ## Note: $< does not appear to work whereas ${@:.pdf=.Rmd} does even 206 | ## though I think they should be identical 207 | .PHONY: help-rmarkdown 208 | help-rmarkdown: 209 | @echo "" 210 | @echo You can easily set up a .PHONY target to produce all output 211 | @echo format files specified at the top of the .Rmd file 212 | @echo See the file ~/lib/common.mk file and simply 213 | @echo 1\) set up a phony target with something like 214 | @echo .PHONY: rmarkdown-all 215 | @echo rmarkdown-all: myfile.Rmd 216 | @echo 2\) insert an Rscript command eg. 217 | @echo ' a\) insert pdf command from ~/lib/common.mk' 218 | @echo ' b\) replace \"pdf_document\" with \"all\"' 219 | 220 | %.pdf: %.Rmd 221 | ${RSCRIPT} ${R_OPTS} -e "library(rmarkdown);render(\"${@:.pdf=.Rmd}\", \"pdf_document\")" 222 | %.pdf: %.rmd 223 | ${RSCRIPT} ${R_OPTS} -e "library(rmarkdown);render(\"${@:.pdf=.rmd}\", \"pdf_document\")" 224 | %.html: %.Rmd 225 | ${RSCRIPT} ${R_OPTS} -e "library(rmarkdown);render(\"${@:.html=.Rmd}\", \"html_document\")" 226 | %.html: %.rmd 227 | ${RSCRIPT} ${R_OPTS} -e "library(rmarkdown);render(\"${@:.html=.rmd}\", \"html_document\")" 228 | %.docx: %.Rmd 229 | ${RSCRIPT} ${R_OPTS} -e "library(rmarkdown);render(\"${@:.docx=.Rmd}\", \"word_document\")" 230 | %.docx: %.rmd 231 | ${RSCRIPT} ${R_OPTS} -e "library(rmarkdown);render(\"${@:.docx=.rmd}\", \"word_document\")" 232 | 233 | ## uncomment next line if required for debugging latex 234 | ## .PRECIOUS: .tex 235 | 236 | ## backup using rsync ------------------------------------------------- 237 | .PHONY: help-rsync 238 | help-rsync: 239 | @echo "" 240 | @echo Use rsync to backup files to/from local or remote destination 241 | @echo "" 242 | @echo "rsync local to remote:" 243 | @echo $$ make rsynctest 244 | @echo " or" 245 | @echo $$ make rsynccopy 246 | @echo "" 247 | @echo "rsync remote to local:" 248 | @echo $$ make rsynctest2here 249 | @echo " or" 250 | @echo $$ make rsynccopy2here 251 | @echo "" 252 | @echo will either run rsync with \'--dry-run\' option to perform a 253 | @echo trial run with no changes made 254 | @echo " or" 255 | @echo copy just those updated files to local/remote destination 256 | @echo but your can change options with something like 257 | @echo $$ RSYNC_DESTINATION=~/ownCloud/myProject3 make rsynctest 258 | @echo $$ RSYNC_DESTINATION=username@remote_host:/home/username/dir1 make rsynctest 259 | @echo NB: rsync variables \(defaults in brackets\) are 260 | @echo " RSYNC_DESTINATION, RSYNC (rsync), RSYNC_FLAGS (-auvtr)" 261 | @echo " RSYNC_FILES_LOCAL (*), RSYNC_FILES_REMOTE (*) RSYNC_DRY_RUN (--dry-run)" 262 | @echo See https://www.digitalocean.com/community/tutorials/how-to-use-rsync-to-sync-local-and-remote-directories-on-a-vps 263 | 264 | 265 | ## rsync local to remote 266 | .PHONY: rsynctest 267 | rsynctest: 268 | ${RSYNC} ${RSYNC_DRY_RUN} ${RSYNC_FLAGS} ${RSYNC_FILES_LOCAL} ${RSYNC_DESTINATION}/. 269 | 270 | .PHONY: rsynccopy 271 | rsynccopy: 272 | ${RSYNC} ${RSYNC_FLAGS} ${RSYNC_FILES_LOCAL} ${RSYNC_DESTINATION}/. 273 | 274 | .PHONY: rsynctest2here 275 | rsynctest2here: 276 | ${RSYNC} ${RSYNC_DRY_RUN} ${RSYNC_FLAGS} ${RSYNC_DESTINATION}/${RSYNC_FILES_REMOTE} . 277 | 278 | .PHONY: rsynccopy2here 279 | rsynccopy2here: 280 | ${RSYNC} ${RSYNC_FLAGS} ${RSYNC_DESTINATION}/${RSYNC_FILES_REMOTE} . 281 | 282 | ## git ------------------------------------------------- 283 | 284 | .PHONY: help-git 285 | help-git: 286 | @echo "" 287 | @echo Version control using git 288 | @echo "" 289 | @echo $$ make git.status 290 | @echo $$ make git.commit 291 | @echo " or" 292 | @echo $$ make git.fetch 293 | @echo will either list changes via \'git status\', commit changes or push to remote repository 294 | @echo " " 295 | @echo Useful commands: 296 | @echo $$git remote -v : lists URLs that Git has stored for remotes 297 | @echo $$git remote add [shortname] [url] : to add remote 298 | @echo $$ git push [remote-name] [branch-name] : to push repository to remote 299 | @echo See http://git-scm.com/doc or 300 | @echo http://git-scm.com/book/en/v2/Git-Basics-Working-with-Remotes 301 | 302 | .PHONY: git.status 303 | git.status: 304 | ${GIT} status 305 | 306 | .PHONY: git.commit 307 | git.commit: 308 | ${GIT} commit ${GIT_FLAGS} 309 | 310 | .PHONY: git.push 311 | git.push: 312 | ${GIT} push ${GIT_ORIGIN} ${GIT_REMOTE} 313 | 314 | 315 | ## Course slides using knit/beamer ---------------------------------------- 316 | 317 | ## Course slides, notes, etc etc using knitr 318 | ## Based on Douglas Bates lme course notes course code 319 | ## but added the slides/article styles as per happymutant website 320 | ## basically needs a line at to of file with beamer options 321 | ## ~~MY~BEAMER~~OPTIONS~~ which gets changed for each different output type 322 | 323 | .PHONY: help-beamer 324 | help-beamer: 325 | @echo "" 326 | @echo Beamer presentations and handouts produced with knitr 327 | @echo Note that base file has name PRESENTATION-src.Rnw 328 | @echo " where PRESENTATION is appropriate name for your presentation" 329 | @echo "" 330 | @echo Targets that may be produced: 331 | @echo " PRESENTATION-Present.pdf: Slides for presentation" 332 | @echo " PRESENTATION-Slides.pdf: 1 slide per page without transitions" 333 | @echo " PRESENTATION-2a4.pdf: Handouts - 2 slides per A4 page" 334 | @echo " PRESENTATION-4a4.pdf: Handouts - 4 slides per A4 page" 335 | @echo " PRESENTATION-syntax.R: R syntax file tangled from Rnw using knit" 336 | @echo " PRESENTATION-Notes.pdf: Notes in beamer article style" 337 | @echo "" 338 | @echo "NB: First line of PRESENTATION-src.Rnw is" 339 | @echo "\\documentclass[~~MY~BEAMER~~OPTIONS~~]{beamer}" 340 | 341 | ## produce latex file with knitr but note that it does not have 342 | ## document class - perhaps it should and use perl etc to modify it 343 | %-src.tex: %-src.Rnw 344 | 345 | ## Presentation pdf - produced via R CMD latexmk ... 346 | ## %-Slides.pdf requires %-src.Rnw WITHOUT \documentclass top line 347 | # %-Present.tex: %-src.tex 348 | # @echo "\\documentclass[dvipsnames,pdflatex,ignorenonframetext]{beamer}" > $@ 349 | # @echo "\\input{"$*-src"}" >> $@ 350 | # @echo "\\end{document}" >> $@ 351 | 352 | ## Presentation pdf - produced via R CMD latexmk ... 353 | ## %-Slides.pdf requires %-src.Rnw WITHOUT \documentclass top line 354 | %-Present.Rnw: %-src.Rnw 355 | sed -e s/~~MY~BEAMER~~OPTIONS~~/dvipsnames,pdflatex,ignorenonframetext/g $< > $@ 356 | 357 | ## Presentation syntax 358 | %-syntax.R: %-src.Rnw 359 | R -e 'library(knitr);knit("$<", tangle=TRUE)' 360 | mv ${<:.Rnw=.R} $@ 361 | 362 | ## Slides - one per page - produced via R CMD latexmk ... 363 | ## dropped handout option! 364 | %-Slides.tex: %-Present.tex 365 | sed -e s/dvipsnames,pdflatex,ignorenonframetext/ignorenonframetext,dvipsnames,pdflatex,handout/g $< > $@ 366 | 367 | ##%-Slides.tex: %-src.tex 368 | ## @echo "\\documentclass[ignorenonframetext,dvipsnames,pdflatex,handout]{beamer}" > $@ 369 | ## @echo "\\input{"$*-src"}" >> $@ 370 | ## @echo "\\end{document}" >> $@ 371 | 372 | # A4 paper - 2 per slides page 373 | %-2a4.tex: %-Slides.pdf 374 | @echo "\\documentclass[a4paper]{article}" > $@ 375 | @echo "\\usepackage{pdfpages}" >> $@ 376 | # @echo "\\usepackage{pgfpages}" >> $@ 377 | # @echo "\\pgfpagesuselayout{2 on 1}[a4paper,border shrink=5mm]" >> $@ 378 | @echo "\\begin{document}" >> $@ 379 | @echo "\\includepdf[nup=1x2,pages=-]{"$*"-Slides.pdf}" >> $@ 380 | # @echo "\\includepdf{"$*"H.pdf}" >> $@ 381 | @echo "\\end{document}" >> $@ 382 | 383 | # A4 paper - 4 slides per page 384 | %-4a4.tex: %-Slides.pdf 385 | @echo "\\documentclass[a4paper,landscape]{article}" > $@ 386 | @echo "\\usepackage{pdfpages}" >> $@ 387 | @echo "\\begin{document}" >> $@ 388 | @echo "\\includepdf[nup=2x2,pages=-]{"$*"-Slides.pdf}" >> $@ 389 | @echo "\\end{document}" >> $@ 390 | 391 | ## Beamer style article - if you experience slight clash with todonotes 392 | ## or wish to add/remove styles modify here 393 | %-Notes.tex: %-src.tex 394 | @echo "% to use packages uncomment appropriate line by removing %" > $@ 395 | @echo "%\\PassOptionsToPackage{override,tikz}{xcolor}" > $@ 396 | @echo "%\\PassOptionsToPackage{override,xcolor}{tikz}" > $@ 397 | @echo "%\\PassOptionsToPackage{override,xcolor}{todonotes}" > $@ 398 | @echo "%\\PassOptionsToPackage{override,xcolor}{beamer}" > $@ 399 | @echo "\\documentclass[a4paper]{article}" > $@ 400 | @echo "\\usepackage{beamerarticle}" >> $@ 401 | @echo "\\input{"$*-src"}" >> $@ 402 | @echo "\\end{document}" >> $@ 403 | 404 | ## Housekeeping rules --------------------------------------------------- 405 | 406 | 407 | ## housekeeping which needs improving - especially backup (.tgz or 408 | ## .zip file?) but this won't work without extra directories etc 409 | ## needs some checking and thought 410 | 411 | .PHONY: clean 412 | clean: 413 | -rm -f *.pdf *.Rout *.log *.aux *.bbl *~ 414 | 415 | .PHONY: backup 416 | backup: 417 | -zip -9 backup/backup-`date +%F`.zip *.R Makefile */*/*.csv *.pdf *.Rnw *.Rmd *.Rout 418 | 419 | -------------------------------------------------------------------------------- /inst/templates/Makefile_base.txt: -------------------------------------------------------------------------------- 1 | MAKE=make 2 | READ=@@READ.DIR@@ 3 | WORK=@@WORK.DIR@@ 4 | REPORTS=@@REPORT.DIR@@ 5 | outputs=$(READ) $(WORK) $(REPORTS) 6 | 7 | .PHONY: all $(outputs) 8 | all: $(outputs) 9 | 10 | $(outputs): 11 | $(MAKE) --directory $@ 12 | -------------------------------------------------------------------------------- /inst/templates/template_analyseR.txt: -------------------------------------------------------------------------------- 1 | ------- START: DELETE THIS SECTION ------------------------------- 2 | File: template_analyseR.txt 3 | This file is used by 'createFromTemplate.R' to create a analyse____.R 4 | syntax file 5 | It can be used as a basis for a modified template and should work as 6 | long as the fields below are found in the syntax after the end of this section. 7 | To use without too much reprogramming all fields should use the names below 8 | Fields to be changed: 9 | @@PROJECT.STEP@@ name of process to perform - read, clean, analyse, analyse 10 | @@DATAFRAME.INPUT@@ (eg data1) 11 | @@DATAFRAME.SAVED@@ (eg data1_cl) (set unless reading raw data or codebook) 12 | @@SYNTAX.FILE@@ name of R syntax file to read/clean/plot/analyse data 13 | (eg clean-data1_csv.R NB: set outside createSyntax() as 14 | @@PROJECT.STEP@@-dataFileName.R) 15 | @@DATE.CREATED@@ date R syntax file created (eg Mon Apr 13 10:09:15 2015) 16 | @@AUTHOR@@ Author - either provided or obtained from git settings 17 | @@DIR.DATA.INPUT@@ name of directory containing input data file 18 | (eg ../data/derived or ../data/original) 19 | @@DIR.DATA.SAVED@@ name of directory containing saved data file 20 | (eg ../data/derived) 21 | @@RDATA.INPUT@@ file name of .RData file to be read in @@DIR.RDATA.INPUT@@ 22 | (eg data1_csv.RData) 23 | @@RDATA.SAVED@@ file name of .RData file to be stored in @@DIR.RDATA.SAVED@@ 24 | (eg data1_csv_clean.RData) 25 | @@LIBRARIES@@ eg. 26 | require(dryworkflow) # Some of these libraries load others too 27 | require(foreign) 28 | require(Hmisc) 29 | @@INPUT.COMMANDS@@ (eg load or read) 30 | load("../data/derived/dta1_csv.RData") -- OR -- 31 | data1 <- 32 | read.csv("../data/original/data1.csv") 33 | @@MYLIB.DIR@@ (eg ../lib) 34 | @@MYLIB.FILES@@ vector of source commands for filenames 35 | source(file.path(@@MYLIB.DIR@@, 'pretend.R') 36 | source(file.path(@@MYLIB.DIR@@, 'tttt.R') 37 | @@LICENCE@@ (eg licence GPL3 see ) 38 | ------- END: DELETE THIS SECTION --------------------------------- 39 | ### File: @@SYNTAX.FILE@@ 40 | ### Purpose: @@PROJECT.STEP@@ data from '@@DATAFILE@@' stored in 41 | ### in file @@RDATA.INPUT@@ in directory '@@DIR.DATA.INPUT@@' 42 | ### and store in @@RDATA.SAVED@@ in directory '@@DIR.DATA.SAVED@@' 43 | ### Created: @@DATE.CREATED@@ initially by package 'dryworkflow' 44 | ### Author: @@AUTHOR@@ 45 | ### Licence: @@LICENCE@@ 46 | 47 | ### Changelog: -- insert comments and times re changes here -- 48 | 49 | ## Specific libraries to be used. NB: can also be loaded in .Rprofile 50 | @@LIBRARIES@@ 51 | 52 | ## Source any R functions in own library directory 53 | ## Add any extra function files here or comment or delete to not load 54 | ## NB: You can source (load) all files in a directory 'myLib' with 55 | ## lapply(Sys.glob(file.path("myLib", "*.R")), source) 56 | @@MYLIB.FILES@@ 57 | 58 | ### Read/Load data in file @@DIR.DATA.INPUT@@/@@RDATA.INPUT@@ --------- 59 | @@INPUT.COMMANDS@@ 60 | 61 | ## Data structure(s) 62 | ## ls.str() # uncomment this line to see structure of all objects in workspace 63 | str(@@DATAFRAME.INPUT@@) 64 | 65 | ## Insert sensible analysis commands here ------------------------------- 66 | 67 | ## --------------------------------------------------------------------- 68 | ## Examples for a simple linear regression - DEMO - START HERE 69 | ## --------------------------------------------------------------------- 70 | 71 | ## if data set very large then this could take a long time 72 | ## test with a sample of size N.SAMPLE 73 | 74 | N.SAMPLE <- 100 75 | 76 | if (dim(@@DATAFRAME.INPUT@@)[1] > N.SAMPLE){ 77 | df1 <- @@DATAFRAME.INPUT@@[sample(1:dim(@@DATAFRAME.INPUT@@)[1], N.SAMPLE),] 78 | } else { 79 | df1 <- @@DATAFRAME.INPUT@@ 80 | } 81 | 82 | ## determine continuous and categorial variables 83 | 84 | num.vars <- names(df1)[sapply(df1, is.numeric)] 85 | (n.num <- length(num.vars)) # no. of numeric variables 86 | 87 | fac.vars <- names(df1)[sapply(df1, function(x) is.factor(x) && nlevels(x)<=5)] 88 | (n.fac <- length(fac.vars)) # no. of factors with 5 or less levels 89 | 90 | ## A simple linear regression analysis demo: 91 | 92 | if (n.num > 0) { # simple linear regression on either continuous or categorical 93 | 94 | library(car) 95 | library(effects) 96 | 97 | y <- num.vars[1] 98 | 99 | if (n.num > 0){ 100 | x <- num.vars[2] 101 | } else { 102 | if (n.fac > 0) 103 | x <- fac.vars[1] 104 | } 105 | cat("Simple lin. regression for '@@DATAFRAME.INPUT@@':\n") 106 | cat("NB: Possibly a sample of size:", N.SAMPLE, "\n") 107 | form1 <- formula(paste(y,'~', x)) 108 | print(summary(mod1 <- lm(form1, data=df1))) 109 | car::qqPlot(mod1) 110 | plot(effects1 <- effects::allEffects(mod1)) 111 | } else { # no continuous variables 112 | cat("No continuous variables\n") 113 | } 114 | 115 | ## DEMO - END HERE ----------------------------------------------- 116 | 117 | ## save analysis results if sensible and/or possible 118 | 119 | @@DATAFRAME.SAVED@@ <- list(data = @@DATAFRAME.INPUT@@, 120 | mod1 = ifelse(exists("mod1"), mod1, NA), 121 | effects1 = ifelse(exists("effects1"), effects1, NA)) 122 | 123 | oldComment <- comment(@@DATAFRAME.INPUT@@) 124 | (newComment 125 | <- paste("Simple analysis for '@@DATAFRAME.INPUT@@' saved at", 126 | date())) 127 | comment(@@DATAFRAME.SAVED@@) <- c(newComment, oldComment) 128 | 129 | ### Store simple analysis for subsequent analysis/reports ---------------- 130 | 131 | save(@@DATAFRAME.SAVED@@, 132 | file = file.path("@@DIR.DATA.SAVED@@", "@@RDATA.SAVED@@")) 133 | -------------------------------------------------------------------------------- /inst/templates/template_beamerRmd.txt: -------------------------------------------------------------------------------- 1 | ------- START: DELETE THIS SECTION ------------------------------- 2 | File: template_reportRmd.txt 3 | This file is used by 'createFromTemplate.R' to create a R markdown file 4 | report___.Rmd 5 | It can be used as a basis for a modified template and should work as 6 | long as the fields below are found in the syntax after the end of this section. 7 | To use without too much reprogramming all fields should use the names below 8 | 9 | It is assumed that we wish to import the clean version of data, 10 | summary and analyse dataframes as well for reporting 11 | 12 | Note that a similar process is available for these types of report/presentation 13 | reportRmd, reportRnw, presentRmd or beamerRnw 14 | 15 | Fields to be changed: 16 | @@SYNTAX.FILE@@ name of Rmd or Rnw markdown file for report 17 | (eg data1_csv.Rmd) 18 | @@DATE.CREATED@@ date R syntax file created (eg Mon Apr 13 10:09:15 2015) 19 | @@AUTHOR@@ Author - either provided or obtained from git settings 20 | @@TITLE.PROJECT@@ Title string for project 21 | @@DATAFILE@@ original data file name 22 | @@DIR.DATA.INPUT@@ name of directory containing input data file 23 | (eg ../data/derived) 24 | @@RDATA.CLEAN.SAVED@@ RData file containing cleaned data 25 | @@RDATA.SUMMARY.SAVED@@ RData file containing summary results 26 | @@RDATA.ANALYSIS.SAVED@@ RData file containing analysis results 27 | @@DATAFRAME.CLEAN@@ Dataframe containing cleaned data 28 | @@DATAFRAME.SUMMARY@@ Dataframe containing summary results 29 | @@DATAFRAME.ANALYSIS@@ Dataframe containing analysis results 30 | @@LIBRARIES@@ eg. 31 | require(dryworkflow) # Some of these libraries load others too 32 | require(foreign) 33 | require(Hmisc) 34 | @@MYLIB.DIR@@ (eg ../lib) NB: not used but set outside calling template 35 | @@MYLIB.FILES@@ vector of source commands for filenames 36 | source(file.path(@@MYLIB.DIR@@, 'pretend.R') 37 | source(file.path(@@MYLIB.DIR@@, 'tttt.R') 38 | ------- END: DELETE THIS SECTION --------------------------------- 39 | --- 40 | title: "@@TITLE.PROJECT@@" 41 | author: "@@AUTHOR@@" 42 | date: "@@DATE.CREATED@@" 43 | output: beamer_presentation 44 | --- 45 | 46 | ## Introduction 47 | 48 | This is a template for the data file *@@DATAFILE@@*. 49 | 50 | ## Slide with Bullets 51 | 52 | - Bullet 1 53 | - Bullet 2 54 | - Bullet 3 55 | 56 | ```{r, echo=FALSE, messages=FALSE, warnings=FALSE, errors=TRUE} 57 | SUBSET <- TRUE # subset - change this 58 | if (SUBSET){ 59 | N.VARS <- 5 # just summarise first 5 variables 60 | N.CASES <- 100 # just summarise first 100 rows 61 | } 62 | library(knitr) 63 | library(Rmarkdown) 64 | library(pander) 65 | library(car) 66 | @@LIBRARIES@@ 67 | 68 | ## Source any R functions in own library directory 69 | ## Add any extra function files here or comment or delete to not load 70 | ## NB: You can source (load) all files in a directory 'myLib' with 71 | ## lapply(Sys.glob(file.path("myLib", "*.R")), source) 72 | @@MYLIB.FILES@@ 73 | 74 | load(file.path("@@DIR.DATA.INPUT@@", "@@RDATA.CLEAN.SAVED@@")) 75 | load(file.path("@@DIR.DATA.INPUT@@", "@@RDATA.SUMMARY.SAVED@@")) 76 | load(file.path("@@DIR.DATA.INPUT@@", "@@RDATA.ANALYSIS.SAVED@@")) 77 | if (SUBSET){ 78 | @@DATAFRAME.CLEAN@@ <- @@DATAFRAME.CLEAN@@[1:N.CASES, 1:N.VARS] 79 | } else { 80 | N.CASES <- dim(@@DATAFRAME.CLEAN@@)[1] 81 | N.VARS <- dim(@@DATAFRAME.CLEAN@@)[2] 82 | } 83 | ``` 84 | 85 | ## Summary Statistics 86 | 87 | ```{r, echo=FALSE, fig.cap = "Pairwise plots of variables"} 88 | if (exists(ggpairs1)) ggpairs1 89 | ``` 90 | 91 | 92 | ## Summary Tables 93 | Data were cleaned and summary statistics presented here are for 94 | `r ifelse(SUBSET, "a subset of", "")` 95 | `r N.CASES` cases from `r N.VARS` variables. 96 | 97 | ```{r, results='asis'} 98 | knitr::kable(Rcmdr::numSummary(@@DATAFRAME.CLEAN@@)) 99 | ``` 100 | 101 | ## Statistical Analysis 102 | 103 | Ideally, you analyse data using *R* and simply output and discuss the 104 | results here. 105 | 106 | ```{r, echo=FAlSE, results='asis'} 107 | if (exists(mod1) { 108 | knitr::kable(summary(mod1)) 109 | } 110 | ``` 111 | -------------------------------------------------------------------------------- /inst/templates/template_beamerRnw.txt: -------------------------------------------------------------------------------- 1 | ------- START: DELETE THIS SECTION ------------------------------- 2 | File: template_reportRmd.txt 3 | This file is used by 'createFromTemplate.R' to create an Rnw beamer file 4 | beamer___.Rnw 5 | It can be used as a basis for a modified template and should work as 6 | long as the fields below are found in the syntax after the end of this section. 7 | To use without too much reprogramming all fields should use the names below 8 | 9 | It is assumed that we wish to import the clean version of data, 10 | summary and analyse dataframes as well for reporting 11 | 12 | Note that a similar process is available for these types of report/presentation 13 | reportRmd, reportRnw, presentRmd or beamerRnw 14 | 15 | Fields to be changed: 16 | @@SYNTAX.FILE@@ name of Rmd or Rnw markdown file for report 17 | (eg data1_csv.Rmd) 18 | @@DATE.CREATED@@ date R syntax file created (eg Mon Apr 13 10:09:15 2015) 19 | @@AUTHOR@@ Author - either provided or obtained from git settings 20 | @@TITLE.PROJECT@@ Title string for project 21 | @@DATAFILE@@ original data file name 22 | @@DIR.DATA.INPUT@@ name of directory containing input data file 23 | (eg ../data/derived) 24 | @@RDATA.CLEAN.SAVED@@ RData file containing cleaned data 25 | @@RDATA.SUMMARY.SAVED@@ RData file containing summary results 26 | @@RDATA.ANALYSIS.SAVED@@ RData file containing analysis results 27 | @@DATAFRAME.CLEAN@@ Dataframe containing cleaned data 28 | @@DATAFRAME.SUMMARY@@ Dataframe containing summary results 29 | @@DATAFRAME.ANALYSIS@@ Dataframe containing analysis results 30 | @@LIBRARIES@@ eg. 31 | require(dryworkflow) # Some of these libraries load others too 32 | require(foreign) 33 | require(Hmisc) 34 | @@MYLIB.DIR@@ (eg ../lib) NB: not used but set outside calling template 35 | @@MYLIB.FILES@@ vector of source commands for filenames 36 | source(file.path(@@MYLIB.DIR@@, 'pretend.R') 37 | source(file.path(@@MYLIB.DIR@@, 'tttt.R') 38 | ------- END: DELETE THIS SECTION --------------------------------- 39 | %% Filename: @@SYNTAX.FILE@@ 40 | %% Author: @@AUTHOR@@ 41 | %% Created at: @@DATE.CREATED@@ by package 'dryworkflow' 42 | %% Change log: Insert dates and changes here 43 | %% 44 | %% 45 | 46 | \documentclass[ignorenonframetext]{beamer} 47 | %% \documentclass[ignorenonframetext]{beamer} 48 | %% Choose themes etc at 49 | %% http://deic.uab.es/~iblanes/beamer_gallery/individual/Frankfurt-default-default.html 50 | % \usetheme{Warsaw} 51 | %% \usetheme{Frankfurt} 52 | \usetheme{Boadilla} 53 | % \usetheme{CambridgeUS} 54 | % \usecolortheme{dolphin} 55 | % \usecolortheme{seahorse} 56 | \usecolortheme{whale} 57 | \usepackage{hyperref} 58 | \usepackage{booktabs} 59 | %%\usepackage{natbib} 60 | \usepackage[os=win]{menukeys} 61 | \renewmenumacro{\directory}[/]{pathswithfolder} 62 | \usepackage[normalem]{ulem} 63 | %% \usepackage{todonotes} % messes up xcolor (override option) 64 | \usepackage{listings} 65 | \usepackage{danger} 66 | \definecolor{Sinput}{rgb}{0,0,0.56} 67 | \definecolor{Scode}{rgb}{0,0,0.56} 68 | \definecolor{Soutput}{rgb}{0.56,0,0} 69 | 70 | %% http://tex.stackexchange.com/questions/131373/typesetting-these-three-keyboard-characters 71 | %% to get backtick/grave character 72 | \usepackage[T1]{fontenc} 73 | \usepackage[utf8]{inputenc} 74 | \usepackage{textcomp} 75 | \usepackage{upquote} 76 | 77 | \begin{document} 78 | 79 | \AtBeginSection[] 80 | 81 | \begin{frame} 82 | \title{@@TITLE.PROJECT@@} 83 | \date{@@DATE.CREATED@@} 84 | \author{@@AUTHOR@@} 85 | \titlepage 86 | \end{frame} 87 | 88 | \maketitle 89 | 90 | \begin{frame} 91 | \frametitle{Outline} 92 | \tableofcontents[pausesections,hideallsubsections] 93 | \end{frame} 94 | 95 | <>= 96 | rm(list=ls()) 97 | opts_chunk$set(fig.path='figures/',include=TRUE,comment=NA,prompt=TRUE,warning=FALSE,message=FALSE,fig.height=4) #$ 98 | options(width=60) 99 | @ 100 | 101 | This is a template for the data file *@@DATAFILE@@*. 102 | 103 | <>= 104 | SUBSET <- FALSE # to subset - change this 105 | if (SUBSET){ 106 | N.VARS <- 5 # just summarise first 5 variables 107 | N.CASES <- 100 # just summarise first 100 rows 108 | } 109 | library(Hmisc) 110 | library(xtable) 111 | library(knitr) 112 | library(rmarkdown) 113 | library(pander) 114 | library(car) 115 | @@LIBRARIES@@ 116 | 117 | ## Source any R functions in own library directory 118 | ## Add any extra function files here or comment or delete to not load 119 | ## NB: You can source (load) all files in a directory 'myLib' with 120 | ## lapply(Sys.glob(file.path("myLib", "*.R")), source) 121 | @@MYLIB.FILES@@ 122 | 123 | load(file.path("@@DIR.DATA.INPUT@@", "@@RDATA.CLEAN.SAVED@@")) 124 | load(file.path("@@DIR.DATA.INPUT@@", "@@RDATA.SUMMARY.SAVED@@")) 125 | load(file.path("@@DIR.DATA.INPUT@@", "@@RDATA.ANALYSIS.SAVED@@")) 126 | if (SUBSET){ 127 | @@DATAFRAME.CLEAN@@ <- @@DATAFRAME.CLEAN@@[1:N.CASES, 1:N.VARS] 128 | } else { 129 | N.CASES <- dim(@@DATAFRAME.CLEAN@@)[1] 130 | N.VARS <- dim(@@DATAFRAME.CLEAN@@)[2] 131 | } 132 | @ 133 | 134 | begin{frame}[fragile] 135 | \frametitle{@@DATAFILE@@} 136 | \begin{itemize} 137 | \item This is a template for the data file \textbf{@@DATAFILE@@}\pause 138 | \item Data were cleaned and summary statistics presented here 139 | are fo \Sexpr{ifelse(SUBSET, "a subset of", "")} \Sexpr{N.CASES} 140 | cases from \Sexpr{N.VARS} variables. 141 | \end{itemize} 142 | \end{frame} 143 | 144 | begin{frame}[fragile] 145 | \frametitle{@@DATAFILE@@: Summary Table} 146 | 147 | <>= 148 | numericVars <- sapply(@@DATAFRAME.CLEAN@@, is.numeric) 149 | pander(RcmdrMisc::numSummary(@@DATAFRAME.CLEAN@@[,numericVars])) 150 | @ 151 | 152 | \end{frame} 153 | 154 | \begin{frame}[fragile] 155 | \frametitle{@@DATAFILE@@: Summary Plots} 156 | 157 | <> 158 | if (exists("ggpairs1")) ggpairs1 159 | @ 160 | 161 | \end{frame} 162 | 163 | \begin{frame}[fragile] 164 | \frametitle{Statistical Analysis} 165 | 166 | <>= 167 | if (exists("mod1")) knitr::kable(summary(mod1)) 168 | @ 169 | 170 | \end{frame} 171 | 172 | \begin{frame}[fragile] 173 | \frametitle{Effects plots} 174 | 175 | <>= 176 | if (exists("mod1")) { 177 | library(effects) 178 | plot(allEffects(mod1)) 179 | } 180 | @ 181 | \end{frame} 182 | 183 | \end{document} 184 | -------------------------------------------------------------------------------- /inst/templates/template_cleanR.txt: -------------------------------------------------------------------------------- 1 | ------- START: DELETE THIS SECTION ------------------------------- 2 | File: template_cleanR.txt 3 | This file is used by 'createFromTemplate.R' to create a clean____.R syntax file 4 | It can be used as a basis for a modified template and should work as 5 | long as the fields below are found in the syntax after the end of this section. 6 | To use without too much reprogramming all fields should use the names below 7 | Fields to be changed: 8 | @@PROJECT.STEP@@ name of process to perform - read, clean, plot, analyse 9 | @@DATAFRAME.INPUT@@ (eg data1) 10 | @@DATAFRAME.SAVED@@ (eg data1_cl) (set unless reading raw data or codebook) 11 | @@SYNTAX.FILE@@ name of R syntax file to read/clean/plot/analyse data 12 | (eg clean-data1_csv.R NB: set outside createSyntax() as 13 | @@PROJECT.STEP@@-dataFileName.R) 14 | @@DATE.CREATED@@ date R syntax file created (eg Mon Apr 13 10:09:15 2015) 15 | @@AUTHOR@@ Author - either provided or obtained from git settings 16 | @@DIR.DATA.INPUT@@ name of directory containing input data file 17 | (eg ../data/derived or ../data/original) 18 | @@DIR.DATA.SAVED@@ name of directory containing saved data file 19 | (eg ../data/derived) 20 | @@RDATA.INPUT@@ file name of .RData file to be read in @@DIR.RDATA.INPUT@@ 21 | (eg data1_csv.RData) 22 | @@RDATA.SAVED@@ file name of .RData file to be stored in @@DIR.RDATA.SAVED@@ 23 | (eg data1_csv_clean.RData) 24 | @@LIBRARIES@@ eg. 25 | require(dryworkflow) # Some of these libraries load others too 26 | require(foreign) 27 | require(Hmisc) 28 | @@INPUT.COMMANDS@@ (eg load or read) 29 | load("../data/derived/dta1_csv.RData") -- OR -- 30 | data1 <- 31 | read.csv("../data/original/data1.csv") 32 | @@MYLIB.DIR@@ (eg ../lib) 33 | @@MYLIB.FILES@@ vector of source commands for filenames 34 | source(file.path(@@MYLIB.DIR@@, 'pretend.R') 35 | source(file.path(@@MYLIB.DIR@@, 'tttt.R') 36 | @@LICENCE@@ (eg licence GPL3 see ) 37 | ------- END: DELETE THIS SECTION --------------------------------- 38 | ### File: @@SYNTAX.FILE@@ 39 | ### Purpose: @@PROJECT.STEP@@ data from '@@DATAFILE@@' stored in 40 | ### in file @@RDATA.INPUT@@ in directory '@@DIR.DATA.INPUT@@' 41 | ### and store in @@RDATA.SAVED@@ in directory '@@DIR.DATA.SAVED@@' 42 | ### Created: @@DATE.CREATED@@ initially by package 'dryworkflow' 43 | ### Author: @@AUTHOR@@ 44 | ### Licence: @@LICENCE@@ 45 | 46 | ### Changelog: -- insert comments and times re changes here -- 47 | 48 | ## Specific libraries to be used. NB: can also be loaded in .Rprofile 49 | @@LIBRARIES@@ 50 | 51 | ## Source any R functions in own library directory 52 | ## Add any extra function files here or comment or delete to not load 53 | ## NB: You can source (load) all files in a directory 'myLib' with 54 | ## lapply(Sys.glob(file.path("myLib", "*.R")), source) 55 | @@MYLIB.FILES@@ 56 | 57 | ### Read/Load data in file @@DIR.DATA.INPUT@@/@@RDATA.INPUT@@ --------- 58 | @@INPUT.COMMANDS@@ 59 | 60 | ## Data structure(s) 61 | ## ls.str() # uncomment this line to see structure of all objects in workspace 62 | str(@@DATAFRAME.INPUT@@) 63 | 64 | 65 | @@DATAFRAME.SAVED@@ <- @@DATAFRAME.INPUT@@ 66 | 67 | ### Clean data ------------------------------------------------- 68 | 69 | ### Insert any transformations here, tidy up variable names, perform 70 | ### checks, modify values (and ideally date and document here and 71 | ### elsewhere) 72 | 73 | summary(@@DATAFRAME.SAVED@@) 74 | 75 | names(@@DATAFRAME.SAVED@@) 76 | 77 | ### Store data for subsequent analysis -------------------------- 78 | 79 | oldComment <- comment(@@DATAFRAME.SAVED@@) 80 | newComment <- paste("Data '@@DATAFRAME.INPUT@@' from '@@RDATA.INPUT@@' cleaned at", date()) 81 | comment(@@DATAFRAME.SAVED@@) <- c(newComment, oldComment) 82 | 83 | save(@@DATAFRAME.SAVED@@, 84 | file = file.path("@@DIR.DATA.SAVED@@", "@@RDATA.SAVED@@")) 85 | -------------------------------------------------------------------------------- /inst/templates/template_codebookR.txt: -------------------------------------------------------------------------------- 1 | ------- START: DELETE THIS SECTION ------------------------------- 2 | File: template_codebookR.txt 3 | This file is used by 'createFromTemplate.R' to create a read.R syntax file 4 | It can be used as a basis for a modified template and should work as 5 | long as the fields below are found in the syntax after the end of this section. 6 | To use without to much reprogramming all fields should use the names below 7 | Fields to be changed: 8 | @@PROJECT.STEP@@ name of process to perform - read, clean, analyse, analyse 9 | @@SYNTAX.FILE@@ name of R syntax file to read codebook 10 | (eg read-codebook-data1_csv.R) 11 | @@DATE.CREATED@@ date R syntax file created (eg Mon Apr 13 10:09:15 2015) 12 | @@AUTHOR@@ Author - either provided or obtained from git settings 13 | @@DATAFILE@@ name of data file to be read in (eg data1.csv) 14 | @@CODEBOOK@@ name of codebook file to be read in (eg data1_codebook.csv) 15 | @@DIR.DATA.INPUT@@ name of directory containing codebook (eg ../data/codebook) 16 | @@DATA.RDATA.FILE@@ full file name of .RData file to be stored 17 | (eg ../data/derived/data1_csv.RData) 18 | @@RDATA.SAVED@@ full file name of .RData file to be stored 19 | (eg ../data/derived/data1_codebook.RData) 20 | @@LIBRARIES@@ eg. 21 | require(dryworkflow) # Some of these libraries load others too 22 | require(foreign) 23 | require(Hmisc) 24 | @@INPUT.COMMANDS@@ 25 | ###: Read in code book and apply to data file. eg 26 | CODEBOOK <- readCodeBook('../data/codebook/data1_codebook.csv') 27 | applyCodeBook(data1, CODEBOOK) 28 | @@DATA.DATAFRAME@@ (eg data1) 29 | @@DATAFRAME.SAVED@@ (eg data1_codebook) 30 | @@MYLIB.FILES@@ vector of source commands for filenames 31 | source(file.path('../lib', 'prestend.R') 32 | source(file.path('../lib', 'tttt.R') 33 | @@MYLIB.DIR@@ (eg ./lib) - NOT USED CURRENTLY 34 | @@LICENCE@@ (eg licence GPL3 see ) 35 | ------- END: DELETE THIS SECTION --------------------------------- 36 | ### File: @@SYNTAX.FILE@@ 37 | ### Purpose: read @@PROJECT.STEP@@ '@@DATAFILE@@' 38 | ### in directory '@@DIR.DATA.INPUT@@' 39 | ### and store in '@@RDATA.SAVED@@' in 40 | ### Created: @@DATE.CREATED@@ by 'dryworkflow::createProjectSkeleton' 41 | ### Author: @@AUTHOR@@ 42 | ### Licence: @@LICENCE@@ 43 | 44 | ### Changelog: -- insert comments and times re changes here -- 45 | 46 | ## Specific libraries to be used. NB: can also be loaded in .Rprofile 47 | @@LIBRARIES@@ 48 | 49 | ## Source any R functions in own library directory 50 | ## Add any extra function files here or comment or delete to not load 51 | ## NB: You can source (load) all files in a directory 'myLib' with 52 | ## lapply(Sys.glob(file.path("myLib", "*.R")), source) 53 | @@MYLIB.FILES@@ 54 | 55 | ### Read in codebook file '@@DATAFILE@@' in '@@DIR.DATA.INPUT@@' 56 | @@INPUT.COMMANDS@@ 57 | 58 | ## Data structure 59 | str(@@DATAFRAME.SAVED@@) 60 | 61 | ### You can insert any transformations here or tidy up variable names 62 | ### and so on but ideally these should be carried out when cleaning 63 | ### the data so that an unchanged original version is stored initially 64 | ### but minor tweaks could be inserted here 65 | 66 | ### Store data for subsequent analysis 67 | comment(@@DATAFRAME.SAVED@@) <- 68 | paste("Read codebook '@@DATAFILE@@' in '@@DIR.DATA.INPUT@@' at", 69 | date()) 70 | 71 | save(@@DATAFRAME.SAVED@@, 72 | file = file.path("@@DIR.DATA.SAVED@@", "@@RDATA.SAVED@@")) 73 | -------------------------------------------------------------------------------- /inst/templates/template_mergeAllR.txt: -------------------------------------------------------------------------------- 1 | ------- START: DELETE THIS SECTION ------------------------------- 2 | File: template_mergeAllR.txt 3 | This file is used by 'createFromTemplate.R' to create an .R syntax file 4 | It can be used as a basis for a modified template and should work as 5 | long as the fields below are found in the syntax after the end of this section. 6 | To use without to much reprogramming all fields should use the names below 7 | Fields to be changed: 8 | @@PROJECT.STEP@@ name of process to perform - mergeAll, etc read, clean, ... 9 | @@SYNTAX.FILE@@ name of R syntax file to read data (eg mergeAll.R) 10 | @@DATE.CREATED@@ date R syntax file created (eg Mon Apr 13 10:09:15 2015) 11 | @@AUTHOR@@ Author - either provided or obtained from git settings 12 | @@DATAFRAME.INPUT@@ character strings of 'cleaned' dataframe names 13 | (assigned ??) 14 | @@RDATA.INPUT@@ character strings of 'cleaned' datafile names 15 | @@DATAFRAME.SAVED@@ merged data frame name 16 | @@DATAFILE@@ names of data files originally read in 17 | (eg c("data1.csv", "data2.dta", ..) 18 | @@DIR.DATA.INPUT@@ name of directory containing data file (eg ../data/derived) 19 | @@RDATA.SAVED@@ file name of .RData file to be stored (eg mergedAll.RData) 20 | @@DIR.DATA.SAVED@@ Directory to save codebook RData file (eg ../data/derived) 21 | @@LIBRARIES@@ eg. 22 | require(dryworkflow) # Some of these libraries load others too 23 | require(foreign) 24 | require(Hmisc) 25 | @@DATAFRAME.SAVED@@ (eg mergedData) 26 | @@MYLIB.FILES@@ vector of source commands for filenames 27 | source(file.path('../lib', 'pretend.R') 28 | source(file.path('../lib', 'tttt.R') 29 | @@MYLIB.DIR@@ (eg ./lib) - NOT USED CURRENTLY 30 | @@LICENCE@@ (eg licence GPL3 see ) 31 | ------- END: DELETE THIS SECTION --------------------------------- 32 | ### Warning: This file is highly unlikely to work without modification 33 | ### 34 | ### File: @@SYNTAX.FILE@@ 35 | ### Purpose: @@PROJECT.STEP@@: cleaned data files in directory 36 | ## '@@DIR.DATA.INPUT@@' 37 | ### and store in '@@RDATA.SAVED@@' in directory '@@DIR.DATA.SAVED@@' 38 | ### Created: @@DATE.CREATED@@ by 'dryworkflow::createProjectSkeleton' 39 | ### Author: @@AUTHOR@@ 40 | ### Licence: @@LICENCE@@ 41 | 42 | ### Changelog: -- insert comments and times re changes here -- 43 | 44 | ## Specific libraries to be used. NB: can also be loaded in .Rprofile 45 | @@LIBRARIES@@ 46 | 47 | ## Source any R functions in own library directory 48 | ## Add any extra function files here or comment or delete to not load 49 | ## NB: You can source (load) all files in a directory 'myLib' with 50 | ## lapply(Sys.glob(file.path("myLib", "*.R")), source) 51 | @@MYLIB.FILES@@ 52 | 53 | DATAFILES <- @@DATAFILE@@ 54 | DATAFRAMES <- @@DATAFRAME.INPUT@@ 55 | RDATAFILES <- @@RDATA.INPUT@@ 56 | 57 | if (length(DATAFRAMES) != length(RDATAFILES)){ 58 | cat("Error: each data frame needs file\n\n") 59 | } else { 60 | names(DATAFRAMES) <- names(RDATAFILES) <- DATAFILES 61 | } 62 | 63 | ### Read in R data files in '@@DIR.DATA.INPUT@@' 64 | ### Files: @@RDATA.INPUT@@ 65 | @@INPUT.COMMANDS@@ 66 | 67 | ### Data frames to be combined: 68 | DATAFRAMES 69 | 70 | ### Description of data frames 71 | lapply(DATAFRAMES, 72 | function(x) cat(x, ":\n", comment(get(x)), sep = "")) 73 | 74 | ### NB: ------------------------------------------------------------------ 75 | 76 | ### Warning: You may wish to run some of this file interactively after 77 | ### data files are read in. Column names that are in common 78 | ### and that are present in one file only are printed along 79 | ### with structure dimensions etc to help identify what's 80 | ### different. 81 | ### It may be prudent to reread and/or rename the variable names 82 | ### before attempting to merge because incorrect specification 83 | ### may make merge times excessive. In particular, you may 84 | ### need to specify appropriate ID column(s) for merging. 85 | 86 | ### Also: instead of using merge below it may be easier to bind rows 87 | ### using something like 88 | ## ## Merging data sets by row binding variables in common 89 | ## in.common <- intersect(names(get(DATAFRAMES[1])), names(get(DATAFRAMES[2]))) 90 | ## @@DATAFRAME.SAVED@@ <- rbind(get(DATAFRAMES[1])[,in.common], 91 | ## get(DATAFRAMES[2][,in.common])) 92 | ## for (J in 2:length(DATAFRAMES)){ 93 | ## in.common <- intersect( in.common, names(get(DATAFRAMES[J]))) 94 | ## ## Row binding data frames 95 | ## @@DATAFRAME.SAVED@@ <- rbind(@@DATAFRAME.SAVED@@[,in.common], 96 | ## get(DATAFRAMES[J])[,in.common]) 97 | ## } 98 | 99 | ## Merging data sets using 'merge' 100 | cat('Merge', DATAFRAMES[1], ',', DATAFRAMES[2], '\n') 101 | 102 | #### Overall comparison: ------------------------------------------- 103 | compare::compare(get(DATAFRAMES[1]), get(DATAFRAMES[2]), allowAll=TRUE) 104 | 105 | ### Variable Names in common -------------------------------------- 106 | intersect(names(get(DATAFRAMES[1])), names(get(DATAFRAMES[2]))) 107 | cat('Column names in', DATAFRAMES[1], 'not in', DATAFRAMES[2]\n 108 | setdiff(names(get(DATAFRAMES[1])), names(get(DATAFRAMES[2]))) 109 | cat('Column names in', DATAFRAMES[2], 'not in', DATAFRAMES[1]\n 110 | setdiff(names(get(DATAFRAMES[2])), names(get(DATAFRAMES[1]))) 111 | ## @@DATAFRAME.SAVED@@ <- merge(get(DATAFRAMES[1]), get(DATAFRAMES[2]), 112 | ## by = 'ID', all=TRUE) 113 | @@DATAFRAME.SAVED@@ <- merge(get(DATAFRAMES[1]), get(DATAFRAMES[2]), all=TRUE) 114 | 115 | for (J in 2:length(DATAFRAMES)){ 116 | cat('Merge @@DATAFRAME.SAVED@@ with', DATAFRAMES[J], '\n') 117 | cat('#### Overall comparison:\n') 118 | print(compare::compare(@@DATAFRAME.SAVED@@, get(DATAFRAMES[J]), 119 | allowAll=TRUE)) 120 | cat('## +++++++ Variable Names in common\n') 121 | print(intersect(names(@@DATAFRAME.SAVED@@), names(get(DATAFRAMES[J])))) 122 | cat('Column names in @@DATAFRAME.SAVED@@ not in', DATAFRAMES[J], '\n') 123 | print(setdiff(names(@@DATAFRAME.SAVED@@), names(get(DATAFRAMES[J])))) 124 | cat('Column names in', DATAFRAMES[J], 'not in @@DATAFRAME.SAVED@@\n') 125 | print(setdiff(names(get(DATAFRAMES[J])), names(@@DATAFRAME.SAVED@@))) 126 | ## @@DATAFRAME.SAVED@@ <- merge(@@DATAFRAME.SAVED@@, get(DATAFRAMES[J]), 127 | ## by = 'ID', all=TRUE) 128 | @@DATAFRAME.SAVED@@ <- merge(@@DATAFRAME.SAVED@@, get(DATAFRAMES[J]), 129 | all=TRUE) 130 | } 131 | 132 | ### Store data for subsequent analysis --------------------------- 133 | comment(@@DATAFRAME.SAVED@@) <- 134 | paste("Data merged from specified data files at", date()) 135 | 136 | str(@@DATAFRAME.SAVED@@) 137 | 138 | save(@@DATAFRAME.SAVED@@, DATAFRAMES, 139 | file = file.path("@@DIR.DATA.SAVED@@", "@@RDATA.SAVED@@")) 140 | -------------------------------------------------------------------------------- /inst/templates/template_presentRmd.txt: -------------------------------------------------------------------------------- 1 | ------- START: DELETE THIS SECTION ------------------------------- 2 | File: template_reportRmd.txt 3 | This file is used by 'createFromTemplate.R' to create a R markdown file 4 | report___.Rmd 5 | It can be used as a basis for a modified template and should work as 6 | long as the fields below are found in the syntax after the end of this section. 7 | To use without too much reprogramming all fields should use the names below 8 | 9 | It is assumed that we wish to import the clean version of data, 10 | summary and analyse dataframes as well for reporting 11 | 12 | Note that a similar process is available for these types of report/presentation 13 | reportRmd, reportRnw, presentRmd or beamerRnw 14 | 15 | Fields to be changed: 16 | @@SYNTAX.FILE@@ name of Rmd or Rnw markdown file for report 17 | (eg data1_csv.Rmd) 18 | @@DATE.CREATED@@ date R syntax file created (eg Mon Apr 13 10:09:15 2015) 19 | @@AUTHOR@@ Author - either provided or obtained from git settings 20 | @@TITLE.PROJECT@@ Title string for project 21 | @@DATAFILE@@ original data file name 22 | @@DIR.DATA.INPUT@@ name of directory containing input data file 23 | (eg ../data/derived) 24 | @@RDATA.CLEAN.SAVED@@ RData file containing cleaned data 25 | @@RDATA.SUMMARY.SAVED@@ RData file containing summary results 26 | @@RDATA.ANALYSIS.SAVED@@ RData file containing analysis results 27 | @@DATAFRAME.CLEAN@@ Dataframe containing cleaned data 28 | @@DATAFRAME.SUMMARY@@ Dataframe containing summary results 29 | @@DATAFRAME.ANALYSIS@@ Dataframe containing analysis results 30 | @@LIBRARIES@@ eg. 31 | require(dryworkflow) # Some of these libraries load others too 32 | require(foreign) 33 | require(Hmisc) 34 | @@MYLIB.DIR@@ (eg ../lib) NB: not used but set outside calling template 35 | @@MYLIB.FILES@@ vector of source commands for filenames 36 | source(file.path(@@MYLIB.DIR@@, 'pretend.R') 37 | source(file.path(@@MYLIB.DIR@@, 'tttt.R') 38 | ------- END: DELETE THIS SECTION --------------------------------- 39 | --- 40 | title: "@@TITLE.PROJECT@@" 41 | author: "@@AUTHOR@@" 42 | date: "@@DATE.CREATED@@" 43 | output: ioslides_presentation 44 | --- 45 | 46 | ## Introduction 47 | 48 | This is a template for the data file *@@DATAFILE@@*. 49 | 50 | ## Slide with Bullets 51 | 52 | - Bullet 1 53 | - Bullet 2 54 | - Bullet 3 55 | 56 | ```{r, echo=FALSE, messages=FALSE, warnings=FALSE, errors=TRUE} 57 | SUBSET <- FALSE # to subset - change this 58 | if (SUBSET){ 59 | N.VARS <- 5 # just summarise first 5 variables 60 | N.CASES <- 100 # just summarise first 100 rows 61 | } 62 | library(knitr) 63 | library(rmarkdown) 64 | library(pander) 65 | library(car) 66 | @@LIBRARIES@@ 67 | 68 | ## Source any R functions in own library directory 69 | ## Add any extra function files here or comment or delete to not load 70 | ## NB: You can source (load) all files in a directory 'myLib' with 71 | ## lapply(Sys.glob(file.path("myLib", "*.R")), source) 72 | @@MYLIB.FILES@@ 73 | 74 | load(file.path("@@DIR.DATA.INPUT@@", "@@RDATA.CLEAN.SAVED@@")) 75 | load(file.path("@@DIR.DATA.INPUT@@", "@@RDATA.SUMMARY.SAVED@@")) 76 | load(file.path("@@DIR.DATA.INPUT@@", "@@RDATA.ANALYSIS.SAVED@@")) 77 | if (SUBSET){ 78 | @@DATAFRAME.CLEAN@@ <- @@DATAFRAME.CLEAN@@[1:N.CASES, 1:N.VARS] 79 | } else { 80 | N.CASES <- dim(@@DATAFRAME.CLEAN@@)[1] 81 | N.VARS <- dim(@@DATAFRAME.CLEAN@@)[2] 82 | } 83 | ``` 84 | 85 | ## Summary Statistics 86 | 87 | ```{r, echo=FALSE, fig.cap = "Pairwise plots of variables"} 88 | if (exists("ggpairs1")) ggpairs1 89 | ``` 90 | 91 | 92 | ## Summary Tables 93 | Data were cleaned and summary statistics presented here are for 94 | `r ifelse(SUBSET, "a subset of", "")` 95 | `r N.CASES` cases from `r N.VARS` variables. 96 | 97 | ```{r, results='asis'} 98 | numericVars <- sapply(@@DATAFRAME.CLEAN@@, is.numeric) 99 | pander(RcmdrMisc::numSummary(@@DATAFRAME.CLEAN@@[,numericVars])) 100 | ``` 101 | 102 | ## Statistical Analysis 103 | 104 | Ideally, you analyse data using *R* and simply output and discuss the 105 | results here. 106 | 107 | ```{r, echo=FALSE, results='asis'} 108 | if (exists("mod1")) { 109 | knitr::kable(summary(mod1)) 110 | } 111 | ``` 112 | -------------------------------------------------------------------------------- /inst/templates/template_readR.txt: -------------------------------------------------------------------------------- 1 | ------- START: DELETE THIS SECTION ------------------------------- 2 | File: template_readR.txt 3 | This file is used by 'createFromTemplate.R' to create a read.R syntax file 4 | It can be used as a basis for a modified template and should work as 5 | long as the fields below are found in the syntax after the end of this section. 6 | To use without to much reprogramming all fields should use the names below 7 | Fields to be changed: 8 | @@PROJECT.STEP@@ name of process to perform - read, clean, analyse, analyse 9 | @@SYNTAX.FILE@@ name of R syntax file to read data (eg read-data1_csv.R) 10 | @@DATE.CREATED@@ date R syntax file created (eg Mon Apr 13 10:09:15 2015) 11 | @@AUTHOR@@ Author - either provided or obtained from git settings 12 | @@DATAFILE@@ name of data file to be read in (eg data1.csv) 13 | @@DIR.DATA.INPUT@@ name of directory containing data file (eg ../data/original) 14 | @@RDATA.SAVED@@ file name of .RData file to be stored (eg data1_codebook.RData) 15 | @@DIR.DATA.SAVED@@ Directory to save codebook RData file (eg ../data/derived) 16 | @@LIBRARIES@@ eg. 17 | require(dryworkflow) # Some of these libraries load others too 18 | require(foreign) 19 | require(Hmisc) 20 | @@READ.DATA.COMMAND@@ (eg read.csv) 21 | @@READ.CODEBOOK@@ eg read__codebook but not used in this template 22 | ###: Read in code book and apply to data file 23 | CODEBOOK <- readCodeBook('../data/codebook/data1_codebook.csv') 24 | applyCodeBook(data1, CODEBOOK) 25 | @@DATAFRAME@@ (eg data1) 26 | @@MYLIB.FILES@@ vector of source commands for filenames 27 | source(file.path('../lib', 'pretend.R') 28 | source(file.path('../lib', 'tttt.R') 29 | @@MYLIB.DIR@@ (eg ./lib) - NOT USED CURRENTLY 30 | @@LICENCE@@ (eg licence GPL3 see ) 31 | ------- END: DELETE THIS SECTION --------------------------------- 32 | ### File: @@SYNTAX.FILE@@ 33 | ### Purpose: @@PROJECT.STEP@@ from data file '@@DATAFILE@@' in directory '@@DIR.DATA.INPUT@@' 34 | ### and store in '@@RDATA.SAVED@@' in directory '@@DIR.DATA.SAVED@@' 35 | ### Created: @@DATE.CREATED@@ by 'dryworkflow::createProjectSkeleton' 36 | ### Author: @@AUTHOR@@ 37 | ### Licence: @@LICENCE@@ 38 | 39 | ### Changelog: -- insert comments and times re changes here -- 40 | 41 | ## Specific libraries to be used. NB: can also be loaded in .Rprofile 42 | @@LIBRARIES@@ 43 | 44 | ## Source any R functions in own library directory 45 | ## Add any extra function files here or comment or delete to not load 46 | ## NB: You can source (load) all files in a directory 'myLib' with 47 | ## lapply(Sys.glob(file.path("myLib", "*.R")), source) 48 | @@MYLIB.FILES@@ 49 | 50 | ### Read in data file '@@DATAFILE@@' in '@@DIR.DATA.INPUT@@' 51 | @@INPUT.COMMANDS@@ 52 | 53 | ## Data structure 54 | str(@@DATAFRAME.SAVED@@) 55 | 56 | ### You can insert any transformations here or tidy up variable names 57 | ### and so on but ideally these should be carried out when cleaning 58 | ### the data so that an unchanged original version is stored initially 59 | 60 | ### Store data for subsequent analysis 61 | comment(@@DATAFRAME.SAVED@@) <- 62 | paste("Data read from '@@DATAFILE@@' at", date()) 63 | 64 | save(@@DATAFRAME.SAVED@@, 65 | file = file.path("@@DIR.DATA.SAVED@@", "@@RDATA.SAVED@@")) 66 | -------------------------------------------------------------------------------- /inst/templates/template_reportRmd.txt: -------------------------------------------------------------------------------- 1 | ------- START: DELETE THIS SECTION ------------------------------- 2 | File: template_reportRmd.txt 3 | This file is used by 'createFromTemplate.R' to create a R markdown file 4 | report___.Rmd 5 | It can be used as a basis for a modified template and should work as 6 | long as the fields below are found in the syntax after the end of this section. 7 | To use without too much reprogramming all fields should use the names below 8 | 9 | It is assumed that we wish to import the clean version of data, 10 | summary and analyse dataframes as well for reporting 11 | 12 | Note that a similar process is available for these types of report/presentation 13 | reportRmd, reportRnw, presentRmd or beamerRnw 14 | 15 | Fields to be changed: 16 | @@SYNTAX.FILE@@ name of Rmd or Rnw markdown file for report 17 | (eg data1_csv.Rmd) 18 | @@DATE.CREATED@@ date R syntax file created (eg Mon Apr 13 10:09:15 2015) 19 | @@AUTHOR@@ Author - either provided or obtained from git settings 20 | @@TITLE.PROJECT@@ Title string for project 21 | @@DATAFILE@@ original data file name 22 | @@DIR.DATA.INPUT@@ name of directory containing input data file 23 | (eg ../data/derived) 24 | @@RDATA.CLEAN.SAVED@@ RData file containing cleaned data 25 | @@RDATA.SUMMARY.SAVED@@ RData file containing summary results 26 | @@RDATA.ANALYSIS.SAVED@@ RData file containing analysis results 27 | @@DATAFRAME.CLEAN@@ Dataframe containing cleaned data 28 | @@DATAFRAME.SUMMARY@@ Dataframe containing summary results 29 | @@DATAFRAME.ANALYSIS@@ Dataframe containing analysis results 30 | @@LIBRARIES@@ eg. 31 | require(dryworkflow) # Some of these libraries load others too 32 | require(foreign) 33 | require(Hmisc) 34 | @@MYLIB.DIR@@ (eg ../lib) NB: not used but set outside calling template 35 | @@MYLIB.FILES@@ vector of source commands for filenames 36 | source(file.path(@@MYLIB.DIR@@, 'pretend.R') 37 | source(file.path(@@MYLIB.DIR@@, 'tttt.R') 38 | ------- END: DELETE THIS SECTION --------------------------------- 39 | --- 40 | title: "@@TITLE.PROJECT@@" 41 | author: "@@AUTHOR@@ @@ADDRESS@@" 42 | date: "@@DATE.CREATED@@" 43 | output: 44 | html_document: 45 | toc: true 46 | theme: united 47 | pdf_document: 48 | toc: true 49 | highlight: zenburn 50 | --- 51 | 52 | This is a template for the data file *@@DATAFILE@@*. 53 | 54 | **Before you create a report, check settings in next chunk, modify 55 | accordingly and then remove this sentence.** 56 | 57 | ```{r, echo=FALSE, messages=FALSE, warnings=FALSE, errors=TRUE} 58 | SUBSET <- FALSE # to subset - change this 59 | if (SUBSET){ 60 | N.VARS <- 5 # just summarise first 5 variables 61 | N.CASES <- 100 # just summarise first 100 rows 62 | } 63 | library(knitr) 64 | library(rmarkdown) 65 | library(pander) 66 | library(car) 67 | @@LIBRARIES@@ 68 | 69 | ## Source any R functions in own library directory 70 | ## Add any extra function files here or comment or delete to not load 71 | ## NB: You can source (load) all files in a directory 'myLib' with 72 | ## lapply(Sys.glob(file.path("myLib", "*.R")), source) 73 | @@MYLIB.FILES@@ 74 | 75 | load(file.path("@@DIR.DATA.INPUT@@", "@@RDATA.CLEAN.SAVED@@")) 76 | load(file.path("@@DIR.DATA.INPUT@@", "@@RDATA.SUMMARY.SAVED@@")) 77 | load(file.path("@@DIR.DATA.INPUT@@", "@@RDATA.ANALYSIS.SAVED@@")) 78 | if (SUBSET){ 79 | @@DATAFRAME.CLEAN@@ <- @@DATAFRAME.CLEAN@@[1:N.CASES, 1:N.VARS] 80 | } else { 81 | N.CASES <- dim(@@DATAFRAME.CLEAN@@)[1] 82 | N.VARS <- dim(@@DATAFRAME.CLEAN@@)[2] 83 | } 84 | ``` 85 | 86 | # Summary Statistics 87 | 88 | Data were cleaned and summary statistics presented here are for 89 | `r ifelse(SUBSET, "a subset of", "")` 90 | `r N.CASES` cases from `r N.VARS` variables. 91 | 92 | ## Summary Tables 93 | 94 | Sometimes *numSummary* can produce problematic output for *knitr* but 95 | really should be OK given it is used inside R Commander. 96 | 97 | ```{r, results='asis'} 98 | numericVars <- sapply(@@DATAFRAME.CLEAN@@, is.numeric) 99 | pander(RcmdrMisc::numSummary(@@DATAFRAME.CLEAN@@[,numericVars])) 100 | ``` 101 | 102 | This may be useful too: 103 | 104 | ```{r, results='asis'} 105 | knitr::kable(psych::describe(@@DATAFRAME.CLEAN@@)) 106 | ``` 107 | 108 | But I'm guessing this simply doesn't work. Table produced previously 109 | and so doesn't need redoing/recalculating but just outputting. 110 | 111 | ```{r, results='asis'} 112 | if (exists("table1")) knitr::kable(table1) 113 | ``` 114 | 115 | ## Summary Plots 116 | 117 | And perhaps some of these plots are present too from the summary step 118 | conducted previously. 119 | 120 | ```{r, echo=FALSE} 121 | if (exists("ggpairs1")) ggpairs1 122 | ``` 123 | 124 | ```{r, echo=FALSE} 125 | if (exists("mosaic1")) mosaic1 126 | ``` 127 | 128 | # Statistical Analysis 129 | 130 | Ideally, you analyse data using *R* and simply output and discuss the 131 | results here. 132 | 133 | ```{r, echo=FALSE, results='asis'} 134 | if (exists("mod1")) { 135 | knitr::kable(summary(mod1)) 136 | knitr::kable(anova(mod1)) 137 | knitr::kable(Anova(mod1)) 138 | } 139 | ``` 140 | 141 | And perhaps some residual plots 142 | 143 | ```{r, echo=FALSE} 144 | if (exists("mod1")) plot(mod1) 145 | ``` 146 | 147 | ```{r, echo=FALSE} 148 | if (exists("mod1")) car::residualPlots(mod1) 149 | ``` 150 | 151 | If present than could plot effects. 152 | 153 | ```{r, echo=FALSE} 154 | if (exists("mod1")) { 155 | library(effects) 156 | plot(allEffects(mod1)) 157 | } 158 | ``` 159 | 160 | For more information about *R* markdown, please see 161 | . 162 | 163 | Note that an 'echo = FALSE' parameter was added to the code chunk to 164 | prevent printing of the R code that generated the plot. You can find 165 | out more about options at . 166 | -------------------------------------------------------------------------------- /inst/templates/template_reportRnw.txt: -------------------------------------------------------------------------------- 1 | ------- START: DELETE THIS SECTION ------------------------------- 2 | File: template_reportRmd.txt 3 | This file is used by 'createFromTemplate.R' to create a R markdown file 4 | report___.Rmd 5 | It can be used as a basis for a modified template and should work as 6 | long as the fields below are found in the syntax after the end of this section. 7 | To use without too much reprogramming all fields should use the names below 8 | 9 | It is assumed that we wish to import the clean version of data, 10 | summary and analyse dataframes as well for reporting 11 | 12 | Note that a similar process is available for these types of report/presentation 13 | reportRmd, reportRnw, presentRmd or beamerRnw 14 | 15 | Fields to be changed: 16 | @@SYNTAX.FILE@@ name of Rmd or Rnw markdown file for report 17 | (eg data1_csv.Rmd) 18 | @@DATE.CREATED@@ date R syntax file created (eg Mon Apr 13 10:09:15 2015) 19 | @@AUTHOR@@ Author - either provided or obtained from git settings 20 | @@TITLE.PROJECT@@ Title string for project 21 | @@DATAFILE@@ original data file name 22 | @@DIR.DATA.INPUT@@ name of directory containing input data file 23 | (eg ../data/derived) 24 | @@RDATA.CLEAN.SAVED@@ RData file containing cleaned data 25 | @@RDATA.SUMMARY.SAVED@@ RData file containing summary results 26 | @@RDATA.ANALYSIS.SAVED@@ RData file containing analysis results 27 | @@DATAFRAME.CLEAN@@ Dataframe containing cleaned data 28 | @@DATAFRAME.SUMMARY@@ Dataframe containing summary results 29 | @@DATAFRAME.ANALYSIS@@ Dataframe containing analysis results 30 | @@LIBRARIES@@ eg. 31 | require(dryworkflow) # Some of these libraries load others too 32 | require(foreign) 33 | require(Hmisc) 34 | @@MYLIB.DIR@@ (eg ../lib) NB: not used but set outside calling template 35 | @@MYLIB.FILES@@ vector of source commands for filenames 36 | source(file.path(@@MYLIB.DIR@@, 'pretend.R') 37 | source(file.path(@@MYLIB.DIR@@, 'tttt.R') 38 | ------- END: DELETE THIS SECTION --------------------------------- 39 | %% Filename: @@SYNTAX.FILE@@ 40 | %% Author: @@AUTHOR@@ 41 | %% Created at: @@DATE.CREATED@@ by package 'dryworkflow' 42 | %% Change log: Insert dates and changes here 43 | %% 44 | %% 45 | 46 | %% Some features here from knitr examples on github - see 47 | %% https://raw.githubusercontent.com/yihui/knitr/master/inst/examples/knitr-minimal.Rnw 48 | 49 | \documentclass[a4paper]{article} 50 | \usepackage[sc]{mathpazo} 51 | \usepackage[T1]{fontenc} 52 | \usepackage{geometry} 53 | \geometry{verbose,tmargin=2.5cm,bmargin=2.5cm,lmargin=2.5cm,rmargin=2.5cm} 54 | \setcounter{secnumdepth}{2} 55 | \setcounter{tocdepth}{2} 56 | \usepackage{url} 57 | \usepackage[unicode=true,pdfusetitle, 58 | bookmarks=true,bookmarksnumbered=true,bookmarksopen=true,bookmarksopenlevel=2, 59 | breaklinks=false,pdfborder={0 0 1},backref=false,colorlinks=false] 60 | {hyperref} 61 | \hypersetup{ 62 | pdfstartview={XYZ null null 1}} 63 | 64 | \begin{document} 65 | 66 | %% Before you create a report, check settings in next chunk, modify 67 | %% accordingly and also change packages and options above. 68 | 69 | <>= 70 | library(knitr) 71 | # set global chunk options 72 | opts_chunk$set(fig.path='figure', fig.align='center', fig.show='hold') 73 | options(formatR.arrow=TRUE,width=90) 74 | @ 75 | 76 | \title{@@TITLE.PROJECT@@} 77 | \author{@@AUTHOR@@} 78 | \address{@@ADDRESS@@} % separate lines with \\ 79 | \maketitle 80 | 81 | This is a template for the data file *@@DATAFILE@@*. 82 | 83 | <>= 84 | SUBSET <- TRUE # subset - change this 85 | if (SUBSET){ 86 | N.VARS <- 5 # just summarise first 5 variables 87 | N.CASES <- 100 # just summarise first 100 rows 88 | } 89 | library(Hmisc) 90 | library(xtable) 91 | library(knitr) 92 | library(Rmarkdown) 93 | library(pander) 94 | library(car) 95 | @@LIBRARIES@@ 96 | 97 | ## Source any R functions in own library directory 98 | ## Add any extra function files here or comment or delete to not load 99 | ## NB: You can source (load) all files in a directory 'myLib' with 100 | ## lapply(Sys.glob(file.path("myLib", "*.R")), source) 101 | @@MYLIB.FILES@@ 102 | 103 | load(file.path("@@DIR.DATA.INPUT@@", "@@RDATA.CLEAN.SAVED@@")) 104 | load(file.path("@@DIR.DATA.INPUT@@", "@@RDATA.SUMMARY.SAVED@@")) 105 | load(file.path("@@DIR.DATA.INPUT@@", "@@RDATA.ANALYSIS.SAVED@@")) 106 | if (SUBSET){ 107 | @@DATAFRAME.CLEAN@@ <- @@DATAFRAME.CLEAN@@[1:N.CASES, 1:N.VARS] 108 | } else { 109 | N.CASES <- dim(@@DATAFRAME.CLEAN@@)[1] 110 | N.VARS <- dim(@@DATAFRAME.CLEAN@@)[2] 111 | } 112 | @ 113 | 114 | \section{Background} 115 | 116 | Insert description of project and aims. 117 | 118 | The structure of this report is as follows. Summary statistics are 119 | shown in Section~\ref{sec:summary} with tables and plots in Sections 120 | \ref{sec:summary-tables} and \ref{sec:summary-plots}, respectively. 121 | 122 | \section{Summary Statistics} 123 | \label{sec:summary} 124 | 125 | \bfseries{Note to self:} Guessing this may not work but should 126 | substitute something that does. 127 | 128 | Data were cleaned and summary statistics presented here are for 129 | \Sexpr{ifelse(SUBSET, "a subset of", "")} \Sexpr{N.CASES} cases from 130 | \Sexpr{N.VARS} variables. 131 | 132 | \subsection{Summary Tables} 133 | \label{sec:summary-tables} 134 | 135 | Sometimes \texttt{numSummary} can produce problematic output for 136 | \texttt{knitr} but really should be OK given it is used inside R 137 | Commander. 138 | 139 | <>= 140 | knitr::kable(Rcmdr::numSummary(@@DATAFRAME.CLEAN@@)) 141 | @ 142 | 143 | This may be useful too: 144 | 145 | <>= 146 | knitr::kable(psych::describe(@@DATAFRAME.CLEAN@@)) 147 | @ 148 | 149 | But I'm guessing this simply doesn't work. Table produced previously 150 | and so doesn't need redoing/recalculating but just outputting. 151 | 152 | <>= 153 | xt <- xtable(table1, label = 'tab:tableX', caption = "Summary statistics") 154 | print(xt) 155 | @ 156 | 157 | <>= 158 | latex(table1, label = 'tab:tableX2', caption = "Summary statistics 2") 159 | @ 160 | 161 | We see summary results in Tables \ref{tab:tableX} and \ref{tab:tableX2}. 162 | 163 | \subsection{Summary Plots} 164 | \label{sec:summary-plots} 165 | 166 | And perhaps some of these plots are present too from the summary step 167 | conducted previously. 168 | 169 | <> 170 | if (exists(ggpairs1)) ggpairs1 171 | @ 172 | 173 | For a perhaps interesting plot, see Figures~\ref{fig:plot1} and 174 | \ref{fig.plot}. 175 | 176 | \textbf{Is this going to crash things if libraries not installed?} 177 | 178 | <> 179 | if (exists(mosaic1)) mosaic1 180 | @ 181 | 182 | \section{Statistical Analysis} 183 | 184 | Ideally, you analyse data using \texttt{R} and simply output and 185 | discuss the results here. 186 | 187 | <>= 188 | if (exists(mod1) { 189 | knitr::kable(summary(mod1)) 190 | knitr::kable(anova(mod1)) 191 | knitr::kable(Anova(mod1)) 192 | @ 193 | 194 | And perhaps some residual plots 195 | 196 | <>= 197 | if (exists(mod1)) plot(mod1) 198 | @ 199 | 200 | <>= 201 | if (exists(mod1)) car::residualPlots(mod1) 202 | @ 203 | 204 | If the \texttt{effects} package is installed then could plot effects. 205 | 206 | <>= 207 | if (exists(mod1)) { 208 | library(effects) 209 | plot(allEffects(mod1)) 210 | } 211 | @ 212 | 213 | \end{document} 214 | -------------------------------------------------------------------------------- /inst/templates/template_summaryR.txt: -------------------------------------------------------------------------------- 1 | ------- START: DELETE THIS SECTION ------------------------------- 2 | File: template_summaryR.txt 3 | This file is used by 'createFromTemplate.R' to create a summary____.R 4 | syntax file 5 | It can be used as a basis for a modified template and should work as 6 | long as the fields below are found in the syntax after the end of this section. 7 | To use without too much reprogramming all fields should use the names below 8 | Fields to be changed: 9 | @@PROJECT.STEP@@ name of process to perform - read, clean, summary, analyse 10 | @@DATAFRAME.INPUT@@ (eg data1) 11 | @@DATAFRAME.SAVED@@ (eg data1_cl) (set unless reading raw data or codebook) 12 | @@SYNTAX.FILE@@ name of R syntax file to read/clean/plot/analyse data 13 | (eg clean-data1_csv.R NB: set outside createSyntax() as 14 | @@PROJECT.STEP@@-dataFileName.R) 15 | @@DATE.CREATED@@ date R syntax file created (eg Mon Apr 13 10:09:15 2015) 16 | @@AUTHOR@@ Author - either provided or obtained from git settings 17 | @@DIR.DATA.INPUT@@ name of directory containing input data file 18 | (eg ../data/derived or ../data/original) 19 | @@DIR.DATA.SAVED@@ name of directory containing saved data file 20 | (eg ../data/derived) 21 | @@RDATA.INPUT@@ file name of .RData file to be read in @@DIR.RDATA.INPUT@@ 22 | (eg data1_csv.RData) 23 | @@RDATA.SAVED@@ file name of .RData file to be stored in @@DIR.RDATA.SAVED@@ 24 | (eg data1_csv_clean.RData) 25 | @@LIBRARIES@@ eg. 26 | require(dryworkflow) # Some of these libraries load others too 27 | require(foreign) 28 | require(Hmisc) 29 | @@INPUT.COMMANDS@@ (eg load or read) 30 | load("../data/derived/dta1_csv.RData") -- OR -- 31 | data1 <- 32 | read.csv("../data/original/data1.csv") 33 | @@MYLIB.DIR@@ (eg ../lib) 34 | @@MYLIB.FILES@@ vector of source commands for filenames 35 | source(file.path(@@MYLIB.DIR@@, 'pretend.R') 36 | source(file.path(@@MYLIB.DIR@@, 'tttt.R') 37 | @@LICENCE@@ (eg licence GPL3 see ) 38 | ------- END: DELETE THIS SECTION --------------------------------- 39 | ### File: @@SYNTAX.FILE@@ 40 | ### Purpose: @@PROJECT.STEP@@ data from '@@DATAFILE@@' stored in 41 | ### in file @@RDATA.INPUT@@ in directory '@@DIR.DATA.INPUT@@' 42 | ### and store in @@RDATA.SAVED@@ in directory '@@DIR.DATA.SAVED@@' 43 | ### Created: @@DATE.CREATED@@ initially by package 'dryworkflow' 44 | ### Author: @@AUTHOR@@ 45 | ### Licence: @@LICENCE@@ 46 | 47 | ### Changelog: -- insert comments and times re changes here -- 48 | 49 | ## Specific libraries to be used. NB: can also be loaded in .Rprofile 50 | @@LIBRARIES@@ 51 | 52 | ## Source any R functions in own library directory 53 | ## Add any extra function files here or comment or delete to not load 54 | ## NB: You can source (load) all files in a directory 'myLib' with 55 | ## lapply(Sys.glob(file.path("myLib", "*.R")), source) 56 | @@MYLIB.FILES@@ 57 | 58 | ### Read/Load data in file @@DIR.DATA.INPUT@@/@@RDATA.INPUT@@ --------- 59 | @@INPUT.COMMANDS@@ 60 | 61 | ## Data structure(s) 62 | ## ls.str() # uncomment this line to see structure of all objects in workspace 63 | str(@@DATAFRAME.INPUT@@) 64 | 65 | ## Insert sensible summary commands here ------------------------------- 66 | 67 | ## --------------------------------------------------------------------- 68 | ## Examples for possible summaries - DEMO - START HERE 69 | ## --------------------------------------------------------------------- 70 | 71 | ## if data set very large then this could take a long time 72 | ## test with a sample of size N.SAMPLE 73 | 74 | N.SAMPLE <- 100 75 | N.COLS <- 10 76 | 77 | if (dim(@@DATAFRAME.INPUT@@)[1] > N.SAMPLE){ 78 | df1 <- @@DATAFRAME.INPUT@@[sample(1:dim(@@DATAFRAME.INPUT@@)[1], N.SAMPLE),] 79 | } else { 80 | df1 <- @@DATAFRAME.INPUT@@ 81 | } 82 | 83 | ## Summary statistics: 84 | 85 | cat("Summary statistics for (possibly a sample of) '@@DATAFRAME.INPUT@@':\n") 86 | summary(df1) 87 | 88 | ### Example: simple plot of data 89 | 90 | ## boxplot of first continuous variable for first factor with 5 or 91 | ## less groups as long as at least one of each or scatterplot 92 | ## matrix if all numeric or moasic plot if 93 | 94 | num.vars <- names(df1)[sapply(df1, is.numeric)] 95 | (n.num <- length(num.vars)) # no. of numeric variables 96 | 97 | fac.vars <- names(df1)[sapply(df1, function(x) is.factor(x) && nlevels(x)<=5)] 98 | (n.fac <- length(fac.vars)) # no. of factors with 5 or less levels 99 | 100 | if (require(RcmdrMisc)) { 101 | RcmdrMisc::numSummary(df1[,num.vars]) 102 | } 103 | if (n.num > 0 & n.fac > 0) { # boxplot if at least 1 numeric and 1 factor 104 | x <- num.vars[1] 105 | y <- fac.vars[1] # just in case ID stored as numeric 106 | xlab <- attr(df1[,x], 'label') 107 | ylab <- attr(df1[,y], 'label') 108 | if (length(xlab) == 0) xlab <- x 109 | if (length(ylab) == 0) ylab <- y 110 | form1 <- formula(paste(y,'~', x)) 111 | print(bwplot(form1, data=df1, xlab=xlab, main=ylab)) 112 | form2 <- formula(paste(x,'~', y)) 113 | print(summary(lm(form2, data=df1))) 114 | 115 | ## Example of Hmisc table 116 | if (require(Hmisc)){ 117 | if (n.num > 6) { 118 | x <- num.vars[1:6] 119 | form3 <- formula(paste(y,'~', paste(x, collapse="+"))) 120 | } else { 121 | form3 <- formula(paste(y,'~', paste(num.vars, collapse="+"))) 122 | } 123 | table1 <- Hmisc::summary.formula(form3, data = df1, method="reverse") 124 | cat("Example summary table using 'Hmisc'") 125 | print(table1) 126 | } else { 127 | table1 <- "'table1' not produced" 128 | } 129 | } 130 | 131 | if (n.num>1) { # if at least 2 numeric - matrix plot max of 4 132 | print(splom(df1[, num.vars[1:min(c(4,n.num))]])) 133 | } 134 | 135 | if (n.fac>1) { # if at least 2 factors - plot first 2 if vcd installed 136 | if (require(vcd)) { 137 | require(vcd) 138 | form1 <- paste("~", paste(fac.vars[1:2], collapse="+")) 139 | mosaic1 <- mosaic(xtabs(form1, data = df1)) 140 | } 141 | } 142 | 143 | ## Example of ggplot2 pairs plot from GGally 144 | if (require(GGally)){ 145 | ncols <- ncol(df1) 146 | if (ncols > 6) { 147 | ggpairs1 <- GGally::ggpairs(df1[,1:6]) 148 | } else { 149 | ggpairs1 <- GGally::ggpairs(df1) 150 | } 151 | print(ggpairs1) 152 | } else { 153 | ggpairs1 <- "ggpairs plot not produced" 154 | } 155 | 156 | ## DEMO - END HERE ----------------------------------------------- 157 | 158 | ## save some summary results if sensible 159 | 160 | @@DATAFRAME.SAVED@@ <- list(data = @@DATAFRAME.INPUT@@, 161 | table1 = ifelse(exists("table1"), table1, NA), 162 | ggpairs1 = ifelse(exists("ggpairs1"), ggpairs1, NA), 163 | ggpairs1 = ifelse(exists("mosaic1"), mosaic1, NA)) 164 | 165 | oldComment <- comment(@@DATAFRAME.INPUT@@) 166 | (newComment 167 | <- paste("Summaries for '@@DATAFRAME.INPUT@@' saved at", 168 | date())) 169 | comment(@@DATAFRAME.SAVED@@) <- c(newComment, oldComment) 170 | 171 | ### Store summary data for subsequent analysis/reports ---------------- 172 | 173 | save(@@DATAFRAME.SAVED@@, 174 | file = file.path("@@DIR.DATA.SAVED@@", "@@RDATA.SAVED@@")) 175 | -------------------------------------------------------------------------------- /man/addFile.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/addFile.R 3 | \name{addFile} 4 | \alias{addFile} 5 | \title{add a new code book, data file, document or R file to project} 6 | \usage{ 7 | addFile(file.name, move = TRUE, projectDir = "myProject", replace = FALSE) 8 | } 9 | \arguments{ 10 | \item{file.name}{name of file to add to project as a string} 11 | 12 | \item{move}{whether to move instead of copy file Default: TRUE} 13 | 14 | \item{projectDir}{base directory of \code{dryworkflow} project 15 | directory. Default: \dQuote{myProject}} 16 | 17 | \item{replace}{logical, if file exists then replace. Default: FALSE} 18 | } 19 | \value{ 20 | logical TRUE/FALSE indicating success of adding file 21 | } 22 | \description{ 23 | add a code book, data or document file to a \code{dryworkflow} 24 | project. If appropriate, relevant R files will be created and 25 | Makefiles, log files and git repository updated 26 | } 27 | 28 | -------------------------------------------------------------------------------- /man/backupFiles.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/backupFiles.R 3 | \name{backupFiles} 4 | \alias{backupFiles} 5 | \title{Backup original, R files, R report and posted files to allow regenerating} 6 | \usage{ 7 | backupFiles(zipFile = NULL) 8 | } 9 | \arguments{ 10 | \item{zipFile}{string containing name for zip file. Default: 11 | \code{NULL} whereby name is derived from today's date and the 12 | project directory name} 13 | } 14 | \value{ 15 | None 16 | } 17 | \description{ 18 | Zips all relevant files to allow work to be reproduced although, 19 | for safety, it is best to make your own backup. 20 | } 21 | \details{ 22 | While \code{backupFiles} provides a way to backup all relevant 23 | files to enable work to be reproduced, it is best to have a more 24 | comprehensive personalised strategy tailored to your own 25 | circumstances. In particular, it may be more prudent to modify 26 | \code{Makefile} to backup off site by using make. See Makefile and 27 | type \code{make help rsync} at the command or shell prompt to help 28 | automate remote backup. Note that this function should be called 29 | from the main project directory not a work/reporting sub directory 30 | } 31 | \author{ 32 | Peter Baker \email{pete@petebaker.id.au} 33 | } 34 | 35 | -------------------------------------------------------------------------------- /man/copyCommonMk.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/copyCommonMk.R 3 | \name{copyCommonMk} 4 | \alias{copyCommonMk} 5 | \title{Copy dryworkflow package common.mk file to specified destination} 6 | \usage{ 7 | copyCommonMk(destination = NULL, overwriteFile = FALSE, createDir = FALSE) 8 | } 9 | \arguments{ 10 | \item{destination}{string containing directory name for copying 11 | \code{common.mk}. Default: "~/lib" for unix style set ups and 12 | $HOME/Library for windows style set ups} 13 | 14 | \item{overwriteFile}{logical indicating whether to overwrite 15 | existing \code{common.mk} file: Default: FALSE} 16 | 17 | \item{createDir}{whether to create destination directory if it 18 | doesn't exist: Default = FALSE} 19 | } 20 | \value{ 21 | None 22 | } 23 | \description{ 24 | The file \code{common.mk} contains pattern rules to process 25 | \code{R}, \code{Rmd} and \code{Rnw} files to output a range of 26 | output files including \code{Rout}, \code{pdf}, \code{html} and 27 | \code{docx} files 28 | } 29 | \details{ 30 | The \code{common.mk} file is supplied with the \code{dryworkflow} 31 | package. Ideally, this file should be placed in a specific 32 | directory used for all data analysis projects. In \code{linux} 33 | this would usually be something like \code{~/lib}. The definitions 34 | and rules can then be used for any project by including the 35 | contents with an \code{include} command placed near the end of a 36 | \code{Makefile}. Individual definitions or rules can be overridden 37 | by redefining them after the \code{include} statement. The latest 38 | version of \code{common.mk} is always available at 39 | \url{https://github.com/petebaker/r-makefile-definitions}. Once a 40 | basic Makefile is set up (usually by 41 | \code{\link{createProjectSkeleton}}) then type \code{make help} 42 | for more details. 43 | } 44 | \examples{ 45 | copyCommonMk("testMake", createDir = TRUE) 46 | } 47 | \author{ 48 | Peter Baker \email{pete@petebaker.id.au} 49 | } 50 | 51 | -------------------------------------------------------------------------------- /man/copyGitIgnore.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/copyGitIgnore.R 3 | \name{copyGitIgnore} 4 | \alias{copyGitIgnore} 5 | \title{Copy dryworkflow package file .gitignore to specified destination} 6 | \usage{ 7 | copyGitIgnore(destination = NULL, overwriteFile = FALSE, 8 | createDir = FALSE, renameDotGitignore = TRUE) 9 | } 10 | \arguments{ 11 | \item{destination}{string containing directory name for copying 12 | \code{gitignore}. Default: current directory} 13 | 14 | \item{overwriteFile}{logical indicating whether to overwrite 15 | existing \code{.gitignore} file: Default: FALSE} 16 | 17 | \item{createDir}{whether to create destination directory if it 18 | doesn't exist: Default = FALSE} 19 | 20 | \item{renameDotGitignore}{logical, whether to rename 21 | \code{DOTgitignore} to \code{.gitignore}. Default: TRUE} 22 | } 23 | \value{ 24 | None 25 | } 26 | \description{ 27 | The file \code{.gitignore} contains patterns and file names 28 | indicating which files are not to be tracked by \code{git}. This 29 | is copied to a destination directory or the current directory if 30 | not specified. 31 | } 32 | \details{ 33 | The file \code{.gitignore} is supplied with the \code{dryworkflow} 34 | package. The file is called \code{DOTgitignore} and, by default, 35 | renamed automatically to \code{.gitignore} and copied to the 36 | current directory. Various output from \code{R}, intermediate 37 | files from \code{latex} and \code{knitr} are specified as files 38 | which \code{git} does not track. Note that on operating systems 39 | like \code{linux}, files beginning with a dot (.) are hidden and so 40 | to unhide \code{.gitignore} the file name is modified to start 41 | with the letters \code{DOT}. However, to work effectively with 42 | \code{git}, the file must be named \code{.gitignore}. 43 | } 44 | \examples{ 45 | copyGitIgnore("testGit", createDir = TRUE, renameDotGitignore = FALSE) 46 | } 47 | \author{ 48 | Peter Baker \email{pete@petebaker.id.au} 49 | } 50 | 51 | -------------------------------------------------------------------------------- /man/copyTemplates.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/copyTemplates.R 3 | \name{copyTemplates} 4 | \alias{copyTemplates} 5 | \title{Copy dryworkflow package template files to specified destination} 6 | \usage{ 7 | copyTemplates(destination = "templates") 8 | } 9 | \arguments{ 10 | \item{destination}{string containing directory name for copying 11 | template files. Default: a new directory "templates" in the 12 | current directory} 13 | } 14 | \value{ 15 | None 16 | } 17 | \description{ 18 | Copies all template \code{.txt} files from package 19 | \code{dryworkflow} to a directory so that they can be modified and 20 | reused. Files can be customised and then used by 21 | \code{\link{createProjectSkeleton}} by specifying an alternative 22 | \code{template.dir}. Note that if the directory specified already 23 | exists then an error will be produced. 24 | } 25 | \details{ 26 | Files have names like \code{template_cleanR.txt}, 27 | \code{template_readR.txt}, \code{template_analyseR.txt} and so 28 | on. Their use should be obvious from the file name. Strings which 29 | may be changed are described at the top of the template file and 30 | the description will be removed from the syntax file which is 31 | produced using \code{\link{createProjectSkeleton}}. 32 | } 33 | \examples{ 34 | copyTemplates() 35 | } 36 | \author{ 37 | Peter Baker \email{pete@petebaker.id.au} 38 | } 39 | 40 | -------------------------------------------------------------------------------- /man/createFromTemplate.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/createFromTemplate.R 3 | \name{createFromTemplate} 4 | \alias{createFromTemplate} 5 | \title{Create an R, Rmd or Rnw file from a template} 6 | \usage{ 7 | createFromTemplate(file.name, subst.strings, template, template.dir, 8 | overwrite.file = FALSE, string.prefix = "@@", 9 | string.suffix = string.prefix, 10 | delete.start = "-- START: DELETE THIS SECTION --", 11 | delete.end = "-- END: DELETE THIS SECTION --", print.mismatches = FALSE) 12 | } 13 | \arguments{ 14 | \item{file.name}{full filename of file to be written provided as a 15 | string and including directory if necessary} 16 | 17 | \item{subst.strings}{named list of string substitutions} 18 | 19 | \item{template}{name of template text file as string. Default: 20 | \code{NULL} for predefined template from \code{\link{dryworkflow}} 21 | package.} 22 | 23 | \item{template.dir}{directory containing template. Default: 24 | \code{\link{dryworkflow}} package directory} 25 | 26 | \item{overwrite.file}{logical whether or not to allow overwrite of 27 | existing file. Default: \code{FALSE}} 28 | 29 | \item{string.prefix}{string of characters for start of strings to 30 | be substituted from template file. Default \sQuote{@@}} 31 | 32 | \item{string.suffix}{string of characters for end of strings to be 33 | substituted from template file. Default: same as 34 | \code{string.prefix}} 35 | 36 | \item{delete.start}{lines between and including those containing 37 | the \code{delete.start} and \code{delete.end} patterns will be 38 | removed. Default: \dQuote{---- START: DELETE THIS SECTION ----}} 39 | 40 | \item{delete.end}{Default: \dQuote{---- END: DELETE THIS SECTION ----}} 41 | 42 | \item{print.mismatches}{logical to declare wther to print warnings 43 | about unused and undefined strings. Default: \code{FALSE}} 44 | } 45 | \value{ 46 | None 47 | } 48 | \description{ 49 | Creates and writes an \code{R} syntax or report file from a 50 | template by substituting for specified strings. By default, these 51 | templates are provided as part of the \code{\link{dryworkflow}} 52 | package. In order to customise these files to a particular project 53 | or style of working, alternate templates and string formats can be 54 | provided. 55 | } 56 | \details{ 57 | By default, strings in the template file that look like 58 | @@SYNTAX.FILE@@ and @@DATA.FILE@@ are substituted with 59 | strings provided as elements in a list with named components 60 | SYNTAX.FILE, DATA.FILE and so on provided as the \code{subst.strings} 61 | argument. The string prefix and suffix can be changed but must be 62 | the same throughout the template. 63 | } 64 | \author{ 65 | Peter Baker \email{pete@petebaker.id.au} 66 | } 67 | 68 | -------------------------------------------------------------------------------- /man/createProjectSkeleton.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/createProjectSkeleton.R 3 | \name{createProjectSkeleton} 4 | \alias{createProjectSkeleton} 5 | \title{Create dryWorkflow data analysis project skeleton} 6 | \usage{ 7 | createProjectSkeleton(dir.project = NULL, name.project = NULL, 8 | title.project = NULL, type.project = NULL, style = NULL, 9 | data.src = NULL, data.ext = NULL, data.mv = NULL, doc.src = NULL, 10 | doc.ext = NULL, doc.mv = NULL, codebook.src = NULL, 11 | codebook.ext = NULL, codebook.mv = NULL, lib.src = NULL, 12 | lib.ext = NULL, lib.mv = NULL, force = NULL, dontmove = NULL, 13 | log.ext = NULL, report.types = NULL, author = NULL, address = NULL, 14 | table1 = NULL, custom.dirs = NULL, mnemonic = NULL, extra = NULL, 15 | common.mk = NULL, template.dir = NULL, templates = NULL, 16 | print.mismatches = NULL, report.which = NULL, report.markdown = NULL, 17 | licence = NULL, ...) 18 | } 19 | \arguments{ 20 | \item{dir.project}{directory name for project. Default: 21 | \dQuote{myRproject}} 22 | 23 | \item{name.project}{name of project (for output documents, comments 24 | etc). Default: \dQuote{My Data Analysis Project}} 25 | 26 | \item{title.project}{title string for project reports 27 | (for output documents, comments etc). Default: \dQuote{My Data Analysis Project}} 28 | 29 | \item{type.project}{type of project: \dQuote{normal}, 30 | \dQuote{simple} or \dQuote{custom} (custom NYI). The style of 31 | directory structure for the project. Default \dQuote{normal}} 32 | 33 | \item{style}{style for directory and filenames \dQuote{unix} or 34 | dQuote{windows}, Default: \dQuote{unix}} 35 | 36 | \item{data.src}{source directory for data files, multiple 37 | OK. Default: current directory \dQuote{.}} 38 | 39 | \item{data.ext}{possible data file extensions. Default: 40 | c(\dQuote{.xls}, \dQuote{.xlsx}, \dQuote{.csv}, \dQuote{.dta}, \dQuote{.sav}, \dQuote{.xpt}, \dQuote{.RData}, \dQuote{.rda})} 41 | 42 | \item{data.mv}{whether to move (TRUE) or copy (FALSE) data files 43 | to destination directory. Default: TRUE which will move files} 44 | 45 | \item{doc.src}{source directory for documents, multiple 46 | OK. Default: current directory \dQuote{.}} 47 | 48 | \item{doc.ext}{possible data file extensions. Default: 49 | c(\dQuote{doc}, \dQuote{docx}, \dQuote{odt}, \dQuote{tex})} 50 | 51 | \item{doc.mv}{whether to move (TRUE) or copy (FALSE) document files 52 | to destination directory. Default: TRUE which will move files} 53 | 54 | \item{codebook.src}{source directory for codebook files - multiple 55 | OK. Default: current directory \dQuote{.}} 56 | 57 | \item{codebook.ext}{possible code book file extensions. Default: 58 | c(\dQuote{_codebook.csv}, \dQuote{_codebook.xls}, \dQuote{_codebook.xlsx})} 59 | 60 | \item{codebook.mv}{whether to move (TRUE) or copy (FALSE) codebook 61 | files to destination directory. Default: TRUE which will move 62 | files} 63 | 64 | \item{lib.src}{source directory for library files - multiple 65 | OK. Default: current directory \dQuote{.}} 66 | 67 | \item{lib.ext}{possible data file extensions. Default: \dQuote{R}} 68 | 69 | \item{lib.mv}{whether to move (TRUE) or copy (FALSE) 70 | library/function files to destination directory. Default: TRUE 71 | which will move files} 72 | 73 | \item{force}{\code{logical} Whether to force creation of project 74 | directory. Default: \code{FALSE} do not overwite existing directory} 75 | 76 | \item{dontmove}{character vector of files that will not be moved 77 | or copied from source directories. Default: 78 | \dQuote{setupProject.R}} 79 | 80 | \item{log.ext}{extension of log and README file(s) which are either 81 | plain text (\dQuote{.txt}) or orgmode (\dQuote{.org}) 82 | text file(s) Default: \dQuote{.txt}} 83 | 84 | \item{report.types}{vector of output templates to be produced 85 | including \dQuote{.html}, \dQuote{.docx} and 86 | \dQuote{.pdf}. Default: \dQuote{all}} 87 | 88 | \item{author}{author name for reports. Default: 89 | \dQuote{Insert author name here}} 90 | 91 | \item{address}{address for reports. Default: \dQuote{Insert address here}} 92 | 93 | \item{table1}{logical: produce table 1 style summary statistics for 94 | reports. Default: \code{FALSE}} 95 | 96 | \item{custom.dirs}{list of extra directories to be 97 | created. Default: \dQuote{extra}} 98 | 99 | \item{mnemonic}{three or four letter mnemonic to aid remembering 100 | and for succinct naming \code{R}, \code{Rmd} and \code{Rnw} files 101 | and project directory. Default: \code{NULL} for none} 102 | 103 | \item{extra}{extra directories additional to standard 104 | setup. Default: \code{NULL} for none} 105 | 106 | \item{common.mk}{list with components \code{dir} the location of 107 | common.mk and other library files. Default: \dQuote{~/lib} for 108 | \code{unix} and \dQuote{$HOME/Library} for 109 | \code{Windows}. However, if a global directory is not found then 110 | the project specific directory will be used. The second component 111 | is \code{file} the filename containing makefile rules for \code{R}, 112 | \code{Rmd} etc files for inclusion into Makefiles. Default: 113 | \dQuote{common.mk}} 114 | 115 | \item{template.dir}{directory name containing template 116 | files. Default: templates provided with the \code{dryworkflow} 117 | package} 118 | 119 | \item{templates}{names list of template files. See the templates 120 | provided with the \code{dryworkflow} package for details} 121 | 122 | \item{print.mismatches}{print mismatches when creating syntax or 123 | markdown files from templates. Default: \code{FALSE}} 124 | 125 | \item{report.which}{which data files to produce reports 126 | for. Choices: \dQuote{first}, \dQuote{merge}, \dQuote{all}) 127 | Default: \dQuote{first}} 128 | 129 | \item{report.markdown}{vector of markdown file types to be 130 | employed to produce reports. Default: \dQuote{.org}, 131 | \dQuote{.Rmd}, \dQuote{.Rnw}. Default: \dQuote{.Rmd} and 132 | \dQuote{.Rnw}} 133 | 134 | \item{licence}{Licence for syntax files. Could be a string such as 135 | \sQuote{Copyright J Smith 2015} Default: \dQuote{GPL3 see }} 136 | 137 | \item{...}{extra parameters passed to safe directory creation} 138 | } 139 | \value{ 140 | invisible or does it??? what about monitoring?? 141 | } 142 | \description{ 143 | Create skeleton directory structure, R syntax templates, report 144 | templates, log file stubs and also move (or copy) data and doc 145 | files to appropriate directories. Also, create project 146 | configuration file, Makefiles and then initialise logs, notes and 147 | version control using \code{git}. 148 | } 149 | \details{ 150 | Note that option parameters are either set as an argument to the 151 | function or automatically via global options using 152 | \code{getOptions("dryw")}. For further information about 153 | setting these options globally via \code{setOptions}, please see 154 | \code{\link{dryworkflow-package}}. 155 | } 156 | \examples{ 157 | ## A project with all default settings 158 | 159 | ## copy .csv file and codebook from dryWorkflow package 160 | file.copy(system.file('demoFiles', 'small2.csv', package='dryworkflow'), 161 | 'small2.csv') 162 | file.copy(system.file('demoFiles', 'small2_codebook.csv', 163 | package='dryworkflow'), 'small2_codebook.csv') 164 | 165 | ## NB: In practice, always check directories, R syntax etc 166 | ## before using 'make' 167 | createProjectSkeleton(dir.proj = "testProject", 168 | name.project = "Experiment 1", 169 | dontmove = "dryworkflow-Ex.R") 170 | } 171 | \author{ 172 | Peter Baker \email{pete@petebaker.id.au} 173 | } 174 | 175 | -------------------------------------------------------------------------------- /man/createSyntaxR.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/createSyntaxR.R 3 | \name{createSyntaxR} 4 | \alias{createSyntaxR} 5 | \title{Create \code{R} syntax for reading, cleaning, summarising and analysing} 6 | \usage{ 7 | createSyntaxR(dir.project, filesAndDFs, template, project.steps = c("read", 8 | "clean", "summary", "analyse", "mergeAll", "codebook", "reportRmd", 9 | "reportRnw", "presentRmd", "beamerRmd", "beamerRnw"), 10 | makefile.depends = NULL, makefile.targets = c("Rout", "pdf"), 11 | myFunction.files = NULL, libraries = NULL, template.dir = NULL, 12 | print.mismatches = FALSE, overwrite.file = FALSE, string.prefix = "@@", 13 | string.suffix = string.prefix, 14 | delete.start = "-- START: DELETE THIS SECTION --", 15 | delete.end = "-- END: DELETE THIS SECTION --", extras = list(LICENCE = 16 | options()$dryworkflow$licence, AUTHOR = options()$dryworkflow$author, 17 | TITLE.PROJECT = options()$dryworkflow$title.project)) 18 | } 19 | \arguments{ 20 | \item{dir.project}{directory name for project} 21 | 22 | \item{filesAndDFs}{object of S3 class \code{fileAndDataName} 23 | containing relevant filenames, directories for setting up syntax 24 | and Makefiles} 25 | 26 | \item{template}{name of template text file as string. Default: 27 | \code{NULL} for predefined template from \code{\link{dryworkflow}} 28 | package.} 29 | 30 | \item{project.steps}{steps to be carried out in project, specified 31 | as a vector of strings. Options are \dQuote{read} to read data 32 | (always assumed present), \dQuote{clean} clean data, 33 | \dQuote{summary} summary statistics and basic plots, 34 | \dQuote{analyse} perform statistical analysis, \dQuote{compare} 35 | compare datasets and in particular different versions of the same 36 | data set, \dQuote{mergeAll} merge data sets of more than one; and 37 | \dQuote{reportRmd} or \dQuote{reportRnw} produce reports using 38 | \code{\link{rmarkdown}} and/or\code{\link{Sweave}} and 39 | \dQuote{presentRmd} or \dQuote{beamerRnw} produce presentations 40 | using \code{\link{rmarkdown}} and/or\code{\link{Sweave}}} 41 | 42 | \item{makefile.depends}{files to be used as dependencies in 43 | addition to the syntax file for targets. Default: worked out from 44 | project structure (\code{NULL})} 45 | 46 | \item{makefile.targets}{strings with file extensions for targets 47 | in makefiles Default: c(\dQuote{Rout}, \dQuote{pdf}) using 48 | \code{R BATCH} and \code{stitch} via \code{rmarkdown}, respectively} 49 | 50 | \item{myFunction.files}{character vector of own homegrown function 51 | file names to be sourced not currently put in to a package} 52 | 53 | \item{libraries}{character vector of library statements to be 54 | added near top of \code{R} syntax file. Default: load 55 | \code{dryworkflow} \code{Hmisc}, \code{foreign}} 56 | 57 | \item{template.dir}{directory containing template. Default: 58 | \code{\link{dryworkflow}} package directory} 59 | 60 | \item{print.mismatches}{print mismatches when creating syntax or 61 | markdown files from templates. Default: \code{FALSE}} 62 | 63 | \item{overwrite.file}{logical whether or not to allow overwrite of 64 | existing file. Default: FALSE} 65 | 66 | \item{string.prefix}{string of characters for start of strings to 67 | be substituted from template file. Default \sQuote{@@}} 68 | 69 | \item{string.suffix}{string of characters for end of strings to be 70 | substituted from template file. Default: same as 71 | \code{string.prefix}} 72 | 73 | \item{delete.start}{lines between and including those containing 74 | the \code{delete.start} and \code{delete.end} patterns will be 75 | removed. Default: \dQuote{---- START: DELETE THIS SECTION ----}} 76 | 77 | \item{delete.end}{Default: \dQuote{---- END: DELETE THIS SECTION ----}} 78 | 79 | \item{extras}{list of extra options to be passed to function for 80 | substitution in template. Default: licence, author and 81 | title.project obtained from global options} 82 | 83 | \item{AUTHOR}{string containing Author's name for \code{R} and 84 | markdown syntax files} 85 | 86 | \item{TITLE.PROJECT}{string containing title of project for log 87 | files and reports} 88 | } 89 | \value{ 90 | Lines to be included in Makefile for reading files as 91 | character vector 92 | } 93 | \description{ 94 | Function is used internally by \code{\link{createProjectSkeleton}} and 95 | \code{\link{addFile}} to produce individual \code{.R} template 96 | syntax files for cleaning, summarising and analysing data files in 97 | a data analysis project 98 | } 99 | \author{ 100 | Peter Baker \email{pete@petebaker.id.au} 101 | } 102 | 103 | -------------------------------------------------------------------------------- /man/dryworkflow.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/dryworkflow.R 3 | \docType{package} 4 | \name{dryworkflow} 5 | \alias{dryworkflow} 6 | \alias{dryworkflow-package} 7 | \title{dryworkflow: don't repeat yourself workflow for more efficient data analysis} 8 | \description{ 9 | The \code{dryworkflow} package produces a project skeleton for 10 | data analysis including \code{R} syntax files, report and 11 | Makefiles. Given data files and documents, the skeleton is 12 | generated with initial directories, template log files, template 13 | \code{R} syntax for data checking and initial analysis, makefiles 14 | and a \code{git} repository is initialised. 15 | } 16 | \section{Templates}{ 17 | \code{R} syntax templates for reading, 18 | cleaning, merging, summarising and analysing data and 19 | \code{Rmarkdown} and \code{Sweave} templates for reports. The 20 | function \code{\link{copyTemplates}} may be used to get copies 21 | of these templates which can then be modified for use when 22 | creating a project skeleton. 23 | } 24 | 25 | \section{Make and definitions}{ 26 | Makefiles are generated. The file 27 | \code{common.mk} provides pattern rules to produce 28 | \code{.Rout} and \code{.pdf} files from \code{R} syntax files 29 | and \code{.html}, \code{.pdf} and \code{.docx} files from 30 | \code{.Rmd} R markdown and \code{.Rnw} files. The function 31 | \code{\link{copyCommonMk}} may be used to get a copy the 32 | \code{common.mk} file used by the installed version of the 33 | \code{dryworkflow} package. The latest version of 34 | \code{common.mk} can always be found at 35 | \url{https://github.com/petebaker/r-makefile-definitions}. 36 | } 37 | 38 | \section{.gitignore}{ 39 | A \code{.gitignore} file is created in the 40 | base project directory to indicate files not to be tracked by 41 | \code{git}. The function \code{\link{copyGitIgnore}} may be 42 | used to get a copy the \code{.gitignore} file used by the 43 | installed version of the \code{dryworkflow} package. The 44 | latest version of \code{.gitignore} can always be found at 45 | \url{https://github.com/petebaker/r-gitignore}. 46 | } 47 | 48 | \section{Project Options}{ 49 | Note that option parameters are either 50 | set as an argument to the function 51 | \code{\link{createProjectSkeleton}} or automatically via 52 | global options using 53 | \code{getOption("dryworkflow")}. Customised options may be set 54 | in \code{.Rprofile} using global options and these will be set 55 | automatically when \code{dryworkflow} is loaded. 56 | } 57 | \examples{ 58 | ## setting global options or put these in .Rprofile 59 | 60 | \dontrun{ 61 | current.opts <- options() 62 | options("dryworkflow" = list(git = list(user.name = "My Name", user.email = "myname@email.com"))) 63 | library(dryworkflow) 64 | options("dryworkflow") 65 | } 66 | 67 | ## A project with all default settings 68 | 69 | ## copy .csv file and codebook from dryWorkflow package 70 | ## noting that normally you just place files in current directory 71 | ## and then run 'createProjectSkeleton' 72 | file.copy(system.file('demoFiles', 'small2.csv', package='dryworkflow'), 73 | 'small2.csv') 74 | file.copy(system.file('demoFiles', 'small2_codebook.csv', 75 | package='dryworkflow'), 'small2_codebook.csv') 76 | 77 | ## NB: In practice, always check directories, R syntax etc 78 | ## before using 'make' 79 | createProjectSkeleton(dir.proj = "testProject2", 80 | name.project = "Experiment 1", 81 | dontmove = "dryworkflow-Ex.R") 82 | } 83 | 84 | -------------------------------------------------------------------------------- /man/extractSubstStrings.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/zzz.R 3 | \name{extractSubstStrings} 4 | \alias{extractSubstStrings} 5 | \title{extract strings for substitution from template for checking etc} 6 | \usage{ 7 | extractSubstStrings(template, string.prefix = "@@", 8 | string.suffix = string.prefix) 9 | } 10 | \arguments{ 11 | \item{template}{character vector where each element is the lines 12 | of a template file} 13 | 14 | \item{string.prefix}{string of characters for start of strings to 15 | be substituted. Default \sQuote{@@}} 16 | 17 | \item{string.suffix}{string of characters for end of strings to be 18 | substituted. Default: same as \code{string.prefix}} 19 | } 20 | \value{ 21 | vector of strings for substitution 22 | } 23 | \description{ 24 | Given a template vector \code{template} read in from a template 25 | file and string format via \code{string.prefix} and 26 | \code{string.sufffix}, extract a vector of all strings to be 27 | substituted. 28 | } 29 | \seealso{ 30 | \code{\link{readTemplate}} reads a template file into a 31 | string, \code{\link{createFromTemplate}} creates a new file from a template 32 | } 33 | \keyword{internal} 34 | 35 | -------------------------------------------------------------------------------- /man/getProjectConfig.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/getProjectConfig.R 3 | \name{getProjectConfig} 4 | \alias{getProjectConfig} 5 | \title{Get dryworkflow configuration from configFile.rds file} 6 | \usage{ 7 | getProjectConfig(projectDir = ".", checkAbove = FALSE, 8 | checkSubDirs = FALSE) 9 | } 10 | \arguments{ 11 | \item{projectDir}{base directory of dryWorkflow project} 12 | 13 | \item{checkAbove}{check directory above current directory for 14 | presence of configuration file} 15 | 16 | \item{checkSubDirs}{check directories below current directory for 17 | presence of configuration file(s). If there is more than one then 18 | print locations. If there is only one, print a message indicating 19 | which project directory is present and use that.} 20 | } 21 | \value{ 22 | object of class \dQuote{drywProjectConfig} else FALSE if 23 | file \code{configFile.rds} is not found or if object not of 24 | correct class 25 | } 26 | \description{ 27 | Configuration file \code{configFile.rds} is created with 28 | \code{\link{createProjectSkeleton}} when a project is created. It 29 | contains details like directory structures and various parameters 30 | for a project. 31 | } 32 | 33 | -------------------------------------------------------------------------------- /man/readCodeBook.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/readCodeBook.R 3 | \name{readCodeBook} 4 | \alias{readCodeBook} 5 | \title{Read a code book in standard format as a csv file} 6 | \usage{ 7 | readCodeBook(x, codebook.directory = NULL, col.names = c(var.name = 8 | "Variable", var.orig = "Original.Name", var.label = "Label", levels = 9 | "Levels", min = "Min", max = "Max"), non.standard = NULL, 10 | na.strings = c("", "NA", ".", " ")) 11 | } 12 | \arguments{ 13 | \item{x}{filename of codebook to parse} 14 | 15 | \item{codebook.directory}{directory containing codebook} 16 | 17 | \item{col.names}{named character vector containing column names in 18 | Code Book file. The vector contains components \dQuote{var.name} = 19 | variable name, \dQuote{var.orig} = original name (if changed), 20 | \dQuote{label} for printing/plotting, \dQuote{level} = factor 21 | levels, \dQuote{min} and \dQuote{max} for continuous measurements, 22 | \dQuote{comments} = comments about the variable which may include 23 | the measurement instrument or references about the measurement} 24 | 25 | \item{non.standard}{named list of non-standard names of columns 26 | with names \code{c("var.name", "var.orig", "var.label", "levels", "min", "max")}} 27 | 28 | \item{na.strings}{a character vector of strings which are to be 29 | interpreted as \sQuote{NA} values. Blank fields are also 30 | considered to be missing values in logical, integer, numeric and 31 | complex fields. Default: \code{c("", "NA", ".", " ")}} 32 | } 33 | \value{ 34 | S3 object of type class \dQuote{codebook} 35 | } 36 | \description{ 37 | Reads a code book stored as a \code{csv} file for either checking 38 | against a data file or relabelling factor levels or labelling 39 | variables. 40 | } 41 | \details{ 42 | Often, data dictionaries or code books are provided with data 43 | files. Rather than a \code{word} \code{doc} or \code{pdf} files, 44 | the format required here is in a very specific format stored as a 45 | \code{csv} file. Once read in, attributes such as factor 46 | labels/levels and variable labels can be added to the 47 | \code{data.frame} and/or also used to check factor labels and 48 | variable names are consistent with the code book. Note that while 49 | various methods may be available which attempt to convert word 50 | docs or pdf's to a spreadsheet and/or csv file, extreme care 51 | should be taken as these are far from perfect. 52 | } 53 | \examples{ 54 | file.copy(system.file('demoFiles', 'data1_codebook.csv', 55 | package='dryworkflow'), 'data1_codebook.csv') 56 | data1_codebook <- readCodeBook("data1_codebook.csv", 57 | non.standard = list(levels = "Factor.Levels", 58 | var.orig = "Old.Variable")) 59 | } 60 | \author{ 61 | Peter Baker \email{pete@petebaker.id.au} 62 | } 63 | 64 | -------------------------------------------------------------------------------- /man/readTemplate.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/zzz.R 3 | \name{readTemplate} 4 | \alias{readTemplate} 5 | \title{Read template file into a string} 6 | \usage{ 7 | readTemplate(template, template.dir = NULL, 8 | delete.start = "-- START: DELETE THIS SECTION --", 9 | delete.end = "-- END: DELETE THIS SECTION --") 10 | } 11 | \arguments{ 12 | \item{template}{name of template text file as string. Default: 13 | \code{NULL} for predefined template from \code{\link{dryworkflow}} 14 | package.} 15 | 16 | \item{template.dir}{directory containing template. Default: 17 | \code{\link{dryworkflow}} package directory} 18 | 19 | \item{delete.start}{lines between and including those containing 20 | the \code{delete.start} and \code{delete.end} patterns will be 21 | removed. Default: \dQuote{---- START: DELETE THIS SECTION ----}} 22 | 23 | \item{delete.end}{Default: \dQuote{---- END: DELETE THIS SECTION 24 | ----}} 25 | } 26 | \value{ 27 | vector string containing complete \code{template} with a 28 | string for each line of the template file 29 | } 30 | \description{ 31 | Internal function used by functions when checking or creating 32 | syntax and markdown files 33 | } 34 | \details{ 35 | Note that lines between (and including) lines containing the 36 | patterns \dQuote{---- START: DELETE THIS SECTION ----} and 37 | \dQuote{---- END: DELETE THIS SECTION ----} will be deleted from 38 | the template 39 | } 40 | \author{ 41 | Peter Baker \email{pete@petebaker.id.au} 42 | } 43 | \keyword{internal} 44 | 45 | -------------------------------------------------------------------------------- /man/setUpDirectoryStructure.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/setUpDirectoryStructure.R 3 | \name{setUpDirectoryStructure} 4 | \alias{setUpDirectoryStructure} 5 | \title{Set up directories for data analysis project} 6 | \usage{ 7 | setUpDirectoryStructure(style = NULL, type.project = NULL, 8 | destinations = NULL, extra = NULL, extra.destinations = NULL, ...) 9 | } 10 | \arguments{ 11 | \item{style}{style for directory and file names (\code{unix} or 12 | \code{windows}), Default: \code{unix}} 13 | 14 | \item{type.project}{type of project: normal, simple or custom 15 | (custom Not Yet Implemented). The style of directory structure 16 | for the project. Default: \dQuote{normal}} 17 | 18 | \item{destinations}{\code{list} of destination directories where 19 | original (and added) files will be moved. This is a \code{list} 20 | with named components \sQuote{data}, \sQuote{doc}, 21 | \sQuote{codebook} and \sQuote{lib} for data files, documents, 22 | codebooks and R functions, respectively. Default: unix directory 23 | names will be \code{list(data = "data/original", doc = "doc/original", codebook = "data/codebook", lib = "lib")} and 24 | Windows will be of similar form.} 25 | 26 | \item{extra}{extra directories additional to standard setup} 27 | 28 | \item{extra.destinations}{extra destination directories additional 29 | to standard setup} 30 | 31 | \item{...}{extra arguments passed to specific functions} 32 | } 33 | \value{ 34 | \code{list} of directories including destinations for 35 | initial files of class \sQuote{drywDestinationDirs}. Named 36 | components are \code{directories} and \code{destinations}. 37 | } 38 | \description{ 39 | Set up directories for data analysis project 40 | } 41 | 42 | -------------------------------------------------------------------------------- /man/setupFileAndDataNames.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/setupFileAndDataNames.R 3 | \name{setupFileAndDataNames} 4 | \alias{setupFileAndDataNames} 5 | \title{Internal: Create syntax filenames and object names for processing} 6 | \usage{ 7 | setupFileAndDataNames(dir.project, destinations, projectConfig, projectDirs, 8 | filenames = NULL, initial.files = NULL, mnemonic = "", 9 | project.steps = c("read", "codebook", "clean", "summary", "analyse", 10 | "compare", "mergeAll", "reportRmd", "reportRnw", "presentRmd", "beamerRmd", 11 | "beamerRnw"), report.markdown = c("Rmd", "Rnw"), report.which = c("first", 12 | "merge", "all")) 13 | } 14 | \arguments{ 15 | \item{dir.project}{dir.project directory name for project. Default: 16 | \dQuote{myRproject}} 17 | 18 | \item{destinations}{\code{list} of destination directories where 19 | original (and added) files will be moved. This is a \code{list} 20 | with named components \sQuote{data}, \sQuote{doc}, 21 | \sQuote{codebook} and \sQuote{lib} for data files, documents, 22 | codebooks and R functions, respectively. Default: unix directory 23 | names will be \code{list(data = "data/original", doc = 24 | "doc/original", codebook = "data/codebook", lib = "lib")} and 25 | Windows will be of similar form but capitalised 26 | with longer form names.} 27 | 28 | \item{projectConfig}{project configuration stored at project 29 | creation and updated when files added. Format similar to similar 30 | to getOptions(\dQuote{dryworkflow})} 31 | 32 | \item{projectDirs}{directory structure of project of class 33 | 'drywDestinationDirs'} 34 | 35 | \item{filenames}{filenames for added files but not set for new 36 | project.} 37 | 38 | \item{initial.files}{initial file sources and extensions} 39 | 40 | \item{mnemonic}{three or four letter mnemonic to aid remembering 41 | and for succinct naming \code{R}, \code{Rmd} and \code{Rnw} files 42 | and project directory. Default: \code{NULL} for none} 43 | 44 | \item{project.steps}{steps to be carried out in project, specified 45 | as a vector of strings. Options are \dQuote{read} to read data 46 | (always assumed present), \dQuote{clean} clean data, 47 | \dQuote{summary} summary statistics and basic plots, 48 | \dQuote{analyse} perform statistical analysis, \dQuote{compare} 49 | compare datasets and in particular different versions of the same 50 | data set, \dQuote{mergeAll} merge data sets of more than one; and 51 | \dQuote{reportRmd} or \dQuote{reportRnw} produce reports using 52 | \code{\link{rmarkdown}} and/or\code{\link{Sweave}} and 53 | \dQuote{presentRmd} or \dQuote{beamerRnw} produce presentations 54 | using \code{\link{rmarkdown}} and/or\code{\link{Sweave}}} 55 | 56 | \item{report.markdown}{vector of markdown file types to be 57 | employed to produce reports such as \dQuote{.org}, \dQuote{.Rmd} 58 | and \dQuote{.Rnw}. Default: \dQuote{.Rmd} and \dQuote{.Rnw}.} 59 | 60 | \item{report.which}{which data files to produce reports 61 | for. Choices: \dQuote{first}, \dQuote{merge}, \dQuote{all}) 62 | Default: \dQuote{first}} 63 | } 64 | \value{ 65 | an S3 object of class \code{fileAndDataName} 66 | } 67 | \description{ 68 | This is an internal \code{\link{dryworkflow-package}} function. It 69 | is primarily designed to be called by 70 | \code{\link{createProjectSkeleton}} and \code{\link{addFile}} to 71 | set up file and object names for processing. Given a list of data 72 | filenames and optionally, a list of project steps, various names 73 | are created for use with templates, makefiles and git for version 74 | control. 75 | } 76 | 77 | -------------------------------------------------------------------------------- /man/whichReadCommand.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/zzz.R 3 | \name{whichReadCommand} 4 | \alias{whichReadCommand} 5 | \title{Provide appropriate read commands for reading data files} 6 | \usage{ 7 | whichReadCommand(data.files, commands = list(tsv = "read.delim", csv = 8 | "read.csv", dta = "foreign::read.dta", sas7bdat = "sas7bdat::read.sas7bdat", 9 | xport = "foreign::read.xport", sav = "foreign::spss.get", rdata = "load", rda 10 | = "load", xls = "readxl::read_excel", xlsx = "readxl::read_excel"), 11 | other = NULL, extra.args = list(tsv = NA, csv = 12 | "na.strings = c(\\"\\", \\"NA\\", \\".\\")", dta = NA, sas7bdat = NA, xport = 13 | NA, sav = NA, rdata = NA, rda = NA, xls = NA, xlsx = NA), 14 | unsupported = "UNSUPPORTED:PUT_READ_FUNCTION_HERE") 15 | } 16 | \arguments{ 17 | \item{data.files}{character vector of file names} 18 | 19 | \item{commands}{list of strings to be substituted into R syntax 20 | files to read each file in \code{data.files}} 21 | 22 | \item{other}{list of either alternate or extra commands for 23 | reading data files} 24 | 25 | \item{extra.args}{extra arguments that will be be added to the 26 | read command produced} 27 | 28 | \item{unsupported}{string to use if filetype not supported} 29 | } 30 | \value{ 31 | named vector of characters to substitute for read commands 32 | in \code{R} syntax file. The \dQuote{extraLibs} attribute is a 33 | character vector of library commands which can be incorporated 34 | into syntax files although this is unnecessary because functions 35 | are called directly 36 | } 37 | \description{ 38 | Using the filename extensions, such as \code{.csv} or \code{.dta}, 39 | a best match is determined for inclusion in \code{R} syntax files 40 | for reading data. Suggested commands can be changed or extras 41 | added by using the \code{other} option. 42 | } 43 | \author{ 44 | Peter Baker 45 | } 46 | \keyword{internal} 47 | 48 | --------------------------------------------------------------------------------