├── .gitignore ├── .travis.yml ├── Makefile ├── README.md ├── examples ├── cellwise.R ├── expression_logger.R ├── lumberjack.R ├── no_log.R └── simple.R ├── fig ├── Makefile ├── datastep2.png └── datastep2.tex ├── pkg ├── DESCRIPTION ├── NAMESPACE ├── NEWS ├── R │ ├── cellwise.R │ ├── expression_logger.R │ ├── filedump.R │ ├── lumberjack.R │ ├── no_logger.R │ ├── run.R │ ├── simple.R │ └── utils.R ├── README.md ├── inst │ ├── CITATION │ └── tinytest │ │ ├── runs │ │ ├── auto_dump.R │ │ ├── dump_test.R │ │ ├── multiple_loggers.R │ │ └── single_logger.R │ │ ├── test_cellwise.R │ │ ├── test_expressionlogger.R │ │ ├── test_filedump.R │ │ ├── test_logging_infra.R │ │ ├── test_no_log.R │ │ ├── test_nse.R │ │ ├── test_run.R │ │ ├── test_simple.R │ │ └── test_utils.R ├── tests │ └── tinytest.R └── vignettes │ ├── JSS_4008.Rnw │ ├── datastep2.pdf │ ├── jss4008.pdf │ ├── process.R │ └── using_lumberjack.Rnw ├── todo.txt └── using_lumberjack.sh /.gitignore: -------------------------------------------------------------------------------- 1 | # History files 2 | .Rhistory 3 | .Rapp.history 4 | 5 | # Session Data files 6 | .RData 7 | *.swp 8 | 9 | # Example code in package build process 10 | *-Ex.R 11 | 12 | # Output files from R CMD build 13 | /*.tar.gz 14 | 15 | # Output files from R CMD check 16 | /*.Rcheck/ 17 | 18 | # RStudio files 19 | .Rproj.user/ 20 | *.Rproj 21 | 22 | # produced vignettes 23 | pkg/vignettes/*.html 24 | pkg/vignettes/*.pdf 25 | 26 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 27 | .httr-oauth 28 | 29 | # knitr and R markdown default cache directories 30 | /*_cache/ 31 | /cache/ 32 | 33 | # Temporary files created by R markdown 34 | *.utf8.md 35 | *.knit.md 36 | .Rproj.user 37 | 38 | # Files created by roxygen 39 | pkg/man/* 40 | 41 | # other files I frequently generate 42 | manual.pdf 43 | README.html 44 | *.csv 45 | output/* 46 | *.toc 47 | *.aux 48 | *.log 49 | *.out 50 | *.tex 51 | 52 | 53 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | 2 | # travis config 3 | 4 | dist: trusty 5 | language: r 6 | sudo: required 7 | 8 | addons: 9 | apt: 10 | packages: 11 | - libxml2-dev 12 | 13 | r: 14 | - release 15 | 16 | before_install: 17 | - R -e "install.packages(c('R6','roxygen2','tinytest','pkgload'))" 18 | - R -e "pkgload::load_all('pkg');roxygen2::roxygenize('pkg')" 19 | - cd ./pkg 20 | 21 | r_packages: 22 | - covr 23 | - rmarkdown 24 | 25 | 26 | after_success: 27 | - Rscript -e 'library(covr);coveralls()' 28 | 29 | notifications: 30 | email: 31 | on_success: change 32 | on_failure: change 33 | 34 | 35 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | doc: 3 | R -s -e "pkgload::load_all('pkg');roxygen2::roxygenize('pkg')" 4 | 5 | pkg: doc 6 | rm -f *.tar.gz 7 | R CMD build --compact-vignettes="gs+qpdf" pkg 8 | 9 | check: doc 10 | rm -rf *.tar.gz 11 | R CMD build --compact-vignettes="gs+qpdf" pkg 12 | R CMD check *.tar.gz 13 | 14 | cran: doc 15 | rm -rf *.tar.gz 16 | R CMD build --compact-vignettes="gs+qpdf" ./pkg 17 | R CMD check --as-cran *.tar.gz 18 | 19 | install: doc 20 | rm -rf *.tar.gz 21 | R CMD build pkg 22 | R CMD INSTALL *.tar.gz 23 | 24 | test: doc 25 | R -s -e "tinytest::build_install_test('pkg')" 26 | 27 | manual: doc 28 | R CMD Rd2pdf --force -o manual.pdf ./pkg 29 | 30 | revdep: pkg 31 | rm -rf revdep 32 | mkdir revdep 33 | mv *.tar.gz revdep 34 | R -s -e "out <- tools::check_packages_in_dir('revdep',reverse=list(which='most'),Ncpus=3); print(summary(out)); saveRDS(out, file='revdep/output.RDS')" 35 | 36 | 37 | clean: 38 | rm -f pkg/vignettes/*.aux 39 | rm -f pkg/vignettes/*.log 40 | rm -f pkg/vignettes/*.out 41 | rm -f pkg/vignettes/using_lumberjack.pdf 42 | rm -f pkg/vignettes/*.toc 43 | rm -f pkg/vignettes/*.csv 44 | rm -rf *.Rcheck 45 | rm -rf revdep 46 | rm -f *.tar.gz 47 | 48 | using: 49 | ./using_lumberjack.sh 50 | 51 | 52 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Track changes in data 2 | [![CRAN](http://www.r-pkg.org/badges/version/lumberjack)](http://cran.r-project.org/package=lumberjack/) 3 | [![status](https://tinyverse.netlify.com/badge/lumberjack)](https://CRAN.R-project.org/package=lumberjack) 4 | [![Downloads](http://cranlogs.r-pkg.org/badges/lumberjack)](http://www.r-pkg.org/pkg/lumberjack)[![Mentioned in Awesome Official Statistics ](https://awesome.re/mentioned-badge.svg)](http://www.awesomeofficialstatistics.org) 5 | [![status](https://tinyverse.netlify.app/badge/lumberjack)](https://CRAN.R-project.org/package=lumberjack) 6 | 7 | 8 | The `lumberjack` R package allows you to: 9 | 10 | - **track changes** in **multiple data sets** as they get processed; 11 | - using **multiple loggers** for each dataset; 12 | - where loggers are **fully customizable**. 13 | 14 | You can get started by just adding one line of code to your existing data 15 | analysis script. 16 | 17 | 18 | - [Overview of functionality](./pkg) 19 | - [Talk at the eRum2018 meeting (Budapest)](https://www.youtube.com/watch?v=DNZs0CHBU4s) 20 | 21 | 22 | 23 | ### Citing lumberjack 24 | 25 | Please cite the [JSS paper](https://www.jstatsoft.org/article/view/v098i01). 26 | 27 | ``` 28 | @article{loo2020monitoring, 29 | title = {Monitoring Data in {R} with the {lumberjack} Package}, 30 | author = {Mark P. J. {van der Loo}}, 31 | journal = {Journal of Statistical Software}, 32 | year = {2021}, 33 | volume = {98}, 34 | number = {1}, 35 | pages = {1--13}, 36 | doi = {10.18637/jss.v098.i01}, 37 | url = {https://www.jstatsoft.org/article/view/v098i01} 38 | } 39 | ``` 40 | 41 | 42 | ### lumberjack philosophy 43 | 44 | Production scripts may contain many data transformations, aimed to clean, 45 | select, model, or augment data with new variables. Analyzing the effect of each 46 | step is cumbersome because it involves adding a lot of code that is not 47 | concerned with the primary goal of the script, namely to analyze and process 48 | data. 49 | 50 | In the lumberjack philosophy, a programmer (analyst) should be only concerned 51 | with the primary process of data analyses. 52 | 53 | ![](fig/datastep2.png) 54 | 55 | 56 | ### Installation 57 | 58 | Published version from CRAN 59 | ```r 60 | install.packages('lumberjack') 61 | ``` 62 | 63 | Development version. 64 | ```r 65 | git clone https://github.com/markvanderloo/lumberjack 66 | cd lumbjerjack 67 | make install 68 | ``` 69 | 70 | ---- 71 | Copyright (2016) Mark van der Loo 72 | Licenced by [EUPL 1.2](https://eupl.eu/1.2/en/) 73 | 74 | 75 | -------------------------------------------------------------------------------- /examples/cellwise.R: -------------------------------------------------------------------------------- 1 | logfile <- tempfile(fileext=".csv") 2 | 3 | # convert height from inch to cm and log changes. 4 | # we need to set a unique key. 5 | women$sleutel <- 1:nrow(women) 6 | out <- women %L>% 7 | start_log(log=cellwise$new(key="sleutel")) %L>% 8 | {.$height <- .$height*2.54; .} %L>% 9 | dump_log(file=logfile, stop=TRUE) 10 | 11 | read.csv(logfile) %L>% head() 12 | 13 | # work with an externally defined logger. 14 | iris$id <- seq_len(nrow(iris)) 15 | logger <- cellwise$new(key="id") 16 | iris %L>% 17 | start_log(logger) %L>% 18 | head() %L>% 19 | stop_log(dump=FALSE) 20 | logger$logdata() 21 | 22 | 23 | -------------------------------------------------------------------------------- /examples/expression_logger.R: -------------------------------------------------------------------------------- 1 | 2 | logfile <- file.path(tempfile(fileext=".csv")) 3 | e <- expression_logger$new(mean=mean(height), sd=sd(height)) 4 | 5 | out <- women %L>% 6 | start_log(e) %L>% 7 | within(height <- height * 2) %L>% 8 | within(height <- height * 3) %L>% 9 | dump_log(file=logfile) 10 | 11 | read.csv(logfile) 12 | 13 | 14 | -------------------------------------------------------------------------------- /examples/lumberjack.R: -------------------------------------------------------------------------------- 1 | # pass arguments to a function 2 | 1:3 %L>% mean() 3 | 4 | # pass arguments using "." 5 | TRUE %L>% mean(c(1,NA,3), na.rm = .) 6 | 7 | # pass arguments to an expression, using "." 8 | 1:3 %L>% { 3 * .} 9 | 10 | # in a more complicated expression, return "." explicitly 11 | women %L>% { .$height <- 2*.$height; . } 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /examples/no_log.R: -------------------------------------------------------------------------------- 1 | 2 | logfile <- tempfile(fileext=".csv") 3 | out <- women %L>% 4 | start_log(log=no_log$new(verbose=FALSE)) %L>% 5 | identity() %L>% 6 | head() %L>% 7 | dump_log(file=logfile, stop=TRUE) 8 | 9 | cat(readLines(logfile),"\n") # Empty file 10 | 11 | -------------------------------------------------------------------------------- /examples/simple.R: -------------------------------------------------------------------------------- 1 | 2 | logfile <- tempfile(fileext=".csv") 3 | out <- women %L>% 4 | start_log(log=simple$new(verbose=FALSE)) %L>% 5 | identity() %L>% 6 | head() %L>% 7 | dump_log(file=logfile, stop=TRUE) 8 | 9 | 10 | read.csv(logfile,stringsAsFactors=FALSE) 11 | 12 | -------------------------------------------------------------------------------- /fig/Makefile: -------------------------------------------------------------------------------- 1 | pdf: datastep2.tex 2 | pdflatex datastep2.tex 3 | 4 | png: pdf 5 | pdftoppm datastep2.pdf datastep2 -png 6 | mv datastep2-1.png datastep2.png 7 | 8 | 9 | -------------------------------------------------------------------------------- /fig/datastep2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markvanderloo/lumberjack/e83dcb1e1a765bc9b391298f93c20ded4cd4a136/fig/datastep2.png -------------------------------------------------------------------------------- /fig/datastep2.tex: -------------------------------------------------------------------------------- 1 | \documentclass{standalone} 2 | \usepackage{tikz} 3 | \usetikzlibrary{arrows,positioning,decorations.pathreplacing} 4 | \tikzset{ 5 | %Define standard arrow tip 6 | >=stealth', 7 | %Define style for boxes 8 | point/.style={ 9 | rectangle, 10 | rounded corners, 11 | draw=black, very thick, 12 | text width=6em, 13 | minimum height=2em, 14 | font=\sffamily, 15 | text centered}, 16 | annote/.style={ 17 | font=\sffamily 18 | }, 19 | % Define arrow style 20 | arrow/.style={ 21 | ->, 22 | thick, 23 | shorten <=2pt, 24 | shorten >=2pt,}, 25 | } 26 | \begin{document} 27 | \begin{tikzpicture} 28 | \node[point] (in) {data}; 29 | \node[point, right=7mm of in] (mod){process}; 30 | \node[point, right=7mm of mod] (out){data'}; 31 | \node[point, above=7mm of mod] (par){parameters}; 32 | \node[point, below=7mm of mod] (log){$\Delta$(data,data')}; 33 | \draw [rounded corners] (-1.3,1.0) rectangle ++(8.8,0.9) 34 | node [anchor=north east] {\textsf{User}}; 35 | 36 | \draw [rounded corners] (-1.3,-0.5) rectangle ++(8.8,1.4) 37 | node [anchor=north east] {\textsf{Programmer}}; 38 | 39 | \draw [rounded corners] (-1.3,-0.7) rectangle ++(8.8,-1.2) 40 | node [anchor=south east] {\textbf{\textsf{lumberjack}}}; 41 | \path (in.east) edge[arrow] (mod.west); 42 | \path (mod.east) edge[arrow] (out.west); 43 | \path (par.south) edge[arrow] (mod.north); 44 | \path (mod.south) edge[arrow] (log.north); 45 | \end{tikzpicture} 46 | \end{document} 47 | -------------------------------------------------------------------------------- /pkg/DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: lumberjack 2 | Maintainer: Mark van der Loo 3 | License: EUPL 4 | Title: Track Changes in Data 5 | LazyData: no 6 | Type: Package 7 | LazyLoad: yes 8 | Authors@R: c(person("Mark", "van der Loo" 9 | , role = c("aut","cre") 10 | , email = "mark.vanderloo@gmail.com" 11 | , comment = c(ORCID="0000-0002-9807-4686")) 12 | , person("Floris", "Ruijter", role = "ctb") ) 13 | Description: A framework that allows for easy logging of changes in data. 14 | Main features: start tracking changes by adding a single line of code to 15 | an existing script. Track changes in multiple datasets, using multiple 16 | loggers. Add custom-built loggers or use loggers offered by other 17 | packages. . 18 | Version: 1.3.1.1 19 | URL: https://github.com/markvanderloo/lumberjack 20 | BugReports: https://github.com/markvanderloo/lumberjack/issues 21 | Imports: utils, R6 22 | Depends: R (>= 3.4.0) 23 | Suggests: tinytest 24 | RoxygenNote: 7.2.1 25 | Roxygen: list(r6=FALSE) 26 | Encoding: UTF-8 27 | -------------------------------------------------------------------------------- /pkg/NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export("%>>%") 4 | export("%L>%") 5 | export(cellwise) 6 | export(dump_log) 7 | export(expression_logger) 8 | export(filedump) 9 | export(get_log) 10 | export(no_log) 11 | export(run_file) 12 | export(simple) 13 | export(source_file) 14 | export(start_log) 15 | export(stop_log) 16 | importFrom(R6,R6Class) 17 | importFrom(utils,capture.output) 18 | -------------------------------------------------------------------------------- /pkg/NEWS: -------------------------------------------------------------------------------- 1 | version 1.3.2 2 | - Fixed Rd bug (thanks to Kurt Horning for pointing out the changes) 3 | 4 | version 1.3.1 5 | - added no logger: not-logging logger that just presents the 6 | interface (conivenience for testing, and for integration 7 | with 'dcmodify') 8 | 9 | version 1.2.1 10 | - Added JSS reference. 11 | 12 | version 1.1.5 13 | - Loggers now have access to the file name and line numbers of the 14 | expression of which the action is being logged. 15 | - The output of loggers 'cellwise', 'simple' and 'expression_logger' 16 | no contain a 'srcref' column, that gives file name and line number 17 | of the expression being logged. (thanks to anonymous reviewer B for 18 | suggesting) 19 | - Fixed typos in documentation (thanks to anonymous reviewer B). 20 | 21 | 22 | version 1.1.4 23 | - When the $add() method of a loggger is called by lumberjack it now passes 24 | a 'meta' argument containing an 'src' string that is taken from the 'srcref' 25 | object when parsing the R script. 26 | - internal changes 27 | 28 | version 1.1.3 29 | - bugfix: prefix label for file output was ignored since 1.1.2 30 | 31 | version 1.1.2 32 | - the 'file' argument in 'cellwise$new' is now called 'tempfile' to better reflect 33 | it's purpose. 34 | - cellwise logger now warns when it encounters a duplicate key (this will corrupt 35 | logging info). 36 | - 'run_file' gains argument 'envir'. 37 | - new function 'source_file', with behaviour close to 'source'. 38 | - stop_log now has 'dump' argument (default: TRUE) and passes ellipsis to $dump() method. 39 | - Internal change: loggers now store internal data privately 40 | - Changed licence to EUPL 1.2 41 | 42 | version 1.0.3 43 | - fixed test that triggered a CRAN error because it wrote in the installed 44 | area (Thanks to Kurt Hornik). 45 | 46 | version 1.0.2 47 | - fixed error caught on CRAN/Debian (an example wrote locally) 48 | 49 | version 1.0.1 50 | - fixed error caught on CRAN/Debian (an example wrote locally) 51 | 52 | version 1.0.0 53 | - Start tracking changes in R objects, by adding a single line of code to 54 | an existing script. 55 | - New function 'run_file', track changes in scripts without '%L>%' 56 | - Support for multiple loggers; loggers can now support an (automatic) label 57 | which is used to distinguish output files when multiple datasets are tracked. 58 | - Argument 'log' is replaced by 'logger' in start_log(), dump_log() et al. 59 | - Completely new vignette. Removed dependence on knitr. 60 | - Completely reviewed the reference manual. 61 | - Switched to 'tinytest' testing framework. 62 | 63 | version 0.3.0 64 | - '%L>%' is now considered the default lumberjack operator. '%>>%' remains as alias. 65 | - Added 'expression_logger': log custom expressions 66 | - Logging now also works for functions that remove the logging attribute. 67 | - The 'cellwise' logger is now compatible with 'tibble' objects (thanks to 68 | Blain Bateman). 69 | - Some documentation improvements. 70 | 71 | version 0.2.0 72 | - Added '%L>%' as synonym to avoid possible confusion with 'pipeR::`%>>%`' 73 | - Argument 'stop' of 'dump_log' is now 'TRUE' by default (was 'FALSE') 74 | - New logger named 'filedump' dumps data versions to csv. 75 | - Function 'stop_log()' will now call '$stop()' if defined. 76 | - Package now depends on R >= 3.4.0. Some tests failed on Windows/OSX + R3.3.3 77 | 78 | version 0.1.0 79 | - initial release 80 | -------------------------------------------------------------------------------- /pkg/R/cellwise.R: -------------------------------------------------------------------------------- 1 | # Implementation of the cellwise logger. 2 | 3 | 4 | #' The cellwise logger. 5 | #' 6 | #' The cellwise logger registers the row, column, old, and new value of cells 7 | #' that changed, along with a step number, timestamp, source reference, and the 8 | #' expression used to alter a dataset. 9 | #' 10 | #' @section Creating a logger: 11 | #' \code{cellwise$new(key, verbose=TRUE, file=tempfile())} 12 | #' \tabular{ll}{ 13 | #' \code{key}\tab \code{[character|integer]} index to column that uniquely identifies a row.\cr 14 | #' \code{verbose}\tab \code{[logical]} toggle verbosity.\cr 15 | #' \code{tempfile}\tab [character] filename for temporary log storage. \cr 16 | #' } 17 | #' 18 | #' @usage 19 | #' cellwise(key, verbose=TRUE, tempfile=file.path(tempdir(),"cellwise.csv")) 20 | #' 21 | #' @param key \code{[character|integer]} index to column that uniquely identifies a row. 22 | #' @param verbose \code{[logical]} toggle verbosity. 23 | #' @param tempfile \code{[character]} filename for temporary log storage. 24 | #' 25 | #' @section Dump options: 26 | #' 27 | #' \code{$dump(file=NULL)} 28 | #' \tabular{ll}{ 29 | #' \code{file}\tab \code{[character]} location to write final output to.\cr 30 | #' } 31 | #' The default location is \code{"cellwise.csv"} in an interactive session, and 32 | #' \code{"DATA_cellwise.csv"} in a script that executed via \code{\link{run_file}}. 33 | #' Here, \code{DATA} is the variable name of the data being tracked or the 34 | #' \code{label} provided with \code{\link{start_log}}. 35 | #' 36 | #' 37 | #' @section Getting data from the logger: 38 | #' 39 | #' \code{$logdata()} Returns a data frame with the current log. 40 | #' 41 | #' @section Details: 42 | #' At initialization, the cellwise logger opens a connection to a temporary 43 | #' file. All logging info is appended to that connection. When 44 | #' \code{\link{dump_log}} is called, the temporary file is closed, copied to 45 | #' the output file, and reopened for writing. The connection is closed 46 | #' automatically when the logger is destroyed, for example when calling 47 | #' \code{\link{stop_log}()}. 48 | #' 49 | #' @docType class 50 | #' @format An \code{R6} class object. 51 | #' 52 | #' @example ../examples/cellwise.R 53 | #' 54 | #' @family loggers 55 | #' @export 56 | cellwise <- R6Class("cellwise" 57 | , private = list( 58 | tmpfile = NULL 59 | , con = NULL 60 | , n = NULL 61 | , verbose = NULL 62 | , key = NULL 63 | ) 64 | , public = list( 65 | label = NULL 66 | , initialize = function(key, verbose=TRUE, tempfile=file.path(tempdir(),"cellwise.csv")){ 67 | if(missing(key)) stop("you must provide a key") 68 | private$tmpfile = tempfile 69 | private$con = file(private$tmpfile, open="wt") 70 | private$n <- 0 71 | private$verbose <- verbose 72 | private$key <- key 73 | write.csv( 74 | data.frame( 75 | step=integer(0) 76 | , time=character(0) 77 | , srcref=character(0) 78 | , expression=character(0) 79 | , key=character(0) 80 | , variable=character(0) 81 | , old=character(0) 82 | , new=character(0) 83 | ) 84 | , file=private$con 85 | , row.names=FALSE 86 | ) 87 | } 88 | , stop = function(...){ 89 | private$con <- iclose(private$con) 90 | } 91 | , add = function(meta, input, output){ 92 | if (!is_open(private$con)) return() 93 | private$n <- private$n+1 94 | # timestamp 95 | ts <- strftime(Sys.time(),usetz=TRUE) 96 | d <- celldiff(input, output, private$key) 97 | if (nrow(d) == 0) return() 98 | d$step <- private$n 99 | d$time <- ts 100 | d$expression <- meta$src 101 | d$srcref <- get_srcref(meta) 102 | d <- d[c(5,6,8,7,1:4)] 103 | 104 | write.table(d,file = private$con 105 | , row.names=FALSE, col.names=FALSE, sep=",") 106 | } 107 | , dump = function(file=NULL){ 108 | private$con <- iclose(private$con) 109 | if (is.null(file)){ 110 | file <- "cellwise.csv" 111 | if (!is.null(self$label) && self$label != "" ) file <- paste(self$label,file,sep="_") 112 | } 113 | file.copy(from=private$tmpfile, to=file, overwrite = TRUE) 114 | if (private$verbose){ 115 | msgf("Dumped a log at %s",file) 116 | } 117 | } 118 | , finalize = function(){ 119 | if (is_open(private$con)) close(private$con) 120 | } 121 | , logdata = function(){ 122 | read.csv(private$tmpfile) 123 | } 124 | ) 125 | ) 126 | 127 | # A reasonable connection closer 128 | iclose <- function(con,...){ 129 | if (!is.null(con)) close(con,...) 130 | invisible(NULL) 131 | } 132 | 133 | # A reasonable connection checker that really only works 134 | # if the reasonable closer is used to overwrite the connection 135 | # object. isOpen crashes on closed (hence destroyed) connections :-(. 136 | is_open <- function(con,...){ 137 | !is.null(con) && isOpen(con) 138 | } 139 | 140 | # a decent sort 141 | isort <- function(x, by,...){ 142 | x[do.call("order",x[by]),,drop=FALSE] 143 | } 144 | 145 | cc <- function(x,y) c(as.character(x), as.character(y)) 146 | 147 | mpaste <- function(...) paste(...,sep=".@.") 148 | 149 | # send x to long format, values as character. 150 | keyframe <- function(x, key){ 151 | col_x <- names(x)[names(x) != key] 152 | # we need double brackets, for tibbles. 153 | kf <- expand.grid(key=x[[key]],variable=col_x) 154 | # we need as.data.frame for certain tibbles (created with group_by) 155 | kf$value <- Reduce(cc, as.data.frame(x[col_x])) 156 | isort(kf, c("key","variable")) 157 | } 158 | 159 | celldiff <- function(x,y,key){ 160 | if ( anyDuplicated(x[,key]) || anyDuplicated(y[,key]) ){ 161 | warnf("Detected duplicates in key variable '%s'. Logging data corrupted.",key) 162 | } 163 | 164 | kx <- keyframe(x,key) 165 | ky <- keyframe(y,key) 166 | kxy <- merge(kx,ky,by=c("key","variable"), all=TRUE) 167 | na_x <- is.na(kxy$value.x) 168 | na_y <- is.na(kxy$value.y) 169 | d_xy <- (na_x & !na_y) | (!na_x & na_y) | 170 | (!na_x & !na_y & kxy$value.x != kxy$value.y) 171 | kxy[d_xy,] 172 | } 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | -------------------------------------------------------------------------------- /pkg/R/expression_logger.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | #' The expression logger. 6 | #' 7 | #' Records the result of one or more user-defined expressions that perform 8 | #' calculations on the object being tracked. 9 | #' 10 | #' @section Creating a logger: 11 | #' \code{expression_logger$new(..., verbose=TRUE)} 12 | #' \tabular{ll}{ 13 | #' \code{...}\tab A comma-separated list of \code{name = expression} pairs. \cr 14 | #' \code{verbose}\tab \code{[logical]} toggle verbosity. 15 | #' } 16 | #' 17 | #' Each expression will be evaluated in the context of the object tracked with 18 | #' this logger. An expression is expected to have a single \code{numeric} or 19 | #' \code{character} output. 20 | #' 21 | #' 22 | #' @section Dump options: 23 | #' 24 | #' \code{$dump(file=NULL)} 25 | #' \tabular{ll}{ 26 | #' \code{file}\tab \code{[character]} location to write final output to.\cr 27 | #' } 28 | #' The default location is \code{"expression.csv"} in an interactive session, and 29 | #' \code{"DATA_expression.csv"} in a script that executed via \code{\link{run_file}}. 30 | #' Here, \code{DATA} is the variable name of the data being tracked 31 | #' or the \code{label} provided with \code{\link{start_log}}. 32 | #' 33 | #' 34 | #' 35 | #' @docType class 36 | #' @format An \code{R6} class object. 37 | #' 38 | #' @example ../examples/expression_logger.R 39 | #' 40 | #' @family loggers 41 | #' @export 42 | expression_logger <- R6Class("expression_logger" 43 | , private=list( 44 | step = NULL 45 | , s=0 46 | , expr = NULL 47 | , expression = NULL 48 | , srcref = NULL 49 | , result = NULL 50 | , verbose=TRUE 51 | ) 52 | , public = list( 53 | label=NULL 54 | , initialize = function(..., verbose=TRUE){ 55 | private$step <- c() 56 | private$expression <- c() 57 | private$verbose <- verbose 58 | private$expr <- as.list(substitute(list(...))[-1]) 59 | private$srcref <- c() 60 | } 61 | , add = function(meta, input, output){ 62 | private$s <- private$s + 1 63 | private$step <- append(private$step, private$s) 64 | private$expression <- append(private$expression, meta$src) 65 | private$srcref <- append(private$srcref, get_srcref(meta)) 66 | out <- lapply(private$expr, function(e) with(output, eval(e))) 67 | out <- do.call(data.frame, out) 68 | if(is.null(private$result)){ 69 | private$result <- out 70 | } else { 71 | private$result <- rbind(private$result, out) 72 | } 73 | } 74 | , dump = function(file=NULL,...){ 75 | if (is.null(file)){ 76 | file <- "expression.csv" 77 | if (!is.null(self$label) && self$label != "") file <- paste(self$label, file, sep="_") 78 | } 79 | d <- cbind( 80 | step = private$step 81 | , srcref = private$srcref 82 | , expression = private$expression 83 | , private$result 84 | , stringsAsFactors = FALSE) 85 | write.csv(d, file=file , row.names=FALSE) 86 | if( private$verbose ) lumberjack:::msgf("Dumped a log at %s", file) 87 | } 88 | ) 89 | ) 90 | 91 | 92 | 93 | 94 | -------------------------------------------------------------------------------- /pkg/R/filedump.R: -------------------------------------------------------------------------------- 1 | 2 | #' The file dumping logger. 3 | #' 4 | #' The file dumping logger dumps the most recent version of a dataset to csv in 5 | #' a directory of choice. 6 | #' 7 | #' @section Creating a logger: 8 | #' 9 | #' \code{filedump$new(dir=file.path(tempdir(),"filedump"), filename="\%sstep\%03d.csv",verbose=TRUE)} 10 | #' \tabular{ll}{ 11 | #' \code{dir}\tab \code{[character]} Directory location to write the file dumps.\cr 12 | #' \code{filename}\tab \code{[character]} Template, used to create file names. 13 | #' to create a file name.\cr 14 | #' \code{verbose}\tab \code{[logical]} toggle verbosity. 15 | #' } 16 | #' 17 | #' File locations are created with \code{file.path(dir, file)}, where 18 | #' \code{file} is generated as \code{sprintf(filename, DATA, STEP)}. In 19 | #' interactive sessions \code{DATA=""}. In sessions where a script is executed 20 | #' using \code{\link{run_file}}, \code{DATA} is the name of the R object being 21 | #' tracked or the \code{label} provided with \code{\link{start_log}}. 22 | #' \code{STEP} is a counter that increases at each dump. 23 | #' 24 | #' @section Dump options: 25 | #' 26 | #' \code{$dump(...)} 27 | #' \tabular{ll}{ 28 | #' \code{...}\tab Currently unused.\cr 29 | #' } 30 | #' 31 | #' @section Retrieve log data: 32 | #' 33 | #' \code{$logdata()} returns a list of data frames, sorted in the order returned by 34 | #' \code{base::dir()} 35 | #' 36 | #' @section Details: 37 | #' 38 | #' If \code{dir} does not exist it is created. 39 | #' 40 | #' 41 | #' @docType class 42 | #' @format An \code{R6} class object. 43 | #' 44 | #' @examples 45 | #' logger <- filedump$new() 46 | #' 47 | #' out <- women %L>% 48 | #' start_log(logger) %L>% 49 | #' within(height <- height * 2) %L>% 50 | #' within(height <- height * 3) %L>% 51 | #' dump_log() 52 | #' dir(file.path(tempdir(),"filedump")) 53 | #' 54 | #' 55 | #' @family loggers 56 | #' @export 57 | filedump <- R6Class("filedump" 58 | , private=list( 59 | n = NULL 60 | , dir = NULL 61 | , verbose = NULL 62 | , filename = NULL 63 | ) 64 | , public = list( 65 | label=NULL 66 | , initialize = function(dir = file.path(tempdir(),"filedump") 67 | , filename="%sstep%03d.csv", verbose = TRUE){ 68 | private$n <- 0 69 | private$dir <- dir 70 | if (!dir.exists(dir)){ 71 | dir.create(dir,recursive = TRUE) 72 | if (verbose){ 73 | msgf("Created %s", normalizePath(dir)) 74 | } 75 | } 76 | private$verbose <- verbose 77 | private$filename <- filename 78 | } 79 | , add = function(meta, input, output){ 80 | prefix <- if (is.null(self$label)) "" else paste0(self$label,"_") 81 | outname <- file.path(private$dir, sprintf(private$filename, prefix, private$n)) 82 | if (private$n == 0) 83 | write.csv(input, file=outname, row.names=FALSE) 84 | private$n <- private$n + 1 85 | outname <- file.path(private$dir, sprintf(private$filename, prefix, private$n)) 86 | write.csv(output, file=outname, row.names=FALSE) 87 | } 88 | , dump = function(...){ 89 | 90 | if ( private$verbose ){ 91 | msgf("Filedumps were written to %s", normalizePath(private$dir)) 92 | } 93 | } 94 | , logdata = function(){ 95 | # this crashes covr 96 | # if (!dir.exists(private$dir)){ 97 | # stopf("The directory %s does not exist.",private$dir) 98 | # } 99 | fl <- dir(private$dir,full.names = TRUE) 100 | lapply(fl, read.csv) 101 | } 102 | )) 103 | 104 | 105 | -------------------------------------------------------------------------------- /pkg/R/lumberjack.R: -------------------------------------------------------------------------------- 1 | #' Track changes in data 2 | #' 3 | #' This package allows you to track changes in R objects by defining one or 4 | #' more loggers for each object. There are a number of built-in loggers and 5 | #' users (or package authors) can create their own loggers. To get started 6 | #' please have a look at the \href{../doc/using_lumberjack.pdf}{using 7 | #' lumberjack} vignette. 8 | #' 9 | #' @author 10 | #' Mark van der Loo 11 | #' 12 | #' 13 | #' @docType package 14 | #' @aliases lumberjack-package 15 | #' @name lumberjack 16 | #' @importFrom R6 R6Class 17 | #' @importFrom utils capture.output 18 | #' 19 | {} 20 | 21 | 22 | 23 | LOGNAME <- "__log__" 24 | 25 | #' Get log object from a data item 26 | #' 27 | #' 28 | #' @param data An R object. 29 | #' @param logger \code{[character]} scalar. Logger to return. Can be 30 | #' \code{NULL} when a single logger is attached. 31 | #' @return A logging object, or \code{NULL} if none exists. 32 | #' 33 | #' 34 | #' @family control 35 | #' 36 | #' @export 37 | get_log <- function(data, logger=NULL){ 38 | store <- attr(data, which=LOGNAME, exact=TRUE) 39 | dataset <- as.character(substitute(data)) 40 | 41 | if ( is.null(store) || ( !is.null(store) & length(ls(store))==0 )){ 42 | return(NULL) 43 | } 44 | 45 | loggers <- ls(store) 46 | 47 | if (is.null(logger)){ 48 | if ( length(loggers) == 1 ){ 49 | return(store[[loggers]]) 50 | } else { 51 | stopf("Dataset has multiple loggers attached. Specify one of: %s" 52 | , paste(sprintf("'%s'",loggers), collapse=",")) 53 | } 54 | } 55 | 56 | if ( is.null(store[[logger]]) ){ 57 | stopf("Dataset is not logged by '%s'", logger) 58 | } 59 | store[[logger]] 60 | } 61 | 62 | has_log <- function(data){ 63 | !is.null(attr(data,LOGNAME)) 64 | } 65 | 66 | 67 | #' Start tracking an R object 68 | #' 69 | #' @param data An R object. 70 | #' @param logger A logging object (typically an environment wrapped in an S3 class) 71 | #' @param label \code{[character]} scalar. A label to attach to the logger (for 72 | #' loggers supporting it). 73 | #' 74 | #' 75 | #' @section Details: 76 | #' All loggers that come with \pkg{lumberjack} support labeling. The label is 77 | #' used by \code{dump} methods to create a unique file name for each 78 | #' object/logger combination. 79 | #' 80 | #' If \code{label} is not supplied, \code{start_log} attempts to create a label 81 | #' from the name of the \code{data} variable. This probably fails when 82 | #' \code{data} is not a variable but an expression (like \code{read.csv...}). A 83 | #' label is also not created when data is passed via the lumberjack not-a-pipe 84 | #' operator. In that case the label is (silently) not set. In cases where 85 | #' multiple datasets are logged with the same type of logger, this could lead 86 | #' to overwriting of dump files, unless \code{file} is explicitly defined when 87 | #' calling \code{\link{dump_log}}. 88 | #' 89 | #' @examples 90 | #' logfile <- tempfile(fileext=".csv") 91 | #' women %L>% 92 | #' start_log(logger=simple$new()) %L>% 93 | #' transform(height_cm = height*2.52) %L>% 94 | #' dump_log(file=logfile) 95 | #' logdata <- read.csv(logfile) 96 | #' head(logdata) 97 | #' 98 | #' @family control 99 | #' @export 100 | start_log <- function(data, logger=simple$new(), label=NULL){ 101 | if ( is.null(attr(data, LOGNAME)) ){ 102 | attr(data, LOGNAME) <- new.env() 103 | } 104 | store <- attr(data, LOGNAME) 105 | newlogger <- class(logger)[[1]] 106 | if ( newlogger %in% ls(store) ){ 107 | warnf("Can not add second logger of class '%s'. Ignoring", newlogger) 108 | return(invisible(data)) 109 | } 110 | # loggers that have a 'dataset' slot have access to 111 | # the name of the dataset 112 | if ( "label" %in% ls(logger) ){ 113 | dataset <- as.character(substitute(data)) 114 | lab <- if (!is.null(label)) paste(label,collapse="") 115 | else if (length(dataset) == 1) dataset 116 | else "" 117 | logger$label <- lab 118 | } 119 | store[[ class(logger)[[1]] ]] <- logger 120 | invisible(data) 121 | } 122 | 123 | remove_log <- function(data, logger){ 124 | store <- attr(data, LOGNAME) 125 | if ( is.null(store) ) return(data) 126 | rm(list=logger, envir=store) 127 | if (length(ls(store)) == 0) 128 | attr(data, LOGNAME) <- NULL 129 | data 130 | } 131 | 132 | 133 | all_loggers <- function(data){ 134 | store <- attr(data,LOGNAME) 135 | if (is.null(store)) character(0) 136 | else ls(store) 137 | } 138 | 139 | #' Dump logging data 140 | #' 141 | #' Calls the \code{$dump(...)} method of logger(s) tracking an R object. 142 | #' 143 | #' 144 | #' @param data An R object tracked by one or more loggers. 145 | #' @param logger \code{[character]} vector. Class names of loggers to dump (e.g. 146 | #' \code{"simple"}). When \code{loggers=NULL}, all loggers are dumped 147 | #' for this object. 148 | #' @param stop \code{[logical]} stop logging after the dump? Removes the 149 | #' logger(s) tracking the object. 150 | #' @param ... Arguments passed to the \code{dump} method of the logger. 151 | #' 152 | #' @return \code{data}, invisibly. 153 | #' 154 | #' 155 | #' @family control 156 | #' 157 | #' 158 | #' @examples 159 | #' logfile <- tempfile(fileext=".csv") 160 | #' women %L>% 161 | #' start_log(logger=simple$new()) %L>% 162 | #' transform(height_cm = height*2.52) %L>% 163 | #' dump_log(file=logfile) 164 | #' logdata <- read.csv(logfile) 165 | #' head(logdata) 166 | #' 167 | #' 168 | #' @export 169 | dump_log <- function(data, logger=NULL,stop=TRUE, ...){ 170 | if ( is.null(logger) ) logger <- all_loggers(data) 171 | for ( lggr in logger ){ 172 | log <- get_log(data, logger=lggr) 173 | log$dump(...) 174 | if (stop) return(invisible(remove_log(data,logger=logger))) 175 | } 176 | invisible(data) 177 | } 178 | 179 | #' Stop logging 180 | #' 181 | #' Calls the logger's \code{$stop()} method if it exists, and removes 182 | #' the logger as attribute from \code{data}. 183 | #' 184 | #' @param data An R object. 185 | #' @param logger \code{[character]} vector. Class names of loggers to dump (e.g. 186 | #' \code{"simple"}). When \code{loggers=NULL}, all loggers are stopped and 187 | #' removed for this data. 188 | #' @param dump \code{['logical']} Toggle dump log file. 189 | #' @param ... Passed to the logger's \code{dump} method, if it exists. 190 | #' 191 | #' @return The data, invisibly. 192 | #' 193 | #' 194 | #' @examples 195 | #' logfile <- tempfile(fileext=".csv") 196 | #' women %L>% 197 | #' start_log(logger=simple$new()) %L>% 198 | #' transform(height_cm = height*2.52) %L>% 199 | #' dump_log(file=logfile) 200 | #' logdata <- read.csv(logfile) 201 | #' head(logdata) 202 | #' 203 | #' @family control 204 | #' @export 205 | stop_log <- function(data, logger=NULL, dump=TRUE, ...){ 206 | if (is.null(logger)) logger <- all_loggers(data) 207 | for ( lggr in logger ){ 208 | log <- get_log(data, logger = lggr) 209 | if (isTRUE(dump)) log$dump(...) 210 | if (is.function(log$stop)) log$stop() 211 | remove_log(data, lggr) 212 | } 213 | invisible(data) 214 | } 215 | 216 | 217 | 218 | #' The lumberjack operator 219 | #' 220 | #' The not-a-pipe operator that tracks changes in data. 221 | #' 222 | #' 223 | #' @param lhs Input value 224 | #' @param rhs Function call or 'dotted' expression (see below). 225 | #' as value 226 | #' 227 | #' @section Piping: 228 | #' 229 | #' The operators \code{\%L>\%} and \code{\%>>\%} are synonyms. The \code{\%L>\%} 230 | #' is the default since version 0.3.0 to avoid confusion with the \code{\%>>\%} 231 | #' operator of the \code{pipeR} package but \code{\%>>\%} still works. 232 | #' 233 | #' The lumberjack operator behaves as a simplified version of the 234 | #' \code{magrittr} pipe operator. The basic behavior of \code{lhs \%>>\% rhs} is 235 | #' the following: 236 | #' 237 | #'\itemize{ 238 | #' \item{If the \code{rhs} uses dot-variables (\code{.}), these are interpreted 239 | #' as the left-hand side, except in formulas where dots already have a special 240 | #' meaning.} 241 | #' \item{If the \code{rhs} is a function call, with no dot-variables used, the 242 | #' \code{lhs} is used as its first argument.} 243 | #' } 244 | #' The most notable differences with `magrittr` are the following. 245 | #' \itemize{ 246 | #' \item{ it does not allow you to define functions in the magrittr style, 247 | #' like \code{a <- . \%>\% sin(.) } 248 | #' } 249 | #' \item{there is no assignment-pipe like \code{\%<>\%}.} 250 | #' \item{you cannot do things like \code{x \%>\% sin} (without the brackets).} 251 | #' } 252 | #' 253 | #' 254 | #' @section Logging: 255 | #' 256 | #' If the left-hand-side is tagged for logging, the lumberjack will update the 257 | #' log by calling the logger's \code{$add()} method, with arguments \code{meta}, 258 | #' \code{input}, \code{output}. Here, \code{meta} is a list with information on 259 | #' the operations performed, and input and output are the left-hand-side and the 260 | #' result, respectively. 261 | #' 262 | #' @example ../examples/lumberjack.R 263 | #' @family control 264 | #' @export 265 | `%>>%` <- function(lhs, rhs){ 266 | store <- attr(lhs, LOGNAME) 267 | 268 | # basic pipe action 269 | rhs <- substitute(rhs) 270 | # need to pass environment so symbols defined there and passed 271 | # as argument can be resolved in NSE situations (see test_simple 272 | # for an example). 273 | out <- pipe(lhs, rhs, env=parent.frame()) 274 | 275 | meta <- list( 276 | expr = as.expression(rhs) 277 | , src = as.character(as.expression(rhs)) 278 | ) 279 | # update logging if set 280 | if ( has_log(lhs) ){ 281 | for (lggr in all_loggers(lhs)){ 282 | log <- get_log(lhs, lggr) 283 | log$add(meta=meta, input=lhs, output=out) 284 | } 285 | } 286 | # if a naughty function has removed the log-store, we add it back. 287 | # except when it was removed by dump_log() 288 | if ( has_log(lhs) && 289 | !as.character(rhs[[1]]) %in% c("dump_log","remove_log","stop_log") && 290 | !has_log(out)){ 291 | attr(out,LOGNAME) <- store 292 | } 293 | out 294 | } 295 | 296 | 297 | #' @rdname grapes-greater-than-greater-than-grapes 298 | #' @export 299 | `%L>%` <- `%>>%` 300 | 301 | 302 | -------------------------------------------------------------------------------- /pkg/R/no_logger.R: -------------------------------------------------------------------------------- 1 | # Implementation of the simple logger. 2 | 3 | 4 | #' The nop logger 5 | #' 6 | #' Record nothing, but present logger interface. 7 | #' 8 | #' @section Creating a logger: 9 | #' 10 | #' \code{no_logger$new(verbose=TRUE)} 11 | #' \tabular{ll}{ 12 | #' \code{verbose}\tab toggle verbosity 13 | #' } 14 | #' 15 | #' @section Dump options: 16 | #' 17 | #' \code{$dump(file=NULL,...)} 18 | #' \tabular{ll}{ 19 | #' \code{file}\tab Ignored. Filename or \code{\link[base]{connection}} to write output to.\cr 20 | #' \code{...}\tab Ignored. extra options passed to \code{\link[utils]{write.csv}}, except 21 | #' \code{row.names}, which is set to \code{FALSE}.\cr 22 | #' } 23 | #' 24 | #' No file or output is created, except a message when \code{verbose=TRUE}. 25 | #' 26 | #' @section Get data: 27 | #' \code{$logdata()} Returns empty data.frame. 28 | #' 29 | #' 30 | #' @docType class 31 | #' @format An \code{R6} class object. 32 | #' 33 | #' @example ../examples/no_log.R 34 | #' 35 | #' @family loggers 36 | #' @export 37 | no_log <- R6Class("no_log" 38 | , private = list( 39 | verbose = NULL 40 | ) 41 | , public = list( 42 | label = NULL 43 | , initialize = function( verbose = TRUE){ 44 | private$verbose <- verbose 45 | } 46 | , add = function(meta, input, output){ 47 | # NOP! we don't store anything! 48 | } 49 | , dump = function(file=NULL,...){ 50 | log_df <- data.frame() 51 | if (is.null(file)){ 52 | file <- "no_log.csv" 53 | } 54 | write.csv(log_df, file=file, row.names = FALSE,...) 55 | if (is.character(file) && private$verbose ){ 56 | msgf("no_log dumped at %s", normalizePath(file)) 57 | } 58 | } 59 | , logdata = function(){ 60 | data.frame() 61 | } 62 | ) 63 | ) 64 | -------------------------------------------------------------------------------- /pkg/R/run.R: -------------------------------------------------------------------------------- 1 | # get names of loggers 2 | get_loggers <- function(store, dataset){ 3 | a <- ls(store[[dataset]]) 4 | a[a != "data"] 5 | } 6 | 7 | 8 | # store: environment to store data and logger. 9 | # 10 | # store$dataset$data 11 | # $simple 12 | # $cellwise 13 | log_capture <- function(store){ 14 | function(data, logger, label=NULL){ 15 | dataset <- as.character(substitute(data)) 16 | if (!dataset %in% ls(store)){ 17 | store[[dataset]] <- new.env() 18 | store[[dataset]]$data <- data 19 | } 20 | loggers <- get_loggers(store, dataset) 21 | newlogger <- class(logger)[[1]] 22 | if ( newlogger %in% loggers ){ 23 | warnf("Can not add a second logger of class '%s' to '%s'. Ignoring." 24 | , class(logger)[[1]], dataset) 25 | return(invisible(data)) 26 | } 27 | 28 | # loggers that have a 'label' slot have access to 29 | # the name of the dataset 30 | if ( "label" %in% ls(logger) ){ 31 | dataset <- as.character(substitute(data)) 32 | lab <- if (!is.null(label)) paste(label,collapse="") 33 | else if (length(dataset) == 1) dataset 34 | else "" 35 | logger$label <- lab 36 | } 37 | store[[dataset]][[newlogger]] <- logger 38 | invisible(data) 39 | } 40 | } 41 | 42 | # We need some detailed dump options because there may be multiple loggers, for 43 | # multiple datasets and the user may want to choose what logs to dump. 44 | dump_capture <- function(store){ 45 | function(data=NULL, logger = NULL, stop=TRUE, ...){ 46 | 47 | if (is.null(data) && is.null(logger)){ 48 | # dump all loggers for all datasets 49 | for (dataset in ls(store)){ 50 | loggers <- get_loggers(store, dataset) 51 | for (lggr in loggers){ 52 | store[[dataset]][[lggr]]$dump(...) 53 | if (stop) rm(list = lggr, envir = store[[dataset]]) 54 | } 55 | } 56 | return(invisible(NULL)) 57 | } 58 | 59 | if (!is.null(data)){ 60 | dataset <- as.character(substitute(data)) 61 | if (is.null(store[[dataset]])){ 62 | msgf("Note: dataset '%s' is not logged", dataset) 63 | return(invisible(data)) 64 | } 65 | } 66 | 67 | 68 | if ( is.null(logger) ){ 69 | # dump all loggers for the current dataset 70 | loggers <- get_loggers(store, dataset) 71 | for (lggr in loggers){ 72 | store[[dataset]][[lggr]]$dump(...) 73 | if (stop) rm(list=lggr, envir = store[[dataset]]) 74 | } 75 | return(invisible(data)) 76 | } 77 | 78 | if (is.character(logger)){ 79 | for (lggr in logger){ 80 | if ( is.null(store[[dataset]][[lggr]]) ){ 81 | warnf("Logger '%s' not found for dataset '%s'", lggr, dataset) 82 | next 83 | } 84 | store[[dataset]][[lggr]]$dump(...) 85 | if (stop) rm(list=lggr, envir=store[[dataset]]) 86 | } 87 | return(invisible(data)) 88 | } 89 | stop("Invalid input for 'dump'",call.=FALSE) 90 | } 91 | } 92 | 93 | update_loggers <- function(store, envir, expr, src, file, lines){ 94 | datasets <- ls(store) 95 | meta <- list(expr = expr 96 | , src = src 97 | , file = file 98 | , line = lines) 99 | 100 | for ( dataset in datasets ){ 101 | old <- store[[dataset]]$data 102 | new <- get(dataset, envir=envir) 103 | loggers <- get_loggers(store, dataset) 104 | for ( logger in loggers ){ 105 | store[[datasets]][[logger]]$add(meta, old, new) 106 | } 107 | store[[dataset]]$data <- new 108 | } 109 | invisible(NULL) 110 | } 111 | 112 | #' Run a file while tracking changes in data 113 | #' 114 | #' Run all code in a file. Changes in data that are tracked, (e.g. with 115 | #' \code{\link{start_log}(data)}) will be followed by the assigned loggers. 116 | #' 117 | #' 118 | #' @param file \code{[character]} file to run. 119 | #' @param auto_dump \code{[logical]} Toggle automatically dump all remaining logs 120 | #' after executing \code{file}. 121 | #' @param envir \code{[environment]} to run the code in. By default a new environment will be created 122 | #' with \code{.GlobalEnv} as parent. 123 | #' 124 | #' 125 | #' @section Details: 126 | #' \code{run\_file} runs code in a separate environment, and returns the environment with all 127 | #' the variables created by the code. \code{source\_file} acts like \code{\link{source}} and 128 | #' runs all the code in the current global workspace (\code{.GlobalEnv}). 129 | #' 130 | #' 131 | #' @return The environment where the code was executed, invisibly. 132 | #' 133 | #' 134 | #' @examples 135 | #' # using 'dontrun' 136 | #' \dontrun{ 137 | #' # create an R file, with logging. 138 | #' script <- " 139 | #' library(lumberjack) 140 | #' data(women) 141 | #' start_log(women, logger=simple$new()) 142 | #' women$height <- women$height*2.54 143 | #' women$weight <- women$weight*0.453592 144 | #' dump_log() 145 | #' " 146 | #' write(script, file="myscript.R") 147 | #' # run the script 148 | #' lumberjack::run_file("myscript.R") 149 | #' # read the logfile 150 | #' read.csv("women_simple.csv") 151 | #' } 152 | #' 153 | #' @family control 154 | #' @export 155 | run_file <- function(file, auto_dump=TRUE, envir=NULL){ 156 | fname <- basename(file) 157 | dname <- dirname(file) 158 | oldwd <- getwd() 159 | on.exit(setwd(oldwd)) 160 | setwd(dname) 161 | 162 | 163 | if (is.null(envir)) envir=new.env(parent=.GlobalEnv) 164 | 165 | store <- new.env() 166 | 167 | envir$start_log <- log_capture(store) 168 | envir$dump_log <- dump_capture(store) 169 | 170 | prog <- parse(fname, keep.source=TRUE) 171 | src <- attr(prog, "srcref") 172 | 173 | for ( i in seq_along(prog) ){ 174 | eval(prog[[i]], envir=envir) 175 | lines <- c(first = src[[i]][1], last = src[[i]][3]) 176 | update_loggers(store = store 177 | , envir = envir 178 | , expr = prog[[i]] 179 | , src = paste(as.character(src[[i]]),collapse="\n") 180 | , file = fname 181 | , lines = lines ) 182 | } 183 | # dump everything not dumped yet. 184 | if (auto_dump) eval(envir$dump_log(), envir=envir) 185 | 186 | rm(list=c("start_log","dump_log"), envir=envir) 187 | invisible(envir) 188 | } 189 | 190 | #' @rdname run_file 191 | #' @export 192 | source_file <- function(file, auto_dump=TRUE){ 193 | run_file(file, auto_dump=auto_dump, envir=.GlobalEnv) 194 | } 195 | 196 | 197 | 198 | -------------------------------------------------------------------------------- /pkg/R/simple.R: -------------------------------------------------------------------------------- 1 | # Implementation of the simple logger. 2 | 3 | 4 | #' The simple logger 5 | #' 6 | #' Record for each expression a \code{POSIXct} timestamp and a \code{logical} 7 | #' indicating whether the tracked object has changed. 8 | #' 9 | #' @section Creating a logger: 10 | #' 11 | #' \code{simple$new(verbose=TRUE)} 12 | #' \tabular{ll}{ 13 | #' \code{verbose}\tab toggle verbosity 14 | #' } 15 | #' 16 | #' @section Dump options: 17 | #' 18 | #' \code{$dump(file=NULL,...)} 19 | #' \tabular{ll}{ 20 | #' \code{file}\tab filename or \code{\link[base]{connection}} to write output to.\cr 21 | #' \code{...}\tab extra options passed to \code{\link[utils]{write.csv}}, except 22 | #' \code{row.names}, which is set to \code{FALSE}.\cr 23 | #' } 24 | #' 25 | #' The default location is \code{"simple.csv"} in an interactive session, and 26 | #' \code{"DATA_simple.csv"} in a script that executed via \code{\link{run_file}}. 27 | #' Here, \code{DATA} is the variable name of the data being tracked 28 | #' or the \code{label} provided with \code{\link{start_log}}. 29 | #' 30 | #' 31 | #' @section Get data: 32 | #' \code{$logdata()} Returns a data frame with the current log. 33 | #' 34 | #' 35 | #' @docType class 36 | #' @format An \code{R6} class object. 37 | #' 38 | #' @example ../examples/simple.R 39 | #' 40 | #' @family loggers 41 | #' @export 42 | simple <- R6Class("simple" 43 | , private = list( 44 | n = NULL 45 | , store = NULL 46 | , verbose = NULL 47 | ) 48 | , public = list( 49 | label = NULL 50 | , initialize = function( verbose = TRUE){ 51 | private$n <- 0 52 | private$store <- new.env() 53 | private$verbose <- verbose 54 | } 55 | , add = function(meta, input, output){ 56 | private$n <- private$n + 1 57 | logname <- sprintf("step%03d",private$n) 58 | logdat <- data.frame(step = private$n 59 | , time = Sys.time() 60 | , srcref = get_srcref(meta) 61 | , expression = meta$src 62 | , changed = !identical(input, output) 63 | , stringsAsFactors = FALSE) 64 | private$store[[logname]] <- logdat 65 | 66 | } 67 | , dump = function(file=NULL,...){ 68 | log_df <- do.call(rbind,mget(ls(private$store), private$store)) 69 | if (is.null(file)){ 70 | file <- "simple.csv" 71 | if (!is.null(self$label) && self$label != "" ) file <- paste(self$label,file,sep="_") 72 | } 73 | write.csv(log_df, file=file, row.names = FALSE,...) 74 | if (is.character(file) && private$verbose ){ 75 | msgf("Dumped a log at %s", normalizePath(file)) 76 | } 77 | } 78 | , logdata = function(){ 79 | v <- private$verbose 80 | private$verbose <- FALSE 81 | fl <- tempfile() 82 | self$dump(file=fl) 83 | out <- read.csv(fl) 84 | private$verbose <- v 85 | out 86 | } 87 | ) 88 | ) 89 | -------------------------------------------------------------------------------- /pkg/R/utils.R: -------------------------------------------------------------------------------- 1 | 2 | msgf <- function(fmt,...){ 3 | message(sprintf(fmt,...)) 4 | } 5 | 6 | stopf <- function(fmt,...){ 7 | stop(sprintf(fmt,...), call. = FALSE) 8 | } 9 | 10 | warnf <- function(fmt, ...){ 11 | warning(sprintf(fmt, ...), call.=FALSE) 12 | } 13 | 14 | get_srcref <- function(meta){ 15 | if (is.null(meta$file)) return(NA_character_) 16 | 17 | sprintf("%s#%d-%d", meta$file, meta$line[1], meta$line[2]) 18 | 19 | } 20 | 21 | 22 | replace <- function(call, match, sub){ 23 | if (length(call) == 1){ 24 | if ( identical(call,match) ){ 25 | return(sub) 26 | } else { 27 | return(call) 28 | } 29 | # Skip formulas. We treat them as literals. 30 | } else if (call[[1]] != "~") { 31 | for ( i in seq_along(call)[-1] ){ 32 | call[[i]] <- replace(call[[i]], match, sub) 33 | } 34 | } 35 | call 36 | } 37 | 38 | 39 | # the pipe action. 40 | pipe <- function(x, y, env=sys.parent()){ 41 | 42 | e <- new.env(parent=env) 43 | e$. <- x 44 | 45 | if ( inherits(y,"call") ){ 46 | y1 <- replace(y, quote(.), quote(x)) 47 | uses_dot <- !identical(y,y1) 48 | 49 | if (uses_dot){ 50 | eval(y, envir=e) 51 | } else { 52 | w <- as.list(y1) 53 | y1 <- as.call(c(w[1],quote(.),w[-1])) 54 | eval(y1, envir=e) 55 | } 56 | } else { 57 | eval(y, envir = e) 58 | } 59 | 60 | 61 | } 62 | 63 | -------------------------------------------------------------------------------- /pkg/README.md: -------------------------------------------------------------------------------- 1 | 2 | ### A brief overview of `lumberjack` 3 | 4 | Lumberjack separates concerns between data processing and monitoring the 5 | process by allowing R programmers (analysts) to declare what objects to track, 6 | and how to track them. 7 | 8 | ![](https://github.com/markvanderloo/lumberjack/raw/master/fig/datastep2.png) 9 | 10 | #### Add logging capabilities to existing analyses scripts 11 | 12 | Start tracking changes by adding a single line of code to an existing script. 13 | 14 | ``` 15 | # contents of 'script.R' 16 | 17 | mydata <- read.csv("path/to/my/data.csv") 18 | 19 | # add this line after reading the data: 20 | start_log(mydata, logger=simple$new()) 21 | 22 | # Existing data analyses code here... 23 | 24 | ``` 25 | Next, run the script using `lumberjack::run_file()`, and read the logging info. 26 | 27 | ``` 28 | library(lumberjack) 29 | run_file("script.R") 30 | 31 | read.csv("mydata_simple.csv") 32 | ``` 33 | 34 | Every aspect of the logging process can be customized, including 35 | output file locations and the logger. 36 | 37 | 38 | 39 | #### Interactive logging with the lumberjack not-a-pipe operator. 40 | 41 | ``` 42 | out <- mydata %L>% 43 | start_log(logger = simple$new()) %L>% 44 | transform(z = 2*sqrt(x)) %L>% 45 | dump_log(file="mylog.csv") 46 | read.csv("mylog.csv") 47 | ``` 48 | 49 | #### Loggers included with lumberjack 50 | 51 | |logger |description | 52 | |--------------------|----------------------------------------------| 53 | |`simple` | Record whether data has changed or not | 54 | |`cellwise` | Record every change in every cell | 55 | |`expression_logger` | Record the value of user-defined expressions | 56 | |`filedump` | Dump data to file after each change. | 57 | 58 | #### Extend with your own loggers 59 | 60 | A logger is a _reference object_ (either R6 or Reference Class) with 61 | the following _mandatory_ elements. 62 | 63 | - `add(meta, input, output)` A method recording differences between in- and output. 64 | - `dump(...)` A method dumping logging info. 65 | - `label`, A slot for setting a label. 66 | 67 | There is also an _optional_ element: 68 | 69 | - `stop(...)` A method that will be called before removing a logger. 70 | 71 | 72 | #### More information 73 | 74 | ``` 75 | install.packages("lumberjack") 76 | library(lumberjack) 77 | vignette("using_lumberjack", package="lumberjack") 78 | ``` 79 | 80 | -------------------------------------------------------------------------------- /pkg/inst/CITATION: -------------------------------------------------------------------------------- 1 | bibentry(bibtype = "Article", 2 | title = "Monitoring Data in {R} with the {lumberjack} Package", 3 | author = person(given = c("Mark", "P.", "J."), 4 | family = "van der Loo", 5 | email = "m.vanderloo@cbs.nl"), 6 | journal = "Journal of Statistical Software", 7 | year = "2021", 8 | volume = "98", 9 | number = "1", 10 | pages = "1--13", 11 | doi = "10.18637/jss.v098.i01", 12 | 13 | header = "To cite lumberjack in publications use:" 14 | ) 15 | 16 | -------------------------------------------------------------------------------- /pkg/inst/tinytest/runs/auto_dump.R: -------------------------------------------------------------------------------- 1 | library(lumberjack) 2 | logfile <- tempfile() 3 | ## file should create women_logger, locally. 4 | data(women) 5 | start_log(women, logger=simple$new(verbose=FALSE)) 6 | women[1,1] <- 2*women[1,1] 7 | women$ratio <- women$height/women$weight 8 | dump_log(women, "simple", file=logfile) 9 | 10 | 11 | -------------------------------------------------------------------------------- /pkg/inst/tinytest/runs/dump_test.R: -------------------------------------------------------------------------------- 1 | library(lumberjack) 2 | women$id <- 1:15 3 | 4 | logfile <- tempfile() 5 | 6 | start_log(women, simple$new(verbose=FALSE)) 7 | start_log(women, cellwise$new(key='id',verbose=FALSE)) 8 | 9 | women[1,1] <- 2*women[1,1] 10 | women$ratio <- women$height/women$weight 11 | 12 | dump_log(women, logger="simple", file=logfile) 13 | 14 | 15 | -------------------------------------------------------------------------------- /pkg/inst/tinytest/runs/multiple_loggers.R: -------------------------------------------------------------------------------- 1 | 2 | library(lumberjack) 3 | women$id <- 1:15 4 | 5 | lf1 <- tempfile() 6 | lf2 <- tempfile() 7 | 8 | 9 | start_log(women, simple$new(verbose=FALSE)) 10 | start_log(women, cellwise$new(key='id',verbose=FALSE)) 11 | 12 | women[1,1] <- 2*women[1,1] 13 | women$ratio <- women$height/women$weight 14 | 15 | dump_log(women, "simple", file=lf1) 16 | dump_log(women, "cellwise", file=lf2) 17 | 18 | -------------------------------------------------------------------------------- /pkg/inst/tinytest/runs/single_logger.R: -------------------------------------------------------------------------------- 1 | 2 | library(lumberjack) 3 | 4 | logfile <- tempfile() 5 | logger <- simple$new(verbose=FALSE) 6 | start_log(women, logger) 7 | 8 | women[1,1] <- 2*women[1,1] 9 | women$ratio <- women$height/women$weight 10 | 11 | dump_log(file=logfile) 12 | 13 | 14 | -------------------------------------------------------------------------------- /pkg/inst/tinytest/test_cellwise.R: -------------------------------------------------------------------------------- 1 | library(lumberjack) 2 | 3 | # cellwise logger 4 | iris$sleutel <- 1:nrow(iris) 5 | logfile <- tempfile() 6 | i2 <- start_log(iris, logger=cellwise$new(key="sleutel", verbose=FALSE)) 7 | i2 <- i2 %>>% identity() 8 | i2 <- i2 %>>% {.$Sepal.Length <- .$Sepal.Length*2; .} 9 | i2 <- dump_log(i2, file=logfile, stop=TRUE) 10 | expect_equal(nrow(read.csv(logfile)),nrow(iris)) 11 | 12 | 13 | d1 <- data.frame(sl = 1:3, x=1:3,y=letters[1:3]) 14 | d2 <- d1 15 | expect_equal(nrow(lumberjack:::celldiff(d1,d2,"sl")),0) 16 | 17 | d2 <- rbind(d1,d1) 18 | d2$sl <- seq_len(nrow(d2)) 19 | expect_equal(nrow(lumberjack:::celldiff(d1,d2,"sl")),6) 20 | 21 | d2 <- d1 22 | d2$foo <- 3:1 23 | expect_equal(nrow(lumberjack:::celldiff(d1,d2,"sl")),3) 24 | d2 <- d1 25 | d2[1,2] <- 2 26 | expect_equal(nrow(lumberjack:::celldiff(d1,d2,"sl")),1) 27 | 28 | iris$id <- seq_len(nrow(iris)) 29 | xx <- cellwise$new(key="id") 30 | iris %>>% start_log(xx) %>>% head(149L) %>>% stop_log(dump=FALSE) 31 | d <- xx$logdata() 32 | expect_equal(nrow(d),ncol(iris)-1L) 33 | 34 | d1 <- data.frame(id=c(1,1),x=1:2) 35 | d2 <- data.frame(id=c(1,1),x=1:2) 36 | 37 | logger <- cellwise$new(key="id") 38 | expect_warning(logger$add(meta=list(src="haha"),input=d1,output=d2)) 39 | 40 | expect_true("label" %in% ls(logger)) 41 | 42 | 43 | -------------------------------------------------------------------------------- /pkg/inst/tinytest/test_expressionlogger.R: -------------------------------------------------------------------------------- 1 | library(lumberjack) 2 | ## expression logger 3 | 4 | 5 | tmpfile <- tempfile() 6 | logger <- expression_logger$new( 7 | mh = mean(height) 8 | , mw = mean(weight) 9 | , verbose = FALSE 10 | ) 11 | 12 | women %L>% 13 | start_log(logger) %L>% 14 | identity() %L>% 15 | {.$height <- 2*.$height; .} %L>% 16 | dump_log(file=tmpfile) 17 | lg <- read.csv(tmpfile) 18 | expect_equal(lg$mh[1], mean(women$height)) 19 | expect_equal(lg$mw[1], mean(women$weight)) 20 | expect_equal(lg$mh[2], mean(2*women$height)) 21 | expect_equal(lg$mw[2], mean(women$weight)) 22 | 23 | expect_true("label" %in% ls(logger)) 24 | 25 | expect_true(all(is.na(lg$srcref))) 26 | 27 | 28 | -------------------------------------------------------------------------------- /pkg/inst/tinytest/test_filedump.R: -------------------------------------------------------------------------------- 1 | library(lumberjack) 2 | ## filedump logging 3 | logger <- filedump$new(dir=tempfile(),verbose=FALSE) 4 | i2 <- start_log(iris, logger=logger) 5 | i2 <- i2 %>>% identity() 6 | i2 <- i2 %>>% {.$Sepal.Length <- .$Sepal.Length*2; .} 7 | i2 <- dump_log(i2, verbose=TRUE) 8 | 9 | # this test crashes covr but it does pass. 10 | #expect_equal(length(logger$logdata()) , 3) 11 | expect_true("label" %in% ls(logger)) 12 | 13 | 14 | -------------------------------------------------------------------------------- /pkg/inst/tinytest/test_logging_infra.R: -------------------------------------------------------------------------------- 1 | library(lumberjack) 2 | 3 | ## Logging switches 4 | # switching on, switching off 5 | expect_false( is.null(get_log(start_log(1:3))) ) 6 | expect_true( is.null(get_log(stop_log(start_log(1:3), dump=FALSE))) ) 7 | 8 | 9 | ## Logging does not depend on functions keeping attributes 10 | naughty_function <- function(x){ 11 | attr(x, lumberjack:::LOGNAME) <- NULL 12 | x 13 | } 14 | d <- data.frame(x=1:3,y=letters[1:3]) 15 | out <- d %L>% 16 | start_log(simple$new(verbose=FALSE)) %L>% 17 | naughty_function() 18 | expect_true(lumberjack:::has_log(out)) 19 | 20 | # exclude remove_log and dump_log from naughty functions 21 | out <- 1:3 %L>% 22 | start_log(simple$new(verbose=FALSE)) %L>% 23 | {.*2} %L>% 24 | dump_log(file=tempfile()) 25 | expect_true(is.null(attributes(out))) 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /pkg/inst/tinytest/test_no_log.R: -------------------------------------------------------------------------------- 1 | library(lumberjack) 2 | 3 | # no_log doesn't keep $logdata(). 4 | logger <- no_log$new() 5 | iris %>>% start_log(logger) %>>% head() %>>% stop_log(dump=FALSE) 6 | expect_equal(logger$logdata(), data.frame()) 7 | 8 | # But does write an empty logfile if asked. 9 | logfile <- tempfile() 10 | i2 <- start_log(iris, no_log$new(verbose=FALSE)) 11 | i2 <- i2 %>>% identity() 12 | i2 <- i2 %>>% head() 13 | i2 <- dump_log(i2, file=logfile, stop=TRUE) 14 | expect_true(file.exists(logfile)) 15 | 16 | 17 | -------------------------------------------------------------------------------- /pkg/inst/tinytest/test_nse.R: -------------------------------------------------------------------------------- 1 | library(lumberjack) 2 | 3 | ## NSE helper functions 4 | # symbol replacement 5 | 6 | expect_identical( 7 | lumberjack:::replace(expression(x + y)[[1]],quote(x),quote(z)) 8 | , expression(z + y)[[1]]) 9 | 10 | expect_identical( 11 | lumberjack:::replace(expression(x + y*y)[[1]],quote(y),quote(z)) 12 | , expression(x + z*z)[[1]]) 13 | 14 | expect_identical( 15 | lumberjack:::replace(expression(x + f(y*y))[[1]],quote(y),quote(z)) 16 | , expression(x + f(z*z))[[1]]) 17 | 18 | 19 | ## The pipe 20 | 21 | expect_identical(1:3 %>>% mean(), mean(1:3)) 22 | expect_identical(1:3 %>>% mean(na.rm=TRUE), mean(1:3, na.rm=TRUE)) 23 | expect_identical(1:3 %>>% mean(.), 2) 24 | expect_identical(1:3 %>>% mean(.,na.rm=TRUE), 2) 25 | g <- 1:3 26 | expect_identical(g %>>% mean(), mean(g)) 27 | 28 | expect_identical( 3 %>>% {2 * .}, 6) 29 | expect_identical( 3 %>>% (2 * .), 6) 30 | expect_identical( 31 | mean(c(1,NA,3),na.rm=TRUE) 32 | , TRUE %>>% mean(c(1,NA,3),na.rm=.) 33 | 34 | ) 35 | 36 | expect_equal( 37 | coefficients(lm(height ~ weight,data=women)) 38 | , women %>>% lm(height ~ weight, data=.) %>>% coefficients() 39 | ) 40 | 41 | 42 | 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /pkg/inst/tinytest/test_run.R: -------------------------------------------------------------------------------- 1 | # without explicit log dumping 2 | expect_silent(e <- run_file("runs/auto_dump.R")) 3 | expect_true(file.exists(e$logfile)) 4 | 5 | 6 | # with explicit log dumping 7 | e <- run_file("runs/single_logger.R", auto_dump=FALSE) 8 | expect_true(file.exists(e$logfile)) 9 | expect_silent(read.csv(e$logfile)) 10 | 11 | # NOTE, this also tests whether 'label' gets prepended properly 12 | e <- run_file("runs/multiple_loggers.R") 13 | expect_true(file.exists(e$lf1)) 14 | expect_true(file.exists(e$lf2)) 15 | 16 | 17 | e <- run_file("runs/dump_test.R", auto_dump=FALSE) 18 | expect_true(file.exists(e$logfile)) 19 | 20 | -------------------------------------------------------------------------------- /pkg/inst/tinytest/test_simple.R: -------------------------------------------------------------------------------- 1 | library(lumberjack) 2 | ## simple logger 3 | 4 | logfile <- tempfile() 5 | i2 <- start_log(iris, simple$new(verbose=FALSE)) 6 | i2 <- i2 %>>% identity() 7 | i2 <- i2 %>>% head() 8 | i2 <- dump_log(i2, file=logfile, stop=TRUE) 9 | expect_equal(nrow(read.csv(logfile)),2) 10 | 11 | # crash test: does multi-piping work under NSE? 12 | i2 <- head(women) %>>% 13 | start_log(simple$new(verbose=FALSE)) %>>% 14 | identity() %>>% 15 | dump_log(file=logfile) 16 | expect_true(file.exists(logfile)) 17 | 18 | #If we dump to the same file we expect an overwrite. 19 | logger <- simple$new() 20 | iris %>>% start_log(logger) %>>% head() %>>% stop_log(dump=FALSE) 21 | expect_equal(nrow(logger$logdata()), 1L) 22 | expect_equal(nrow(read.csv(logfile)), 1L) 23 | 24 | expect_true("label" %in% ls(simple$new())) 25 | 26 | -------------------------------------------------------------------------------- /pkg/inst/tinytest/test_utils.R: -------------------------------------------------------------------------------- 1 | library(lumberjack) 2 | ## Utilities 3 | 4 | expect_message(lumberjack:::msgf("foo")) 5 | expect_warning(lumberjack:::warnf("foo")) 6 | expect_error(lumberjack:::stopf("foo")) 7 | 8 | 9 | -------------------------------------------------------------------------------- /pkg/tests/tinytest.R: -------------------------------------------------------------------------------- 1 | 2 | if ( requireNamespace("tinytest", quietly=TRUE) ){ 3 | tinytest::test_package("lumberjack") 4 | } 5 | 6 | -------------------------------------------------------------------------------- /pkg/vignettes/JSS_4008.Rnw: -------------------------------------------------------------------------------- 1 | \documentclass{article} 2 | \usepackage{pdfpages} 3 | %\VignetteIndexEntry{Monitoring changes in data with R with the lumberjack package} 4 | 5 | \begin{document} 6 | \includepdf[pages=-, fitpaper=true]{jss4008.pdf} 7 | \end{document} 8 | -------------------------------------------------------------------------------- /pkg/vignettes/datastep2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markvanderloo/lumberjack/e83dcb1e1a765bc9b391298f93c20ded4cd4a136/pkg/vignettes/datastep2.pdf -------------------------------------------------------------------------------- /pkg/vignettes/jss4008.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markvanderloo/lumberjack/e83dcb1e1a765bc9b391298f93c20ded4cd4a136/pkg/vignettes/jss4008.pdf -------------------------------------------------------------------------------- /pkg/vignettes/process.R: -------------------------------------------------------------------------------- 1 | 2 | # read data 3 | data(women) 4 | 5 | start_log(women, logger = simple$new(verbose=TRUE)) 6 | 7 | # transform inches to m 8 | women$height <- women$height * 0.0254 9 | # transform pounds to kg 10 | women$weight <- women$weight * 0.453592 11 | # add body-mass index column 12 | women$bmi <- women$weight/(women$height^2) 13 | # write data 14 | outfile <- tempfile(fileext=".csv") 15 | write.csv(women, file=outfile, row.names=FALSE) 16 | 17 | dump_log(file="women_simple.csv") 18 | 19 | -------------------------------------------------------------------------------- /pkg/vignettes/using_lumberjack.Rnw: -------------------------------------------------------------------------------- 1 | %\VignetteIndexEntry{Using lumberjack} 2 | \documentclass[11pt]{article} 3 | \usepackage{enumitem} 4 | \usepackage{xcolor} % for color definitions 5 | \usepackage{sectsty} % to modify heading colors 6 | \usepackage{fancyhdr} 7 | \setlist{nosep} 8 | 9 | % simpler, but issue with your margin notes 10 | \usepackage[left=1cm,right=3cm, bottom=2cm, top=1cm]{geometry} 11 | 12 | \usepackage{hyperref} 13 | 14 | \definecolor{bluetext}{RGB}{0,101,165} 15 | \definecolor{graytext}{RGB}{80,80,80} 16 | 17 | \hypersetup{ 18 | pdfborder={0 0 0} 19 | , colorlinks=true 20 | , urlcolor=blue 21 | , linkcolor=bluetext 22 | , linktoc=all 23 | , citecolor=blue 24 | } 25 | 26 | \sectionfont{\color{bluetext}} 27 | \subsectionfont{\color{bluetext}} 28 | \subsubsectionfont{\color{bluetext}} 29 | 30 | % no serif=better reading from screen. 31 | \renewcommand{\familydefault}{\sfdefault} 32 | 33 | % header and footers 34 | \pagestyle{fancy} 35 | \fancyhf{} % empty header and footer 36 | \renewcommand{\headrulewidth}{0pt} % remove line on top 37 | \rfoot{\color{bluetext} lumberjack \Sexpr{packageVersion("lumberjack")}} 38 | \lfoot{\color{black}\thepage} % side-effect of \color{}: lowers the printed text a little(?) 39 | 40 | \usepackage{fancyvrb} 41 | 42 | 43 | % custom commands make life easier. 44 | \newcommand{\code}[1]{\texttt{#1}} 45 | \newcommand{\pkg}[1]{\textbf{#1}} 46 | \let\oldmarginpar\marginpar 47 | \renewcommand{\marginpar}[1]{\oldmarginpar{\color{bluetext}\raggedleft\scriptsize #1}} 48 | 49 | % skip line at start of new paragraph 50 | \setlength{\parindent}{0pt} 51 | \setlength{\parskip}{1ex} 52 | 53 | 54 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 55 | \title{Using \code{lumberjack}} 56 | \author{Mark van der Loo} 57 | \date{\today{} | Package version \Sexpr{packageVersion("lumberjack")}} 58 | 59 | \begin{document} 60 | \DefineVerbatimEnvironment{Sinput}{Verbatim}{fontshape=n,formatcom=\color{graytext}} 61 | \DefineVerbatimEnvironment{Soutput}{Verbatim}{fontshape=sl,formatcom=\color{graytext}} 62 | \newlength{\fancyvrbtopsep} 63 | \newlength{\fancyvrbpartopsep} 64 | \makeatletter 65 | \FV@AddToHook{\FV@ListParameterHook}{\topsep=\fancyvrbtopsep\partopsep=\fancyvrbpartopsep} 66 | \makeatother 67 | 68 | 69 | \setlength{\fancyvrbtopsep}{0pt} 70 | \setlength{\fancyvrbpartopsep}{0pt} 71 | \maketitle{} 72 | \thispagestyle{empty} 73 | 74 | \tableofcontents{} 75 | <>= 76 | options(prompt=" ", 77 | continue = " ", 78 | width=100) 79 | library(lumberjack) 80 | @ 81 | 82 | \newpage{} 83 | \section{Purpose of this package: tracking changes in data} 84 | This package allows one to monitor changes in data as they get processed, with 85 | very little effort. It offers a clear and sharp separation of concerns between 86 | the primary goal of a data processing script, and a secondary goal: namely 87 | gathering data about the data process itself. The following diagram 88 | demonstrates the idea. 89 | % 90 | \begin{center} 91 | \includegraphics[width=8cm]{datastep2.pdf} 92 | \end{center} 93 | % 94 | A programmer writes a script that transforms \code{data} into \code{data'}, 95 | possibly based on externally provided parameters. The \pkg{lumberjack} package 96 | automatically gathers information on how a each process step changed the data. 97 | The way the difference between two versions of a data set is computed 98 | ($\Delta$, in the diagram) is fully customizable. 99 | 100 | 101 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 102 | \section{Tracking changes in scripts that process data} 103 | Consider as an example the following simple data analysis script in a file 104 | called \code{process.R}. 105 | \begin{Sinput} 106 | data(women) 107 | women$height <- women$height * 0.0254 108 | women$weight <- women$weight * 0.453592 109 | women$bmi <- women$weight/(women$height^2) 110 | outfile <- tempfile(fileext=".csv") 111 | write.csv(women, file=outfile, row.names=FALSE) 112 | \end{Sinput} 113 | 114 | This script loads the \code{women} dataset, converts height and weight to SI 115 | units (meters and kg), and adds a Body-Mass Index column. We can run the code 116 | in this file using \code{source} and read the result from the temporary file in 117 | \code{outfile}. 118 | 119 | To check what happens with the \code{women} dataset at each step we need to do 120 | two things. First, we define which dataset must me tracked, in what way, and 121 | for what part of the script. This can be done by adding one line of code. 122 | \begin{Sinput} 123 | data(women) 124 | 125 | start_log(women, logger=simple$new()) 126 | 127 | women$height <- women$height * 0.0254 128 | women$weight <- women$weight * 0.453592 129 | women$bmi <- women$weight/(women$height^2) 130 | outfile <- tempfile(fileext=".csv") 131 | write.csv(women, file=outfile, row.names=FALSE) 132 | 133 | \end{Sinput} 134 | Second, we run our script using \code{lumberjack::run\_file}. 135 | <<>>= 136 | library(lumberjack) 137 | out <- run_file("process.R") 138 | @ 139 | All variables created during \code{run\_file} are stored in \code{out}. 140 | <<>>= 141 | head(out$women, 3) 142 | @ 143 | The logging information is by default written to a file with a name that is the 144 | combination of the data set name and the logger name, here: 145 | \code{women\_simple.csv} (but this can be customized, see \code{?simple}). 146 | <<>>= 147 | read.csv("women_simple.csv") 148 | @ 149 | The \code{simple} logger records for each expression in the script whether it 150 | changed the data that is being tracked. An overview of available loggers is 151 | given in Section~\ref{sect:loggers}. 152 | 153 | 154 | Summarizing, to track chages in a data set one needs to do the following. 155 | \begin{enumerate} 156 | \item Define a \emph{logger}. Here this is done with \code{simple\$new()}. 157 | \item Tell \pkg{lumberjack} which dataset(s) to log. Here, this is done with 158 | \code{start\_log(dataset, logger)}. When tracking multiple datasets, each 159 | dataset must get its own logger (see Section~\ref{sect:multiple}). 160 | \item Develop the analyses as usual. 161 | \item \emph{Optionally} dump the logging information and close the logger explicitly 162 | (see Section~\ref{sect:datalocation}). 163 | \item Run the whole file using \code{lumberjack::run\_file}. 164 | \end{enumerate} 165 | 166 | 167 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 168 | \section{A little background} 169 | We give a rough sketch on how \pkg{lumberjack} works. Two concepts govern its 170 | behavior. The first concept is the \emph{logger}. A logger is an R object that 171 | capable of comparing two datasets. Depending on the type of logger it can 172 | compare various things. The built-in \code{simple} logger just records whether 173 | two versions of a dataset are identical or not. The \code{cellwise} logger 174 | compares two versions of a data set cell-by-cell and records the old and new 175 | values if they differ. The \code{expression\_logger} tracks the value of an R 176 | expression as the data gets processed. A logger also offers functionality 177 | to dump logging information to file or elsewhere. 178 | 179 | The second concept is the \emph{runtime}. \pkg{lumberjack} intercepts the R 180 | expressions written by the user and calls the logger to compare the current 181 | version of a data set with the previous version. The runtime also takes care of 182 | keeping an old version of the data in memory for comparison. When the user 183 | calls \code{dump\_log()} it makes sure that the `dump' functionality of the 184 | active logger(s) is called. 185 | 186 | We have already seen the \code{run\_file()} implementation of the \pkg{lumberjack} 187 | runtime. There is a second implementation for interactive use. This is 188 | the so-called lumberjack `pipe' operator \code{\%L>\%}, which is discussed 189 | in Section~\ref{sect:lumberjack}. 190 | 191 | 192 | 193 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 194 | \section{Controlling where the logging data is written} 195 | \label{sect:datalocation} 196 | The location of output is determined by the logger when \code{dump\_log} is 197 | called. This is done by default by \code{run\_file} after executing the script. All 198 | loggers in \pkg{lumberjack} write output to a \code{csv} file with default 199 | location \code{dataset\_logger.csv}. If \code{run\_file} is used to to execute an R 200 | file, then the log is written in the same directory as where the R file 201 | resides. You can control where and when logging information is dumped by 202 | calling \code{dump\_log} explicitly. 203 | 204 | For the \pkg{lumberjack} loggers, \code{dump\_log} has an argument \code{file} 205 | to control explicitly where logging data is saved. For example, to dump logging 206 | information for \code{mtcars} in \code{hihi.csv} one can do the following. 207 | <>= 208 | start_log(mtcars, logger=simple$new()) 209 | # all data transformations here... 210 | dump_log(file="hihi.csv") 211 | @ 212 | 213 | Note that we took care to state `loggers in \pkg{lumberjack}' every time. This 214 | is because \pkg{lumberjack} is extensible and other loggers can be developed 215 | that output logs to a data base for example. In fact, the parameters that 216 | \code{dump\_log()} accepts, apart from \code{data}, \code{logger} and 217 | \code{stop}, can be different for each logger in principle. For a 218 | specification of arguments and values, see the help pages for each logger. 219 | 220 | 221 | 222 | 223 | 224 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 225 | \section{Tracking multiple datasets} 226 | \label{sect:multiple} 227 | Call \code{start\_log}, with a new logger on each dataset to track. For 228 | example to track both \code{women} and \code{mtcars} with the \code{simple} 229 | logger, do the following. 230 | <>= 231 | start_log(women, logger=simple$new()) 232 | start_log(mtcars, logger=simple$new()) 233 | # all data transformations here... 234 | dump_log() 235 | @ 236 | % 237 | Calling \code{dump\_log()} will cause all loggers to stop tracking changes and 238 | write changes to file. To dump all loggers for a specific dataset, provide 239 | the dataset when dumping. 240 | <>= 241 | dump_log(data=mtcars) 242 | @ 243 | It is also possible to use multiple loggers on a single dataset. To is is done 244 | by calling \code{start\_log} multiple times for the same data set, with 245 | different loggers. Here we track the women dataset with the \code{simple} 246 | logger and with the \code{cellwise} logger. 247 | <>= 248 | women$id <- 1:15 249 | start_log(women, logger=simple$new()) 250 | start_log(women, logger=cellwise(key="id")) 251 | # all data transformations here... 252 | dump_log() 253 | @ 254 | Here, the \code{cellwise} logger records every change in every cell as the data 255 | gets processed. It needs a key column to be able to identify and store the 256 | location of each cell for each record. To dump a specific logger for a specific 257 | dataset, pass the data and the name of the logger. 258 | <>= 259 | dump_log(women, "cellwise") 260 | @ 261 | 262 | 263 | 264 | 265 | 266 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 267 | \section{Tracking changes in data in interactive sessions} 268 | \label{sect:lumberjack} 269 | The \pkg{lumberjack} operator is a forward `pipe' operator that enables 270 | logging. In this example we compute again the BMI index of records in the 271 | \code{women} dataset that comes with R. We use the \code{transform} function 272 | from base R to derive the new columns. 273 | <<>>= 274 | data(women) 275 | women$id <- 1:15 276 | out <- women %L>% 277 | start_log(logger = cellwise$new(key="id")) %L>% 278 | transform(height = height*0.0254 ) %L>% 279 | transform(weight = weight*0.453592) %L>% 280 | transform(bmi = weight/height^2) %L>% 281 | dump_log() 282 | head( read.csv("cellwise.csv"), 3) 283 | @ 284 | The logging data consists of a step number, a timestamp, the location of the 285 | expression in the script (here: \code{NA}, since there is no script file), the 286 | expression that transformed the data, the record key, the variable, the old and 287 | the new value. 288 | 289 | 290 | The variable \code{out} contains the output of the calculation. 291 | <<>>= 292 | head(out,3) 293 | @ 294 | 295 | 296 | 297 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 298 | \section{Available loggers} 299 | \label{sect:loggers} 300 | 301 | \pkg{lumberjack} is extensible and users can provide their own loggers, 302 | for example to offload logging results to a data base or to define 303 | new ways to measure the difference between two data sets. Below 304 | we list loggers that we know of. 305 | 306 | \subsection{In the lumberjack package} 307 | 308 | \begin{itemize} 309 | \item \code{simple} Just check whether data has changed. 310 | \item \code{cellwise} Track changes per cell (incl. old value, new value) 311 | \item \code{filedump} Dump a file after each step (including the zeroth step.) 312 | \item \code{expression\_logger} Track the result of any expression. 313 | \end{itemize} 314 | 315 | Both \code{cellwise} and \code{simple} have been discussed before. The 316 | expression logger tracks the result of one or more expressions that will be 317 | evaluated after each data processing step. For example, suppose we want to 318 | follow the mean and variance of variables in the `women` dataset as it gets 319 | processed. 320 | <<>>= 321 | logger <- expression_logger$new(mean_h = mean(height), sd_h = sd(height)) 322 | out <- women %L>% 323 | start_log(logger) %L>% 324 | transform(height = height*2.54) %L>% 325 | transform(weight = weight*0.453592) %L>% 326 | dump_log() 327 | read.csv("expression.csv",stringsAsFactors = FALSE) 328 | @ 329 | 330 | 331 | \subsection{In other packages} 332 | 333 | \begin{itemize} 334 | \item \code{\href{https://CRAN.R-project.org/package=validate}{validate}::lbj\_rules} Track changes in data quality measured by validation rules. 335 | \item \code{\href{https://CRAN.R-project.org/package=validate}{validate}::lbj\_cells} Track changes in cell filling and cell counts. 336 | \item \code{\href{https://CRAN.R-project.org/package=daff}{daff}::lbj\_daff} Use data-diff to track changes in data frame-like objects. 337 | \end{itemize} 338 | 339 | 340 | 341 | 342 | 343 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 344 | \section{Properties of the lumberjack pipe operator} 345 | 346 | There are several `forward pipe' operators in the R community, including 347 | \href{https://cran.r-project.org/package=magrittr}{magrittr}, 348 | \href{https://cran.r-project.org/package=pipeR}{pipeR} and 349 | \href{https://github.com/piccolbo/yapo}{yapo}. All have different behavior. 350 | The lumberjack operator behaves as a simplified version of the `magrittr` pipe 351 | operator. Here are some examples. 352 | <<>>= 353 | # pass the first argument to a function 354 | 1:3 %L>% mean() 355 | 356 | # pass arguments using "." 357 | TRUE %L>% mean(c(1,NA,3), na.rm = .) 358 | 359 | # pass arguments to an expression, using "." 360 | 1:3 %L>% { 3 * .} 361 | 362 | # in a more complicated expression, return "." explicitly 363 | women %L>% { .$height <- 2*.$height; . } %L>% head(3) 364 | @ 365 | 366 | The main differences with `magrittr` are that 367 | \begin{itemize} 368 | \item there is no assignment-pipe like \code{\%<>\%}. 369 | \item it does not allow you to define functions in the magrittr style: \code{a <- . \%>\% sin(.)} 370 | \item you cannot do things like \code{pi \%>\% sin} and expect an answer. 371 | \end{itemize} 372 | 373 | \section{Extending lumberjack} 374 | There are many ways to register changes in data. That is why \pkg{lumberjack} 375 | is extensible with new loggers. 376 | 377 | \subsection{The lumberjack logging API} 378 | 379 | In short, a logger is a \emph{reference object} with the following 380 | \emph{mandatory} elements: 381 | 382 | \begin{enumerate} 383 | \item A method \code{\$add(meta, input, output)}. This is a function that 384 | computes the difference between \code{input} and \code{output} and adds it to a 385 | log. The \code{meta} argument is a \code{list} with the following elements: 386 | \begin{itemize} 387 | \item \code{expr} The expression used to turn \code{input} into \code{output}. 388 | \item \code{src} The same expression, but turned into a string. 389 | \item \code{file} The name of the file that was run. This element is only 390 | available when code is run from a script. 391 | \item \code{lines} A named \code{integer} vector containing the first and last 392 | line of the expression in the source file. This element is only available 393 | when code is run from a script. 394 | \end{itemize} 395 | \item A method \code{\$dump(...)} this function writes the current logging info 396 | somewhere. Often this will be a file, but it really can be any place where R 397 | can send data. It is \emph{recommended} that \code{dump} has the argument 398 | \code{file} if it writes anything to file. \code{\$dump} \emph{must} have the 399 | \code{...} argument because when a user calls \code{dump\_log(...)} the extra 400 | arguments are passed to \code{\$dump}. 401 | \item a slot called \code{\$label}. The label is set by \code{start\_log} and 402 | is used to keep track of logging streams when multiple datasets are tracked 403 | with instances of the same logger type. \code{start\_log} will try to create a 404 | label if none is provided. If it fails to create a label, it will be set to the 405 | empty string `""`. 406 | \end{enumerate} 407 | The following element is \emph{optional} 408 | \begin{enumerate} 409 | \item A method \code{\$stop()} called by \code{stop\_log()} before removing the 410 | logger from the data. 411 | \end{enumerate} 412 | 413 | 414 | There are several systems in R to build such a reference object. We recommend 415 | using \href{https://cran.r-project.org/package=R6}{R6} classes or 416 | \href{http://adv-r.had.co.nz/R5.html}{reference classes}. Below an example for 417 | each system is given. The example loggers only register whether something has 418 | ever changed. A \code{dump} results in a simple message on screen. 419 | 420 | \subsection{R6 classes} 421 | An introduction to R6 classes can be found 422 | \href{https://cran.r-project.org/package=R6/vignettes/Introduction.html}{here}. 423 | 424 | Let us define the `trivial' logger. 425 | <<>>= 426 | library(R6) 427 | trivial <- R6Class("trivial", 428 | public = list( 429 | changed = NULL 430 | , label=NULL 431 | , initialize = function(){ 432 | self$changed <- FALSE 433 | } 434 | , add = function(meta, input, output){ 435 | self$changed <- self$changed | !identical(input, output) 436 | } 437 | , dump = function(){ 438 | msg <- if(self$changed) "" else "not " 439 | cat(sprintf("The data has %schanged\n",msg)) 440 | } 441 | ) 442 | ) 443 | @ 444 | Here is how to use it. 445 | <<>>= 446 | library(lumberjack) 447 | out <- women %L>% 448 | start_log(trivial$new()) %L>% 449 | identity() %L>% 450 | dump_log(stop=TRUE) 451 | 452 | 453 | out <- women %L>% 454 | start_log(trivial$new()) %L>% 455 | head() %L>% 456 | dump_log(stop=TRUE) 457 | @ 458 | 459 | \subsection{Reference classes} 460 | Reference classes (RC) come with the R recommended `methods` package. An 461 | introduction can be found \href{http://adv-r.had.co.nz/R5.html}{here}. Here is 462 | how to define the trivial logger as a reference class. 463 | <<>>= 464 | library(methods) 465 | trivial <- setRefClass("trivial", 466 | fields = list( 467 | changed = "logical", label="character" 468 | ), 469 | methods = list( 470 | initialize = function(){ 471 | .self$changed = FALSE 472 | .self$label = "" 473 | } 474 | , add = function(meta, input, output){ 475 | .self$changed <- .self$changed | !identical(input,output) 476 | } 477 | , dump = function(){ 478 | msg <- if( .self$changed ) "" else "not " 479 | cat(sprintf("The data has %schanged\n",msg)) 480 | } 481 | ) 482 | ) 483 | @ 484 | And here is how to use it. 485 | <<>>= 486 | library(lumberjack) 487 | out <- women %L>% 488 | start_log(trivial()) %L>% 489 | identity() %L>% 490 | dump_log(stop=TRUE) 491 | 492 | 493 | out <- women %L>% 494 | start_log(trivial()) %L>% 495 | head() %L>% 496 | dump_log(stop=TRUE) 497 | 498 | @ 499 | 500 | Observe that there are subtle differences between R6 and Reference classes (RC). 501 | \begin{itemize} 502 | \item In R6 the object is referred to with `self`, in RC this is done with `.self`. 503 | \item An R6 object is initialized with \code{classname\$new()}, an RC object 504 | is initialized with \code{classname()}. 505 | \end{itemize} 506 | 507 | 508 | \subsection{Advice for package authors} 509 | 510 | If you have a package that has interesting functionality that can be offered 511 | also inside a logger, you might consider exporting a logger object that works 512 | with \pkg{lumberjack}. To keep things uniform, we give the following advice. 513 | 514 | \paragraph{Documenting logging objects.} 515 | Most package authors use 516 | \href{https://cran.r-project.org/package=roxygen2}{roxygen2} to generate 517 | documentation. Below is an example of how to document the class and its 518 | methods. To show how to document arguments, a new \code{allcaps} argument is 519 | added to the dump function. 520 | 521 | \begin{verbatim} 522 | #' The trivial logger. 523 | #' 524 | #' The trivial logger only registers whether something has changed at all. 525 | #' A `dump` leads to an informative message on the console. 526 | #' 527 | #' @section Creating a logger: 528 | #' \code{trivial$new()} 529 | #' 530 | #' @section Dump options: 531 | #' \code{$dump(allcaps)} 532 | #' \tabular{ll}{ 533 | #' \code{allcaps}\tab \code{[logical]} print message in capitals? 534 | #' } 535 | #' 536 | #' 537 | #' @docType class 538 | #' @format An \code{R6} class object. 539 | #' 540 | #' @examples 541 | #' out <- women %L>% 542 | #' start_log(trivial$new()) %L>% 543 | #' head() %L>% 544 | #' dump_log(stop=TRUE) 545 | #' 546 | #' 547 | #' @export 548 | trivial <- R6Class("trivial", 549 | public = list( 550 | changed = NULL 551 | , initialize = function(){ 552 | self$changed <- FALSE 553 | } 554 | , add = function(meta, input, output){ 555 | self$changed <- self$changed | !identical(input, output) 556 | } 557 | , dump = function(allcaps=FALSE){ 558 | msg <- if(self$changed) "" else "not " 559 | msg <- sprintf("The data has %schanged\n",msg) 560 | if (allcaps) msg <- toupper(msg) 561 | cat(msg) 562 | ) 563 | ) 564 | \end{verbatim} 565 | 566 | 567 | \paragraph{Adding lumberjack to the DESCRIPTION of your package} 568 | 569 | Once you have exported a logger, it is a good idea to add the line 570 | \begin{verbatim} 571 | Enhances: lumberjack 572 | \end{verbatim} 573 | To the \code{DESCRIPTION} file. It can then be found by other users via lumberjack's 574 | CRAN webpage. 575 | 576 | 577 | 578 | 579 | \end{document} 580 | -------------------------------------------------------------------------------- /todo.txt: -------------------------------------------------------------------------------- 1 | - verbosity level for run(?) 2 | - pause/resume? 3 | - logger that tracks side effects 4 | 5 | -------------------------------------------------------------------------------- /using_lumberjack.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | function finish { 4 | cd ${oldwd} 5 | } 6 | trap finish EXIT 7 | 8 | oldwd=`pwd` 9 | 10 | cd pkg/vignettes 11 | 12 | R -e "Sweave('using_lumberjack.Rnw')" 13 | pdflatex using_lumberjack.tex 14 | pdflatex using_lumberjack.tex 15 | pdflatex using_lumberjack.tex 16 | 17 | evince using_lumberjack.pdf & 18 | 19 | 20 | 21 | --------------------------------------------------------------------------------