├── .Rbuildignore ├── .gitignore ├── R ├── refactor-package.R ├── zz.R ├── lintr_wrappers.R ├── utils.R ├── detect_similar_code.R ├── inspect_commented_code.R ├── check_files_parse.R ├── fetch_namespace_names.R ├── identify_hybrid_scripts.R ├── find_package_funs.R ├── refactor_impl.r └── refactor.r ├── refactor.Rproj ├── man ├── use_namespace_check.Rd ├── refactor-package.Rd ├── identify_hybrid_scripts.Rd ├── fetch_namespace_names.Rd ├── check_files_parse.Rd ├── find_pkg_funs.Rd ├── use_lintr_template_on_file.Rd ├── detect_similar_code.Rd └── refactor.Rd ├── DESCRIPTION ├── NAMESPACE ├── README.Rmd ├── inst ├── lint_file_template.R └── lint_dir_template.R └── README.md /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^refactor\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^README\.Rmd$ 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .Rdata 4 | .httr-oauth 5 | .DS_Store 6 | -------------------------------------------------------------------------------- /R/refactor-package.R: -------------------------------------------------------------------------------- 1 | #' @keywords internal 2 | "_PACKAGE" 3 | 4 | # The following block is used by usethis to automatically manage 5 | # roxygen namespace tags. Modify with care! 6 | ## usethis namespace: start 7 | ## usethis namespace: end 8 | NULL 9 | -------------------------------------------------------------------------------- /R/zz.R: -------------------------------------------------------------------------------- 1 | .onLoad <- function(libname, pkgname) { 2 | op <- options() 3 | op.refactor <- list( 4 | refactor.value = TRUE, 5 | refactor.env = FALSE, 6 | refactor.time = FALSE, 7 | refactor.waldo = TRUE 8 | ) 9 | toset <- !(names(op.refactor ) %in% names(op)) 10 | if(any(toset)) options(op.refactor[toset]) 11 | 12 | invisible(NULL) 13 | } 14 | -------------------------------------------------------------------------------- /refactor.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | LineEndingConversion: Posix 18 | 19 | BuildType: Package 20 | PackageUseDevtools: Yes 21 | PackageInstallArgs: --no-multiarch --with-keep.source 22 | PackageRoxygenize: rd,collate,namespace 23 | -------------------------------------------------------------------------------- /man/use_namespace_check.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/fetch_namespace_names.R 3 | \name{use_namespace_check} 4 | \alias{use_namespace_check} 5 | \title{Use namespace check} 6 | \usage{ 7 | use_namespace_check() 8 | } 9 | \value{ 10 | Returns \code{NULL} invisibly, called for side effects. 11 | } 12 | \description{ 13 | Wrapper around \code{fetch_namespace_names()} that opens a new RStudio source editot 14 | tab with code to be pasted at the top of the main script to enumerate required 15 | packages and test if they are installed. 16 | } 17 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: refactor 2 | Title: Tools for Refactoring Code 3 | Version: 0.0.0.9000 4 | Authors@R: 5 | person(given = "Antoine", 6 | family = "Fabri", 7 | role = c("aut", "cre"), 8 | email = "antoine.fabri@gmail.com") 9 | Description: Tools for refactoring code. 10 | License: GPL-3 11 | Encoding: UTF-8 12 | Language: en 13 | LazyData: true 14 | Roxygen: list(markdown = TRUE) 15 | RoxygenNote: 7.2.3 16 | Imports: 17 | waldo, pryr 18 | URL: https://github.com/moodymudskipper/refactor 19 | BugReports: https://github.com/moodymudskipper/refactor/issues 20 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export("%ignore_original%") 4 | export("%ignore_refactored%") 5 | export("%refactor%") 6 | export("%refactor_chunk%") 7 | export("%refactor_chunk_and_value%") 8 | export("%refactor_chunk_and_value_efficiently%") 9 | export("%refactor_chunk_efficiently%") 10 | export("%refactor_value%") 11 | export("%refactor_value_efficiently%") 12 | export(check_files_parse) 13 | export(detect_similar_code) 14 | export(fetch_namespace_names) 15 | export(find_pkg_funs) 16 | export(identify_hybrid_scripts) 17 | export(use_lintr_template_on_dir) 18 | export(use_lintr_template_on_file) 19 | export(use_namespace_check) 20 | -------------------------------------------------------------------------------- /man/refactor-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/refactor-package.R 3 | \docType{package} 4 | \name{refactor-package} 5 | \alias{refactor} 6 | \alias{refactor-package} 7 | \title{refactor: Tools for Refactoring Code} 8 | \description{ 9 | Tools for refactoring code. 10 | } 11 | \seealso{ 12 | Useful links: 13 | \itemize{ 14 | \item \url{https://github.com/moodymudskipper/refactor} 15 | \item Report bugs at \url{https://github.com/moodymudskipper/refactor/issues} 16 | } 17 | 18 | } 19 | \author{ 20 | \strong{Maintainer}: Antoine Fabri \email{antoine.fabri@gmail.com} 21 | 22 | } 23 | \keyword{internal} 24 | -------------------------------------------------------------------------------- /man/identify_hybrid_scripts.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/identify_hybrid_scripts.R 3 | \name{identify_hybrid_scripts} 4 | \alias{identify_hybrid_scripts} 5 | \title{Identify hybrid scripts} 6 | \usage{ 7 | identify_hybrid_scripts(path = ".", recursive = TRUE) 8 | } 9 | \arguments{ 10 | \item{path}{A string. The path to a file or the folder to explore 11 | By default explores the working directory.} 12 | 13 | \item{recursive}{A boolean. Passed to \code{list.files()} if \code{path} is a directory} 14 | } 15 | \value{ 16 | Returns the path invisibly, called for side effects. 17 | } 18 | \description{ 19 | Identify scripts who contain both function definitions and other object definitions. 20 | } 21 | -------------------------------------------------------------------------------- /man/fetch_namespace_names.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/fetch_namespace_names.R 3 | \name{fetch_namespace_names} 4 | \alias{fetch_namespace_names} 5 | \title{Fetch namespace names} 6 | \usage{ 7 | fetch_namespace_names(path = ".", recursive = TRUE) 8 | } 9 | \arguments{ 10 | \item{path}{A string. The path to a file or the folder to explore 11 | By default explores the working directory.} 12 | 13 | \item{recursive}{A boolean. Passed to \code{list.files()} if \code{path} is a directory} 14 | } 15 | \value{ 16 | A character vector of package names 17 | } 18 | \description{ 19 | Scans the code and finds all namespaced call of the form \code{pkg::fun} and returns 20 | a vector of unique package names. 21 | } 22 | -------------------------------------------------------------------------------- /man/check_files_parse.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/check_files_parse.R 3 | \name{check_files_parse} 4 | \alias{check_files_parse} 5 | \title{Check that files parse correctly} 6 | \usage{ 7 | check_files_parse(path = ".", recursive = TRUE) 8 | } 9 | \arguments{ 10 | \item{path}{A string. The path to a file or the folder to explore 11 | By default explores the working directory.} 12 | 13 | \item{recursive}{A boolean. Passed to \code{list.files()} if \code{path} is a directory} 14 | } 15 | \value{ 16 | Returns the path invisibly, called for side effects. 17 | } 18 | \description{ 19 | This identifies files that contain non syntactic code, including files that 20 | have an R extension despite not being an R script. 21 | } 22 | -------------------------------------------------------------------------------- /man/find_pkg_funs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/find_package_funs.R 3 | \name{find_pkg_funs} 4 | \alias{find_pkg_funs} 5 | \title{Find all uses of a package's functions} 6 | \usage{ 7 | find_pkg_funs(pkg, path = ".", recursive = TRUE, exclude = NULL) 8 | } 9 | \arguments{ 10 | \item{pkg}{A string. The name of the target package} 11 | 12 | \item{path}{A string. The path to a file or the folder to explore 13 | By default explores the working directory.} 14 | 15 | \item{recursive}{A boolean. Passed to \code{list.files()} if \code{path} is a directory} 16 | 17 | \item{exclude}{A character vector of function names to dismiss.} 18 | } 19 | \value{ 20 | Returns its input invisibly, called for side effects 21 | } 22 | \description{ 23 | This will show false positives, but guarantees that we don't miss any instance. 24 | } 25 | -------------------------------------------------------------------------------- /man/use_lintr_template_on_file.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/lintr_wrappers.R 3 | \name{use_lintr_template_on_file} 4 | \alias{use_lintr_template_on_file} 5 | \alias{use_lintr_template_on_dir} 6 | \title{Use lintr template} 7 | \usage{ 8 | use_lintr_template_on_file(path = NULL) 9 | 10 | use_lintr_template_on_dir(path = NULL) 11 | } 12 | \arguments{ 13 | \item{path}{Path to a R script or a directory. By default \code{use_lint_template_on_file()} 14 | considers the active document and \code{use_lint_template_on_dir()} considers the 15 | project folder as returned by \code{here::here()}} 16 | } 17 | \value{ 18 | Returns \code{NULL} invisibly. Called for side effects. 19 | } 20 | \description{ 21 | This opens up an untitled script in RStudio containing calls to \code{lintr::lint()} 22 | or \code{lintr::lint_dir()} with various linters, sorted by category and rough 23 | order of importance. 24 | } 25 | -------------------------------------------------------------------------------- /man/detect_similar_code.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/detect_similar_code.R 3 | \name{detect_similar_code} 4 | \alias{detect_similar_code} 5 | \title{Detect similar code blocks} 6 | \usage{ 7 | detect_similar_code(paths = ".", recursive = TRUE, pattern = NULL) 8 | } 9 | \arguments{ 10 | \item{paths}{Paths to scripts or folders containing scripts} 11 | 12 | \item{recursive}{Whether folders should be explorer recursively} 13 | 14 | \item{pattern}{A regular expression used to filter files} 15 | } 16 | \value{ 17 | The \code{paths} argument invisibly. Called for side effects 18 | } 19 | \description{ 20 | This is a wrapper around \code{dupree::dupree()}. It analyses provided files looking for 21 | similar code. It is a bit slow so it can be impractical on big projects to run 22 | it with default (all files contained in working directory recursively), in 23 | this case it is wiser to run it on one or more specific \code{paths} or/and 24 | or to filter the files using the \code{pattern} argument. 25 | } 26 | -------------------------------------------------------------------------------- /R/lintr_wrappers.R: -------------------------------------------------------------------------------- 1 | #' Use lintr template 2 | #' 3 | #' This opens up an untitled script in RStudio containing calls to `lintr::lint()` 4 | #' or `lintr::lint_dir()` with various linters, sorted by category and rough 5 | #' order of importance. 6 | #' 7 | #' @param path Path to a R script or a directory. By default `use_lint_template_on_file()` 8 | #' considers the active document and `use_lint_template_on_dir()` considers the 9 | #' project folder as returned by `here::here()` 10 | #' 11 | #' @return Returns `NULL` invisibly. Called for side effects. 12 | #' @export 13 | use_lintr_template_on_file <- function(path = NULL) { 14 | if (is.null(path)) path <- rstudioapi::documentPath(rstudioapi::documentId(FALSE)) 15 | template_path <- system.file("lint_file_template.R", package = "refactor") 16 | lines <- readLines(template_path) 17 | lines[[3]] <- sprintf('path <- "%s"', path) 18 | rstudioapi::documentNew(lines) 19 | invisible(NULL) 20 | } 21 | 22 | #' @rdname use_lintr_template_on_file 23 | #' @export 24 | use_lintr_template_on_dir <- function(path = NULL) { 25 | if (is.null(path)) path <- here::here() 26 | template_path <- system.file("lint_dir_template.R", package = "refactor") 27 | lines <- readLines(template_path) 28 | lines[[3]] <- sprintf('linted_dir <- "%s"', path) 29 | rstudioapi::documentNew(lines) 30 | invisible(NULL) 31 | } 32 | -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | gfn <- getFromNamespace 2 | promise_code <- gfn("promise_code", "pryr") 3 | promise_env <- gfn("promise_env", "pryr") 4 | promise_evaled <- gfn("promise_evaled", "pryr") 5 | is_promise2 <- gfn("is_promise2", "pryr") 6 | 7 | clone_env <- function(env, deep = FALSE) { 8 | # create new environment with same parent 9 | clone <- new.env(parent = parent.env(env)) 10 | for(obj in ls(env, all.names = TRUE)) { 11 | promise_lgl <- is_unevaled_promise(as.symbol(obj), env = env) 12 | if(promise_lgl) { 13 | # fetch promise expression and env 14 | promise_expr <- promise_code(obj, env) 15 | promise_env <- promise_env(obj, env) 16 | 17 | # Assign this expression as a promise (delayed assignment) in our 18 | # cloned environment 19 | eval(bquote( 20 | delayedAssign(obj, .(promise_expr), eval.env = promise_env, assign.env = clone))) 21 | } else { 22 | obj_val <- get(obj, envir = env) 23 | if(is.environment(obj_val) && deep) { 24 | assign(obj, clone_env(obj_val, deep = TRUE),envir= clone) 25 | } else { 26 | assign(obj, obj_val, envir= clone) 27 | } 28 | } 29 | } 30 | attributes(clone) <- attributes(env) 31 | clone 32 | } 33 | 34 | is_unevaled_promise <- function(name, env) { 35 | is_promise2(name, env) && !promise_evaled(name, env) 36 | } 37 | 38 | identical2 <- function(target, current, ...) { 39 | isTRUE(all.equal(target, current, ...)) 40 | } 41 | -------------------------------------------------------------------------------- /R/detect_similar_code.R: -------------------------------------------------------------------------------- 1 | #' Detect similar code blocks 2 | #' 3 | #' This is a wrapper around `dupree::dupree()`. It analyses provided files looking for 4 | #' similar code. It is a bit slow so it can be impractical on big projects to run 5 | #' it with default (all files contained in working directory recursively), in 6 | #' this case it is wiser to run it on one or more specific `paths` or/and 7 | #' or to filter the files using the `pattern` argument. 8 | #' 9 | #' @param paths Paths to scripts or folders containing scripts 10 | #' @param recursive Whether folders should be explorer recursively 11 | #' @param pattern A regular expression used to filter files 12 | #' 13 | #' @return The `paths` argument invisibly. Called for side effects 14 | #' @export 15 | detect_similar_code <- function(paths = ".", recursive = TRUE, pattern = NULL) { 16 | paths_are_dirs <- sapply(paths, dir.exists) 17 | dirs <- paths[paths_are_dirs] 18 | paths_from_dirs <- unlist(lapply(dirs, list.files, recursive = recursive)) 19 | paths <- c(paths[!paths_are_dirs], paths_from_dirs) 20 | if (!is.null(pattern)) paths <- grep(pattern, paths, value = TRUE) 21 | dups <- dupree::dupree(paths) 22 | dups <- as.data.frame(dups) 23 | dups <- transform( 24 | dups, 25 | message = paste0("duplicate #", 1:nrow(dups), " (score: ", round(score,2),")")) 26 | dups <- 27 | rbind( 28 | setNames(dups[c("file_a", "line_a", "message")], c("file", "line", "message")), 29 | setNames(dups[c("file_b", "line_b", "message")], c("file", "line", "message")) 30 | ) 31 | 32 | dups <- transform(dups[order(dups$message),], type ="info", column = 1) 33 | rstudioapi::sourceMarkers("dupree", dups) 34 | } 35 | -------------------------------------------------------------------------------- /R/inspect_commented_code.R: -------------------------------------------------------------------------------- 1 | 2 | rleid <- function(x) { 3 | x <- rle(x)$lengths 4 | rep(seq_along(x), times = x) 5 | } 6 | 7 | inspect_commented_code <- function() { 8 | files <- list.files(here::here(), all.files = TRUE, full.names = TRUE, recursive = TRUE) 9 | files <- grep("\\.r$", files, ignore.case = TRUE, value = TRUE) 10 | 11 | inspect_block <- function(block) { 12 | n <- nrow(block) 13 | for (start in 1:n) { 14 | not_a_good_start_comment <- 15 | grepl("^ *$", block$code[start]) || 16 | grepl(":$", block$code[start]) || 17 | grepl(": ", block$code[start]) # or would yield false positive 18 | if (not_a_good_start_comment) next 19 | for (end in start:n) 20 | code <- block$code[start:end] 21 | parsed_lgl <- tryCatch(!is.symbol(parse(text = code)[[1]]), error = function(e) FALSE) 22 | if (parsed_lgl) { 23 | # keep only first line of commented code 24 | return(block[start, ]) 25 | } 26 | } 27 | return(NULL) 28 | } 29 | 30 | inspect_blocks <- function(file) { 31 | code <- readLines(file) 32 | line <- seq_along(code) 33 | 34 | commented_lgl <- startsWith(code, "#") & !startsWith(code, "#'") 35 | block_id <- rleid(commented_lgl) 36 | blocks_df <- data.frame(line, message = code, code = sub("^#+", "", code), block_id)[commented_lgl, ] 37 | blocks <- split(blocks_df, blocks_df$block_id) 38 | 39 | df <- do.call(rbind, lapply(blocks, inspect_block)) 40 | if (is.null(df)) return(df) 41 | df$block_id <- NULL 42 | df$code <- NULL 43 | df$file = file 44 | df 45 | } 46 | 47 | commented_code_df <- do.call(rbind, lapply(files, inspect_blocks)) 48 | commented_code_df <- transform(commented_code_df, type = "info", column = 1) 49 | rstudioapi::sourceMarkers("commented code", commented_code_df) 50 | } 51 | -------------------------------------------------------------------------------- /R/check_files_parse.R: -------------------------------------------------------------------------------- 1 | #' Check that files parse correctly 2 | #' 3 | #' This identifies files that contain non syntactic code, including files that 4 | #' have an R extension despite not being an R script. 5 | #' 6 | #' @param path A string. The path to a file or the folder to explore 7 | #' By default explores the working directory. 8 | #' @param recursive A boolean. Passed to `list.files()` if `path` is a directory 9 | #' 10 | #' @return Returns the path invisibly, called for side effects. 11 | #' @export 12 | check_files_parse <- function(path = ".", recursive = TRUE) { 13 | if (dir.exists(path)) { 14 | all_scripts <- list.files( 15 | path, 16 | pattern = "\\.[rR]$", 17 | full.names = TRUE, 18 | recursive = recursive 19 | ) 20 | } else { 21 | if (!file.exists(path)) { 22 | stop("wrong file") 23 | } 24 | all_scripts <- path 25 | } 26 | 27 | errors <- sapply(all_scripts, function(file) { 28 | error <- tryCatch(parse(file), error = function(e) e$message) 29 | if (!is.character(error)) error <- "" 30 | if (startsWith(error, "invalid multibyte character in parse")) { 31 | is_rdata <- !inherits(try(load(file, envir = new.env())), "try-error") 32 | if(is_rdata) return("`.RData` file stored as `.R`") 33 | is_rds <- !inherits(try(readRDS(file)), "try-error") 34 | if(is_rdata) return("`.RDS` file stored as `.R`") 35 | } 36 | error 37 | }) 38 | errors <- errors[errors != ""] 39 | if(!length(errors)) { 40 | message("All R scripts contain syntactic codes") 41 | return(invisible(path)) 42 | } 43 | markers <- data.frame( 44 | type = "error", 45 | file = names(errors), 46 | message = errors, 47 | line= ifelse( 48 | endsWith(errors, "^"), 49 | as.numeric(sub("^.*\n(\\d+): +.+\n +\\^$", "\\1", errors)), 50 | 1), 51 | column=1 52 | ) 53 | 54 | rstudioapi::sourceMarkers("Check that files parse", markers) 55 | invisible(path) 56 | } 57 | -------------------------------------------------------------------------------- /R/fetch_namespace_names.R: -------------------------------------------------------------------------------- 1 | #' Fetch namespace names 2 | #' 3 | #' Scans the code and finds all namespaced call of the form `pkg::fun` and returns 4 | #' a vector of unique package names. 5 | #' 6 | #' @param path A string. The path to a file or the folder to explore 7 | #' By default explores the working directory. 8 | #' @param recursive A boolean. Passed to `list.files()` if `path` is a directory 9 | #' 10 | #' @return A character vector of package names 11 | #' @export 12 | fetch_namespace_names <- function(path = ".", recursive = TRUE) { 13 | if (dir.exists(path)) { 14 | all_scripts <- list.files( 15 | path, 16 | pattern = "\\.[rR]$", 17 | full.names = TRUE, 18 | recursive = recursive 19 | ) 20 | } else { 21 | if (!file.exists(path)) { 22 | stop("wrong file") 23 | } 24 | all_scripts <- path 25 | } 26 | 27 | code <- sapply(all_scripts, parse) 28 | namespaces <- list() 29 | collect_namespaced_calls <- function(call) { 30 | if(!is.call(call) && !is.expression(call)) { 31 | return() 32 | } 33 | if(rlang::is_call(call, "::")) { 34 | namespaces <<- c(namespaces, call[[2]]) 35 | return() 36 | } 37 | lapply(as.list(call), collect_namespaced_calls) 38 | invisible() 39 | } 40 | lapply(code, collect_namespaced_calls) 41 | sort(as.character(unique(namespaces))) 42 | } 43 | 44 | #' Use namespace check 45 | #' 46 | #' Wrapper around `fetch_namespace_names()` that opens a new RStudio source editot 47 | #' tab with code to be pasted at the top of the main script to enumerate required 48 | #' packages and test if they are installed. 49 | #' 50 | #' @return Returns `NULL` invisibly, called for side effects. 51 | #' @export 52 | use_namespace_check <- function() { 53 | pkgs <- fetch_namespace_names() 54 | #pkgs <- capture.output(dput(pkgs)) 55 | #pkgs <- paste(pkgs, collapse = "\n") 56 | code <- sprintf("requireNamespace(\"%s\")", pkgs) 57 | rstudioapi::documentNew(code, "r") 58 | invisible(NULL) 59 | } 60 | 61 | -------------------------------------------------------------------------------- /R/identify_hybrid_scripts.R: -------------------------------------------------------------------------------- 1 | #' Identify hybrid scripts 2 | #' 3 | #' Identify scripts who contain both function definitions and other object definitions. 4 | #' 5 | #' @param path A string. The path to a file or the folder to explore 6 | #' By default explores the working directory. 7 | #' @param recursive A boolean. Passed to `list.files()` if `path` is a directory 8 | #' 9 | #' @return Returns the path invisibly, called for side effects. 10 | #' @export 11 | identify_hybrid_scripts <- function(path = ".", recursive = TRUE) { 12 | if (dir.exists(path)) { 13 | all_scripts <- list.files( 14 | path, 15 | pattern = "\\.[rR]$", 16 | full.names = TRUE, 17 | recursive = recursive 18 | ) 19 | } else { 20 | if (!file.exists(path)) { 21 | stop("wrong file") 22 | } 23 | all_scripts <- path 24 | } 25 | 26 | is_function_call <- function(x) { 27 | is.call(x) && 28 | list(x[[1]]) %in% c(quote(`<-`), quote(`=`)) && 29 | is.call(x[[3]]) && 30 | identical(x[[c(3, 1)]], quote(`function`)) 31 | } 32 | 33 | get_file_markers <- function(file) { 34 | code <- parse(file) 35 | calls_are_fun_defs <- sapply(as.list(code), is_function_call) 36 | 37 | if(all(calls_are_fun_defs) || all(!calls_are_fun_defs)) { 38 | return(NULL) 39 | } 40 | 41 | data <- getParseData(code) 42 | level1 <- data$id[data$parent == 0] 43 | level2 <- data$id[data$parent %in% level1] 44 | lines <- unique(data[data$id %in% level2, "line1"]) 45 | if(calls_are_fun_defs[[1]]) { 46 | msg <- "The script starts with a function definition but contains other object definitions" 47 | line <- lines[which(!calls_are_fun_defs)[[1]]] 48 | } else { 49 | msg <- "The script starts with a non function object definition but contains functions" 50 | line <- lines[which(calls_are_fun_defs)[[1]]] 51 | } 52 | markers <- data.frame( 53 | type = "info", 54 | file = file, 55 | line = line, 56 | column = 1, 57 | message = msg 58 | ) 59 | markers 60 | } 61 | 62 | markers <- lapply(all_scripts, get_file_markers) 63 | markers <- do.call(rbind, markers) 64 | if(is.null(markers)) { 65 | message("All scripts contain either only function definitions or no function definition") 66 | return(invisible(path)) 67 | } 68 | rstudioapi::sourceMarkers("Hybrid scripts", markers) 69 | invisible(path) 70 | } 71 | -------------------------------------------------------------------------------- /R/find_package_funs.R: -------------------------------------------------------------------------------- 1 | is_infix <- function(x) { 2 | startsWith(x, "%") & endsWith(x, "%") 3 | } 4 | 5 | #' Find all uses of a package's functions 6 | #' 7 | #' This will show false positives, but guarantees that we don't miss any instance. 8 | #' 9 | #' @param pkg A string. The name of the target package 10 | #' @param path A string. The path to a file or the folder to explore 11 | #' By default explores the working directory. 12 | #' @param recursive A boolean. Passed to `list.files()` if `path` is a directory 13 | #' @param exclude A character vector of function names to dismiss. 14 | #' 15 | #' @return Returns its input invisibly, called for side effects 16 | #' @export 17 | find_pkg_funs <- function(pkg, path = ".", recursive = TRUE, exclude = NULL 18 | # , include_s3_generics = FALSE 19 | ) { 20 | # fetch package functions and their origin 21 | imports <- getNamespaceImports(pkg) 22 | imports <- Map(function(fun, pkg) data.frame(fun, pkg), imports, names(imports)) 23 | imports <- do.call(rbind, imports) 24 | row.names(imports) <- NULL 25 | exports <- data.frame(fun = getNamespaceExports(pkg)) 26 | exports <- exports[!exports$fun %in% exclude, , drop = FALSE] 27 | pkg_funs <- merge(exports, imports, all.x = TRUE) 28 | 29 | # ns <- asNamespace(pkg) 30 | # s3_methods <- Filter(function(x) isS3method(x, envir = ns), ls(ns)) 31 | # s3_generics <- sub("^([^.]+)\\..*$", "\\1", s3_methods) 32 | # s3 <- data.frame(method = s3_methods, generic = s3_generics) 33 | # s3 <- subset(s3, !generic %in% ls(ns)) 34 | 35 | 36 | # fetch parse data 37 | if(dir.exists(path)) { 38 | files <- list.files(path, full.names = TRUE, recursive = recursive, pattern = "\\.[rR]$") 39 | } else { 40 | if (!file.exists(path)) stop(sprintf("Invalid value for `path`, '%s' doesn't exist", path)) 41 | files <- path 42 | } 43 | 44 | parse_data <- lapply(files, function(file) { 45 | data <- getParseData(parse(file)) 46 | data <- data[! data$token %in% c("SYMBOL_SUB", "SYMBOL_PACKAGE"),] 47 | i_namespaced <- which(data$text %in% c("::", ":::")) + 1 48 | if(length(i_namespaced)) data <- data[-i_namespaced,] 49 | transform(data, file = file) 50 | }) 51 | parse_data <- do.call(rbind, parse_data) 52 | parse_data <- parse_data[c("line1", "col1", "text", "file")] 53 | 54 | # merge datasets 55 | merged <- merge(parse_data, pkg_funs, by.x = "text", by.y = "fun") 56 | 57 | if(!nrow(merged)) { 58 | message(sprintf("No potential function calls from {%s} were found in the code", pkg)) 59 | return(invisible(NULL)) 60 | } 61 | 62 | markers <- data.frame( 63 | type = "info", 64 | file = merged$file, 65 | line = merged$line1, 66 | column = merged$col1, 67 | message = ifelse( 68 | is.na(merged$pkg), 69 | ifelse( 70 | is_infix(merged$text), 71 | sprintf("Found `%s`, do we want `library(%s, include.only = '%s')` ?", merged$text, pkg, merged$text), 72 | sprintf("Found `%s`, do we want `%s::%s` ?", merged$text, pkg, merged$text) 73 | ), 74 | ifelse( 75 | is_infix(merged$text), 76 | sprintf("Found `%s`, do we want `library(%s, include.only = '%s')` (or more directly `library(%s, include.only = '%s')`)?", merged$text, pkg, merged$text, merged$pkg, merged$text), 77 | sprintf("Found `%s`, do we want `%s::%s` (or more directly `%s::%s`) ?", merged$text, pkg, merged$text, merged$pkg, merged$text) 78 | ) 79 | ) 80 | ) 81 | 82 | rstudioapi::sourceMarkers("Functions that might come from", markers) 83 | invisible(pkg) 84 | } 85 | 86 | -------------------------------------------------------------------------------- /man/refactor.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/refactor.r 3 | \name{refactor} 4 | \alias{refactor} 5 | \alias{\%refactor\%} 6 | \alias{\%refactor_chunk\%} 7 | \alias{\%refactor_value\%} 8 | \alias{\%refactor_chunk_and_value\%} 9 | \alias{\%refactor_chunk_efficiently\%} 10 | \alias{\%refactor_value_efficiently\%} 11 | \alias{\%refactor_chunk_and_value_efficiently\%} 12 | \alias{\%ignore_original\%} 13 | \alias{\%ignore_refactored\%} 14 | \title{Refactor Code} 15 | \usage{ 16 | original \%refactor\% refactored 17 | 18 | original \%refactor_chunk\% refactored 19 | 20 | original \%refactor_value\% refactored 21 | 22 | original \%refactor_chunk_and_value\% refactored 23 | 24 | original \%refactor_chunk_efficiently\% refactored 25 | 26 | original \%refactor_value_efficiently\% refactored 27 | 28 | original \%refactor_chunk_and_value_efficiently\% refactored 29 | 30 | original \%ignore_original\% refactored 31 | 32 | original \%ignore_refactored\% refactored 33 | } 34 | \arguments{ 35 | \item{original}{original expression} 36 | 37 | \item{refactored}{refactored expression} 38 | } 39 | \description{ 40 | These operators are used to refactor code and differ in the difference of 41 | behavior they allow between refactored and original code. 42 | } 43 | \details{ 44 | \itemize{ 45 | \item 46 | } 47 | 48 | Both original and refactored expressions are run. By default the function will fail if 49 | the outputs are different. \verb{\%ignore_original\%} and \verb{\%ignore_refactored\%} do as 50 | heir names suggest. 51 | 52 | Options can be set to alter the behavior of \verb{\%refactor\%}: 53 | \itemize{ 54 | \item if \code{refactor.value} is \code{TRUE} (the default), the sameness of the outputs of 55 | \code{original} and \code{refactored} is tested 56 | \item if \code{refactor.env} is \code{TRUE} (default is \code{FALSE}), the sameness of the modifications 57 | to the local environment made by \code{original} and \code{refactored} is tested 58 | \item if \code{refactor.time} is \code{TRUE} (default is \code{FALSE}), the improved execution speed of 59 | the refactored solution is tested 60 | \item if \code{refactor.waldo} is \code{TRUE} (the default), the \code{waldo::compare} will be used 61 | to compare objects or environments in case of failure. 'waldo' is sometimes 62 | slow and if we set this option to \code{FALSE}, \code{dplyr::all_equal()} would be used instead. 63 | } 64 | 65 | \verb{\%refactor_*\%} functions are variants that are not affected by options other than 66 | \code{refactor.waldo}: 67 | \itemize{ 68 | \item \verb{\%refactor_chunk\%} behaves like \verb{\%refactor\%} with \code{options(refactor.value = FALSE, refactor.env = TRUE, refactor.time = FALSE)}, 69 | it's convenient to refactor chunks of code that modify the local environment. 70 | \item \verb{\%refactor_value\%} behaves like \verb{\%refactor\%} with \code{options(refactor.value = TRUE, refactor.env = FALSE, refactor.time = FALSE)}, 71 | it's convenient to refactor the body of a function that returns a useful value. 72 | \item \verb{\%refactor_chunk_and_value\%} behaves like \verb{\%refactor\%} with \code{options(refactor.value = TRUE, refactor.env = TRUE, refactor.time = FALSE)}, 73 | it's convenient to refactor the body of a function that returns a closure. 74 | \item \verb{\%refactor_chunk_efficiently\%}, \verb{\%refactor_value_efficiently\%} and \verb{\%refactor_chunk_and_value_efficiently\%} are variants of the above 75 | which also check the improved execution speed of the refactored solution 76 | } 77 | 78 | 2 additional functions are used to avoid akward commenting of code, when the original 79 | and refactored code have different behaviors. 80 | \itemize{ 81 | \item \verb{\%ignore_original\%} and \verb{\%ignore_refactored\%} are useful when original and 82 | refactored code give different results (possibly because one of them is wrong) 83 | and we want to keep both codes around without commenting. 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | 6 | 7 | ```{r, include = FALSE} 8 | knitr::opts_chunk$set( 9 | collapse = TRUE, 10 | comment = "#>", 11 | fig.path = "man/figures/README-", 12 | out.width = "100%" 13 | ) 14 | ``` 15 | 16 | # refactor 17 | 18 | {refactor} helps you test your refactored code with real live data. It's 19 | a complement to unit tests, useful in the dirtier stage of refactoring, 20 | when we're not quite sure if our unit tests are good enough or if we don't want to 21 | write them yet because there are too many things changing. 22 | 23 | {refactor} lets you run both the original and refactored 24 | version of your code and checks whether the output is the same and if it runs 25 | as fast. 26 | 27 | As you encounter failures you might improve your unit tests, and 28 | when you're comfortable with your work you can remove the original version 29 | 30 | ## Installation 31 | 32 | Install with: 33 | 34 | ``` r 35 | remotes::install_github("moodymudskipper/refactor") 36 | ``` 37 | 38 | ## Examples 39 | 40 | ```{r} 41 | library(refactor) 42 | ``` 43 | 44 | `%refactor%` by default checks that the output value is consistent between 45 | the original and refactored expressions. 46 | 47 | They'll often be used on the body of a function but can be used on any expression. 48 | 49 | Here I intend to correct an inefficient use of the `apply()` function, 50 | but used `pmax` incorrectly: 51 | 52 | ```{r, error=TRUE} 53 | fun1 <- function(data) { 54 | apply(data, 1, max) 55 | } %refactor% { 56 | pmax(data) 57 | } 58 | fun1(cars) 59 | ``` 60 | 61 | Now using it correctly: 62 | 63 | ```{r} 64 | fun2 <- function(data) { 65 | apply(data, 1, max) 66 | } %refactor% { 67 | do.call(pmax, data) 68 | } 69 | fun2(cars) 70 | ``` 71 | 72 | We can use the option `refactor.env` to test that the local environment isn't 73 | changed in different ways by the original and refactored expression. 74 | 75 | 76 | ```{r, error=TRUE} 77 | options("refactor.env" = TRUE) 78 | { 79 | # original code 80 | data <- cars 81 | i <- 1 82 | apply(data, i, max) 83 | } %refactor% { 84 | # refactored code 85 | do.call(pmax, cars) 86 | } 87 | ``` 88 | 89 | We can use the option `refactor.time` to test that the refactored solution is 90 | faster. 91 | 92 | ```{r, error=TRUE} 93 | # use bigger data so execution time differences are noticeable 94 | cars2 <- do.call(rbind, replicate(1000,cars, F)) 95 | 96 | options("refactor.time" = TRUE) 97 | fun3 <- function(data) { 98 | do.call(pmax, data) 99 | } %refactor% { 100 | apply(data, 1, max) 101 | } 102 | fun3(cars2) 103 | 104 | ``` 105 | 106 | ## Other functions 107 | 108 | It's often easier to use the functions below: 109 | 110 | * `%refactor_chunk%` behaves like `%refactor%` with `options(refactor.value = FALSE, refactor.env = TRUE, refactor.time = FALSE)`, 111 | it's convenient to refactor chunks of code that modify the local environment. 112 | * `%refactor_value%` behaves like `%refactor%` with `options(refactor.value = TRUE, refactor.env = FALSE, refactor.time = FALSE)`, 113 | it's convenient to refactor the body of a function that returns a useful value. 114 | * `%refactor_chunk_and_value%` behaves like `%refactor%` with `options(refactor.value = TRUE, refactor.env = TRUE, refactor.time = FALSE)`, 115 | it's convenient to refactor the body of a function that returns a closure. 116 | * `%refactor_chunk_efficiently%`, `%refactor_value_efficiently%` and `%refactor_chunk_and_value_efficiently%` are variants of the above 117 | which also check the improved execution speed of the refactored solution 118 | * `%ignore_original%` and `%ignore_refactored%` are useful when original and 119 | refactored code give different results (possibly because one of them is wrong) 120 | and we want to keep both codes around without commenting. 121 | 122 | ## Additional functions 123 | 124 | We provide a few helper for refactoring tasks, check out the doc! 125 | 126 | ## Caveats 127 | 128 | We don't control that side effects are the same on both sides, with the exception 129 | of modifications to the local environment. This means 130 | the following for instance might be different in your refactored code 131 | and you won't be warned about it : 132 | 133 | * modified environments (other than local) 134 | * written files 135 | * printed output 136 | * messages 137 | * warnings 138 | * errors 139 | 140 | We might be able to support some of those though. 141 | 142 | More importantly since both side are run, side effects will be run twice and in some case 143 | this might change the behavior of the program, so use cautiously. 144 | -------------------------------------------------------------------------------- /inst/lint_file_template.R: -------------------------------------------------------------------------------- 1 | library(lintr) 2 | 3 | path <- "" 4 | 5 | #### spot signs of non robust code --------------------------------------------- 6 | 7 | # Check that no absolute paths are used (e.g. "/var", "C:\System", "~/docs"). 8 | lint(path, linters = absolute_path_linter()) 9 | 10 | # checks that closures have the proper usage using checkUsage. Note this runs eval on the code, so do not use with untrusted code. 11 | lint(path, linters = object_usage_linter()) 12 | 13 | # Avoid the symbols T and F (for TRUE and FALSE). 14 | lint(path, linters = T_and_F_symbol_linter()) 15 | 16 | # check for 1:length(...), 1:nrow(...), 1:ncol(...), 1:NROW(...) and 1:NCOL(...) expressions. 17 | # These often cause bugs when the right hand side is zero. It is safer to use seq_len or seq_along instead. 18 | lint(path, linters = seq_linter()) 19 | 20 | # that checks for x == NA 21 | lint(path, linters = equals_na_linter()) 22 | 23 | # Report the use of undesirable functions 24 | lint(path, linters = undesirable_function_linter(undesirable)) 25 | 26 | # Check for overly complicated expressions. See ?lintr::cyclocomp 27 | lint(path, linters = cyclocomp_linter(25)) 28 | 29 | # Check that each step in a pipeline is on a new line, or the entire pipe fits on one line. 30 | lint(path, linters = pipe_continuation_linter()) 31 | 32 | # Report the use of undesirable operators, e.g. `:::` or `<<-` and suggest an alternative. 33 | lint(path, linters = undesirable_operator_linter()) 34 | 35 | # Check that there is no commented code outside roxygen blocks 36 | lint(path, linters = commented_code_linter()) 37 | 38 | # Check that the source contains no TODO or FIXME comments (case-insensitive). 39 | lint(path, linters = todo_comment_linter("todo")) 40 | lint(path, linters = todo_comment_linter("fixme")) 41 | 42 | #### object names -------------------------------------------------------------- 43 | 44 | # Check that object names conform to a naming style. 45 | lint(path, linters = object_name_linter(styles = "snake_case")) 46 | 47 | # check that object names are not too long. 48 | lint(path, linters = object_length_linter(length = 30)) 49 | 50 | #### pure style ---------------------------------------------------------------- 51 | 52 | # assignment_linter: checks that '<-' is always used for assignment 53 | lint(path, linters = assignment_linter()) 54 | 55 | # Check that the c function is not used without arguments nor with a single constant. 56 | lint(path, linters = unneeded_concatenation_linter()) 57 | 58 | # check that all commas are followed by spaces, but do not have spaces before them. 59 | lint(path, linters = commas_linter()) 60 | 61 | # check that all infix operators have spaces around them. 62 | lint(path, linters = infix_spaces_linter()) 63 | 64 | # check that only spaces are used for indentation, not tabs. 65 | lint(path, linters = no_tab_linter()) 66 | 67 | # check the line length of both comments and code is less than length. 68 | lint(path, linters = line_length_linter(80)) 69 | 70 | # check that opening curly braces are never on their own line and are always followed by a newline. 71 | lint(path, linters = open_curly_linter(allow_single_line = FALSE)) 72 | 73 | # check that closed curly braces should always be on their own line unless they follow an else. 74 | lint(path, linters = closed_curly_linter(allow_single_line = FALSE)) 75 | 76 | # check that all left parentheses have a space before them unless they are in a function call. 77 | lint(path, linters = spaces_left_parentheses_linter()) 78 | 79 | # check that all left parentheses in a function call do not have spaces before them. 80 | lint(path, linters = function_left_parentheses_linter()) 81 | 82 | # check that there is a space between right parenthesis and an opening curly brace. 83 | lint(path, linters = paren_brace_linter()) 84 | 85 | # check that parentheses and square brackets do not have spaces directly inside them. 86 | lint(path, linters = spaces_inside_linter()) 87 | 88 | # Check that no semicolons terminate statements. 89 | lint(path, linters = semicolon_terminator_linter()) 90 | 91 | # checks that only single quotes are used to delimit string constants. 92 | lint(path, linters = single_quotes_linter()) 93 | 94 | # check there are no trailing blank lines. 95 | lint(path, linters = trailing_blank_lines_linter()) 96 | 97 | # check there are no trailing whitespace characters. 98 | lint(path, linters = trailing_whitespace_linter()) 99 | 100 | #### Zealous linters ----------------------------------------------------------- 101 | # These are not very important or yield a lot of false positives 102 | 103 | # Check that the '[[' operator is used when extracting a single element from an object, not '[' (subsetting) nor '$' (interactive use). 104 | lint(path, linters = extraction_operator_linter()) 105 | 106 | # Check that integers are explicitly typed using the form 1L instead of 1. 107 | lint(path, linters = implicit_integer_linter()) 108 | 109 | # Check that file.path() is used to construct safe and portable paths. 110 | lint(path, linters = nonportable_path_linter()) 111 | -------------------------------------------------------------------------------- /R/refactor_impl.r: -------------------------------------------------------------------------------- 1 | refactor_impl <- function(original, refactored, refactor.value, refactor.time, refactor.env, refactor.waldo, src_env) { 2 | 3 | ## record env before modifications 4 | original_env <- clone_env(src_env) 5 | original_var_nms <- names(original_env) 6 | 7 | 8 | if(refactor.time) { 9 | original_time <- system.time(original_value <- eval(original, src_env))[["elapsed"]] 10 | } else { 11 | original_value <- eval(original, src_env) 12 | } 13 | 14 | ## record env after modifications made by original code 15 | new_env1 <- clone_env(src_env) 16 | new_var_nms <- names(new_env1) 17 | 18 | ## reinitiate env 19 | rm(list=setdiff(new_var_nms, original_var_nms), envir = src_env) 20 | for (var_nm in intersect(new_var_nms, original_var_nms)) { 21 | promise_lgl <- is_unevaled_promise(var_nm, src_env) 22 | if(promise_lgl) { 23 | promise_expr <- pryr:::promise_code(var_nm, src_env) 24 | promise_env <- pryr:::promise_env(var_nm, src_env) 25 | # Assign this expression as a promise (delayed assignment) in our 26 | # cloned environment 27 | eval(bquote( 28 | delayedAssign(var_nm, .(promise_expr), eval.env = promise_env, assign.env = src_env))) 29 | } else { 30 | src_env[[var_nm]] <- original_env[[var_nm]] 31 | } 32 | } 33 | 34 | if(refactor.time) { 35 | refactored_time <- system.time(refactored_value <- eval(refactored, src_env))[["elapsed"]] 36 | } else { 37 | refactored_value <- eval(refactored, src_env) 38 | } 39 | 40 | ## record env after modifications made by refactored code 41 | new_env2 <- clone_env(src_env) 42 | 43 | if(refactor.value && !identical2(original_value, refactored_value)) { 44 | stop("The refactored expression returns a different value from the original one.\n\n", 45 | if(refactor.waldo) 46 | paste(waldo::compare( 47 | original_value, refactored_value, x_arg = "original", y_arg = "refactored"), 48 | collapse = "\n\n") 49 | else 50 | paste(all.equal(original_value, refactored_value), collapse = "\n\n"), 51 | call. = FALSE) 52 | } 53 | 54 | if(refactor.env) { 55 | vars1 <- ls(envir = new_env1, all.names = TRUE) 56 | vars2 <- ls(envir = new_env2, all.names = TRUE) 57 | if(!identical2(vars1, vars2)) { 58 | setdiff1 <- setdiff(vars1, vars2) 59 | msg1 <- paste0( 60 | "Some variables defined in the original code, were not found in the ", 61 | "refactored code: ", toString(setdiff1), 62 | "\nDo you need `rm(", toString(setdiff1), ")`") 63 | setdiff2 <- setdiff(vars2, vars1) 64 | msg2 <- paste0( 65 | "Some variables defined in the refactored code, were not found in the ", 66 | "original code: ", toString(setdiff1), 67 | "\nDo you need `rm(", toString(setdiff2), ")`") 68 | stop(paste(c(msg1, msg2), collapse = "\n"), call. = FALSE) 69 | } 70 | for(var_nm in vars1) { 71 | promise_lgl <- is_unevaled_promise(var_nm, env = new_env1) 72 | if(promise_lgl) { 73 | if(! is_unevaled_promise(var_nm, new_env2)) 74 | stop("`", var_nm, "` is an unevaled promise in the original code, but is evaluated", 75 | "in the refactored code") 76 | 77 | promise_info1 <- list( 78 | code = pryr:::promise_code(var_nm, new_env1), 79 | env = pryr:::promise_env(var_nm, new_env1)) 80 | 81 | promise_info2 <- list( 82 | code = pryr:::promise_code(var_nm, new_env2), 83 | env = pryr:::promise_env(var_nm, new_env2)) 84 | 85 | if(!identical(promise_info1, promise_info2)) { 86 | stop("The promise `var_nm` is different in original and refactored code \n", 87 | paste(waldo::compare( 88 | promise_info1, promise_info2, x_arg = "original", y_arg = "refactored"), 89 | collapse = "\n\n"), 90 | call. = FALSE) 91 | } 92 | } else { 93 | if(is_unevaled_promise(var_nm, new_env2)) 94 | stop("`", var_nm, "` is an unevaled promise in the refactored code, but is evaluated", 95 | "in the original code") 96 | val1 <- new_env1[[var_nm]] 97 | val2 <- new_env2[[var_nm]] 98 | if(!identical2(val1, val2)) { 99 | stop("The variable `", var_nm, "` is bound to a different value ", 100 | "after the original and refactored code\n", 101 | if(refactor.waldo) 102 | paste(waldo::compare( 103 | val1, val2, x_arg = "original", y_arg = "refactored"), 104 | collapse = "\n\n") 105 | else 106 | paste(all.equal(val1, val2), collapse = "\n\n"), 107 | call. = FALSE) 108 | } 109 | 110 | } 111 | } 112 | } 113 | 114 | if(refactor.time && refactored_time > original_time) { 115 | stop("The refactored code ran slower than the original code.\n", 116 | paste(waldo::compare( 117 | original_time, refactored_time, x_arg = "original time (s)", y_arg = "refactored time (s)"), 118 | collapse = "\n\n"), 119 | call. = FALSE) 120 | } 121 | return(original_value) 122 | } 123 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # refactor 5 | 6 | {refactor} helps you test your refactored code with real live data. It’s 7 | a complement to unit tests, useful in the dirtier stage of refactoring, 8 | when we’re not quite sure if our unit tests are good enough or if we 9 | don’t want to write them yet because there are too many things changing. 10 | 11 | {refactor} lets you run both the original and refactored version of your 12 | code and checks whether the output is the same and if it runs as fast. 13 | 14 | As you encounter failures you might improve your unit tests, and when 15 | you’re comfortable with your work you can remove the original version 16 | 17 | ## Installation 18 | 19 | Install with: 20 | 21 | ``` r 22 | remotes::install_github("moodymudskipper/refactor") 23 | ``` 24 | 25 | ## Examples 26 | 27 | ``` r 28 | library(refactor) 29 | ``` 30 | 31 | `%refactor%` by default checks that the output value is consistent 32 | between the original and refactored expressions. 33 | 34 | They’ll often be used on the body of a function but can be used on any 35 | expression. 36 | 37 | Here I intend to correct an inefficient use of the `apply()` function, 38 | but used `pmax` incorrectly: 39 | 40 | ``` r 41 | fun1 <- function(data) { 42 | apply(data, 1, max) 43 | } %refactor% { 44 | pmax(data) 45 | } 46 | fun1(cars) 47 | #> Error: The refactored expression returns a different value from the original one. 48 | #> 49 | #> `original` is a double vector (4, 10, 7, 22, 16, ...) 50 | #> `refactored` is an S3 object of class , a list 51 | ``` 52 | 53 | Now using it correctly: 54 | 55 | ``` r 56 | fun2 <- function(data) { 57 | apply(data, 1, max) 58 | } %refactor% { 59 | do.call(pmax, data) 60 | } 61 | fun2(cars) 62 | #> [1] 4 10 7 22 16 10 18 26 34 17 28 14 20 24 28 26 34 34 46 63 | #> [20] 26 36 60 80 20 26 54 32 40 32 40 50 42 56 76 84 36 46 68 64 | #> [39] 32 48 52 56 64 66 54 70 92 93 120 85 65 | ``` 66 | 67 | We can use the option `refactor.env` to test that the local environment 68 | isn’t changed in different ways by the original and refactored 69 | expression. 70 | 71 | ``` r 72 | options("refactor.env" = TRUE) 73 | { 74 | # original code 75 | data <- cars 76 | i <- 1 77 | apply(data, i, max) 78 | } %refactor% { 79 | # refactored code 80 | do.call(pmax, cars) 81 | } 82 | #> Error: Some variables defined in the original code, were not found in the refactored code: data, i 83 | #> Do you need `rm(data, i)` 84 | #> Some variables defined in the refactored code, were not found in the original code: data, i 85 | #> Do you need `rm()` 86 | ``` 87 | 88 | We can use the option `refactor.time` to test that the refactored 89 | solution is faster. 90 | 91 | ``` r 92 | # use bigger data so execution time differences are noticeable 93 | cars2 <- do.call(rbind, replicate(1000,cars, F)) 94 | 95 | options("refactor.time" = TRUE) 96 | fun3 <- function(data) { 97 | do.call(pmax, data) 98 | } %refactor% { 99 | apply(data, 1, max) 100 | } 101 | fun3(cars2) 102 | #> Error: The refactored code ran slower than the original code. 103 | #> `original time (s)`: 0.00 104 | #> `refactored time (s)`: 0.03 105 | ``` 106 | 107 | ## Other functions 108 | 109 | It’s often easier to use the functions below: 110 | 111 | - `%refactor_chunk%` behaves like `%refactor%` with 112 | `options(refactor.value = FALSE, refactor.env = TRUE, refactor.time = FALSE)`, 113 | it’s convenient to refactor chunks of code that modify the local 114 | environment. 115 | - `%refactor_value%` behaves like `%refactor%` with 116 | `options(refactor.value = TRUE, refactor.env = FALSE, refactor.time = FALSE)`, 117 | it’s convenient to refactor the body of a function that returns a 118 | useful value. 119 | - `%refactor_chunk_and_value%` behaves like `%refactor%` with 120 | `options(refactor.value = TRUE, refactor.env = TRUE, refactor.time = FALSE)`, 121 | it’s convenient to refactor the body of a function that returns a 122 | closure. 123 | - `%refactor_chunk_efficiently%`, `%refactor_value_efficiently%` and 124 | `%refactor_chunk_and_value_efficiently%` are variants of the above 125 | which also check the improved execution speed of the refactored 126 | solution 127 | - `%ignore_original%` and `%ignore_refactored%` are useful when original 128 | and refactored code give different results (possibly because one of 129 | them is wrong) and we want to keep both codes around without 130 | commenting. 131 | 132 | ## Additional functions 133 | 134 | We provide a few helper for refactoring tasks, check out the doc! 135 | 136 | ## Caveats 137 | 138 | We don’t control that side effects are the same on both sides, with the 139 | exception of modifications to the local environment. This means the 140 | following for instance might be different in your refactored code and 141 | you won’t be warned about it : 142 | 143 | - modified environments (other than local) 144 | - written files 145 | - printed output 146 | - messages 147 | - warnings 148 | - errors 149 | 150 | We might be able to support some of those though. 151 | 152 | More importantly since both side are run, side effects will be run twice 153 | and in some case this might change the behavior of the program, so use 154 | cautiously. 155 | -------------------------------------------------------------------------------- /inst/lint_dir_template.R: -------------------------------------------------------------------------------- 1 | library(lintr) 2 | 3 | linted_dir <- here::here() 4 | 5 | #### spot signs of non robust code --------------------------------------------- 6 | 7 | # Check that no absolute paths are used (e.g. "/var", "C:\System", "~/docs"). 8 | lint_dir(linted_dir, linters = absolute_path_linter()) 9 | 10 | # checks that closures have the proper usage using checkUsage. Note this runs eval on the code, so do not use with untrusted code. 11 | lint_dir(linted_dir, linters = object_usage_linter()) 12 | 13 | # Avoid the symbols T and F (for TRUE and FALSE). 14 | lint_dir(linted_dir, linters = T_and_F_symbol_linter()) 15 | 16 | # check for 1:length(...), 1:nrow(...), 1:ncol(...), 1:NROW(...) and 1:NCOL(...) expressions. 17 | # These often cause bugs when the right hand side is zero. It is safer to use seq_len or seq_along instead. 18 | lint_dir(linted_dir, linters = seq_linter()) 19 | 20 | # that checks for x == NA 21 | lint_dir(linted_dir, linters = equals_na_linter()) 22 | 23 | # Report the use of undesirable functions 24 | lint_dir(linted_dir, linters = undesirable_function_linter()) 25 | 26 | # Check for overly complicated expressions. See ?lintr::cyclocomp 27 | lint_dir(linted_dir, linters = cyclocomp_linter(25)) 28 | 29 | # Check that each step in a pipeline is on a new line, or the entire pipe fits on one line. 30 | lint_dir(linted_dir, linters = pipe_continuation_linter()) 31 | 32 | # Report the use of undesirable operators, e.g. `:::` or `<<-` and suggest an alternative. 33 | lint_dir(linted_dir, linters = undesirable_operator_linter()) 34 | 35 | # Check that there is no commented code outside roxygen blocks 36 | lint_dir(linted_dir, linters = commented_code_linter()) 37 | 38 | # Check that the source contains no TODO or FIXME comments (case-insensitive). 39 | lint_dir(linted_dir, linters = todo_comment_linter("todo")) 40 | lint_dir(linted_dir, linters = todo_comment_linter("fixme")) 41 | 42 | #### object names -------------------------------------------------------------- 43 | 44 | # Check that object names conform to a naming style. 45 | lint_dir(linted_dir, linters = object_name_linter(styles = "snake_case")) 46 | 47 | # check that object names are not too long. 48 | lint_dir(linted_dir, linters = object_length_linter(length = 30)) 49 | 50 | #### pure style ---------------------------------------------------------------- 51 | 52 | # automate most of the style 53 | styler::style_dir(linted_dir) 54 | 55 | # assignment_linter: checks that '<-' is always used for assignment 56 | lint_dir(linted_dir, linters = assignment_linter()) 57 | 58 | # Check that the c function is not used without arguments nor with a single constant. 59 | lint_dir(linted_dir, linters = unneeded_concatenation_linter()) 60 | 61 | # check that all commas are followed by spaces, but do not have spaces before them. 62 | lint_dir(linted_dir, linters = commas_linter()) 63 | 64 | # check that all infix operators have spaces around them. 65 | lint_dir(linted_dir, linters = infix_spaces_linter()) 66 | 67 | # check that only spaces are used for indentation, not tabs. 68 | lint_dir(linted_dir, linters = no_tab_linter()) 69 | 70 | # check the line length of both comments and code is less than length. 71 | lint_dir(linted_dir, linters = line_length_linter(80)) 72 | 73 | # check that opening curly braces are never on their own line and are always followed by a newline. 74 | lint_dir(linted_dir, linters = open_curly_linter(allow_single_line = FALSE)) 75 | 76 | # check that closed curly braces should always be on their own line unless they follow an else. 77 | lint_dir(linted_dir, linters = closed_curly_linter(allow_single_line = FALSE)) 78 | 79 | # check that all left parentheses have a space before them unless they are in a function call. 80 | lint_dir(linted_dir, linters = spaces_left_parentheses_linter()) 81 | 82 | # check that all left parentheses in a function call do not have spaces before them. 83 | lint_dir(linted_dir, linters = function_left_parentheses_linter()) 84 | 85 | # check that there is a space between right parenthesis and an opening curly brace. 86 | lint_dir(linted_dir, linters = paren_brace_linter()) 87 | 88 | # check that parentheses and square brackets do not have spaces directly inside them. 89 | lint_dir(linted_dir, linters = spaces_inside_linter()) 90 | 91 | # Check that no semicolons terminate statements. 92 | lint_dir(linted_dir, linters = semicolon_terminator_linter()) 93 | 94 | # checks that only single quotes are used to delimit string constants. 95 | lint_dir(linted_dir, linters = single_quotes_linter()) 96 | 97 | # check there are no trailing blank lines. 98 | lint_dir(linted_dir, linters = trailing_blank_lines_linter()) 99 | 100 | # check there are no trailing whitespace characters. 101 | lint_dir(linted_dir, linters = trailing_whitespace_linter()) 102 | 103 | #### Zealous linters ----------------------------------------------------------- 104 | # These are not very important or yield a lot of false positives 105 | 106 | # Check that the '[[' operator is used when extracting a single element from an object, not '[' (subsetting) nor '$' (interactive use). 107 | lint_dir(linted_dir, linters = extraction_operator_linter()) 108 | 109 | # Check that integers are explicitly typed using the form 1L instead of 1. 110 | lint_dir(linted_dir, linters = implicit_integer_linter()) 111 | 112 | # Check that file.path() is used to construct safe and portable paths. 113 | lint_dir(linted_dir, linters = nonportable_path_linter()) 114 | -------------------------------------------------------------------------------- /R/refactor.r: -------------------------------------------------------------------------------- 1 | #' Refactor Code 2 | #' 3 | #' These operators are used to refactor code and differ in the difference of 4 | #' behavior they allow between refactored and original code. 5 | #' 6 | #' * 7 | #' Both original and refactored expressions are run. By default the function will fail if 8 | #' the outputs are different. `%ignore_original%` and `%ignore_refactored%` do as 9 | #' heir names suggest. 10 | #' 11 | #' Options can be set to alter the behavior of `%refactor%`: 12 | #' 13 | #' * if `refactor.value` is `TRUE` (the default), the sameness of the outputs of 14 | #' `original` and `refactored` is tested 15 | #' * if `refactor.env` is `TRUE` (default is `FALSE`), the sameness of the modifications 16 | #' to the local environment made by `original` and `refactored` is tested 17 | #' * if `refactor.time` is `TRUE` (default is `FALSE`), the improved execution speed of 18 | #' the refactored solution is tested 19 | #' * if `refactor.waldo` is `TRUE` (the default), the `waldo::compare` will be used 20 | #' to compare objects or environments in case of failure. 'waldo' is sometimes 21 | #' slow and if we set this option to `FALSE`, `dplyr::all_equal()` would be used instead. 22 | #' 23 | #' `%refactor_*%` functions are variants that are not affected by options other than 24 | #' `refactor.waldo`: 25 | #' 26 | #' * `%refactor_chunk%` behaves like `%refactor%` with `options(refactor.value = FALSE, refactor.env = TRUE, refactor.time = FALSE)`, 27 | #' it's convenient to refactor chunks of code that modify the local environment. 28 | #' * `%refactor_value%` behaves like `%refactor%` with `options(refactor.value = TRUE, refactor.env = FALSE, refactor.time = FALSE)`, 29 | #' it's convenient to refactor the body of a function that returns a useful value. 30 | #' * `%refactor_chunk_and_value%` behaves like `%refactor%` with `options(refactor.value = TRUE, refactor.env = TRUE, refactor.time = FALSE)`, 31 | #' it's convenient to refactor the body of a function that returns a closure. 32 | #' * `%refactor_chunk_efficiently%`, `%refactor_value_efficiently%` and `%refactor_chunk_and_value_efficiently%` are variants of the above 33 | #' which also check the improved execution speed of the refactored solution 34 | #' 35 | #' 2 additional functions are used to avoid akward commenting of code, when the original 36 | #' and refactored code have different behaviors. 37 | #' 38 | #' * `%ignore_original%` and `%ignore_refactored%` are useful when original and 39 | #' refactored code give different results (possibly because one of them is wrong) 40 | #' and we want to keep both codes around without commenting. 41 | #' 42 | #' @param original original expression 43 | #' @param refactored refactored expression 44 | #' @name refactor 45 | #' @export 46 | `%refactor%` <- function(original, refactored) { 47 | refactor_impl( 48 | original = substitute(original), 49 | refactored = substitute(refactored), 50 | refactor.value = getOption("refactor.value"), 51 | refactor.time = getOption("refactor.time"), 52 | refactor.env = getOption("refactor.env"), 53 | refactor.waldo = getOption("refactor.waldo"), 54 | src_env = parent.frame()) 55 | } 56 | 57 | #' @export 58 | #' @rdname refactor 59 | `%refactor_chunk%` <- function(original, refactored) { 60 | refactor_impl( 61 | original = substitute(original), 62 | refactored = substitute(refactored), 63 | refactor.value = FALSE, 64 | refactor.time = FALSE, 65 | refactor.env = TRUE, 66 | refactor.waldo = getOption("refactor.waldo"), 67 | src_env = parent.frame()) 68 | } 69 | 70 | #' @export 71 | #' @rdname refactor 72 | `%refactor_value%` <- function(original, refactored) { 73 | refactor_impl( 74 | original = substitute(original), 75 | refactored = substitute(refactored), 76 | refactor.value = TRUE, 77 | refactor.time = FALSE, 78 | refactor.env = FALSE, 79 | refactor.waldo = getOption("refactor.waldo"), 80 | src_env = parent.frame()) 81 | } 82 | 83 | #' @export 84 | #' @rdname refactor 85 | `%refactor_chunk_and_value%` <- function(original, refactored) { 86 | refactor_impl( 87 | original = substitute(original), 88 | refactored = substitute(refactored), 89 | refactor.value = TRUE, 90 | refactor.time = FALSE, 91 | refactor.env = TRUE, 92 | refactor.waldo = getOption("refactor.waldo"), 93 | src_env = parent.frame()) 94 | } 95 | 96 | #' @export 97 | #' @rdname refactor 98 | `%refactor_chunk_efficiently%` <- function(original, refactored) { 99 | refactor_impl( 100 | original = substitute(original), 101 | refactored = substitute(refactored), 102 | refactor.value = FALSE, 103 | refactor.time = TRUE, 104 | refactor.env = TRUE, 105 | refactor.waldo = getOption("refactor.waldo"), 106 | src_env = parent.frame()) 107 | } 108 | 109 | #' @export 110 | #' @rdname refactor 111 | `%refactor_value_efficiently%` <- function(original, refactored) { 112 | refactor_impl( 113 | original = substitute(original), 114 | refactored = substitute(refactored), 115 | refactor.value = TRUE, 116 | refactor.time = TRUE, 117 | refactor.env = FALSE, 118 | refactor.waldo = getOption("refactor.waldo"), 119 | src_env = parent.frame()) 120 | } 121 | 122 | #' @export 123 | #' @rdname refactor 124 | `%refactor_chunk_and_value_efficiently%` <- function(original, refactored) { 125 | refactor_impl( 126 | original = substitute(original), 127 | refactored = substitute(refactored), 128 | refactor.value = TRUE, 129 | refactor.time = TRUE, 130 | refactor.env = TRUE, 131 | refactor.waldo = getOption("refactor.waldo"), 132 | src_env = parent.frame()) 133 | } 134 | 135 | #' @export 136 | #' @rdname refactor 137 | `%ignore_original%` <- function(original, refactored) { 138 | refactored <- substitute(refactored) 139 | eval.parent(refactored) 140 | } 141 | 142 | #' @export 143 | #' @rdname refactor 144 | `%ignore_refactored%` <- function(original, refactored) { 145 | refactored <- substitute(original) 146 | eval.parent(original) 147 | } 148 | 149 | 150 | 151 | --------------------------------------------------------------------------------