├── .Rbuildignore
├── .gitignore
├── R
    ├── refactor-package.R
    ├── zz.R
    ├── lintr_wrappers.R
    ├── utils.R
    ├── detect_similar_code.R
    ├── inspect_commented_code.R
    ├── check_files_parse.R
    ├── fetch_namespace_names.R
    ├── identify_hybrid_scripts.R
    ├── find_package_funs.R
    ├── refactor_impl.r
    └── refactor.r
├── refactor.Rproj
├── man
    ├── use_namespace_check.Rd
    ├── refactor-package.Rd
    ├── identify_hybrid_scripts.Rd
    ├── fetch_namespace_names.Rd
    ├── check_files_parse.Rd
    ├── find_pkg_funs.Rd
    ├── use_lintr_template_on_file.Rd
    ├── detect_similar_code.Rd
    └── refactor.Rd
├── DESCRIPTION
├── NAMESPACE
├── README.Rmd
├── inst
    ├── lint_file_template.R
    └── lint_dir_template.R
└── README.md


/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^refactor\.Rproj$
2 | ^\.Rproj\.user$
3 | ^README\.Rmd$
4 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .Rdata
4 | .httr-oauth
5 | .DS_Store
6 | 


--------------------------------------------------------------------------------
/R/refactor-package.R:
--------------------------------------------------------------------------------
1 | #' @keywords internal
2 | "_PACKAGE"
3 | 
4 | # The following block is used by usethis to automatically manage
5 | # roxygen namespace tags. Modify with care!
6 | ## usethis namespace: start
7 | ## usethis namespace: end
8 | NULL
9 | 


--------------------------------------------------------------------------------
/R/zz.R:
--------------------------------------------------------------------------------
 1 | .onLoad <- function(libname, pkgname) {
 2 |   op <- options()
 3 |   op.refactor <- list(
 4 |     refactor.value = TRUE,
 5 |     refactor.env = FALSE,
 6 |     refactor.time = FALSE,
 7 |     refactor.waldo = TRUE
 8 |   )
 9 |   toset <- !(names(op.refactor ) %in% names(op))
10 |   if(any(toset)) options(op.refactor[toset])
11 | 
12 |   invisible(NULL)
13 | }
14 | 


--------------------------------------------------------------------------------
/refactor.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: No
 4 | SaveWorkspace: No
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | LineEndingConversion: Posix
18 | 
19 | BuildType: Package
20 | PackageUseDevtools: Yes
21 | PackageInstallArgs: --no-multiarch --with-keep.source
22 | PackageRoxygenize: rd,collate,namespace
23 | 


--------------------------------------------------------------------------------
/man/use_namespace_check.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/fetch_namespace_names.R
 3 | \name{use_namespace_check}
 4 | \alias{use_namespace_check}
 5 | \title{Use namespace check}
 6 | \usage{
 7 | use_namespace_check()
 8 | }
 9 | \value{
10 | Returns \code{NULL} invisibly, called for side effects.
11 | }
12 | \description{
13 | Wrapper around \code{fetch_namespace_names()} that opens a new RStudio source editot
14 | tab with code to be pasted at the top of the main script to enumerate required
15 | packages and test if they are installed.
16 | }
17 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: refactor
 2 | Title: Tools for Refactoring Code
 3 | Version: 0.0.0.9000
 4 | Authors@R: 
 5 |     person(given = "Antoine",
 6 |            family = "Fabri",
 7 |            role = c("aut", "cre"),
 8 |            email = "antoine.fabri@gmail.com")
 9 | Description: Tools for refactoring code.
10 | License: GPL-3
11 | Encoding: UTF-8
12 | Language: en
13 | LazyData: true
14 | Roxygen: list(markdown = TRUE)
15 | RoxygenNote: 7.2.3
16 | Imports: 
17 |     waldo, pryr
18 | URL: https://github.com/moodymudskipper/refactor
19 | BugReports: https://github.com/moodymudskipper/refactor/issues
20 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export("%ignore_original%")
 4 | export("%ignore_refactored%")
 5 | export("%refactor%")
 6 | export("%refactor_chunk%")
 7 | export("%refactor_chunk_and_value%")
 8 | export("%refactor_chunk_and_value_efficiently%")
 9 | export("%refactor_chunk_efficiently%")
10 | export("%refactor_value%")
11 | export("%refactor_value_efficiently%")
12 | export(check_files_parse)
13 | export(detect_similar_code)
14 | export(fetch_namespace_names)
15 | export(find_pkg_funs)
16 | export(identify_hybrid_scripts)
17 | export(use_lintr_template_on_dir)
18 | export(use_lintr_template_on_file)
19 | export(use_namespace_check)
20 | 


--------------------------------------------------------------------------------
/man/refactor-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/refactor-package.R
 3 | \docType{package}
 4 | \name{refactor-package}
 5 | \alias{refactor}
 6 | \alias{refactor-package}
 7 | \title{refactor: Tools for Refactoring Code}
 8 | \description{
 9 | Tools for refactoring code.
10 | }
11 | \seealso{
12 | Useful links:
13 | \itemize{
14 |   \item \url{https://github.com/moodymudskipper/refactor}
15 |   \item Report bugs at \url{https://github.com/moodymudskipper/refactor/issues}
16 | }
17 | 
18 | }
19 | \author{
20 | \strong{Maintainer}: Antoine Fabri \email{antoine.fabri@gmail.com}
21 | 
22 | }
23 | \keyword{internal}
24 | 


--------------------------------------------------------------------------------
/man/identify_hybrid_scripts.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/identify_hybrid_scripts.R
 3 | \name{identify_hybrid_scripts}
 4 | \alias{identify_hybrid_scripts}
 5 | \title{Identify hybrid scripts}
 6 | \usage{
 7 | identify_hybrid_scripts(path = ".", recursive = TRUE)
 8 | }
 9 | \arguments{
10 | \item{path}{A string. The path to a file or the folder to explore
11 | By default explores the working directory.}
12 | 
13 | \item{recursive}{A boolean. Passed to \code{list.files()} if \code{path} is a directory}
14 | }
15 | \value{
16 | Returns the path invisibly, called for side effects.
17 | }
18 | \description{
19 | Identify scripts who contain both function definitions and other object definitions.
20 | }
21 | 


--------------------------------------------------------------------------------
/man/fetch_namespace_names.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/fetch_namespace_names.R
 3 | \name{fetch_namespace_names}
 4 | \alias{fetch_namespace_names}
 5 | \title{Fetch namespace names}
 6 | \usage{
 7 | fetch_namespace_names(path = ".", recursive = TRUE)
 8 | }
 9 | \arguments{
10 | \item{path}{A string. The path to a file or the folder to explore
11 | By default explores the working directory.}
12 | 
13 | \item{recursive}{A boolean. Passed to \code{list.files()} if \code{path} is a directory}
14 | }
15 | \value{
16 | A character vector of package names
17 | }
18 | \description{
19 | Scans the code and finds all namespaced call of the form \code{pkg::fun} and returns
20 | a vector of unique package names.
21 | }
22 | 


--------------------------------------------------------------------------------
/man/check_files_parse.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/check_files_parse.R
 3 | \name{check_files_parse}
 4 | \alias{check_files_parse}
 5 | \title{Check that files parse correctly}
 6 | \usage{
 7 | check_files_parse(path = ".", recursive = TRUE)
 8 | }
 9 | \arguments{
10 | \item{path}{A string. The path to a file or the folder to explore
11 | By default explores the working directory.}
12 | 
13 | \item{recursive}{A boolean. Passed to \code{list.files()} if \code{path} is a directory}
14 | }
15 | \value{
16 | Returns the path invisibly, called for side effects.
17 | }
18 | \description{
19 | This identifies files that contain non syntactic code, including files that
20 | have an R extension despite not being an R script.
21 | }
22 | 


--------------------------------------------------------------------------------
/man/find_pkg_funs.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/find_package_funs.R
 3 | \name{find_pkg_funs}
 4 | \alias{find_pkg_funs}
 5 | \title{Find all uses of a package's functions}
 6 | \usage{
 7 | find_pkg_funs(pkg, path = ".", recursive = TRUE, exclude = NULL)
 8 | }
 9 | \arguments{
10 | \item{pkg}{A string. The name of the target package}
11 | 
12 | \item{path}{A string. The path to a file or the folder to explore
13 | By default explores the working directory.}
14 | 
15 | \item{recursive}{A boolean. Passed to \code{list.files()} if \code{path} is a directory}
16 | 
17 | \item{exclude}{A character vector of function names to dismiss.}
18 | }
19 | \value{
20 | Returns its input invisibly, called for side effects
21 | }
22 | \description{
23 | This will show false positives, but guarantees that we don't miss any instance.
24 | }
25 | 


--------------------------------------------------------------------------------
/man/use_lintr_template_on_file.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/lintr_wrappers.R
 3 | \name{use_lintr_template_on_file}
 4 | \alias{use_lintr_template_on_file}
 5 | \alias{use_lintr_template_on_dir}
 6 | \title{Use lintr template}
 7 | \usage{
 8 | use_lintr_template_on_file(path = NULL)
 9 | 
10 | use_lintr_template_on_dir(path = NULL)
11 | }
12 | \arguments{
13 | \item{path}{Path to a R script or a directory. By default \code{use_lint_template_on_file()}
14 | considers the active document and \code{use_lint_template_on_dir()} considers the
15 | project folder as returned by \code{here::here()}}
16 | }
17 | \value{
18 | Returns \code{NULL} invisibly. Called for side effects.
19 | }
20 | \description{
21 | This opens up an untitled script in RStudio containing calls to \code{lintr::lint()}
22 | or \code{lintr::lint_dir()} with various linters, sorted by category and rough
23 | order of importance.
24 | }
25 | 


--------------------------------------------------------------------------------
/man/detect_similar_code.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/detect_similar_code.R
 3 | \name{detect_similar_code}
 4 | \alias{detect_similar_code}
 5 | \title{Detect similar code blocks}
 6 | \usage{
 7 | detect_similar_code(paths = ".", recursive = TRUE, pattern = NULL)
 8 | }
 9 | \arguments{
10 | \item{paths}{Paths to scripts or folders containing scripts}
11 | 
12 | \item{recursive}{Whether folders should be explorer recursively}
13 | 
14 | \item{pattern}{A regular expression used to filter files}
15 | }
16 | \value{
17 | The \code{paths} argument invisibly. Called for side effects
18 | }
19 | \description{
20 | This is a wrapper around \code{dupree::dupree()}. It analyses provided files looking for
21 | similar code. It is a bit slow so it can be impractical on big projects to run
22 | it with default (all files contained in working directory recursively), in
23 | this case it is wiser to run it on one or more specific \code{paths} or/and
24 | or to filter the files using the \code{pattern} argument.
25 | }
26 | 


--------------------------------------------------------------------------------
/R/lintr_wrappers.R:
--------------------------------------------------------------------------------
 1 | #' Use lintr template
 2 | #'
 3 | #' This opens up an untitled script in RStudio containing calls to `lintr::lint()`
 4 | #' or `lintr::lint_dir()` with various linters, sorted by category and rough
 5 | #' order of importance.
 6 | #'
 7 | #' @param path Path to a R script or a directory. By default `use_lint_template_on_file()`
 8 | #'   considers the active document and `use_lint_template_on_dir()` considers the
 9 | #'   project folder as returned by `here::here()`
10 | #'
11 | #' @return Returns `NULL` invisibly. Called for side effects.
12 | #' @export
13 | use_lintr_template_on_file <- function(path = NULL) {
14 |   if (is.null(path)) path <- rstudioapi::documentPath(rstudioapi::documentId(FALSE))
15 |   template_path <- system.file("lint_file_template.R", package = "refactor")
16 |   lines <- readLines(template_path)
17 |   lines[[3]] <- sprintf('path <- "%s"', path)
18 |   rstudioapi::documentNew(lines)
19 |   invisible(NULL)
20 | }
21 | 
22 | #' @rdname use_lintr_template_on_file
23 | #' @export
24 | use_lintr_template_on_dir <- function(path = NULL) {
25 |   if (is.null(path)) path <- here::here()
26 |   template_path <- system.file("lint_dir_template.R", package = "refactor")
27 |   lines <- readLines(template_path)
28 |   lines[[3]] <- sprintf('linted_dir <- "%s"', path)
29 |   rstudioapi::documentNew(lines)
30 |   invisible(NULL)
31 | }
32 | 


--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
 1 | gfn <- getFromNamespace
 2 | promise_code <- gfn("promise_code", "pryr")
 3 | promise_env <- gfn("promise_env", "pryr")
 4 | promise_evaled <- gfn("promise_evaled", "pryr")
 5 | is_promise2 <- gfn("is_promise2", "pryr")
 6 | 
 7 | clone_env <- function(env, deep = FALSE) {
 8 |   # create new environment with same parent
 9 |   clone <- new.env(parent = parent.env(env))
10 |   for(obj in ls(env, all.names = TRUE)) {
11 |     promise_lgl <- is_unevaled_promise(as.symbol(obj), env = env)
12 |     if(promise_lgl) {
13 |       # fetch promise expression and env
14 |       promise_expr <- promise_code(obj, env)
15 |       promise_env  <- promise_env(obj, env)
16 | 
17 |       # Assign this expression as a promise (delayed assignment) in our
18 |       # cloned environment
19 |       eval(bquote(
20 |         delayedAssign(obj, .(promise_expr), eval.env = promise_env, assign.env = clone)))
21 |     } else {
22 |       obj_val <- get(obj, envir = env)
23 |       if(is.environment(obj_val) && deep) {
24 |         assign(obj, clone_env(obj_val, deep = TRUE),envir= clone)
25 |       } else  {
26 |         assign(obj, obj_val, envir= clone)
27 |       }
28 |     }
29 |   }
30 |   attributes(clone) <- attributes(env)
31 |   clone
32 | }
33 | 
34 | is_unevaled_promise <- function(name, env) {
35 |   is_promise2(name, env) && !promise_evaled(name, env)
36 | }
37 | 
38 | identical2 <- function(target, current, ...) {
39 |   isTRUE(all.equal(target, current, ...))
40 | }
41 | 


--------------------------------------------------------------------------------
/R/detect_similar_code.R:
--------------------------------------------------------------------------------
 1 | #' Detect similar code blocks
 2 | #'
 3 | #' This is a wrapper around `dupree::dupree()`. It analyses provided files looking for
 4 | #'   similar code. It is a bit slow so it can be impractical on big projects to run
 5 | #'   it with default (all files contained in working directory recursively), in
 6 | #'   this case it is wiser to run it on one or more specific `paths` or/and
 7 | #'   or to filter the files using the `pattern` argument.
 8 | #'
 9 | #' @param paths Paths to scripts or folders containing scripts
10 | #' @param recursive Whether folders should be explorer recursively
11 | #' @param pattern A regular expression used to filter files
12 | #'
13 | #' @return The `paths` argument invisibly. Called for side effects
14 | #' @export
15 | detect_similar_code <- function(paths = ".", recursive = TRUE, pattern = NULL) {
16 |   paths_are_dirs <- sapply(paths, dir.exists)
17 |   dirs <- paths[paths_are_dirs]
18 |   paths_from_dirs <- unlist(lapply(dirs, list.files, recursive = recursive))
19 |   paths <- c(paths[!paths_are_dirs], paths_from_dirs)
20 |   if (!is.null(pattern)) paths <- grep(pattern, paths, value = TRUE)
21 |   dups <- dupree::dupree(paths)
22 |   dups <- as.data.frame(dups)
23 |   dups <- transform(
24 |     dups,
25 |     message = paste0("duplicate #", 1:nrow(dups), " (score: ", round(score,2),")"))
26 |   dups <-
27 |     rbind(
28 |       setNames(dups[c("file_a", "line_a", "message")], c("file", "line", "message")),
29 |       setNames(dups[c("file_b", "line_b", "message")], c("file", "line", "message"))
30 |     )
31 | 
32 |   dups <- transform(dups[order(dups$message),], type ="info", column = 1)
33 |   rstudioapi::sourceMarkers("dupree", dups)
34 | }
35 | 


--------------------------------------------------------------------------------
/R/inspect_commented_code.R:
--------------------------------------------------------------------------------
 1 | 
 2 | rleid <- function(x) {
 3 |   x <- rle(x)$lengths
 4 |   rep(seq_along(x), times = x)
 5 | }
 6 | 
 7 | inspect_commented_code <- function() {
 8 |   files <- list.files(here::here(), all.files = TRUE, full.names = TRUE, recursive = TRUE)
 9 |   files <- grep("\\.r$", files, ignore.case = TRUE, value = TRUE)
10 | 
11 |   inspect_block <- function(block) {
12 |     n <- nrow(block)
13 |     for (start in 1:n) {
14 |       not_a_good_start_comment <-
15 |         grepl("^ *$", block$code[start]) ||
16 |         grepl(":$", block$code[start]) ||
17 |         grepl(": ", block$code[start]) # or would yield false positive
18 |       if (not_a_good_start_comment) next
19 |       for (end in start:n)
20 |         code <- block$code[start:end]
21 |       parsed_lgl <- tryCatch(!is.symbol(parse(text = code)[[1]]), error = function(e) FALSE)
22 |       if (parsed_lgl) {
23 |         # keep only first line of commented code
24 |         return(block[start, ])
25 |       }
26 |     }
27 |     return(NULL)
28 |   }
29 | 
30 |   inspect_blocks <- function(file) {
31 |     code <- readLines(file)
32 |     line <- seq_along(code)
33 | 
34 |     commented_lgl <- startsWith(code, "#") & !startsWith(code, "#'")
35 |     block_id <- rleid(commented_lgl)
36 |     blocks_df <- data.frame(line, message = code, code = sub("^#+", "", code), block_id)[commented_lgl, ]
37 |     blocks <- split(blocks_df, blocks_df$block_id)
38 | 
39 |     df <- do.call(rbind, lapply(blocks, inspect_block))
40 |     if (is.null(df)) return(df)
41 |     df$block_id <- NULL
42 |     df$code <- NULL
43 |     df$file = file
44 |     df
45 |   }
46 | 
47 |   commented_code_df <- do.call(rbind, lapply(files, inspect_blocks))
48 |   commented_code_df <- transform(commented_code_df, type = "info", column = 1)
49 |   rstudioapi::sourceMarkers("commented code", commented_code_df)
50 | }
51 | 


--------------------------------------------------------------------------------
/R/check_files_parse.R:
--------------------------------------------------------------------------------
 1 | #' Check that files parse correctly
 2 | #'
 3 | #' This identifies files that contain non syntactic code, including files that
 4 | #' have an R extension despite not being an R script.
 5 | #'
 6 | #' @param path A string. The path to a file or the folder to explore
 7 | #'   By default explores the working directory.
 8 | #' @param recursive A boolean. Passed to `list.files()` if `path` is a directory
 9 | #'
10 | #' @return Returns the path invisibly, called for side effects.
11 | #' @export
12 | check_files_parse <- function(path = ".", recursive = TRUE) {
13 |   if (dir.exists(path)) {
14 |     all_scripts <- list.files(
15 |       path,
16 |       pattern = "\\.[rR]$",
17 |       full.names = TRUE,
18 |       recursive = recursive
19 |     )
20 |   } else {
21 |     if (!file.exists(path)) {
22 |       stop("wrong file")
23 |     }
24 |     all_scripts <- path
25 |   }
26 | 
27 |   errors <- sapply(all_scripts, function(file) {
28 |     error <- tryCatch(parse(file), error = function(e) e$message)
29 |     if (!is.character(error)) error <- ""
30 |     if (startsWith(error, "invalid multibyte character in parse")) {
31 |       is_rdata <- !inherits(try(load(file, envir = new.env())), "try-error")
32 |       if(is_rdata) return("`.RData` file stored as `.R`")
33 |       is_rds <- !inherits(try(readRDS(file)), "try-error")
34 |       if(is_rdata) return("`.RDS` file stored as `.R`")
35 |     }
36 |     error
37 |   })
38 |   errors <- errors[errors != ""]
39 |   if(!length(errors)) {
40 |     message("All R scripts contain syntactic codes")
41 |     return(invisible(path))
42 |   }
43 |   markers <- data.frame(
44 |     type = "error",
45 |     file = names(errors),
46 |     message = errors,
47 |     line= ifelse(
48 |       endsWith(errors, "^"),
49 |       as.numeric(sub("^.*\n(\\d+): +.+\n +\\^$", "\\1", errors)),
50 |       1),
51 |     column=1
52 |   )
53 | 
54 |   rstudioapi::sourceMarkers("Check that files parse", markers)
55 |   invisible(path)
56 | }
57 | 


--------------------------------------------------------------------------------
/R/fetch_namespace_names.R:
--------------------------------------------------------------------------------
 1 | #' Fetch namespace names
 2 | #'
 3 | #' Scans the code and finds all namespaced call of the form `pkg::fun` and returns
 4 | #' a vector of unique package names.
 5 | #'
 6 | #' @param path A string. The path to a file or the folder to explore
 7 | #'   By default explores the working directory.
 8 | #' @param recursive A boolean. Passed to `list.files()` if `path` is a directory
 9 | #'
10 | #' @return A character vector of package names
11 | #' @export
12 | fetch_namespace_names <- function(path = ".", recursive = TRUE) {
13 |   if (dir.exists(path)) {
14 |     all_scripts <- list.files(
15 |       path,
16 |       pattern = "\\.[rR]$",
17 |       full.names = TRUE,
18 |       recursive = recursive
19 |     )
20 |   } else {
21 |     if (!file.exists(path)) {
22 |       stop("wrong file")
23 |     }
24 |     all_scripts <- path
25 |   }
26 | 
27 |   code <- sapply(all_scripts, parse)
28 |   namespaces <- list()
29 |   collect_namespaced_calls <- function(call) {
30 |     if(!is.call(call) && !is.expression(call)) {
31 |       return()
32 |     }
33 |     if(rlang::is_call(call, "::")) {
34 |       namespaces <<- c(namespaces, call[[2]])
35 |       return()
36 |     }
37 |     lapply(as.list(call), collect_namespaced_calls)
38 |     invisible()
39 |   }
40 |   lapply(code, collect_namespaced_calls)
41 |   sort(as.character(unique(namespaces)))
42 | }
43 | 
44 | #' Use namespace check
45 | #'
46 | #' Wrapper around `fetch_namespace_names()` that opens a new RStudio source editot
47 | #'   tab with code to be pasted at the top of the main script to enumerate required
48 | #'   packages and test if they are installed.
49 | #'
50 | #' @return Returns `NULL` invisibly, called for side effects.
51 | #' @export
52 | use_namespace_check <- function() {
53 |   pkgs <- fetch_namespace_names()
54 |   #pkgs <- capture.output(dput(pkgs))
55 |   #pkgs <- paste(pkgs, collapse = "\n")
56 |   code <- sprintf("requireNamespace(\"%s\")", pkgs)
57 |   rstudioapi::documentNew(code, "r")
58 |   invisible(NULL)
59 | }
60 | 
61 | 


--------------------------------------------------------------------------------
/R/identify_hybrid_scripts.R:
--------------------------------------------------------------------------------
 1 | #' Identify hybrid scripts
 2 | #'
 3 | #' Identify scripts who contain both function definitions and other object definitions.
 4 | #'
 5 | #' @param path A string. The path to a file or the folder to explore
 6 | #'   By default explores the working directory.
 7 | #' @param recursive A boolean. Passed to `list.files()` if `path` is a directory
 8 | #'
 9 | #' @return Returns the path invisibly, called for side effects.
10 | #' @export
11 | identify_hybrid_scripts <- function(path = ".", recursive = TRUE) {
12 |   if (dir.exists(path)) {
13 |     all_scripts <- list.files(
14 |       path,
15 |       pattern = "\\.[rR]$",
16 |       full.names = TRUE,
17 |       recursive = recursive
18 |     )
19 |   } else {
20 |     if (!file.exists(path)) {
21 |       stop("wrong file")
22 |     }
23 |     all_scripts <- path
24 |   }
25 | 
26 |   is_function_call <- function(x) {
27 |     is.call(x) &&
28 |       list(x[[1]]) %in% c(quote(`<-`), quote(`=`)) &&
29 |       is.call(x[[3]]) &&
30 |       identical(x[[c(3, 1)]], quote(`function`))
31 |   }
32 | 
33 |   get_file_markers <- function(file) {
34 |     code <- parse(file)
35 |     calls_are_fun_defs <- sapply(as.list(code), is_function_call)
36 | 
37 |     if(all(calls_are_fun_defs) || all(!calls_are_fun_defs)) {
38 |       return(NULL)
39 |     }
40 | 
41 |     data <- getParseData(code)
42 |     level1 <- data$id[data$parent == 0]
43 |     level2 <- data$id[data$parent %in% level1]
44 |     lines <- unique(data[data$id %in% level2, "line1"])
45 |     if(calls_are_fun_defs[[1]]) {
46 |       msg <- "The script starts with a function definition but contains other object definitions"
47 |       line <- lines[which(!calls_are_fun_defs)[[1]]]
48 |     } else {
49 |       msg <- "The script starts with a non function object definition but contains functions"
50 |       line <- lines[which(calls_are_fun_defs)[[1]]]
51 |     }
52 |     markers <- data.frame(
53 |       type = "info",
54 |       file = file,
55 |       line = line,
56 |       column = 1,
57 |       message = msg
58 |     )
59 |     markers
60 |   }
61 | 
62 |   markers <- lapply(all_scripts, get_file_markers)
63 |   markers <- do.call(rbind, markers)
64 |   if(is.null(markers)) {
65 |     message("All scripts contain either only function definitions or no function definition")
66 |     return(invisible(path))
67 |   }
68 |   rstudioapi::sourceMarkers("Hybrid scripts", markers)
69 |   invisible(path)
70 | }
71 | 


--------------------------------------------------------------------------------
/R/find_package_funs.R:
--------------------------------------------------------------------------------
 1 | is_infix <- function(x) {
 2 |   startsWith(x, "%") & endsWith(x, "%")
 3 | }
 4 | 
 5 | #' Find all uses of a package's functions
 6 | #'
 7 | #' This will show false positives, but guarantees that we don't miss any instance.
 8 | #'
 9 | #' @param pkg A string. The name of the target package
10 | #' @param path A string. The path to a file or the folder to explore
11 | #'   By default explores the working directory.
12 | #' @param recursive A boolean. Passed to `list.files()` if `path` is a directory
13 | #' @param exclude A character vector of function names to dismiss.
14 | #'
15 | #' @return Returns its input invisibly, called for side effects
16 | #' @export
17 | find_pkg_funs <- function(pkg, path = ".", recursive = TRUE, exclude = NULL
18 |                           # , include_s3_generics = FALSE
19 |                           ) {
20 |   # fetch package functions and their origin
21 |   imports <- getNamespaceImports(pkg)
22 |   imports <- Map(function(fun, pkg) data.frame(fun, pkg), imports, names(imports))
23 |   imports <- do.call(rbind, imports)
24 |   row.names(imports) <- NULL
25 |   exports <- data.frame(fun = getNamespaceExports(pkg))
26 |   exports <- exports[!exports$fun %in% exclude, , drop = FALSE]
27 |   pkg_funs <- merge(exports, imports, all.x = TRUE)
28 | 
29 |   # ns <- asNamespace(pkg)
30 |   # s3_methods  <- Filter(function(x) isS3method(x, envir = ns), ls(ns))
31 |   # s3_generics <- sub("^([^.]+)\\..*$", "\\1", s3_methods)
32 |   # s3 <- data.frame(method = s3_methods, generic = s3_generics)
33 |   # s3 <- subset(s3, !generic %in% ls(ns))
34 | 
35 | 
36 |   # fetch parse data
37 |   if(dir.exists(path)) {
38 |     files <- list.files(path, full.names = TRUE, recursive = recursive, pattern = "\\.[rR]$")
39 |   } else {
40 |     if (!file.exists(path)) stop(sprintf("Invalid value for `path`, '%s' doesn't exist", path))
41 |     files <- path
42 |   }
43 | 
44 |   parse_data <- lapply(files, function(file) {
45 |     data <- getParseData(parse(file))
46 |     data <- data[! data$token %in% c("SYMBOL_SUB", "SYMBOL_PACKAGE"),]
47 |     i_namespaced <- which(data$text %in% c("::", ":::")) + 1
48 |     if(length(i_namespaced)) data <- data[-i_namespaced,]
49 |     transform(data, file = file)
50 |     })
51 |   parse_data <- do.call(rbind, parse_data)
52 |   parse_data <- parse_data[c("line1", "col1", "text", "file")]
53 | 
54 |   # merge datasets
55 |   merged <- merge(parse_data, pkg_funs, by.x = "text", by.y = "fun")
56 | 
57 |   if(!nrow(merged)) {
58 |     message(sprintf("No potential function calls from {%s} were found in the code", pkg))
59 |     return(invisible(NULL))
60 |   }
61 | 
62 |   markers <- data.frame(
63 |     type = "info",
64 |     file = merged$file,
65 |     line = merged$line1,
66 |     column = merged$col1,
67 |     message = ifelse(
68 |       is.na(merged$pkg),
69 |       ifelse(
70 |         is_infix(merged$text),
71 |         sprintf("Found `%s`, do we want `library(%s, include.only = '%s')` ?", merged$text, pkg, merged$text),
72 |         sprintf("Found `%s`, do we want `%s::%s` ?", merged$text, pkg, merged$text)
73 |         ),
74 |       ifelse(
75 |         is_infix(merged$text),
76 |         sprintf("Found `%s`, do we want `library(%s, include.only = '%s')` (or more directly `library(%s, include.only = '%s')`)?", merged$text, pkg, merged$text, merged$pkg, merged$text),
77 |         sprintf("Found `%s`, do we want `%s::%s` (or more directly `%s::%s`) ?", merged$text, pkg, merged$text, merged$pkg, merged$text)
78 |         )
79 |       )
80 |   )
81 | 
82 |   rstudioapi::sourceMarkers("Functions that might come from", markers)
83 |   invisible(pkg)
84 | }
85 | 
86 | 


--------------------------------------------------------------------------------
/man/refactor.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/refactor.r
 3 | \name{refactor}
 4 | \alias{refactor}
 5 | \alias{\%refactor\%}
 6 | \alias{\%refactor_chunk\%}
 7 | \alias{\%refactor_value\%}
 8 | \alias{\%refactor_chunk_and_value\%}
 9 | \alias{\%refactor_chunk_efficiently\%}
10 | \alias{\%refactor_value_efficiently\%}
11 | \alias{\%refactor_chunk_and_value_efficiently\%}
12 | \alias{\%ignore_original\%}
13 | \alias{\%ignore_refactored\%}
14 | \title{Refactor Code}
15 | \usage{
16 | original \%refactor\% refactored
17 | 
18 | original \%refactor_chunk\% refactored
19 | 
20 | original \%refactor_value\% refactored
21 | 
22 | original \%refactor_chunk_and_value\% refactored
23 | 
24 | original \%refactor_chunk_efficiently\% refactored
25 | 
26 | original \%refactor_value_efficiently\% refactored
27 | 
28 | original \%refactor_chunk_and_value_efficiently\% refactored
29 | 
30 | original \%ignore_original\% refactored
31 | 
32 | original \%ignore_refactored\% refactored
33 | }
34 | \arguments{
35 | \item{original}{original expression}
36 | 
37 | \item{refactored}{refactored expression}
38 | }
39 | \description{
40 | These operators are used to refactor code and differ in the difference of
41 | behavior they allow between refactored and original code.
42 | }
43 | \details{
44 | \itemize{
45 | \item 
46 | }
47 | 
48 | Both original and refactored expressions are run. By default the function will fail if
49 | the outputs are different. \verb{\%ignore_original\%} and \verb{\%ignore_refactored\%} do as
50 | heir names suggest.
51 | 
52 | Options can be set to alter the behavior of \verb{\%refactor\%}:
53 | \itemize{
54 | \item if \code{refactor.value} is \code{TRUE} (the default), the sameness of the outputs of
55 | \code{original} and \code{refactored} is tested
56 | \item if \code{refactor.env} is \code{TRUE} (default is \code{FALSE}), the sameness of the modifications
57 | to the local environment made by \code{original} and \code{refactored} is tested
58 | \item if \code{refactor.time} is \code{TRUE} (default is \code{FALSE}), the improved execution speed of
59 | the refactored solution is tested
60 | \item if \code{refactor.waldo} is \code{TRUE} (the default), the \code{waldo::compare} will be used
61 | to compare objects or environments in case of failure. 'waldo' is sometimes
62 | slow and if we set this option to \code{FALSE}, \code{dplyr::all_equal()} would be used instead.
63 | }
64 | 
65 | \verb{\%refactor_*\%} functions are variants that are not affected by options other than
66 | \code{refactor.waldo}:
67 | \itemize{
68 | \item \verb{\%refactor_chunk\%} behaves like \verb{\%refactor\%} with \code{options(refactor.value = FALSE, refactor.env = TRUE, refactor.time = FALSE)},
69 | it's convenient to refactor chunks of code that modify the local environment.
70 | \item \verb{\%refactor_value\%} behaves like \verb{\%refactor\%} with \code{options(refactor.value = TRUE, refactor.env = FALSE, refactor.time = FALSE)},
71 | it's convenient to refactor the body of a function that returns a useful value.
72 | \item \verb{\%refactor_chunk_and_value\%} behaves like \verb{\%refactor\%} with \code{options(refactor.value = TRUE, refactor.env = TRUE, refactor.time = FALSE)},
73 | it's convenient to refactor the body of a function that returns a closure.
74 | \item \verb{\%refactor_chunk_efficiently\%}, \verb{\%refactor_value_efficiently\%} and \verb{\%refactor_chunk_and_value_efficiently\%} are variants of the above
75 | which also check the improved execution speed of the refactored solution
76 | }
77 | 
78 | 2 additional functions are used to avoid akward commenting of code, when the original
79 | and refactored code have different behaviors.
80 | \itemize{
81 | \item \verb{\%ignore_original\%} and \verb{\%ignore_refactored\%} are useful when original and
82 | refactored code give different results (possibly because one of them is wrong)
83 | and we want to keep both codes around without commenting.
84 | }
85 | }
86 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | output: github_document
  3 | ---
  4 | 
  5 | <!-- README.md is generated from README.Rmd. Please edit that file -->
  6 | 
  7 | ```{r, include = FALSE}
  8 | knitr::opts_chunk$set(
  9 |   collapse = TRUE,
 10 |   comment = "#>",
 11 |   fig.path = "man/figures/README-",
 12 |   out.width = "100%"
 13 | )
 14 | ```
 15 | 
 16 | # refactor
 17 | 
 18 | {refactor} helps you test your refactored code with real live data. It's
 19 | a complement to unit tests, useful in the dirtier stage of refactoring,
 20 | when we're not quite sure if our unit tests are good enough or if we don't want to
 21 | write them yet because there are too many things changing.
 22 | 
 23 | {refactor} lets you run both the original and refactored 
 24 | version of your code and checks whether the output is the same and if it runs
 25 | as fast. 
 26 | 
 27 | As you encounter failures you might improve your unit tests, and
 28 | when you're comfortable with your work you can remove the original version
 29 | 
 30 | ## Installation
 31 | 
 32 | Install with:
 33 | 
 34 | ``` r
 35 | remotes::install_github("moodymudskipper/refactor")
 36 | ```
 37 | 
 38 | ## Examples
 39 | 
 40 | ```{r}
 41 | library(refactor)
 42 | ```
 43 | 
 44 | `%refactor%`  by default checks that the output value is consistent between
 45 | the original and refactored expressions.
 46 | 
 47 | They'll often be used on the body of a function but can be used on any expression.
 48 | 
 49 | Here I intend to correct an inefficient use of the `apply()` function,
 50 | but used `pmax` incorrectly:
 51 | 
 52 | ```{r, error=TRUE}
 53 | fun1 <- function(data) {
 54 |   apply(data, 1, max)
 55 | } %refactor% {
 56 |   pmax(data)
 57 | }
 58 | fun1(cars)
 59 | ```
 60 | 
 61 | Now using it correctly:
 62 | 
 63 | ```{r}
 64 | fun2 <- function(data) {
 65 |   apply(data, 1, max)
 66 | } %refactor% {
 67 |   do.call(pmax, data)
 68 | }
 69 | fun2(cars)
 70 | ```
 71 | 
 72 | We can use the option `refactor.env` to test that the local environment isn't 
 73 | changed in different ways by the original and refactored expression.
 74 | 
 75 | 
 76 | ```{r, error=TRUE}
 77 | options("refactor.env" = TRUE)
 78 | {
 79 |   # original code
 80 |   data <- cars
 81 |   i <- 1
 82 |   apply(data, i, max)
 83 | } %refactor% {
 84 |   # refactored code
 85 |   do.call(pmax, cars)
 86 | }
 87 | ```
 88 | 
 89 | We can use the option `refactor.time` to test that the refactored solution is
 90 | faster.
 91 | 
 92 | ```{r, error=TRUE}
 93 | # use bigger data so execution time differences are noticeable
 94 | cars2 <- do.call(rbind, replicate(1000,cars, F))
 95 | 
 96 | options("refactor.time" = TRUE)
 97 | fun3 <- function(data) {
 98 |   do.call(pmax, data)
 99 | } %refactor% {
100 |   apply(data, 1, max)
101 | }
102 | fun3(cars2)
103 | 
104 | ```
105 | 
106 | ## Other functions
107 | 
108 | It's often easier to use the functions below:
109 | 
110 | * `%refactor_chunk%` behaves like `%refactor%` with `options(refactor.value = FALSE, refactor.env = TRUE, refactor.time = FALSE)`,
111 |   it's convenient to refactor chunks of code that modify the local environment.
112 | * `%refactor_value%` behaves like `%refactor%` with `options(refactor.value = TRUE, refactor.env = FALSE, refactor.time = FALSE)`,
113 |   it's convenient to refactor the body of a function that returns a useful value.
114 | * `%refactor_chunk_and_value%` behaves like `%refactor%` with `options(refactor.value = TRUE, refactor.env = TRUE, refactor.time = FALSE)`,
115 |   it's convenient to refactor the body of a function that returns a closure.
116 | * `%refactor_chunk_efficiently%`, `%refactor_value_efficiently%` and `%refactor_chunk_and_value_efficiently%` are variants of the above
117 |   which also check the improved execution speed of the refactored solution
118 | * `%ignore_original%` and `%ignore_refactored%` are useful when original and
119 |   refactored code give different results (possibly because one of them is wrong)
120 |   and we want to keep both codes around without commenting.
121 |   
122 | ## Additional functions
123 | 
124 | We provide a few helper for refactoring tasks, check out the doc!
125 | 
126 | ## Caveats
127 | 
128 | We don't control that side effects are the same on both sides, with the exception
129 | of modifications to the local environment. This means
130 | the following for instance might be different in your refactored code 
131 | and you won't be warned about it :
132 | 
133 | * modified environments (other than local)
134 | * written files
135 | * printed output
136 | * messages
137 | * warnings
138 | * errors
139 | 
140 | We might be able to support some of those though.
141 | 
142 | More importantly since both side are run, side effects will be run twice and in some case
143 | this might change the behavior of the program, so use cautiously.
144 | 


--------------------------------------------------------------------------------
/inst/lint_file_template.R:
--------------------------------------------------------------------------------
  1 | library(lintr)
  2 | 
  3 | path <- ""
  4 | 
  5 | #### spot signs of non robust code ---------------------------------------------
  6 | 
  7 | # Check that no absolute paths are used (e.g. "/var", "C:\System", "~/docs").
  8 | lint(path, linters = absolute_path_linter())
  9 | 
 10 | # checks that closures have the proper usage using checkUsage. Note this runs eval on the code, so do not use with untrusted code.
 11 | lint(path, linters = object_usage_linter())
 12 | 
 13 | # Avoid the symbols T and F (for TRUE and FALSE).
 14 | lint(path, linters = T_and_F_symbol_linter())
 15 | 
 16 | # check for 1:length(...), 1:nrow(...), 1:ncol(...), 1:NROW(...) and 1:NCOL(...) expressions.
 17 | # These often cause bugs when the right hand side is zero. It is safer to use seq_len or seq_along instead.
 18 | lint(path, linters = seq_linter())
 19 | 
 20 | # that checks for x == NA
 21 | lint(path, linters = equals_na_linter())
 22 | 
 23 | # Report the use of undesirable functions
 24 | lint(path, linters = undesirable_function_linter(undesirable))
 25 | 
 26 | # Check for overly complicated expressions. See ?lintr::cyclocomp
 27 | lint(path, linters = cyclocomp_linter(25))
 28 | 
 29 | # Check that each step in a pipeline is on a new line, or the entire pipe fits on one line.
 30 | lint(path, linters = pipe_continuation_linter())
 31 | 
 32 | # Report the use of undesirable operators, e.g. `:::` or `<<-` and suggest an alternative.
 33 | lint(path, linters = undesirable_operator_linter())
 34 | 
 35 | # Check that there is no commented code outside roxygen blocks
 36 | lint(path, linters = commented_code_linter())
 37 | 
 38 | # Check that the source contains no TODO or FIXME comments (case-insensitive).
 39 | lint(path, linters = todo_comment_linter("todo"))
 40 | lint(path, linters = todo_comment_linter("fixme"))
 41 | 
 42 | #### object names --------------------------------------------------------------
 43 | 
 44 | # Check that object names conform to a naming style.
 45 | lint(path, linters = object_name_linter(styles = "snake_case"))
 46 | 
 47 | # check that object names are not too long.
 48 | lint(path, linters = object_length_linter(length = 30))
 49 | 
 50 | #### pure style ----------------------------------------------------------------
 51 | 
 52 | # assignment_linter: checks that '<-' is always used for assignment
 53 | lint(path, linters = assignment_linter())
 54 | 
 55 | # Check that the c function is not used without arguments nor with a single constant.
 56 | lint(path, linters = unneeded_concatenation_linter())
 57 | 
 58 | # check that all commas are followed by spaces, but do not have spaces before them.
 59 | lint(path, linters = commas_linter())
 60 | 
 61 | # check that all infix operators have spaces around them.
 62 | lint(path, linters = infix_spaces_linter())
 63 | 
 64 | # check that only spaces are used for indentation, not tabs.
 65 | lint(path, linters = no_tab_linter())
 66 | 
 67 | # check the line length of both comments and code is less than length.
 68 | lint(path, linters = line_length_linter(80))
 69 | 
 70 | # check that opening curly braces are never on their own line and are always followed by a newline.
 71 | lint(path, linters = open_curly_linter(allow_single_line = FALSE))
 72 | 
 73 | # check that closed curly braces should always be on their own line unless they follow an else.
 74 | lint(path, linters = closed_curly_linter(allow_single_line = FALSE))
 75 | 
 76 | # check that all left parentheses have a space before them unless they are in a function call.
 77 | lint(path, linters = spaces_left_parentheses_linter())
 78 | 
 79 | # check that all left parentheses in a function call do not have spaces before them.
 80 | lint(path, linters = function_left_parentheses_linter())
 81 | 
 82 | # check that there is a space between right parenthesis and an opening curly brace.
 83 | lint(path, linters = paren_brace_linter())
 84 | 
 85 | # check that parentheses and square brackets do not have spaces directly inside them.
 86 | lint(path, linters = spaces_inside_linter())
 87 | 
 88 | # Check that no semicolons terminate statements.
 89 | lint(path, linters = semicolon_terminator_linter())
 90 | 
 91 | # checks that only single quotes are used to delimit string constants.
 92 | lint(path, linters = single_quotes_linter())
 93 | 
 94 | # check there are no trailing blank lines.
 95 | lint(path, linters = trailing_blank_lines_linter())
 96 | 
 97 | # check there are no trailing whitespace characters.
 98 | lint(path, linters = trailing_whitespace_linter())
 99 | 
100 | #### Zealous linters -----------------------------------------------------------
101 | # These are not very important or yield a lot of false positives
102 | 
103 | # Check that the '[[' operator is used when extracting a single element from an object, not '[' (subsetting) nor '$' (interactive use).
104 | lint(path, linters = extraction_operator_linter())
105 | 
106 | # Check that integers are explicitly typed using the form 1L instead of 1.
107 | lint(path, linters = implicit_integer_linter())
108 | 
109 | # Check that file.path() is used to construct safe and portable paths.
110 | lint(path, linters = nonportable_path_linter())
111 | 


--------------------------------------------------------------------------------
/R/refactor_impl.r:
--------------------------------------------------------------------------------
  1 | refactor_impl <- function(original, refactored, refactor.value, refactor.time, refactor.env, refactor.waldo, src_env) {
  2 | 
  3 |   ## record env before modifications
  4 |   original_env  <- clone_env(src_env)
  5 |   original_var_nms  <- names(original_env)
  6 | 
  7 | 
  8 |   if(refactor.time) {
  9 |     original_time     <- system.time(original_value   <- eval(original, src_env))[["elapsed"]]
 10 |   } else {
 11 |     original_value   <- eval(original, src_env)
 12 |   }
 13 | 
 14 |   ## record env after modifications made by original code
 15 |   new_env1    <- clone_env(src_env)
 16 |   new_var_nms <- names(new_env1)
 17 | 
 18 |   ## reinitiate env
 19 |   rm(list=setdiff(new_var_nms, original_var_nms), envir = src_env)
 20 |   for (var_nm in intersect(new_var_nms, original_var_nms)) {
 21 |     promise_lgl <- is_unevaled_promise(var_nm, src_env)
 22 |     if(promise_lgl) {
 23 |       promise_expr <- pryr:::promise_code(var_nm, src_env)
 24 |       promise_env  <- pryr:::promise_env(var_nm, src_env)
 25 |       # Assign this expression as a promise (delayed assignment) in our
 26 |       # cloned environment
 27 |       eval(bquote(
 28 |         delayedAssign(var_nm, .(promise_expr), eval.env = promise_env, assign.env = src_env)))
 29 |     } else {
 30 |       src_env[[var_nm]] <- original_env[[var_nm]]
 31 |     }
 32 |   }
 33 | 
 34 |   if(refactor.time) {
 35 |     refactored_time   <- system.time(refactored_value <- eval(refactored, src_env))[["elapsed"]]
 36 |   } else {
 37 |     refactored_value <- eval(refactored, src_env)
 38 |   }
 39 | 
 40 |   ## record env after modifications made by refactored code
 41 |   new_env2   <- clone_env(src_env)
 42 | 
 43 |   if(refactor.value && !identical2(original_value, refactored_value)) {
 44 |     stop("The refactored expression returns a different value from the original one.\n\n",
 45 |          if(refactor.waldo)
 46 |            paste(waldo::compare(
 47 |              original_value, refactored_value, x_arg = "original", y_arg = "refactored"),
 48 |              collapse = "\n\n")
 49 |          else
 50 |            paste(all.equal(original_value, refactored_value), collapse = "\n\n"),
 51 |          call. = FALSE)
 52 |   }
 53 | 
 54 |   if(refactor.env) {
 55 |     vars1 <- ls(envir = new_env1, all.names = TRUE)
 56 |     vars2 <- ls(envir = new_env2, all.names = TRUE)
 57 |     if(!identical2(vars1, vars2)) {
 58 |       setdiff1 <- setdiff(vars1, vars2)
 59 |       msg1 <- paste0(
 60 |         "Some variables defined in the original code, were not found in the ",
 61 |         "refactored code: ", toString(setdiff1),
 62 |         "\nDo you need `rm(", toString(setdiff1), ")`")
 63 |       setdiff2 <- setdiff(vars2, vars1)
 64 |       msg2 <- paste0(
 65 |         "Some variables defined in the refactored code, were not found in the ",
 66 |         "original code: ", toString(setdiff1),
 67 |         "\nDo you need `rm(", toString(setdiff2), ")`")
 68 |       stop(paste(c(msg1, msg2), collapse = "\n"), call. = FALSE)
 69 |     }
 70 |     for(var_nm in vars1) {
 71 |       promise_lgl <- is_unevaled_promise(var_nm, env = new_env1)
 72 |       if(promise_lgl) {
 73 |         if(! is_unevaled_promise(var_nm, new_env2))
 74 |           stop("`", var_nm, "` is an unevaled promise in the original code, but is evaluated",
 75 |                "in the refactored code")
 76 | 
 77 |         promise_info1 <- list(
 78 |           code = pryr:::promise_code(var_nm, new_env1),
 79 |           env = pryr:::promise_env(var_nm, new_env1))
 80 | 
 81 |         promise_info2 <- list(
 82 |           code = pryr:::promise_code(var_nm, new_env2),
 83 |           env = pryr:::promise_env(var_nm, new_env2))
 84 | 
 85 |         if(!identical(promise_info1, promise_info2)) {
 86 |           stop("The promise `var_nm` is different in original and refactored code \n",
 87 |                paste(waldo::compare(
 88 |                  promise_info1, promise_info2, x_arg = "original", y_arg = "refactored"),
 89 |                  collapse = "\n\n"),
 90 |                call. = FALSE)
 91 |         }
 92 |       } else {
 93 |         if(is_unevaled_promise(var_nm, new_env2))
 94 |           stop("`", var_nm, "` is an unevaled promise in the refactored code, but is evaluated",
 95 |                "in the original code")
 96 |         val1 <- new_env1[[var_nm]]
 97 |         val2 <- new_env2[[var_nm]]
 98 |         if(!identical2(val1, val2)) {
 99 |           stop("The variable `", var_nm, "` is bound to a  different value ",
100 |                "after the original and refactored code\n",
101 |                if(refactor.waldo)
102 |                  paste(waldo::compare(
103 |                    val1, val2, x_arg = "original", y_arg = "refactored"),
104 |                    collapse = "\n\n")
105 |                else
106 |                  paste(all.equal(val1, val2), collapse = "\n\n"),
107 |                call. = FALSE)
108 |         }
109 | 
110 |       }
111 |     }
112 |   }
113 | 
114 |   if(refactor.time && refactored_time > original_time) {
115 |     stop("The refactored code ran slower than the original code.\n",
116 |          paste(waldo::compare(
117 |            original_time, refactored_time, x_arg = "original time (s)", y_arg = "refactored time (s)"),
118 |            collapse = "\n\n"),
119 |          call. = FALSE)
120 |   }
121 |   return(original_value)
122 | }
123 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | <!-- README.md is generated from README.Rmd. Please edit that file -->
  3 | 
  4 | # refactor
  5 | 
  6 | {refactor} helps you test your refactored code with real live data. It’s
  7 | a complement to unit tests, useful in the dirtier stage of refactoring,
  8 | when we’re not quite sure if our unit tests are good enough or if we
  9 | don’t want to write them yet because there are too many things changing.
 10 | 
 11 | {refactor} lets you run both the original and refactored version of your
 12 | code and checks whether the output is the same and if it runs as fast.
 13 | 
 14 | As you encounter failures you might improve your unit tests, and when
 15 | you’re comfortable with your work you can remove the original version
 16 | 
 17 | ## Installation
 18 | 
 19 | Install with:
 20 | 
 21 | ``` r
 22 | remotes::install_github("moodymudskipper/refactor")
 23 | ```
 24 | 
 25 | ## Examples
 26 | 
 27 | ``` r
 28 | library(refactor)
 29 | ```
 30 | 
 31 | `%refactor%` by default checks that the output value is consistent
 32 | between the original and refactored expressions.
 33 | 
 34 | They’ll often be used on the body of a function but can be used on any
 35 | expression.
 36 | 
 37 | Here I intend to correct an inefficient use of the `apply()` function,
 38 | but used `pmax` incorrectly:
 39 | 
 40 | ``` r
 41 | fun1 <- function(data) {
 42 |   apply(data, 1, max)
 43 | } %refactor% {
 44 |   pmax(data)
 45 | }
 46 | fun1(cars)
 47 | #> Error: The refactored expression returns a different value from the original one.
 48 | #> 
 49 | #> `original` is a double vector (4, 10, 7, 22, 16, ...)
 50 | #> `refactored` is an S3 object of class <data.frame>, a list
 51 | ```
 52 | 
 53 | Now using it correctly:
 54 | 
 55 | ``` r
 56 | fun2 <- function(data) {
 57 |   apply(data, 1, max)
 58 | } %refactor% {
 59 |   do.call(pmax, data)
 60 | }
 61 | fun2(cars)
 62 | #>  [1]   4  10   7  22  16  10  18  26  34  17  28  14  20  24  28  26  34  34  46
 63 | #> [20]  26  36  60  80  20  26  54  32  40  32  40  50  42  56  76  84  36  46  68
 64 | #> [39]  32  48  52  56  64  66  54  70  92  93 120  85
 65 | ```
 66 | 
 67 | We can use the option `refactor.env` to test that the local environment
 68 | isn’t changed in different ways by the original and refactored
 69 | expression.
 70 | 
 71 | ``` r
 72 | options("refactor.env" = TRUE)
 73 | {
 74 |   # original code
 75 |   data <- cars
 76 |   i <- 1
 77 |   apply(data, i, max)
 78 | } %refactor% {
 79 |   # refactored code
 80 |   do.call(pmax, cars)
 81 | }
 82 | #> Error: Some variables defined in the original code, were not found in the refactored code: data, i
 83 | #> Do you need `rm(data, i)`
 84 | #> Some variables defined in the refactored code, were not found in the original code: data, i
 85 | #> Do you need `rm()`
 86 | ```
 87 | 
 88 | We can use the option `refactor.time` to test that the refactored
 89 | solution is faster.
 90 | 
 91 | ``` r
 92 | # use bigger data so execution time differences are noticeable
 93 | cars2 <- do.call(rbind, replicate(1000,cars, F))
 94 | 
 95 | options("refactor.time" = TRUE)
 96 | fun3 <- function(data) {
 97 |   do.call(pmax, data)
 98 | } %refactor% {
 99 |   apply(data, 1, max)
100 | }
101 | fun3(cars2)
102 | #> Error: The refactored code ran slower than the original code.
103 | #>   `original time (s)`: 0.00
104 | #> `refactored time (s)`: 0.03
105 | ```
106 | 
107 | ## Other functions
108 | 
109 | It’s often easier to use the functions below:
110 | 
111 | - `%refactor_chunk%` behaves like `%refactor%` with
112 |   `options(refactor.value = FALSE, refactor.env = TRUE, refactor.time = FALSE)`,
113 |   it’s convenient to refactor chunks of code that modify the local
114 |   environment.
115 | - `%refactor_value%` behaves like `%refactor%` with
116 |   `options(refactor.value = TRUE, refactor.env = FALSE, refactor.time = FALSE)`,
117 |   it’s convenient to refactor the body of a function that returns a
118 |   useful value.
119 | - `%refactor_chunk_and_value%` behaves like `%refactor%` with
120 |   `options(refactor.value = TRUE, refactor.env = TRUE, refactor.time = FALSE)`,
121 |   it’s convenient to refactor the body of a function that returns a
122 |   closure.
123 | - `%refactor_chunk_efficiently%`, `%refactor_value_efficiently%` and
124 |   `%refactor_chunk_and_value_efficiently%` are variants of the above
125 |   which also check the improved execution speed of the refactored
126 |   solution
127 | - `%ignore_original%` and `%ignore_refactored%` are useful when original
128 |   and refactored code give different results (possibly because one of
129 |   them is wrong) and we want to keep both codes around without
130 |   commenting.
131 | 
132 | ## Additional functions
133 | 
134 | We provide a few helper for refactoring tasks, check out the doc!
135 | 
136 | ## Caveats
137 | 
138 | We don’t control that side effects are the same on both sides, with the
139 | exception of modifications to the local environment. This means the
140 | following for instance might be different in your refactored code and
141 | you won’t be warned about it :
142 | 
143 | - modified environments (other than local)
144 | - written files
145 | - printed output
146 | - messages
147 | - warnings
148 | - errors
149 | 
150 | We might be able to support some of those though.
151 | 
152 | More importantly since both side are run, side effects will be run twice
153 | and in some case this might change the behavior of the program, so use
154 | cautiously.
155 | 


--------------------------------------------------------------------------------
/inst/lint_dir_template.R:
--------------------------------------------------------------------------------
  1 | library(lintr)
  2 | 
  3 | linted_dir <- here::here()
  4 | 
  5 | #### spot signs of non robust code ---------------------------------------------
  6 | 
  7 | # Check that no absolute paths are used (e.g. "/var", "C:\System", "~/docs").
  8 | lint_dir(linted_dir, linters = absolute_path_linter())
  9 | 
 10 | # checks that closures have the proper usage using checkUsage. Note this runs eval on the code, so do not use with untrusted code.
 11 | lint_dir(linted_dir, linters = object_usage_linter())
 12 | 
 13 | # Avoid the symbols T and F (for TRUE and FALSE).
 14 | lint_dir(linted_dir, linters = T_and_F_symbol_linter())
 15 | 
 16 | # check for 1:length(...), 1:nrow(...), 1:ncol(...), 1:NROW(...) and 1:NCOL(...) expressions.
 17 | # These often cause bugs when the right hand side is zero. It is safer to use seq_len or seq_along instead.
 18 | lint_dir(linted_dir, linters = seq_linter())
 19 | 
 20 | # that checks for x == NA
 21 | lint_dir(linted_dir, linters = equals_na_linter())
 22 | 
 23 | # Report the use of undesirable functions
 24 | lint_dir(linted_dir, linters = undesirable_function_linter())
 25 | 
 26 | # Check for overly complicated expressions. See ?lintr::cyclocomp
 27 | lint_dir(linted_dir, linters = cyclocomp_linter(25))
 28 | 
 29 | # Check that each step in a pipeline is on a new line, or the entire pipe fits on one line.
 30 | lint_dir(linted_dir, linters = pipe_continuation_linter())
 31 | 
 32 | # Report the use of undesirable operators, e.g. `:::` or `<<-` and suggest an alternative.
 33 | lint_dir(linted_dir, linters = undesirable_operator_linter())
 34 | 
 35 | # Check that there is no commented code outside roxygen blocks
 36 | lint_dir(linted_dir, linters = commented_code_linter())
 37 | 
 38 | # Check that the source contains no TODO or FIXME comments (case-insensitive).
 39 | lint_dir(linted_dir, linters = todo_comment_linter("todo"))
 40 | lint_dir(linted_dir, linters = todo_comment_linter("fixme"))
 41 | 
 42 | #### object names --------------------------------------------------------------
 43 | 
 44 | # Check that object names conform to a naming style.
 45 | lint_dir(linted_dir, linters = object_name_linter(styles = "snake_case"))
 46 | 
 47 | # check that object names are not too long.
 48 | lint_dir(linted_dir, linters = object_length_linter(length = 30))
 49 | 
 50 | #### pure style ----------------------------------------------------------------
 51 | 
 52 | # automate most of the style
 53 | styler::style_dir(linted_dir)
 54 | 
 55 | # assignment_linter: checks that '<-' is always used for assignment
 56 | lint_dir(linted_dir, linters = assignment_linter())
 57 | 
 58 | # Check that the c function is not used without arguments nor with a single constant.
 59 | lint_dir(linted_dir, linters = unneeded_concatenation_linter())
 60 | 
 61 | # check that all commas are followed by spaces, but do not have spaces before them.
 62 | lint_dir(linted_dir, linters = commas_linter())
 63 | 
 64 | # check that all infix operators have spaces around them.
 65 | lint_dir(linted_dir, linters = infix_spaces_linter())
 66 | 
 67 | # check that only spaces are used for indentation, not tabs.
 68 | lint_dir(linted_dir, linters = no_tab_linter())
 69 | 
 70 | # check the line length of both comments and code is less than length.
 71 | lint_dir(linted_dir, linters = line_length_linter(80))
 72 | 
 73 | # check that opening curly braces are never on their own line and are always followed by a newline.
 74 | lint_dir(linted_dir, linters = open_curly_linter(allow_single_line = FALSE))
 75 | 
 76 | # check that closed curly braces should always be on their own line unless they follow an else.
 77 | lint_dir(linted_dir, linters = closed_curly_linter(allow_single_line = FALSE))
 78 | 
 79 | # check that all left parentheses have a space before them unless they are in a function call.
 80 | lint_dir(linted_dir, linters = spaces_left_parentheses_linter())
 81 | 
 82 | # check that all left parentheses in a function call do not have spaces before them.
 83 | lint_dir(linted_dir, linters = function_left_parentheses_linter())
 84 | 
 85 | # check that there is a space between right parenthesis and an opening curly brace.
 86 | lint_dir(linted_dir, linters = paren_brace_linter())
 87 | 
 88 | # check that parentheses and square brackets do not have spaces directly inside them.
 89 | lint_dir(linted_dir, linters = spaces_inside_linter())
 90 | 
 91 | # Check that no semicolons terminate statements.
 92 | lint_dir(linted_dir, linters = semicolon_terminator_linter())
 93 | 
 94 | # checks that only single quotes are used to delimit string constants.
 95 | lint_dir(linted_dir, linters = single_quotes_linter())
 96 | 
 97 | # check there are no trailing blank lines.
 98 | lint_dir(linted_dir, linters = trailing_blank_lines_linter())
 99 | 
100 | # check there are no trailing whitespace characters.
101 | lint_dir(linted_dir, linters = trailing_whitespace_linter())
102 | 
103 | #### Zealous linters -----------------------------------------------------------
104 | # These are not very important or yield a lot of false positives
105 | 
106 | # Check that the '[[' operator is used when extracting a single element from an object, not '[' (subsetting) nor '$' (interactive use).
107 | lint_dir(linted_dir, linters = extraction_operator_linter())
108 | 
109 | # Check that integers are explicitly typed using the form 1L instead of 1.
110 | lint_dir(linted_dir, linters = implicit_integer_linter())
111 | 
112 | # Check that file.path() is used to construct safe and portable paths.
113 | lint_dir(linted_dir, linters = nonportable_path_linter())
114 | 


--------------------------------------------------------------------------------
/R/refactor.r:
--------------------------------------------------------------------------------
  1 | #' Refactor Code
  2 | #'
  3 | #' These operators are used to refactor code and differ in the difference of
  4 | #' behavior they allow between refactored and original code.
  5 | #'
  6 | #' *
  7 | #' Both original and refactored expressions are run. By default the function will fail if
  8 | #' the outputs are different. `%ignore_original%` and `%ignore_refactored%` do as
  9 | #' heir names suggest.
 10 | #'
 11 | #' Options can be set to alter the behavior of `%refactor%`:
 12 | #'
 13 | #' * if `refactor.value` is `TRUE` (the default), the sameness of the outputs of
 14 | #'   `original` and `refactored` is tested
 15 | #' * if `refactor.env` is `TRUE` (default is `FALSE`), the sameness of the modifications
 16 | #'   to the local environment made by `original` and `refactored` is tested
 17 | #' * if `refactor.time` is `TRUE` (default is `FALSE`), the improved execution speed of
 18 | #'   the refactored solution is tested
 19 | #' * if `refactor.waldo` is `TRUE` (the default), the `waldo::compare` will be used
 20 | #'   to compare objects or environments in case of failure. 'waldo' is sometimes
 21 | #'   slow and if we set this option to `FALSE`, `dplyr::all_equal()` would be used instead.
 22 | #'
 23 | #' `%refactor_*%` functions are variants that are not affected by options other than
 24 | #'  `refactor.waldo`:
 25 | #'
 26 | #' * `%refactor_chunk%` behaves like `%refactor%` with `options(refactor.value = FALSE, refactor.env = TRUE, refactor.time = FALSE)`,
 27 | #'   it's convenient to refactor chunks of code that modify the local environment.
 28 | #' * `%refactor_value%` behaves like `%refactor%` with `options(refactor.value = TRUE, refactor.env = FALSE, refactor.time = FALSE)`,
 29 | #'   it's convenient to refactor the body of a function that returns a useful value.
 30 | #' * `%refactor_chunk_and_value%` behaves like `%refactor%` with `options(refactor.value = TRUE, refactor.env = TRUE, refactor.time = FALSE)`,
 31 | #'   it's convenient to refactor the body of a function that returns a closure.
 32 | #' * `%refactor_chunk_efficiently%`, `%refactor_value_efficiently%` and `%refactor_chunk_and_value_efficiently%` are variants of the above
 33 | #'   which also check the improved execution speed of the refactored solution
 34 | #'
 35 | #'  2 additional functions are used to avoid akward commenting of code, when the original
 36 | #'  and refactored code have different behaviors.
 37 | #'
 38 | #'  * `%ignore_original%` and `%ignore_refactored%` are useful when original and
 39 | #'   refactored code give different results (possibly because one of them is wrong)
 40 | #'   and we want to keep both codes around without commenting.
 41 | #'
 42 | #' @param original original expression
 43 | #' @param refactored refactored expression
 44 | #' @name refactor
 45 | #' @export
 46 | `%refactor%` <- function(original, refactored) {
 47 |   refactor_impl(
 48 |     original = substitute(original),
 49 |     refactored =  substitute(refactored),
 50 |     refactor.value = getOption("refactor.value"),
 51 |     refactor.time = getOption("refactor.time"),
 52 |     refactor.env = getOption("refactor.env"),
 53 |     refactor.waldo = getOption("refactor.waldo"),
 54 |     src_env = parent.frame())
 55 | }
 56 | 
 57 | #' @export
 58 | #' @rdname refactor
 59 | `%refactor_chunk%` <- function(original, refactored) {
 60 |   refactor_impl(
 61 |     original = substitute(original),
 62 |     refactored =  substitute(refactored),
 63 |     refactor.value = FALSE,
 64 |     refactor.time = FALSE,
 65 |     refactor.env = TRUE,
 66 |     refactor.waldo = getOption("refactor.waldo"),
 67 |     src_env = parent.frame())
 68 | }
 69 | 
 70 | #' @export
 71 | #' @rdname refactor
 72 | `%refactor_value%` <- function(original, refactored) {
 73 |   refactor_impl(
 74 |     original = substitute(original),
 75 |     refactored =  substitute(refactored),
 76 |     refactor.value = TRUE,
 77 |     refactor.time = FALSE,
 78 |     refactor.env = FALSE,
 79 |     refactor.waldo = getOption("refactor.waldo"),
 80 |     src_env = parent.frame())
 81 | }
 82 | 
 83 | #' @export
 84 | #' @rdname refactor
 85 | `%refactor_chunk_and_value%` <- function(original, refactored) {
 86 |   refactor_impl(
 87 |     original = substitute(original),
 88 |     refactored =  substitute(refactored),
 89 |     refactor.value = TRUE,
 90 |     refactor.time = FALSE,
 91 |     refactor.env = TRUE,
 92 |     refactor.waldo = getOption("refactor.waldo"),
 93 |     src_env = parent.frame())
 94 | }
 95 | 
 96 | #' @export
 97 | #' @rdname refactor
 98 | `%refactor_chunk_efficiently%` <- function(original, refactored) {
 99 |   refactor_impl(
100 |     original = substitute(original),
101 |     refactored =  substitute(refactored),
102 |     refactor.value = FALSE,
103 |     refactor.time = TRUE,
104 |     refactor.env = TRUE,
105 |     refactor.waldo = getOption("refactor.waldo"),
106 |     src_env = parent.frame())
107 | }
108 | 
109 | #' @export
110 | #' @rdname refactor
111 | `%refactor_value_efficiently%` <- function(original, refactored) {
112 |   refactor_impl(
113 |     original = substitute(original),
114 |     refactored =  substitute(refactored),
115 |     refactor.value = TRUE,
116 |     refactor.time = TRUE,
117 |     refactor.env = FALSE,
118 |     refactor.waldo = getOption("refactor.waldo"),
119 |     src_env = parent.frame())
120 | }
121 | 
122 | #' @export
123 | #' @rdname refactor
124 | `%refactor_chunk_and_value_efficiently%` <- function(original, refactored) {
125 |   refactor_impl(
126 |     original = substitute(original),
127 |     refactored =  substitute(refactored),
128 |     refactor.value = TRUE,
129 |     refactor.time = TRUE,
130 |     refactor.env = TRUE,
131 |     refactor.waldo = getOption("refactor.waldo"),
132 |     src_env = parent.frame())
133 | }
134 | 
135 | #' @export
136 | #' @rdname refactor
137 | `%ignore_original%` <- function(original, refactored) {
138 |   refactored <- substitute(refactored)
139 |   eval.parent(refactored)
140 | }
141 | 
142 | #' @export
143 | #' @rdname refactor
144 | `%ignore_refactored%` <- function(original, refactored) {
145 |   refactored <- substitute(original)
146 |   eval.parent(original)
147 | }
148 | 
149 | 
150 | 
151 | 


--------------------------------------------------------------------------------