├── .Rbuildignore ├── .github ├── .gitignore └── workflows │ ├── check-standard.yaml │ ├── pkgdown.yaml │ └── test-coverage.yaml ├── .gitignore ├── DESCRIPTION ├── LICENSE ├── LICENSE.md ├── NAMESPACE ├── NEWS.md ├── R ├── across2.R ├── checks.R ├── crossover.R ├── data-csat.R ├── data-csatraw.R ├── dplyover.R ├── meta_setup.R ├── over.R ├── over2.R ├── over_across_family.R ├── select_strings.R ├── select_values.R ├── selection_helpers.R ├── show_affix.R └── string_eval.R ├── README.Rmd ├── README.md ├── _pkgdown.yml ├── codecov.yml ├── data-raw └── csatraw.R ├── data ├── csat.rda └── csatraw.rda ├── dplyover.Rproj ├── man ├── across2.Rd ├── crossover.Rd ├── csat.Rd ├── csatraw.Rd ├── dplyover-package.Rd ├── figures │ ├── apple-touch-icon-120x120.png │ ├── apple-touch-icon-152x152.png │ ├── apple-touch-icon-180x180.png │ ├── apple-touch-icon-60x60.png │ ├── apple-touch-icon-76x76.png │ ├── apple-touch-icon.png │ ├── favicon-16x16.png │ ├── favicon-32x32.png │ ├── favicon.ico │ ├── lifecycle-archived.svg │ ├── lifecycle-defunct.svg │ ├── lifecycle-deprecated.svg │ ├── lifecycle-experimental.svg │ ├── lifecycle-maturing.svg │ ├── lifecycle-questioning.svg │ ├── lifecycle-stable.svg │ ├── lifecycle-superseded.svg │ ├── logo.png │ └── logo_big.png ├── over.Rd ├── over2.Rd ├── over_across_family.Rd ├── rmd │ └── setup.Rmd ├── select_values.Rd ├── select_vars.Rd ├── selection_helpers.Rd ├── show_affix.Rd └── string_eval.Rd ├── pkgdown ├── extra.css └── favicon │ ├── apple-touch-icon-120x120.png │ ├── apple-touch-icon-152x152.png │ ├── apple-touch-icon-180x180.png │ ├── apple-touch-icon-60x60.png │ ├── apple-touch-icon-76x76.png │ ├── apple-touch-icon.png │ ├── favicon-16x16.png │ ├── favicon-32x32.png │ └── favicon.ico ├── tests ├── testthat.R └── testthat │ ├── _snaps │ ├── over.md │ └── over2.md │ ├── test-across2.R │ ├── test-crossover.R │ ├── test-over.R │ ├── test-over2.R │ ├── test-select_strings.R │ ├── test-select_values.R │ ├── test-show_affix.R │ └── test-string_eval.R └── vignettes ├── .gitignore ├── benchmark1.png ├── benchmark2.png ├── performance.Rmd ├── performance.rds ├── why_bench.rds └── why_dplyover.Rmd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^dplyover\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^LICENSE\.md$ 4 | ^README\.Rmd$ 5 | ^data-raw$ 6 | ^codecov\.yml$ 7 | ^\.github$ 8 | ^doc$ 9 | ^Meta$ 10 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.github/workflows/check-standard.yaml: -------------------------------------------------------------------------------- 1 | # For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. 2 | # https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions 3 | on: 4 | push: 5 | branches: 6 | - main 7 | - master 8 | pull_request: 9 | branches: 10 | - main 11 | - master 12 | 13 | name: R-CMD-check 14 | 15 | jobs: 16 | R-CMD-check: 17 | runs-on: ${{ matrix.config.os }} 18 | 19 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 20 | 21 | strategy: 22 | fail-fast: false 23 | matrix: 24 | config: 25 | - {os: windows-latest, r: 'release'} 26 | - {os: macOS-latest, r: 'release'} 27 | - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} 28 | - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} 29 | 30 | env: 31 | R_REMOTES_NO_ERRORS_FROM_WARNINGS: true 32 | RSPM: ${{ matrix.config.rspm }} 33 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 34 | 35 | steps: 36 | - uses: actions/checkout@v2 37 | 38 | - uses: r-lib/actions/setup-r@v1 39 | with: 40 | r-version: ${{ matrix.config.r }} 41 | 42 | - uses: r-lib/actions/setup-pandoc@v1 43 | 44 | - name: Query dependencies 45 | run: | 46 | install.packages('remotes') 47 | saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) 48 | writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") 49 | shell: Rscript {0} 50 | 51 | - name: Cache R packages 52 | if: runner.os != 'Windows' 53 | uses: actions/cache@v2 54 | with: 55 | path: ${{ env.R_LIBS_USER }} 56 | key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} 57 | restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- 58 | 59 | - name: Install system dependencies 60 | if: runner.os == 'Linux' 61 | run: | 62 | while read -r cmd 63 | do 64 | eval sudo $cmd 65 | done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))') 66 | 67 | - name: Install dependencies 68 | run: | 69 | remotes::install_deps(dependencies = TRUE) 70 | remotes::install_cran("rcmdcheck") 71 | shell: Rscript {0} 72 | 73 | - name: Check 74 | env: 75 | _R_CHECK_CRAN_INCOMING_REMOTE_: false 76 | run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") 77 | shell: Rscript {0} 78 | 79 | - name: Upload check results 80 | if: failure() 81 | uses: actions/upload-artifact@main 82 | with: 83 | name: ${{ runner.os }}-r${{ matrix.config.r }}-results 84 | path: check 85 | -------------------------------------------------------------------------------- /.github/workflows/pkgdown.yaml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | - main 5 | - master 6 | 7 | name: pkgdown 8 | 9 | jobs: 10 | pkgdown: 11 | runs-on: macOS-latest 12 | env: 13 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 14 | steps: 15 | - uses: actions/checkout@v2 16 | 17 | - uses: r-lib/actions/setup-r@v1 18 | 19 | - uses: r-lib/actions/setup-pandoc@v1 20 | 21 | - name: Query dependencies 22 | run: | 23 | install.packages('remotes') 24 | saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) 25 | writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") 26 | shell: Rscript {0} 27 | 28 | - name: Cache R packages 29 | uses: actions/cache@v2 30 | with: 31 | path: ${{ env.R_LIBS_USER }} 32 | key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} 33 | restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- 34 | 35 | - name: Install dependencies 36 | run: | 37 | remotes::install_deps(dependencies = TRUE) 38 | install.packages("pkgdown", type = "binary") 39 | shell: Rscript {0} 40 | 41 | - name: Install package 42 | run: R CMD INSTALL . 43 | 44 | - name: Deploy package 45 | run: | 46 | git config --local user.email "actions@github.com" 47 | git config --local user.name "GitHub Actions" 48 | Rscript -e 'pkgdown::deploy_to_branch(new_process = FALSE)' 49 | -------------------------------------------------------------------------------- /.github/workflows/test-coverage.yaml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | - main 5 | - master 6 | pull_request: 7 | branches: 8 | - main 9 | - master 10 | 11 | name: test-coverage 12 | 13 | jobs: 14 | test-coverage: 15 | runs-on: macOS-latest 16 | env: 17 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 18 | steps: 19 | - uses: actions/checkout@v2 20 | 21 | - uses: r-lib/actions/setup-r@v1 22 | 23 | - uses: r-lib/actions/setup-pandoc@v1 24 | 25 | - name: Query dependencies 26 | run: | 27 | install.packages('remotes') 28 | saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) 29 | writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") 30 | shell: Rscript {0} 31 | 32 | - name: Cache R packages 33 | uses: actions/cache@v2 34 | with: 35 | path: ${{ env.R_LIBS_USER }} 36 | key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} 37 | restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- 38 | 39 | - name: Install dependencies 40 | run: | 41 | install.packages(c("remotes")) 42 | remotes::install_deps(dependencies = TRUE) 43 | remotes::install_cran("covr") 44 | shell: Rscript {0} 45 | 46 | - name: Test coverage 47 | run: covr::codecov() 48 | shell: Rscript {0} 49 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | 5 | to_do.R 6 | 7 | R/crossover_old.R 8 | R/cross_matrix.R 9 | R/dplyr_context_internals.R 10 | R/dplyr_data_mask.R 11 | R/across_new.R 12 | 13 | tests/testthat/test-across_new.R 14 | tests/testthat/test-crossover_old.R 15 | 16 | inst/doc 17 | doc 18 | docs 19 | Meta 20 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: dplyover 2 | Title: Create columns by applying functions to vectors and/or columns in 'dplyr' 3 | Version: 0.0.8.9002 4 | Authors@R: 5 | person(given = "Tim", 6 | family = "Tiefenbach", 7 | role = c("aut", "cre"), 8 | email = "mailme@tim-tiefenbach.de", 9 | comment = c(ORCID = "0000-0001-9443-2434")) 10 | Description: Extension of 'dplyr’s functionality that builds a family of functions 11 | around dplyr::across(). 12 | License: MIT + file LICENSE 13 | Encoding: UTF-8 14 | LazyData: true 15 | Roxygen: list(markdown = TRUE) 16 | RoxygenNote: 7.1.1 17 | URL: https://github.com/TimTeaFan/dplyover 18 | BugReports: https://github.com/TimTeaFan/dplyover/issues 19 | Suggests: 20 | testthat (>= 3.0.0), 21 | knitr, 22 | rmarkdown, 23 | lifecycle, 24 | covr, 25 | stringr, 26 | tidyr, 27 | bench, 28 | ggplot2 29 | Imports: 30 | dplyr (>= 1.0.0), 31 | rlang (>= 0.4.7), 32 | vctrs (>= 0.3.3), 33 | purrr, 34 | glue (>= 1.3.2), 35 | tibble (>= 2.1.3), 36 | tidyselect (>= 1.1.0) 37 | Depends: 38 | R (>= 3.2.0) 39 | Config/testthat/edition: 3 40 | VignetteBuilder: knitr 41 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2020 2 | COPYRIGHT HOLDER: Tim Tiefenbach 3 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2020 Tim Tiefenbach 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(.) 4 | export(across2) 5 | export(across2x) 6 | export(crossover) 7 | export(cut_names) 8 | export(dist_values) 9 | export(extract_names) 10 | export(over) 11 | export(over2) 12 | export(over2x) 13 | export(seq_range) 14 | export(show_prefix) 15 | export(show_suffix) 16 | importFrom(rlang,"%||%") 17 | importFrom(rlang,":=") 18 | importFrom(stats,na.omit) 19 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # dplyover 0.0.8.9002 2 | 3 | * fix `inspect_call` so that it work with dev version of rlang::trace_back() 4 | 5 | # dplyover 0.0.8.9001 6 | 7 | * hotfix which fixed a bug preventing {dplyover} to run on R versions <= 4.0 8 | 9 | # dplyover 0.0.8.9000 10 | 11 | * first official release 12 | * over-across function family: `over()`, `across2()`, `crossover()` and variants 13 | * helper functions: selection helpers for values, strings and variables (see `?selection_helpers`) 14 | * helper functions for `across2` pre- and suffixes: `show_suffix()`, `show_prefix()` 15 | * datasets: `csat` and `csatraw` 16 | 17 | # dplyover 0.0.7.9000 18 | 19 | * pre-release with basic functionality 20 | -------------------------------------------------------------------------------- /R/checks.R: -------------------------------------------------------------------------------- 1 | # adapted from https://stackoverflow.com/a/60447909/9349302 2 | is.date <- function(x) { 3 | inherits(x, c("Date", "POSIXt")) 4 | } 5 | 6 | inspect_call <- function(warn = TRUE, last_verb = FALSE) { 7 | 8 | out <- list(warn = FALSE, 9 | last_verb = NULL) 10 | trace_bck <- rlang::trace_back() 11 | 12 | if (is.null(trace_bck$calls)) { 13 | call_fns <- purrr::map(purrr::transpose(trace_bck), function(trace) { 14 | paste0(trace$namespace, 15 | trace$scope, 16 | as.character(trace$call[1]))}) 17 | } else { 18 | call_fns <- purrr::map(trace_bck$calls, function(call) `[[`(call, 1) ) 19 | } 20 | limit <- min(which(grepl("^dplyover::", call_fns))) 21 | mut_id <- which(grepl("^dplyr:::mutate", call_fns[1:limit - 1])) 22 | 23 | # last dplyr verb 24 | if (last_verb) { 25 | last_dplyr_verb <- max(which(grepl("^dplyr:::[mutate|summarise|summarize|filter|select|arrange|transmute]", call_fns))) 26 | out$last_verb <- last_dplyr_verb 27 | } 28 | # check keep 29 | if (warn) { 30 | if (length(mut_id) > 0) { 31 | 32 | last_mut <- as.list(trace_bck$call[[max(mut_id) - 2]]) 33 | 34 | keep_arg <- grepl("^\\.keep$", names(last_mut), perl = TRUE) 35 | 36 | if (any(keep_arg)) { 37 | keep_val <- last_mut[keep_arg] 38 | 39 | if (keep_val %in% c("used", "unused")) { 40 | out$warn <- TRUE 41 | } 42 | } 43 | } 44 | } 45 | out 46 | } 47 | 48 | # this function is copied from dplyr 49 | # see README section Acknowledgements as well as dplyr's license and copyright 50 | data_mask_top <- function(env, recursive = FALSE, inherit = FALSE) { 51 | while (rlang::env_has(env, ".__tidyeval_data_mask__.", inherit = inherit)) { 52 | env <- rlang::env_parent(rlang::env_get(env, ".top_env", inherit = inherit)) 53 | if (!recursive) { 54 | return(env) 55 | } 56 | } 57 | 58 | env 59 | } 60 | -------------------------------------------------------------------------------- /R/crossover.R: -------------------------------------------------------------------------------- 1 | #' Apply functions to a set of columns and a vector simultaniously in 'dplyr' 2 | #' 3 | #' @description 4 | #' `crossover()` combines the functionality of [dplyr::across()] with [over()] 5 | #' by iterating simultaneously over (i) a set of columns (`.xcols`) and (ii) 6 | #' a vector or list (`.y`). `crossover()` *always* applies the functions in 7 | #' `.fns` in a *nested* way to a combination of both inputs. There are, however, 8 | #' two different ways in which the functions in `.fns` are applied. 9 | #' 10 | #' When `.y` is a vector or list, each function in `.fns` is applied to 11 | #' *all pairwise combinations* between columns in `.xcols` and elements in 12 | #' `.y` (this resembles the behavior of `over2x()` and `across2x()`). 13 | #' 14 | #' `crossover()` has one trick up it's sleeves, which sets it apart from the other 15 | #' functions in the <[`over-across family`][over_across_family]>: Its second input 16 | #' (`.y`) can be a function. This changes the originial behavior slightly: First 17 | #' the function in `.y` is applied to all columns in `.xcols` to *generate* an 18 | #' input object which will be used as `.y` in the function calls in `.fns`. 19 | #' In this case each function is applied to all pairs between (i) columns in 20 | #' `.xcols` with (ii) the output elements that they generated through the 21 | #' function that was originally supplied to `.y`. Note that the underyling 22 | #' data must not be grouped, if a function is supplied to `.y`. For examples see 23 | #' the example section below. 24 | #' 25 | #' @param .xcols <[`tidy-select`][dplyr_tidy_select]> Columns to transform. 26 | #' Because `crossover()` is used within functions like `summarise()` and 27 | #' `mutate()`, you can't select or compute upon grouping variables. 28 | #' 29 | #' @param .y An atomic vector or list to apply functions to. `crossover()` also 30 | #' accepts a function as `.y` argument. In this case each column in `.xcols` 31 | #' is looped over all the outputs that it generated with the function supplied 32 | #' to `.y`. Note: the underyling data must not be grouped, if a function 33 | #' is supplied to `.y`. 34 | #' 35 | #' If a function is supplied, the following values are possible: 36 | #' 37 | #' - A bare function name, e.g. `unique` 38 | #' - An anonymous function, e.g. `function(x) unique(x)` 39 | #' - A purrr-style lambda, e.g. `~ unique(.x, fromLast = TRUE)` 40 | #' 41 | #' Note that additional arguments can only be specified with an anonymous 42 | #' function, a purrr-style lamba or with a pre-filled custom function. 43 | #' 44 | #' @param .fns Functions to apply to each column in `.xcols` and element in `.y`. 45 | #' 46 | #' Possible values are: 47 | #' 48 | #' - A function 49 | #' - A purrr-style lambda 50 | #' - A list of functions/lambdas 51 | #' 52 | #' Note that `NULL` is not accepted as argument to `.fns`. 53 | #' 54 | #' @param ... Additional arguments for the function calls in `.fns`. 55 | #' 56 | #' @param .names A glue specification that describes how to name the output 57 | #' columns. This can use: 58 | #' 59 | #' - `{xcol}` to stand for the selected column name, 60 | #' - `{y}` to stand for the selected vector element, and 61 | #' - `{fn}` to stand for the name of the function being applied. 62 | #' 63 | #' The default (`NULL`) is equivalent to `"{xcol}_{y}"` for the single function 64 | #' case and `"{xcol}_{y}_{fn}"` for the case where a list is used for `.fns`. 65 | #' 66 | #' Note that, depending on the nature of the underlying object in `.y`, 67 | #' specifying `{y}` will yield different results: 68 | #' 69 | #' - If `.y` is an unnamed atomic vector, `{y}` will represent each value. 70 | #' - If `.y` is a named list or atomic vector, `{y}` will represent each name. 71 | #' - If `.y` is an unnamed list, `{y}` will be the index number running from 1 to `length(y)`. 72 | #' 73 | #' This standard behavior (interpretation of `{y}`) can be overwritten by 74 | #' directly specifying: 75 | #' 76 | #' - `{y_val}` for `.y`'s values 77 | #' - `{y_nm}` for its names 78 | #' - `{y_idx}` for its index numbers 79 | #' 80 | #' Alternatively, a character vector of length equal to the number of columns to 81 | #' be created can be supplied to `.names`. Note that in this case, the glue 82 | #' specification described above is not supported. 83 | #' 84 | #' @param .names_fn Optionally, a function that is applied after the glue 85 | #' specification in `.names` has been evaluated. This is, for example, helpful, 86 | #' in case the resulting names need to be further cleaned or trimmed. 87 | #' 88 | #' @returns 89 | #' `crossover()` returns a tibble with one column for each combination of 90 | #' columns in `.xcols`, elements in `.y` and functions in `.fns`. 91 | #' 92 | #' If a function is supplied as `.y` argument, `crossover()` returns a tibble with 93 | #' one column for each pair of output elements of `.y` and the column in `.xcols` 94 | #' that generated the output combined with each function in `.fns`. 95 | #' 96 | #' @seealso 97 | #' Other members of the <[`over-across function family`][over_across_family]>. 98 | #' 99 | #' @section Examples: 100 | #' 101 | #' ```{r, child = "man/rmd/setup.Rmd"} 102 | #' ``` 103 | #' 104 | #' For the basic functionality please refer to the examples in [over()] and 105 | #' [dplyr::across()]. 106 | #' 107 | #' ```{r, comment = "#>", collapse = TRUE} 108 | #' library(dplyr) 109 | #' 110 | #' # For better printing 111 | #' iris <- as_tibble(iris) 112 | #' ``` 113 | #' 114 | #' ## Creating many similar variables for mulitple columns 115 | #' If `.y` is a vector or list, `crossover()` loops every combination between 116 | #' columns in `.xcols` and elements in `.y` over the functions in `.fns`. This 117 | #' is helpful in cases where we want to create a batch of similar variables with 118 | #' only slightly changes in the arguments of the calling function. A good example 119 | #' are lagged variables. Below we create five lagged variables for each 120 | #' 'Sepal.Length' and 'Sepal.Width'. To create nice names we use a named list 121 | #' as argument in `.fns` and specify the glue syntax in `.names`. 122 | #' 123 | #' ```{r, comment = "#>", collapse = TRUE} 124 | #' iris %>% 125 | #' transmute( 126 | #' crossover(starts_with("sepal"), 127 | #' 1:5, 128 | #' list(lag = ~ lag(.x, .y)), 129 | #' .names = "{xcol}_{fn}{y}")) %>% 130 | #' glimpse 131 | #' ``` 132 | #' 133 | #' ## Creating dummy variables for multiple varialbes (columns) 134 | #' The `.y` argument of `crossover()` can take a function instead of list or vector. 135 | #' In the example below we select the columns 'type', 'product', 'csat' in `.xcols`. 136 | #' We supply the function [dist_values()] to `.y`, which is a cleaner variant of 137 | #' base R's `unique()`. This generates all distinct values for all three selected 138 | #' variables. Now, the function in `.fns`, `~ if_else(.y == .x, 1, 0)`, is applied 139 | #' to each pair of distinct value in `.y` and the column in `.xcols` that generated 140 | #' this value. This basically creates a dummy variable for each value of each 141 | #' variable. Since some of the values contain whitespace characters, we can use the 142 | #' `.names_fn` argument to supply a *third* function that cleans the output names 143 | #' by replacing spaces with an underscore and setting all characters `tolower()`. 144 | #' 145 | #' ```{r, comment = "#>", collapse = TRUE} 146 | #' csat %>% 147 | #' transmute( 148 | #' crossover(.xcols = c(type, product, csat), 149 | #' .y = dist_values, 150 | #' .fns = ~ if_else(.y == .x, 1, 0), 151 | #' .names_fn = ~ gsub("\\s", "_", .x) %>% tolower(.) 152 | #' )) %>% 153 | #' glimpse 154 | #' ``` 155 | #' 156 | #' 157 | #' @export 158 | crossover <- function(.xcols = dplyr::everything(), .y, .fns, ..., .names = NULL, .names_fn = NULL){ 159 | 160 | data <- tryCatch({ 161 | dplyr::cur_data() 162 | }, error = function(e) { 163 | rlang::abort("`crossover()` must only be used inside dplyr verbs.") 164 | }) 165 | 166 | deparse_call <- deparse(sys.call(), 167 | width.cutoff = 500L, 168 | backtick = TRUE, 169 | nlines = 1L, 170 | control = NULL) 171 | 172 | setup <- meta_setup(grp_id = dplyr::cur_group_id(), 173 | dep_call = deparse_call, 174 | par_frame = parent.frame(), 175 | setup_fn = "crossover_setup", 176 | cols = rlang::enquo(.xcols), 177 | y1 = .y, 178 | fns = .fns, 179 | names = .names, 180 | names_fn = .names_fn) 181 | 182 | vars <- setup$vars 183 | y <- setup$y 184 | 185 | if (length(vars) == 0L) { 186 | return(tibble::new_tibble(list(), nrow = 1L)) 187 | } 188 | 189 | fns <- setup$fns 190 | names <- setup$names 191 | 192 | if (setup$each) { 193 | data <- data[unique(vars)] 194 | data_ls <- as.list(data)[vars] 195 | data <- tibble::new_tibble(data_ls, nrow = nrow(data)) 196 | } else { 197 | data <- data[vars] 198 | } 199 | 200 | n_cols <- length(data) 201 | n_fns <- length(fns) 202 | seq_n_cols <- seq_len(n_cols) 203 | seq_fns <- seq_len(n_fns) 204 | 205 | k <- 1L 206 | 207 | if (setup$each) { 208 | out <- vector("list", n_cols * n_fns) 209 | 210 | for (i in seq_n_cols) { 211 | col <- data[[i]] 212 | yi <- y[[i]] 213 | for (j in seq_fns) { 214 | fn <- fns[[j]] 215 | out[[k]] <- fn(col, yi, ...) 216 | k <- k + 1L 217 | } 218 | } 219 | 220 | } else { 221 | n_y <- length(y) 222 | seq_n_y <- seq_len(n_y) 223 | out <- vector("list", n_cols * n_y * n_fns) 224 | 225 | for (i in seq_n_cols) { 226 | col <- data[[i]] 227 | for(l in seq_n_y) { 228 | yl <- y[[l]] 229 | for (j in seq_fns) { 230 | fn <- fns[[j]] 231 | out[[k]] <- fn(col, yl, ...) 232 | k <- k + 1L 233 | } 234 | } 235 | } 236 | } 237 | 238 | size <- vctrs::vec_size_common(!!!out) 239 | out <- vctrs::vec_recycle_common(!!!out, .size = size) 240 | names(out) <- names 241 | tibble::new_tibble(out, nrow = size) 242 | } 243 | 244 | 245 | crossover_setup <- function(cols, y1, fns, names, names_fn, each = FALSE) { 246 | 247 | # setup: cols 248 | data <- dplyr::cur_data() 249 | cols <- rlang::quo_set_env(cols, 250 | data_mask_top(rlang::quo_get_env(cols), 251 | recursive = FALSE, 252 | inherit = TRUE)) 253 | vars <- tidyselect::eval_select(cols, data) 254 | vars <- init_vars <- names(vars) 255 | 256 | # setup: .y 257 | 258 | # if .y is function: 259 | if (is.function(y1) || rlang::is_formula(y1)) { 260 | 261 | if (length(dplyr::cur_group()) > 0) { 262 | rlang::abort(c("Problem with `crossover()` input `.y`.", 263 | i = "If `.y` is a function the underlying data must not be grouped.", 264 | x = "`crossover()` was used on a grouped data.frame.")) 265 | } 266 | 267 | # set flag `each` 268 | each <- TRUE 269 | 270 | # expand vars 271 | y1 <- rlang::as_function(y1) 272 | y1 <- purrr::map(dplyr::select(data, !! cols), y1) # replace with: data[[, vars]] 273 | vars <- unlist(purrr::imap(y1, ~ rep(.y, length(.x)))) 274 | y1 <- unlist(y1, recursive = FALSE) 275 | 276 | if (!is.list(y1)) y1 <- unname(y1) 277 | } 278 | 279 | y1_nm <- names(y1) 280 | y1_idx <- as.character(seq_along(y1)) 281 | y1_val <- if (is.data.frame(y1) && nrow(y1) != 1) { 282 | NULL 283 | } else if (is.list(y1) && is.vector(y1) && 284 | any(purrr::map_lgl(y1, ~ length(.x) != 1))) { 285 | NULL 286 | } else { 287 | y1 288 | } 289 | 290 | # apply `.names` smart default 291 | if (is.function(fns) || rlang::is_formula(fns)) { 292 | names <- names %||% "{xcol}_{y}" 293 | fns <- list(`1` = fns) 294 | } else { 295 | names <- names %||% "{xcol}_{y}_{fn}" 296 | } 297 | 298 | if (!is.list(fns)) { 299 | rlang::abort(c("Problem with `crossover()` input `.fns`.", 300 | i = "Input `.fns` must be a function, a formula, or a list of functions/formulas.")) 301 | } 302 | 303 | # use index for unnamed lists 304 | if (is.list(y1) && !rlang::is_named(y1)) { 305 | names(y1) <- y1_idx 306 | } 307 | 308 | # TODO: Default needed when function in .y returns values? 309 | 310 | # handle formulas 311 | fns <- purrr::map(fns, rlang::as_function) 312 | 313 | # make sure fns has names, use number to replace unnamed 314 | if (is.null(names(fns))) { 315 | names_fns <- seq_along(fns) 316 | } else { 317 | names_fns <- names(fns) 318 | empties <- which(names_fns == "") 319 | if (length(empties)) { 320 | names_fns[empties] <- empties 321 | } 322 | } 323 | 324 | # setup control flow: 325 | vars_no <- length(y1) * length(fns) * if (!each) length(y1) else 1 326 | maybe_glue <- any(grepl("{.*}", names, perl = TRUE)) 327 | is_glue <- any(grepl("{(xcol|y|y_val|y_nm|y_idx|fn)}", names, perl = TRUE)) 328 | 329 | # if .names use glue syntax: 330 | if (is_glue) { 331 | 332 | if (length(names) > 1) { 333 | rlang::abort(c("Problem with `crossover()` input `.names`.", 334 | i = "Glue specification must be a character vector of length == 1.", 335 | x = paste0("`.names` is of length: ", length(names), "."))) 336 | } 337 | 338 | # warn that default values are used if conditions not met 339 | if (is.null(y1_val) && grepl("{y_val}", names, perl = TRUE)) { 340 | rlang::warn("in `crossover()` `.names`: used 'y_idx' instead of 'y_val'. The latter only works with lists if all elements are length 1.") 341 | } 342 | 343 | if (is.null(y1_nm) && grepl("{y_nm}", names, perl = TRUE)) { 344 | rlang::warn("in `crossover()` `.names`: used 'y_idx' instead of 'y_nm', since the input object is unnamed.") 345 | } 346 | 347 | if (each) { 348 | names <- vctrs::vec_as_names(glue::glue(names, 349 | xcol = rep(vars, each = length(fns)), 350 | y = rep(names(y1) %||% y1, each = length(fns)), 351 | y_val = rep(y1_val %||% y1_idx, each = length(fns)), 352 | y_nm = rep(y1_nm %||% y1_idx, each = length(fns)), 353 | y_idx = rep(y1_idx, each = length(fns)), 354 | fn = rep(names_fns, length(y1))), 355 | repair = "check_unique") 356 | } else { 357 | n_cols <- length(vars) 358 | n_y1 <- length(y1) 359 | n_nm_fns <- length(names_fns) 360 | seq_n_col <- seq_len(n_cols) 361 | seq_n_y1 <- seq_len(n_y1) 362 | seq_nm_fns <- seq_len(n_nm_fns) 363 | k <- 1L 364 | out <- vector("character", n_cols* n_y1 * n_nm_fns) 365 | 366 | for (i in seq_n_col) { 367 | for(l in seq_n_y1) { 368 | for (j in seq_nm_fns) { 369 | out[[k]] <- glue::glue(names, 370 | xcol = vars[[i]], 371 | y = names(y1)[[l]] %||% y1[[l]], 372 | y_val = y1_val[[l]] %||% y1_idx[[l]], 373 | y_nm = y1_nm[[l]] %||% y1_idx[[l]], 374 | y_idx = y1_idx[[l]], 375 | fn = names_fns[[j]]) 376 | k <- k + 1L 377 | } 378 | } 379 | } 380 | names <- vctrs::vec_as_names(out, repair = "check_unique") 381 | } 382 | 383 | 384 | # no correct glue syntax detected 385 | } else { 386 | # glue syntax might be wrong 387 | if (maybe_glue && length(names) == 1 && vars_no > 1) { 388 | rlang::abort(c("Problem with `crossover()` input `.names`.", 389 | x = "Unrecognized glue specification `{...}` detected in `.names`.", 390 | i = "`.names` only supports the following expressions: '{xcol}'. '{y}', '{y_val}', '{y_nm}', '{y_idx}' or '{fn}'." 391 | )) 392 | } 393 | # check if non-glue names are unique 394 | vctrs::vec_as_names(names, repair = "check_unique") 395 | # check number of names 396 | if (length(names) != vars_no) { 397 | rlang::abort(c("Problem with `crossover()` input `.names`.", 398 | i = "The number of elements in `.names` must equal the number of new columns.", 399 | x = paste0(length(names), " elements provided to `.names`, but the number of new columns is ", vars_no, ".") 400 | )) 401 | } 402 | } 403 | 404 | # apply names_fn 405 | if (!is.null(names_fn)) { 406 | nm_f <- rlang::as_function(names_fn) 407 | names <- purrr::map_chr(names, nm_f) 408 | } 409 | 410 | value <- list(vars = vars, y = y1, fns = fns, names = names, each = each) 411 | value 412 | } 413 | 414 | 415 | 416 | 417 | 418 | 419 | -------------------------------------------------------------------------------- /R/data-csat.R: -------------------------------------------------------------------------------- 1 | #' Customer Satisfaction Survey (recoded data) 2 | #' 3 | #' This data is randomly generated. It resembles data from a customer 4 | #' satisfaction survey using CSAT (Customer Satisfaction Score) for a 5 | #' contract-based product. The data has been recoded. The raw version of this data 6 | #' set can be found here <[`csatraw`][csatraw]>. 7 | #' 8 | #' @format A tibble with 150 rows and 15 variables: 9 | #' \describe{ 10 | #' \item{cust_id}{Customer identification number} 11 | #' \item{type}{Type of customer: "new", "existing" or "reactive"} 12 | #' \item{product}{The type of product: "basic", "advanced" or "premium"} 13 | #' \item{csat}{The overall Customer Satisfaction Score} 14 | #' \item{csat_open}{Follow-up question why the respondent gave this specific 15 | #' Customer Satisfaction rating. The open-ended answers have been coded into six 16 | #' categories (multiple answers possible).} 17 | #' \item{postal_contact, phone_contact, email_contact, website_contact, 18 | #' shop_contact}{When did the customer have last contact via given channel?} 19 | #' \item{postal_rating, phone_rating, email_rating, website_rating, 20 | #' shop_rating}{If customer had contact over the given channel: 21 | #' How satisfied was he?} 22 | #' } 23 | #' @examples 24 | #' csat 25 | "csat" 26 | -------------------------------------------------------------------------------- /R/data-csatraw.R: -------------------------------------------------------------------------------- 1 | #' Customer Satisfaction Survey (raw data) 2 | #' 3 | #' This data is randomly generated. It resembles raw data from a customer 4 | #' satisfaction survey using CSAT (Customer Satisfaction Score) for a 5 | #' contract-based product. The first three variables are given, all other 6 | #' variables come from a survey tool and are only named "item1" etc. 7 | #' A recoded version of this data set can be found here <[`csat`][csat]>. 8 | #' 9 | #' @format A tibble with 150 rows and 15 variables: 10 | #' \describe{ 11 | #' \item{cust_id}{Customer identification number} 12 | #' \item{type}{Type of customer: "new", "existing" or "reactive"} 13 | #' \item{product}{The type of product: "basic", "advanced" or "premium"} 14 | #' \item{item1}{The overall Customer Satisfaction Score\cr\cr Scale: Ranging from 1 = 15 | #' "Very unsatisfied" to 5 = "Very satisfied"} 16 | #' \item{item1_open}{Follow-up question why the respondent gave this specific 17 | #' Customer Satisfaction rating. The open-ended answers have been coded into six 18 | #' categories: 11 = "great product", 12 = "good service", 13 = "friendly staff", 19 | #' 21 = "too expensive", 22 = "unfriendly", 23 = "no response" (multiple answers 20 | #' possible).} 21 | #' \item{item2a, item3a, item4a, item5a, item6a}{When did the customer have last 22 | #' contact via postal mail (item2a), phone (item3a), email (item4a), website 23 | #' (item5a), a retail shop (item6a) ?\cr\cr Scale: 0 = "no contact", 1 = "more 24 | #' than 3 years ago", 2 = "within 1 to 3 years", 3 = "within the last year"} 25 | #' \item{item2b, item3b, item4b, item5b, item6b}{If customer had contact 26 | #' via postal mail (item2b), phone (item3b), email (item4b), website (item5b), 27 | #' a retail shop (item6b): How satisfied was he?\cr\cr 28 | #' Scale: Ranging from 1 = "Very unsatisfied", to 5 = "Very satisfied"} 29 | #' } 30 | #' @examples 31 | #' csatraw 32 | "csatraw" 33 | -------------------------------------------------------------------------------- /R/dplyover.R: -------------------------------------------------------------------------------- 1 | #' @description 2 | #' To learn more about dplyover, start with the vignette: 3 | #' `browseVignettes(package = "dplyover")` 4 | #' @importFrom rlang %||% 5 | #' @importFrom rlang := 6 | #' @importFrom stats na.omit 7 | "_PACKAGE" 8 | -------------------------------------------------------------------------------- /R/meta_setup.R: -------------------------------------------------------------------------------- 1 | # deprase call (similar to dplyr:::key_deparse) 2 | # this function is copied from dplyr 3 | # see README section Acknowledgements as well as dplyr's license and copyright 4 | deparse_call <- function(call) { 5 | deparse(call, 6 | width.cutoff = 500L, 7 | backtick = TRUE, 8 | nlines = 1L, 9 | control = NULL) 10 | } 11 | 12 | # environment where last value of across2 pre/suf error is stored 13 | .last <- rlang::new_environment() 14 | 15 | ## meta setup use by all major dplyover functions (tests passing) 16 | #> this setup is rather dodgy and currently being overhauled 17 | #> see new_meta_setup branch! 18 | #> and yes, we shouldn't write something in par_frame since dplyover does not create this environment 19 | meta_setup <- function(grp_id, dep_call, par_frame, setup_fn, ...) { 20 | 21 | call_nm <- sub("([a-z0-9]+).*", "\\1()", dep_call) 22 | 23 | dots <- rlang::list2(...) 24 | 25 | wrong_setup <- FALSE 26 | 27 | # meta setup 28 | setup_exists <- exists(".__dplyover_setup__.", envir = par_frame) 29 | 30 | # if setup already exists 31 | if (setup_exists && grp_id > 1L) { 32 | # get data 33 | parent_setup <- get(".__dplyover_setup__.", envir = par_frame) 34 | # get call number 35 | call_no <- which.min(parent_setup$call_his) 36 | call_id <- paste0("call", call_no) 37 | # update "call_his" 38 | par_frame[[".__dplyover_setup__."]][["call_his"]][call_no] <- grp_id 39 | # check call and get data from existing call 40 | if (identical(parent_setup$call_lang[call_no], dep_call)) { 41 | return(parent_setup[[call_id]]$setup) 42 | } 43 | # otherwise continue 44 | wrong_setup <- TRUE 45 | } 46 | # if this is a new call to over or if setup went wrong 47 | if (!setup_exists || wrong_setup) { 48 | 49 | # new setup 50 | if (grp_id == 1 && !grepl("^over", call_nm, perl = TRUE)) { 51 | call_info <- inspect_call() 52 | if (call_info[["warn"]]){ 53 | rlang::warn(glue::glue("`{call_nm}` does not support the `.keep` argument in `dplyr::mutate()` when set to 'used' or 'unused'.")) 54 | } 55 | } 56 | par_frame$`.__dplyover_setup__.` <- list() 57 | par_frame[[".__dplyover_setup__."]][["call_his"]] <- grp_id 58 | par_frame[[".__dplyover_setup__."]][["call_lang"]] <- dep_call 59 | call_id <- paste0("call", grp_id) 60 | # existing setup, but new call 61 | } else { 62 | parent_setup <- get(".__dplyover_setup__.", envir = par_frame) 63 | # register new call 64 | par_frame[[".__dplyover_setup__."]][["call_his"]] <- c(parent_setup$call_his, 1) 65 | par_frame[[".__dplyover_setup__."]][["call_lang"]] <- c(parent_setup$call_lang, dep_call) 66 | # get number of current call 67 | call_id <- paste0("call", which.min(parent_setup$call_his)) 68 | } 69 | 70 | # in both cases: write data into par_frame 71 | par_frame[[".__dplyover_setup__."]][[call_id]][["setup"]] <- 72 | setup <- do.call(setup_fn, dots) 73 | 74 | setup 75 | } 76 | -------------------------------------------------------------------------------- /R/over.R: -------------------------------------------------------------------------------- 1 | #' Apply functions to a list or vector in 'dplyr' 2 | #' 3 | #' @description 4 | #' `over()` makes it easy to create new colums inside a [dplyr::mutate()] or 5 | #' [dplyr::summarise()] call by applying a function (or a set of functions) to 6 | #' an atomic vector or list using a syntax similar to [dplyr::across()]. 7 | #' The main difference is that [dplyr::across()] transforms or creates new columns 8 | #' based on existing ones, while `over()` can create new columns based on a 9 | #' vector or list to which it will apply one or several functions. 10 | #' Whereas [dplyr::across()] allows `tidy-selection` helpers to select columns, 11 | #' `over()` provides its own helper functions to select strings or values based 12 | #' on either (1) values of specified columns or (2) column names. See the 13 | #' examples below and the `vignette("why_dplyover")` for more details. 14 | #' 15 | #' @param .x An atomic vector or list to apply functions to. Alternatively a 16 | #' <[`selection helper`][selection_helpers]> can be used to create 17 | #' a vector. 18 | #' 19 | #' @param .fns Functions to apply to each of the elements in `.x`. For 20 | #' functions that expect variable names as input, the selected strings need to 21 | #' be turned into symbols and evaluated. `dplyrover` comes with a genuine helper 22 | #' function that evaluates strings as names [`.()`]. 23 | #' 24 | #' Possible values are: 25 | #' 26 | #' - A function 27 | #' - A purrr-style lambda 28 | #' - A list of functions/lambdas 29 | #' 30 | #' For examples see the example section below. 31 | #' 32 | #' Note that, unlike `across()`, `over()` does not accept `NULL` as a 33 | #' value to `.fns`. 34 | #' 35 | #' @param ... Additional arguments for the function calls in `.fns`. 36 | #' 37 | #' @param .names A glue specification that describes how to name the output 38 | #' columns. This can use `{x}` to stand for the selected vector element, and 39 | #' `{fn}` to stand for the name of the function being applied. The default 40 | #' (`NULL`) is equivalent to `"{x}"` for the single function case and 41 | #' `"{x}_{fn}"` for the case where a list is used for `.fns`. 42 | #' 43 | #' Note that, depending on the nature of the underlying object in `.x`, 44 | #' specifying `{x}` will yield different results: 45 | #' 46 | #' - If `.x` is an unnamed atomic vector, `{x}` will represent each value. 47 | #' - If `.x` is a named list or atomic vector, `{x}` will represent each name. 48 | #' - If `.x` is an unnamed list, `{x}` will be the index number running from 1 to `length(x)`. 49 | #' 50 | #' This standard behavior (interpretation of `{x}`) can be overwritten by 51 | #' directly specifying: 52 | #' 53 | #' - `{x_val}` for `.x`'s values 54 | #' - `{x_nm}` for its names 55 | #' - `{x_idx}` for its index numbers 56 | #' 57 | #' Alternatively, a character vector of length equal to the number of columns to 58 | #' be created can be supplied to `.names`. Note that in this case, the glue 59 | #' specification described above is not supported. 60 | #' 61 | #' @param .names_fn Optionally, a function that is applied after the glue 62 | #' specification in `.names` has been evaluated. This is, for example, helpful 63 | #' in case the resulting names need to be further cleaned or trimmed. 64 | #' 65 | #' @returns 66 | #' A tibble with one column for each element in `.x` and each function in `.fns`. 67 | #' 68 | #' @section Note: 69 | #' Similar to `dplyr::across()` `over()` works only inside dplyr verbs. 70 | #' 71 | #' @seealso 72 | #' [over2()] to apply a function to two objects. 73 | #' 74 | #' All members of the <[`over-across function family`][over_across_family]>. 75 | #' 76 | #' @section Examples: 77 | #' 78 | #' ```{r, child = "man/rmd/setup.Rmd"} 79 | #' ``` 80 | #' 81 | #' It has two main use cases. They differ in how the elements in `.x` 82 | #' are used. Let's first attach `dplyr`: 83 | #' 84 | #' ```{r, comment = "#>", collapse = TRUE} 85 | #' library(dplyr) 86 | #' 87 | #' # For better printing 88 | #' iris <- as_tibble(iris) 89 | #' ``` 90 | #' 91 | #' 92 | #' #### (1) The General Use Case 93 | #' Here the values in `.x` are used as inputs to one or more functions in `.fns`. 94 | #' This is useful, when we want to create several new variables based on the same 95 | #' function with varying arguments. A good example is creating a bunch of lagged 96 | #' variables. 97 | #' 98 | #' ```{r, comment = "#>", collapse = TRUE} 99 | #' tibble(x = 1:25) %>% 100 | #' mutate(over(c(1:3), 101 | #' ~ lag(x, .x))) 102 | #' ``` 103 | #' 104 | #' Lets create a dummy variable for each unique value in 'Species': 105 | #' ```{r, comment = "#>", collapse = TRUE} 106 | #' iris %>% 107 | #' mutate(over(unique(Species), 108 | #' ~ if_else(Species == .x, 1, 0)), 109 | #' .keep = "none") 110 | #' ``` 111 | #' 112 | #' With `over()` it is also possible to create several dummy variables with 113 | #' different thresholds. We can use the `.names` argument to control the output 114 | #' names: 115 | #' 116 | #' ```{r, comment = "#>", collapse = TRUE} 117 | #' iris %>% 118 | #' mutate(over(seq(4, 7, by = 1), 119 | #' ~ if_else(Sepal.Length < .x, 1, 0), 120 | #' .names = "Sepal.Length_{x}"), 121 | #' .keep = "none") 122 | #' ``` 123 | #' 124 | #' A similar approach can be used with dates. Below we loop over a date 125 | #' sequence to check whether the date falls within a given start and end 126 | #' date. We can use the `.names_fn` argument to clean the resulting output 127 | #' names: 128 | #' 129 | #' ```{r, comment = "#>", collapse = TRUE} 130 | #' # some dates 131 | #' dat_tbl <- tibble(start = seq.Date(as.Date("2020-01-01"), 132 | #' as.Date("2020-01-15"), 133 | #' by = "days"), 134 | #' end = start + 10) 135 | #' 136 | #' dat_tbl %>% 137 | #' mutate(over(seq(as.Date("2020-01-01"), 138 | #' as.Date("2020-01-21"), 139 | #' by = "weeks"), 140 | #' ~ .x >= start & .x <= end, 141 | #' .names = "day_{x}", 142 | #' .names_fn = ~ gsub("-", "", .x))) 143 | #' ``` 144 | #' 145 | #' `over()` can summarise data in wide format. In the example below, we want to 146 | #' know for each group of customers (`new`, `existing`, `reactivate`), how much 147 | #' percent of the respondents gave which rating on a five point likert scale 148 | #' (`item1`). A usual approach in the tidyverse would be to use 149 | #' `count %>% group_by %>% mutate`, which yields the same result in the usually 150 | #' prefered long format. Sometimes, however, we might want this kind of summary 151 | #' in the wide format, and in this case `over()` comes in handy: 152 | #' 153 | #' ```{r, comment = "#>", collapse = TRUE} 154 | #' csatraw %>% 155 | #' group_by(type) %>% 156 | #' summarise(over(c(1:5), 157 | #' ~ mean(item1 == .x))) 158 | #' ``` 159 | #' 160 | #' Instead of a vector we can provide a named list of vectors to calculate the 161 | #' top two and bottom two categories on the fly: 162 | #' 163 | #' ```{r, comment = "#>", collapse = TRUE} 164 | #' csatraw %>% 165 | #' group_by(type) %>% 166 | #' summarise(over(list(bot2 = c(1:2), 167 | #' mid = 3, 168 | #' top2 = c(4:5)), 169 | #' ~ mean(item1 %in% .x))) 170 | #' ``` 171 | #' 172 | #' `over()` can also loop over columns of a data.frame. In the example below we 173 | #' want to create four different dummy variables of `item1`: (i) the top and (ii) 174 | #' bottom category as well as (iii) the top two and (iv) the bottom two categories. 175 | #' We can create a lookup `data.frame` and use all columns but the first as input to 176 | #' `over()`. In the function call we make use of base R's `match()`, where `.x` 177 | #' represents the new values and `recode_df[, 1]` refers to the old values. 178 | #' 179 | #' ```{r, comment = "#>", collapse = TRUE} 180 | #' 181 | #' recode_df <- data.frame(old = c(1, 2, 3, 4, 5), 182 | #' top1 = c(0, 0, 0, 0, 1), 183 | #' top2 = c(0, 0, 0, 1, 1), 184 | #' bot1 = c(1, 0, 0, 0, 0), 185 | #' bot2 = c(1, 1, 0, 0, 0)) 186 | #' 187 | #' csatraw %>% 188 | #' mutate(over(recode_df[,-1], 189 | #' ~ .x[match(item1, recode_df[, 1])], 190 | #' .names = "item1_{x}")) %>% 191 | #' select(starts_with("item1")) 192 | #' ``` 193 | #' 194 | #' `over()` work nicely with comma separated values stored in character vectors. 195 | #' In the example below, the colum `csat_open` contains one or more comma 196 | #' separated reasons why a specific customer satisfaction rating was given. 197 | #' We can easily create a column for each response category with the help of 198 | #' `dist_values` - a wrapper around `unique` which can split vector elements 199 | #' using a separator: 200 | #' 201 | #' ```{r, comment = "#>", collapse = TRUE} 202 | #' csat %>% 203 | #' mutate(over(dist_values(csat_open, .sep = ", "), 204 | #' ~ as.integer(grepl(.x, csat_open)), 205 | #' .names = "rsp_{x}", 206 | #' .names_fn = ~ gsub("\\s", "_", .x)), 207 | #' .keep = "none") %>% glimpse 208 | #' ``` 209 | #' 210 | #' 211 | #' #### (2) A Very Specific Use Case 212 | #' Here strings are supplied to `.x` to construct column names (sharing the 213 | #' same stem). This allows us to dynamically use more than one column in the 214 | #' function calls in `.fns`. To work properly, the strings need to be 215 | #' turned into symbols and evaluated. For this {dplyover} provides a genuine 216 | #' helper function `.()` that evaluates strings and helps to declutter the 217 | #' otherwise rather verbose code. `.()` supports glue syntax and takes a string 218 | #' as argument. 219 | #' 220 | #' Below are a few examples using two colums in the function calls in `.fns`. 221 | #' For the two column case [across2()] provides a more intuitive API that is 222 | #' closer to the original `dplyr::across`. Using `.()` inside `over` is really 223 | #' useful for cases with more than two columns. 224 | #' 225 | #' Consider the following example of a purrr-style formula in `.fns` using `.()`: 226 | #' 227 | #' ```{r, comment = "#>", collapse = TRUE} 228 | #' iris %>% 229 | #' mutate(over(c("Sepal", "Petal"), 230 | #' ~ .("{.x}.Width") + .("{.x}.Length") 231 | #' )) 232 | #' ``` 233 | #' 234 | #' The above syntax is equal to the more verbose: 235 | #' ```{r, comment = "#>", collapse = TRUE} 236 | #' iris %>% 237 | #' mutate(over(c("Sepal", "Petal"), 238 | #' ~ eval(sym(paste0(.x, ".Width"))) + 239 | #' eval(sym(paste0(.x, ".Length"))) 240 | #' )) 241 | #' ``` 242 | #' 243 | #' `.()` also works with anonymous functions: 244 | #' ```{r, comment = "#>", collapse = TRUE} 245 | #' iris %>% 246 | #' summarise(over(c("Sepal", "Petal"), 247 | #' function(x) mean(.("{x}.Width")) 248 | #' )) 249 | #' ``` 250 | #' 251 | #' A named list of functions: 252 | #' ```{r, comment = "#>", collapse = TRUE} 253 | #' iris %>% 254 | #' mutate(over(c("Sepal", "Petal"), 255 | #' list(product = ~ .("{.x}.Width") * .("{.x}.Length"), 256 | #' sum = ~ .("{.x}.Width") + .("{.x}.Length")) 257 | #' ), 258 | #' .keep = "none") 259 | #' ``` 260 | #' 261 | #' Again, use the `.names` argument to control the output names: 262 | #' ```{r, comment = "#>", collapse = TRUE} 263 | #' iris %>% 264 | #' mutate(over(c("Sepal", "Petal"), 265 | #' list(product = ~ .("{.x}.Width") * .("{.x}.Length"), 266 | #' sum = ~ .("{.x}.Width") + .("{.x}.Length")), 267 | #' .names = "{fn}_{x}"), 268 | #' .keep = "none") 269 | #' ``` 270 | #' @export 271 | over <- function(.x, .fns, ..., .names = NULL, .names_fn = NULL){ 272 | 273 | grp_id <- tryCatch({ 274 | dplyr::cur_group_id() 275 | }, error = function(e) { 276 | rlang::abort("`over()` must only be used inside dplyr verbs.") 277 | }) 278 | 279 | setup <- meta_setup(dep_call = deparse_call(sys.call()), 280 | grp_id = grp_id, 281 | par_frame = parent.frame(), 282 | setup_fn = "over_setup", 283 | x1 = .x, 284 | fns = .fns, 285 | names = .names, 286 | names_fn = .names_fn) 287 | 288 | x <- setup$x 289 | fns <- setup$fns 290 | names <- setup$names 291 | 292 | # check empty input 293 | if (length(x) == 0L) { 294 | return(tibble::new_tibble(list(), nrow = 1L)) 295 | } 296 | 297 | n_x <- length(x) 298 | n_fns <- length(fns) 299 | seq_n_x <- seq_len(n_x) 300 | seq_fns <- seq_len(n_fns) 301 | k <- 1L 302 | out <- vector("list", n_x * n_fns) 303 | 304 | for (i in seq_n_x) { 305 | xi <- x[[i]] 306 | for (j in seq_fns) { 307 | fn <- fns[[j]] 308 | out[[k]] <- fn(xi, ...) 309 | k <- k + 1L 310 | } 311 | } 312 | size <- vctrs::vec_size_common(!!!out) 313 | out <- vctrs::vec_recycle_common(!!!out, .size = size) 314 | names(out) <- names 315 | tibble::new_tibble(out, nrow = size) 316 | } 317 | 318 | 319 | over_setup <- function(x1, fns, names, names_fn) { 320 | 321 | # setup name variants 322 | x1_nm <- names(x1) 323 | x1_idx <- as.character(seq_along(x1)) 324 | x1_val <- if (is.data.frame(x1) && nrow(x1) != 1) { 325 | NULL 326 | } else if (is.list(x1) && is.vector(x1) && 327 | any(purrr::map_lgl(x1, ~ length(.x) != 1))) { 328 | NULL 329 | } else { 330 | x1 331 | } 332 | 333 | # apply `.names` smart default 334 | if (is.function(fns) || rlang::is_formula(fns)) { 335 | names <- names %||% "{x}" 336 | fns <- list(`1` = fns) 337 | } else { 338 | names <- names %||% "{x}_{fn}" 339 | } 340 | 341 | if (!is.list(fns)) { 342 | rlang::abort(c("Problem with `over()` input `.fns`.", 343 | i = "Input `.fns` must be a function, a formula, or a list of functions/formulas.")) 344 | } 345 | 346 | # use index for unnamed lists 347 | if (is.list(x1) && !rlang::is_named(x1)) { 348 | names(x1) <- x1_idx 349 | } 350 | 351 | # handle formulas 352 | fns <- purrr::map(fns, rlang::as_function) 353 | 354 | # make sure fns has names, use number to replace unnamed 355 | if (is.null(names(fns))) { 356 | names_fns <- seq_along(fns) 357 | } else { 358 | names_fns <- names(fns) 359 | empties <- which(names_fns == "") 360 | if (length(empties)) { 361 | names_fns[empties] <- empties 362 | } 363 | } 364 | 365 | # setup control flow: 366 | vars_no <- length(x1) * length(fns) 367 | maybe_glue <- any(grepl("{.*}", names, perl = TRUE)) 368 | is_glue <- any(grepl("{(x|x_val|x_nm|x_idx|fn)}", names, perl = TRUE)) 369 | 370 | # if .names use glue syntax: 371 | if (is_glue) { 372 | 373 | if (length(names) > 1) { 374 | rlang::abort(c("Problem with `over()` input `.names`.", 375 | i = "Glue specification must be a character vector of length == 1.", 376 | x = paste0("`.names` is of length: ", length(names), "."))) 377 | } 378 | 379 | # warn that default values are used if conditions not met 380 | if (is.null(x1_val) && grepl("{x_val}", names, perl = TRUE)) { 381 | rlang::warn("in `over()` `.names`: used 'x_idx' instead of 'x_val'. The latter only works with lists if all elements are length 1.") 382 | } 383 | 384 | if (is.null(x1_nm) && grepl("{x_nm}", names, perl = TRUE)) { 385 | rlang::warn("in `over()` `.names`: used 'x_idx' instead of 'x_nm', since the input object is unnamed.") 386 | } 387 | 388 | names <- vctrs::vec_as_names(glue::glue(names, 389 | x = rep(names(x1) %||% x1, each = length(fns)), 390 | x_val = rep(x1_val %||% x1_idx, each = length(fns)), 391 | x_nm = rep(x1_nm %||% x1_idx, each = length(fns)), 392 | x_idx = rep(x1_idx, each = length(fns)), 393 | fn = rep(names_fns, length(x1))), 394 | repair = "check_unique") 395 | 396 | # no correct glue syntax detected 397 | } else { 398 | # glue syntax might be wrong 399 | if (maybe_glue && length(names) == 1 && vars_no > 1) { 400 | rlang::abort(c("Problem with `over()` input `.names`.", 401 | x = "Unrecognized glue specification `{...}` detected in `.names`.", 402 | i = "`.names` only supports the following expressions: '{x}', '{x_val}', '{x_nm}', '{x_idx}' or '{fn}'." 403 | )) 404 | } 405 | # check that non-glue names are unique 406 | vctrs::vec_as_names(names, repair = "check_unique") 407 | # check number of names 408 | if (length(names) != vars_no) { 409 | rlang::abort(c("Problem with `over()` input `.names`.", 410 | i = "The number of elements in `.names` must equal the number of new columns.", 411 | x = paste0(length(names), " elements provided to `.names`, but the number of new columns is ", vars_no, ".") 412 | )) 413 | } 414 | } 415 | 416 | # apply names_fn 417 | if (!is.null(names_fn)) { 418 | nm_f <- rlang::as_function(names_fn) 419 | names <- purrr::map_chr(names, nm_f) 420 | } 421 | 422 | value <- list(x = x1, fns = fns, names = names) 423 | value 424 | } 425 | 426 | 427 | 428 | -------------------------------------------------------------------------------- /R/over_across_family.R: -------------------------------------------------------------------------------- 1 | #' The over-across function family 2 | #' 3 | #' @description 4 | #' 5 | #' `dplyover` extends `dplyr`'s functionality by building a function family 6 | #' around `dplyr::across()`. 7 | #' 8 | #' The goal of this **over-across function family** is to provide a concise and 9 | #' uniform syntax which can be used to create columns by applying functions to 10 | #' vectors and / or sets of columns in dplyr. Ideally, this will improve our 11 | #' mental model so that it is easier to tackle problems where the solution is 12 | #' based on creating new columns. 13 | #' 14 | #' The functions in the over-apply function family create columns by applying 15 | #' one or several functions to: 16 | #' 17 | #' ### basic functions 18 | #' - [dplyr::across()]: a set of columns 19 | #' - [over()]: a vector (list or atomic vector) 20 | #' 21 | #' ### variants 22 | #' - [over2()] two vectors of the same length (pairwise) 23 | #' - [over2x()] two vectors (nested) 24 | #' - [across2()] two sets of columns (pairwise) 25 | #' - [across2x()] two sets of columns (nested) 26 | #' - [crossover()] a set of columns and a vector (nested) 27 | #' 28 | #' @name over_across_family 29 | NULL 30 | -------------------------------------------------------------------------------- /R/select_strings.R: -------------------------------------------------------------------------------- 1 | #' Select string parts or patterns of column names 2 | #' 3 | #' @description 4 | #' 5 | #' These functions are [selection helpers][selection_helpers]. 6 | #' They are intended to be used inside `over()` to extract parts or patterns of 7 | #' the column names of the underlying data. 8 | #' 9 | #' * [cut_names()] selects strings by removing (cutting off) the specified `.pattern`. 10 | #' This functionality resembles `stringr::str_remove_all()`. 11 | #' 12 | #' * [extract_names()] selects strings by extracting the specified `.pattern`. 13 | #' This functionality resembles `stringr::str_extract()`. 14 | #' 15 | #' @param .pattern Pattern to look for. 16 | #' @param .vars A character vector with variables names. When used inside `over` 17 | #' all column names of the underlying data are automatically supplied to `.vars`. 18 | #' This argument is useful when testing the functionality outside the context of 19 | #' `over()`. 20 | #' @param .remove Pattern to remove from the variable names provided in `.vars`. 21 | #' When this argument is provided, all variables names in `.vars` that match 22 | #' the pattern specified in `.remove` will be removed, before the `.pattern` to 23 | #' look for will be applied. 24 | #' 25 | #' @return 26 | #' A character vector. 27 | #' 28 | #' @section Examples: 29 | #' 30 | #' ```{r, child = "man/rmd/setup.Rmd"} 31 | #' ``` 32 | #' 33 | #' Selection helpers can be used inside `dplyover::over()` which in turn must be 34 | #' used inside `dplyr::mutate` or `dplyr::summarise`. Let's first attach `dplyr` 35 | #' (and `stringr` for comparision): 36 | #' 37 | #' ```{r, comment = "#>", collapse = TRUE} 38 | #' library(dplyr) 39 | #' library(stringr) 40 | #' 41 | #' # For better printing 42 | #' iris <- as_tibble(iris) 43 | #' ``` 44 | #' 45 | #' Let's first compare `cut_names()` and `extract_names()` to their {stringr} 46 | #' equivalents `stringr::str_remove_all()` and `stringr::str_extract()`: 47 | #' 48 | #' We can observe two main differences: 49 | #' 50 | #' (1) `cut_names()` and `extract_names()` only return strings where the function 51 | #' was applied successfully (when characters have actually been removed or 52 | #' extracted). `stringr::str_remove_all()` returns unmatched strings as is, while 53 | #' `stringr::str_extract()` returns `NA`. 54 | #' 55 | #' ```{r, comment = "#>", collapse = TRUE} 56 | #' cut_names("Width", .vars = names(iris)) 57 | #' str_remove_all(names(iris), "Width") 58 | #' 59 | #' extract_names("Length|Width", .vars = names(iris)) 60 | #' str_extract(rep(names(iris), 2), "Length|Width") 61 | #' ``` 62 | #' 63 | #' (2) `cut_names()` and `extract_names()` return only unique values: 64 | #' 65 | #' ```{r, comment = "#>", collapse = TRUE} 66 | #' cut_names("Width", .vars = rep(names(iris), 2)) 67 | #' str_remove_all(rep(names(iris), 2), "Width") 68 | #' 69 | #' extract_names("Length|Width", .vars = names(iris)) 70 | #' str_extract(rep(names(iris), 2), "Length|Width") 71 | #' ``` 72 | #' 73 | #' The examples above do not show that `cut_names()` removes *all* strings matching 74 | #' the `.pattern` argument, while `extract_names()` does only extract the `.pattern` 75 | #' *one* time: 76 | #' 77 | #' ```{r, comment = "#>", collapse = TRUE} 78 | #' cut_names("Width", .vars = "Width.Petal.Width") 79 | #' str_remove_all("Width.Petal.Width", "Width") 80 | #' 81 | #' extract_names("Width", .vars = "Width.Petal.Width") 82 | #' str_extract("Width.Petal.Width", "Width") 83 | #' ``` 84 | #' 85 | #' Within [`over()`] `cut_names()` and `extract_names()` automatically use the 86 | #' column names of the underlying data: 87 | #' 88 | #' ```{r, comment = "#>", collapse = TRUE} 89 | #' iris %>% 90 | #' mutate(over(cut_names(".Width"), 91 | #' ~ .("{.x}.Width") * .("{.x}.Length"), 92 | #' .names = "Product_{x}")) 93 | #' 94 | #' iris %>% 95 | #' mutate(over(extract_names("Length|Width"), 96 | #' ~.("Petal.{.x}") * .("Sepal.{.x}"), 97 | #' .names = "Product_{x}")) 98 | #' ``` 99 | #' 100 | #' What problem does `cut_names()` solve? 101 | #' In the example above using `cut_names()` might not seem helpful, since we could easily 102 | #' use `c("Sepal", "Petal")` instead. However, there are cases where we have 103 | #' data with a lot of similar pairs of variables sharing a common prefix or 104 | #' suffix. If we want to loop over them using `over()` then `cut_names()` comes 105 | #' in handy. 106 | #' 107 | #' The usage of `extract_names()` might be less obvious. Lets look at raw data 108 | #' from a customer satifsaction survey which contains the following variables. 109 | #' 110 | #' ```{r, comment = "#>", collapse = TRUE} 111 | #' csatraw %>% glimpse(width = 50) 112 | #' ``` 113 | #' 114 | #' The survey has several 'item's consisting of two sub-questions / variables 'a' 115 | #' and 'b'. Lets say we want to calculate the product of those two variables for 116 | #' each item. `extract_names()` helps us to select all variables containing 117 | #' 'item' followed by a digit using the regex `"item\\d"` as `.pattern`. 118 | #' However, there is 'item1' and 'item1_open' which are not followed by `a` and 119 | #' `b`. `extract_names()` lets us exclude these items by setting the `.remove` 120 | #' argument to `[^item1]`: 121 | #' 122 | #' ```{r, comment = "#>", collapse = TRUE} 123 | #' csatraw %>% 124 | #' transmute(over(extract_names("item\\d", "^item1"), 125 | #' ~ .("{.x}a") * .("{.x}b")) 126 | #' ) 127 | #' ``` 128 | #' @name select_vars 129 | NULL 130 | 131 | #' @rdname select_vars 132 | #' @export 133 | cut_names <- function(.pattern, .remove = NULL, .vars = NULL) { 134 | 135 | .varn <- .vars 136 | 137 | if (is.null(.vars) && sys.call(sys.nframe() - 2)[[1]] == "meta_setup") { 138 | .varn <- names(dplyr::cur_data_all()) 139 | } 140 | 141 | if (is.null(.remove)) { 142 | .selected <- .varn 143 | } else { 144 | .notselected <- grep(.remove, .varn, perl = TRUE, value = TRUE) 145 | 146 | if (length(.notselected) == 0) { 147 | rlang::abort( 148 | c("Problem with `cut_names()` input `.remove`.", 149 | i = paste0("The character string provided in `.remove` ('", 150 | .remove, "') must at least match one ", 151 | ifelse(is.null(.vars), "column name.", "element in `.vars`.")), 152 | x = "No match was found.")) 153 | } 154 | 155 | .selected <- setdiff(.varn, .notselected) 156 | } 157 | 158 | 159 | .match <- grepl(.pattern, .selected, perl = TRUE) 160 | .extract <- gsub(.pattern, "", .selected, perl = TRUE)[.match] 161 | 162 | if (length(.extract) == 0) { 163 | rlang::abort( 164 | c("Problem with `cut_names()` input `.pattern`.", 165 | i = paste0("The character string provided in `.pattern` ('", 166 | .pattern, "') must at least return one match."), 167 | x = "No match was found.")) 168 | } 169 | 170 | unique(.extract[nchar(.extract) > 0]) 171 | } 172 | 173 | #' @rdname select_vars 174 | #' @export 175 | extract_names <- function(.pattern, .remove = NULL, .vars = NULL) { 176 | 177 | .varn <- .vars 178 | 179 | if (is.null(.vars) && sys.call(sys.nframe() - 2)[[1]] == "meta_setup") { 180 | .varn <- names(dplyr::cur_data_all()) 181 | } 182 | 183 | if (is.null(.remove)) { 184 | .selected <- .varn 185 | } else { 186 | .notselected <- grep(.remove, .varn, perl = TRUE, value = TRUE) 187 | 188 | if (length(.notselected) == 0) { 189 | rlang::abort( 190 | c("Problem with `extract_names()` input `.remove`.", 191 | i = paste0("The character string provided in `.remove` ('", 192 | .remove, "') must at least match one ", 193 | ifelse(is.null(.vars), "column name.", "element in `.vars`.")), 194 | x = "No match was found.")) 195 | } 196 | .selected <- setdiff(.varn, .notselected) 197 | } 198 | 199 | .extract <- regexpr(.pattern, .selected, perl = TRUE) 200 | .res <- regmatches(.selected, .extract) 201 | 202 | if (length(.res) == 0) { 203 | rlang::abort( 204 | c("Problem with `extract_names()` input `.pattern`.", 205 | i = paste0("The character string provided in `.pattern` ('", 206 | .pattern, "') must at least return one match."), 207 | x = "No match was found.")) 208 | } 209 | 210 | unique(.res) 211 | 212 | } 213 | -------------------------------------------------------------------------------- /R/select_values.R: -------------------------------------------------------------------------------- 1 | #' Select values from variables 2 | #' 3 | #' @description 4 | #' 5 | #' These functions are [selection helpers][selection_helpers]. They are intended 6 | #' to be used inside all functions that accept a vector as argument (that is `over()` 7 | #' and `crossover()` and all their variants) to extract values of a variable. 8 | #' 9 | #' * [dist_values()] returns all distinct values (or in the case of factor variables: 10 | #' levels) of a variable `x` which are not `NA`. 11 | #' 12 | #' * [seq_range()] returns the sequence between the `range()` of a variable `x`. 13 | #' 14 | #' @param x An atomic vector or list. For [seq_range()] x must be numeric or date. 15 | #' @param .sep A character vector containing regular expression(s) which are used 16 | #' for splitting the values (works only if x is a character vector). 17 | #' @param .sort A character string indicating which sorting scheme is to be applied 18 | #' to distinct values: ascending ("asc"), descending ("desc"), "none" or "levels". The 19 | #' default is ascending, only if x is a factor the default is "levels". 20 | #' @param .by A number (or date expression) representing the increment of the sequence. 21 | #' 22 | #' @return 23 | #' [dist_values()] returns a vector of the same type of x, with exception of 24 | #' factors which are converted to type `"character"`. 25 | #' 26 | #' [seq_range()] returns an vector of type `"integer"` or `"double"`. 27 | #' 28 | #' @section Examples: 29 | #' 30 | #' ```{r, child = "man/rmd/setup.Rmd"} 31 | #' ``` 32 | #' 33 | #' Selection helpers can be used inside `dplyover::over()` which in turn must be 34 | #' used inside `dplyr::mutate` or `dplyr::summarise`. Let's first attach `dplyr`: 35 | #' 36 | #' ```{r, comment = "#>", collapse = TRUE} 37 | #' library(dplyr) 38 | #' 39 | #' # For better printing 40 | #' iris <- as_tibble(iris) 41 | #' ``` 42 | #' 43 | #' `dist_values()` extracts all distinct values of a column variable. 44 | #' This is helpful when creating dummy variables in a loop using `over()`. 45 | #' 46 | #' ```{r, comment = "#>", collapse = TRUE} 47 | #' iris %>% 48 | #' mutate(over(dist_values(Species), 49 | #' ~ if_else(Species == .x, 1, 0) 50 | #' ), 51 | #' .keep = "none") 52 | #' ``` 53 | #' 54 | #' `dist_values()` is just a wrapper around unique. However, it has five 55 | #' differences: 56 | #' 57 | #' (1) `NA` values are automatically stripped. Compare: 58 | #' 59 | #' ```{r, comment = "#>", collapse = TRUE} 60 | #' unique(c(1:3, NA)) 61 | #' dist_values(c(1:3, NA)) 62 | #' ``` 63 | #' 64 | #' (2) Applied on factors, `dist_values()` returns all distinct `levels` as 65 | #' character. Compare the following: 66 | #' 67 | #' ```{r, comment = "#>", collapse = TRUE} 68 | #' fctrs <- factor(c(1:3, NA), levels = c(3:1)) 69 | #' 70 | #' fctrs %>% unique() %>% class() 71 | #' 72 | #' fctrs %>% dist_values() %>% class() 73 | #' ``` 74 | #' 75 | #' (3) As default, the output is sorted in ascending order for non-factors, and 76 | #' is sorted as the underyling "levels" for factors. This can be controlled by 77 | #' setting the `.sort` argument. Compare: 78 | #' 79 | #' ```{r, comment = "#>", collapse = TRUE} 80 | #' # non-factors 81 | #' unique(c(3,1,2)) 82 | #' 83 | #' dist_values(c(3,1,2)) 84 | #' dist_values(c(3,1,2), .sort = "desc") 85 | #' dist_values(c(3,1,2), .sort = "none") 86 | #' 87 | #' # factors 88 | #' fctrs <- factor(c(2,1,3, NA), levels = c(3:1)) 89 | #' 90 | #' dist_values(fctrs) 91 | #' dist_values(fctrs, .sort = "levels") 92 | #' dist_values(fctrs, .sort = "asc") 93 | #' dist_values(fctrs, .sort = "desc") 94 | #' dist_values(fctrs, .sort = "none") 95 | #' 96 | #' ``` 97 | #' 98 | #' (4) When used on a character vector `dist_values` can take a separator 99 | #' `.sep` to split the elements accordingly: 100 | #' 101 | #' ```{r, comment = "#>", collapse = TRUE} 102 | #' c("1, 2, 3", 103 | #' "2, 4, 5", 104 | #' "4, 1, 7") %>% 105 | #' dist_values(., .sep = ", ") 106 | #' ``` 107 | #' 108 | #' (5) When used on lists `dist_values` automatically simplifiies its input 109 | #' into a vector using `unlist`: 110 | #' 111 | #' ```{r, comment = "#>", collapse = TRUE} 112 | #' list(a = c(1:4), b = (4:6), c(5:10)) %>% 113 | #' dist_values() 114 | #' ``` 115 | #' 116 | #' ---------- 117 | #' `seq_range()` generates a numeric sequence between the `min` and `max` 118 | #' values of its input variable. This is helpful when creating many dummy 119 | #' variables with varying thresholds. 120 | #' 121 | #' ```{r, comment = "#>", collapse = TRUE} 122 | #' iris %>% 123 | #' mutate(over(seq_range(Sepal.Length, 1), 124 | #' ~ if_else(Sepal.Length > .x, 1, 0), 125 | #' .names = "Sepal.Length.{x}"), 126 | #' .keep = "none") 127 | #' ``` 128 | #' 129 | #' Note that if the input variable does not have decimal places, `min` and `max` are 130 | #' wrapped in `ceiling` and `floor` accordingly. This will prevent the creation of 131 | #' variables that contain only `0` or `1`. Compare the output below with the 132 | #' example above: 133 | #' 134 | #' ```{r, comment = "#>", collapse = TRUE} 135 | #' iris %>% 136 | #' mutate(over(seq(round(min(Sepal.Length), 0), 137 | #' round(max(Sepal.Length), 0), 138 | #' 1), 139 | #' ~ if_else(Sepal.Length > .x, 1, 0), 140 | #' .names = "Sepal.Length.{x}"), 141 | #' .keep = "none") 142 | #' ``` 143 | #' 144 | #' `seq_range()` also works on dates: 145 | #' 146 | #' ```{r, comment = "#>", collapse = TRUE} 147 | #' some_dates <- c(as.Date("2020-01-02"), 148 | #' as.Date("2020-05-02"), 149 | #' as.Date("2020-03-02")) 150 | #' 151 | #' 152 | #' some_dates %>% 153 | #' seq_range(., "1 month") 154 | #' ``` 155 | #' 156 | #' @name select_values 157 | NULL 158 | 159 | #' @rdname select_values 160 | #' @export 161 | dist_values <- function(x, .sep = NULL, .sort = c("asc", "desc", "none", "levels")) { 162 | 163 | is_null <- identical(.sort, c("asc", "desc", "none", "levels")) 164 | sort <- match.arg(.sort) 165 | 166 | if (is.list(x)) { 167 | x <- unlist(x) 168 | } 169 | if (!is.null(.sep)) { 170 | x <- unlist(strsplit(x, .sep)) 171 | } 172 | 173 | res <- as.vector(na.omit(unique(x))) 174 | if (!is.factor(x)) { 175 | if (sort == "asc") { 176 | return(sort(res)) 177 | } else if (sort == "desc") { 178 | return(sort(res, decreasing = TRUE)) 179 | } else { 180 | return(res) 181 | } 182 | } else { 183 | x <- levels(x) 184 | if (is_null || .sort == "levels") { 185 | return(x) 186 | } else if (sort == "asc") { 187 | return(sort(x)) 188 | } else if (sort == "desc") { 189 | return(sort(x, decreasing = TRUE)) 190 | } else { 191 | res 192 | } 193 | } 194 | } 195 | 196 | #' @rdname select_values 197 | #' @export 198 | seq_range <- function(x, .by) { 199 | 200 | if (!class(x) %in% c("numeric", "integer", "Date")) { 201 | rlang::abort( 202 | c("Problem with `seq_range()` input `x`.", 203 | i = "`x` must be a numeric vector.", 204 | x = paste0("`x` is of class: ", class(x), ".")) 205 | ) 206 | } 207 | 208 | .range <- range(x) 209 | 210 | if (!is.date(x) && identical(.by, round(.by, 0))) { 211 | .range[1] <- ceiling(.range[1]) 212 | .range[2] <- floor(.range[2]) 213 | } 214 | 215 | seq(.range[1], .range[2], by = .by) 216 | 217 | } 218 | -------------------------------------------------------------------------------- /R/selection_helpers.R: -------------------------------------------------------------------------------- 1 | #' Selection helpers 2 | #' 3 | #' @description 4 | #' 5 | #' `dplyover` provides three kinds of selection helpers which are intended for 6 | #' use in all functions that accept a vector as argument (that is `over()` and 7 | #' `crossover()` as well as their variants, see here for a full list of the 8 | #' [over-across function family][over_across_family]). 9 | #' 10 | #' Helpers which select **string parts** of the **column names** (of the underyling data): 11 | #' - [cut_names()] removes a specified pattern. 12 | #' - [extract_names()] extracts a specified pattern. 13 | #' 14 | #' Helpers which select **values** of a variable: 15 | #' - [dist_values()] returns all distinct values. 16 | #' - [seq_range()] returns the sequence between the `range()` of a variable. 17 | #' 18 | #' A helper function that evaluates a glue specification as variable 19 | #' - [.()] evaluates an interpolated string as symbol 20 | #' 21 | #' @name selection_helpers 22 | #' @aliases over_selection_helpers 23 | NULL 24 | -------------------------------------------------------------------------------- /R/show_affix.R: -------------------------------------------------------------------------------- 1 | #' Show affixes for variable pairs of two sets of columns 2 | #' 3 | #' @description 4 | #' 5 | #' These functions show the prefixes or suffixes for each pair of variables of 6 | #' two sets of columns. They are intended to be used either (1) in case `across2` 7 | #' throws an error when `{pre}` or `{suf}` are specified in `across2`'s `.names` 8 | #' argument or (2) before using `{pre}` or `{suf}` in `across2` to understand 9 | #' how the pre- or suffixes will look like. 10 | #' 11 | #' * [show_prefix()] lists each variable pair and the corresponding alphanumeric prefix 12 | #' 13 | #' * [show_suffix()] lists each variable pair and the corresponding alphanumeric suffix 14 | #' 15 | #' @param .data A data frame. 16 | #' @param .xcols,.ycols <[`tidy-select`][dplyr::dplyr_tidy_select]> Sets of 17 | #' columns for which the common pre- or suffix will be shown for each pair. 18 | #' Note that you can not select. 19 | #' 20 | #' @return 21 | #' A tibble with three columns: .xcols, .ycols and prefix or suffix. 22 | #' 23 | #' @section Examples: 24 | #' 25 | #' ```{r, child = "man/rmd/setup.Rmd"} 26 | #' ``` 27 | #' Below two use cases of `show_prefix/suffix` are briefly explained. 28 | #' Let's first attach dplyr and get ready: 29 | #' 30 | #' ```{r, comment = "#>", collapse = TRUE} 31 | #' library(dplyr) 32 | #' 33 | #' # For better printing 34 | #' iris <- as_tibble(iris) 35 | #' ``` 36 | #' 37 | #' ## (1) When called after an error is thrown by across2() 38 | #' 39 | #' Let's assume we use `across2` with the `{pre}` glue specification on some 40 | #' data where not all variable pairs share a common prefix. In the example below 41 | #' we use `dplyr::rename` to create such a case. Then `across2` will throw an 42 | #' error. The error message already suggests that we can run `show_prefix()` 43 | #' to see what went wrong. In this case we can call `show_prefix()` without 44 | #' any arguments: 45 | #' 46 | #' ```{r, comment = "#>", collapse = TRUE, error = TRUE} 47 | #' iris %>% 48 | #' as_tibble %>% 49 | #' rename("Pesal.Length" = Sepal.Length) %>% 50 | #' mutate(across2(ends_with("Length"), 51 | #' ends_with("Width"), 52 | #' .fns = list(product = ~ .x * .y, 53 | #' sum = ~ .x + .y), 54 | #' .names = "{pre}_{fn}")) 55 | #' show_prefix() 56 | #' ``` 57 | #' 58 | #' ## (2) When called on a data.frame 59 | #' 60 | #' When called on a data.frame we just need to specify two sets of columns: 61 | #' `.xcols` and `.ycols` (just like in `across2`). 62 | #' 63 | #' ```{r, comment = "#>", collapse = TRUE} 64 | #' iris %>% 65 | #' show_suffix(starts_with("Sepal"), 66 | #' starts_with("Petal")) 67 | #' ``` 68 | #' 69 | #' 70 | #' @name show_affix 71 | NULL 72 | #' @rdname show_affix 73 | #' @export 74 | show_prefix <- function(.data = NULL, .xcols = NULL, .ycols = NULL) { 75 | 76 | if (is.null(.data) && !is.null(dplyover:::.last$value)) { 77 | 78 | .data <- .last$value$data 79 | .xcols <- .last$value$xcols 80 | .ycols <- .last$value$ycols 81 | 82 | rm(value, envir = .last) 83 | 84 | } else { 85 | .xcols <- rlang::enexpr(.xcols) 86 | .ycols <- rlang::enexpr(.ycols) 87 | } 88 | 89 | show_affix(data = .data, 90 | xcols = .xcols, 91 | ycols = .ycols, 92 | type = "prefix") 93 | } 94 | 95 | #' @rdname show_affix 96 | #' @export 97 | show_suffix <- function(.data = NULL, .xcols = NULL, .ycols = NULL) { 98 | 99 | if (is.null(.data) && !is.null(.last$value)) { 100 | 101 | .data <- .last$value$data 102 | .xcols <- .last$value$xcols 103 | .ycols <- .last$value$ycols 104 | 105 | rm(value, envir = .last) 106 | 107 | } else { 108 | .xcols <- rlang::enexpr(.xcols) 109 | .ycols <- rlang::enexpr(.ycols) 110 | } 111 | show_affix(data = .data, 112 | xcols = .xcols, 113 | ycols = .ycols, 114 | type = "suffix") 115 | } 116 | 117 | 118 | show_affix <- function(data, xcols, ycols, type = c("prefix", "suffix")) { 119 | 120 | group_vars <- group_vars(data) 121 | 122 | if (length(group_vars) > 0) { 123 | data <- dplyr::ungroup(data) 124 | data <- dplyr::select(data, -dplyr::all_of(group_vars)) 125 | } 126 | 127 | xvars <- tidyselect::eval_select(xcols, data) 128 | yvars <- tidyselect::eval_select(ycols, data) 129 | 130 | xvars <- names(xvars) 131 | yvars <- names(yvars) 132 | 133 | if (length(xvars) != length(yvars)) { 134 | rlang::abort(c(paste0("Problem with `show_", type,"()` input `.xcols` and `.ycols`."), 135 | i = "Input `.xcols` and `.ycols` must have the same number of columns.", 136 | x = paste0(length(xvars), " columns are selected in `.xcols`, ", 137 | "while ", length(yvars), " columns are selected in `.ycols`."))) 138 | } 139 | 140 | var_nms <- purrr::flatten(purrr::map2(xvars, yvars, ~ list(c(.x, .y)))) 141 | if (type == "prefix") { 142 | res <- purrr::map(var_nms, ~ get_affix(.x, "prefix")) 143 | } else { 144 | res <- purrr::map(var_nms, ~ get_affix(.x, "suffix")) 145 | } 146 | 147 | res <- unlist(purrr::modify_if(res, rlang::is_empty, ~ NA_character_)) 148 | 149 | inp_tbl <- tibble::tibble(.xcols = xvars, 150 | .ycols = yvars, 151 | !! type := res) 152 | 153 | print_min <- getOption("tibble.print_min") %||% 10 154 | print_max <- getOption("tibble.print_max") %||% 20 155 | 156 | if (nrow(inp_tbl) > print_max) { 157 | cat("Use `.Last.value %>% View()` to see to full list of variables.") 158 | } 159 | inp_tbl 160 | } 161 | 162 | # helper function for across2_setup 163 | get_affix <- function(x, type = c("prefix", "suffix")) { 164 | 165 | side <- switch(type, 166 | "prefix" = "right", 167 | "suffix" = "left") 168 | 169 | x <- stringr::str_pad(x, max(nchar(x)), side = side, pad = " ") 170 | x_ls <- purrr::transpose(strsplit(x, "")) 171 | x_ls_length <- purrr::map_dbl(purrr::map(x_ls, unique), length) 172 | x_rle <- rle(x_ls_length) 173 | 174 | if (side == "right" && x_rle$values[1] == 1) { 175 | res <- stringr::str_sub(x[[1]], 176 | start = 1L, 177 | end = x_rle$length[1]) 178 | 179 | } else if (side == "left" && x_rle$values[length(x_rle$values)] == 1) { 180 | res_start <- sum(x_rle$length[-length(x_rle$length)]) + 1 181 | res_length <- x_rle$length[length(x_rle$length)] 182 | res_end <- res_start + res_length 183 | 184 | res <- stringr::str_sub(x[[1]], 185 | start = res_start, 186 | end = res_end) 187 | } else { 188 | res <- NULL 189 | } 190 | 191 | res <- stringr::str_remove_all(res, "[:punct:]*$") 192 | res <- stringr::str_remove_all(res, "^[:punct:]*") 193 | 194 | if (side == "right") { 195 | res <- stringr::str_extract(res, "^[:alnum:]*") 196 | } else { 197 | res <- stringr::str_extract(res, "[:alnum:]*$") 198 | } 199 | 200 | res 201 | 202 | } 203 | 204 | # add to tests 205 | # x <- c("Sepal.Length", "Sepal.Width") 206 | # x <- c("Length.Sepal", "Width.Sepal") 207 | # x <- c("Length.of.Sepal.here", "Length.no.Sepal.here") 208 | # get_affix(x, "suffix") 209 | -------------------------------------------------------------------------------- /R/string_eval.R: -------------------------------------------------------------------------------- 1 | #' Evaluate an interpolated string as symbol 2 | #' 3 | #' @description 4 | #' 5 | #' This function takes a glue specifcation as input, and evaluates the final 6 | #' argument string as name in the caller environment. 7 | #' 8 | #' @param x A glue specification, that is, a string which contains an R expression 9 | #' wrapped in curly braces, e.g. `."{.x}_some_string"`. 10 | #' 11 | #' @return 12 | #' The values of the variable with the name of the final argument string, given 13 | #' that it exists in the caller environment. 14 | #' 15 | #' @section Examples: 16 | #' 17 | #' ```{r, child = "man/rmd/setup.Rmd"} 18 | #' ``` 19 | #' ```{r, comment = "#>", collapse = TRUE} 20 | #' library(dplyr) 21 | #' 22 | #' # For better printing 23 | #' iris <- as_tibble(iris) 24 | #' ``` 25 | #' 26 | #' Below is a simple example from `over()`. In `over`'s function 27 | #' argument `.x` is first evaluated as 'Sepal' and then as 'Petal' which 28 | #' results in the final argument strings 'Sepal.Width' and 'Sepal.Length' as 29 | #' well as 'Petal.Width' and 'Petal.Length'. 30 | #' 31 | #' ```{r, comment = "#>", collapse = TRUE} 32 | #' iris %>% 33 | #' mutate(over(c("Sepal", "Petal"), 34 | #' ~ .("{.x}.Width") + .("{.x}.Length") 35 | #' )) 36 | #' ``` 37 | #' 38 | #' The above syntax is equal to the more verbose: 39 | #' ```{r, comment = "#>", collapse = TRUE} 40 | #' iris %>% 41 | #' mutate(over(c("Sepal", "Petal"), 42 | #' ~ eval(sym(paste0(.x, ".Width"))) + 43 | #' eval(sym(paste0(.x, ".Length"))) 44 | #' )) 45 | #' ``` 46 | #' 47 | #' Although `.()` was created with the use of `over()` in mind, it can also be 48 | #' used within `dplyr::across()` in combination with `dplyr::cur_column()`. 49 | #' First let's rename 'Sepal.Length' and 'Petal.Length' to 'Sepal' and 'Petal' 50 | #' to have a stem to which we can attach the string '.Width' to access the 51 | #' two 'Width' variables. Now we can call `.(cur_colunm())` to access the variable 52 | #' `across()` has been called on (Note: we could have used `.x` instead). We can 53 | #' further access the values of the 'Width' variables by wrapping `cur_column()` 54 | #' in curly braces `{}`, adding `.Width` and wrapping everything with 55 | #' quotation marks `.("{cur_column()}.Width")`. 56 | #' 57 | #' ```{r, comment = "#>", collapse = TRUE} 58 | #' iris %>% 59 | #' rename("Sepal" = "Sepal.Length", 60 | #' "Petal" = "Petal.Length") %>% 61 | #' mutate(across(c(Sepal, Petal), 62 | #' ~ .(cur_column()) + .("{cur_column()}.Width"), 63 | #' .names = "{col}_sum")) 64 | #' ``` 65 | #' 66 | #' A similar approach can be achieved using `purrr::map` in combination with `.()`: 67 | #' ```{r, comment = "#>", collapse = TRUE} 68 | #' iris %>% 69 | #' rename("Sepal" = "Sepal.Length", 70 | #' "Petal" = "Petal.Length") %>% 71 | #' mutate(purrr::map_dfc(c("Sepal_sum" = "Sepal", "Petal_sum" = "Petal"), 72 | #' ~ .(.x) + .("{.x}.Width"))) 73 | #' ``` 74 | #' @name string_eval 75 | #' @export 76 | `.` <- function(x) { 77 | rlang::eval_tidy(rlang::sym(glue::glue(x, 78 | .open = "{", 79 | .close = "}", 80 | .envir = parent.frame())), 81 | env = rlang::caller_env()) 82 | } 83 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | 6 | 7 | ```{r, child = "man/rmd/setup.Rmd"} 8 | ``` 9 | 10 | # dplyover 11 | 12 | 13 | ![Release status](https://img.shields.io/badge/status-first%20release-yellow) 14 | [![Lifecycle](man/figures/lifecycle-experimental.svg)](man/figures/lifecycle-experimental.svg) 15 | [![R-CMD-check](https://github.com/TimTeaFan/dplyover/workflows/R-CMD-check/badge.svg)](https://github.com/TimTeaFan/dplyover/actions) 16 | [![Codecov test coverage](https://codecov.io/gh/TimTeaFan/dplyover/branch/main/graph/badge.svg)](https://codecov.io/gh/TimTeaFan/dplyover?branch=main) 17 | [![CodeFactor](https://www.codefactor.io/repository/github/timteafan/dplyover/badge)](https://www.codefactor.io/repository/github/timteafan/dplyover) 18 | [![CRAN status](https://www.r-pkg.org/badges/version/dplyover)](https://cran.r-project.org/package=dplyover) 19 | 20 | 21 | ## Overview 22 | 23 | dplyover logo 24 | 25 | {dplyover} extends {dplyr}'s functionality by building a function family 26 | around `dplyr::across()`. 27 | 28 | The goal of this *over-across function family* is to provide a concise and 29 | uniform syntax which can be used to create columns by applying functions to 30 | vectors and/or sets of columns in {dplyr}. Ideally, this will: 31 | 32 | - **reduce the amount of code** to create variables derived from existing colums, 33 | which is especially helpful when doing exploratory data analysis (e.g. lagging, 34 | collapsing, recoding etc. many variables in a similar way). 35 | - **provide a clean {dplyr} approach** to create many variables which are 36 | calculated based on two or more variables. 37 | - **improve our mental model** so that it is easier to tackle problems where the 38 | solution is based on creating new columns. 39 | 40 | The functions in the *over-apply function family* create columns by applying 41 | one or several functions to: 42 | 43 | - `dplyr::across()` a set of columns (not part of dplyover) 44 | - `over()` a vector (list or atomic vector) 45 | - `over2()` two vectors of the same length (sequentially^#^) 46 | - `over2x()` two vectors (nested^+^) 47 | - `across2()` two sets of columns (sequentially^#^) 48 | - `across2x()` two sets of columns (nested^+^) 49 | - `crossover()` a set of columns and a vector (nested^+^) 50 | 51 | # "sequentially" means that the function is sequentially applied to the 52 | first two elements of `x[[1]]` and `y[[1]]`, then to the second pair of elements 53 | and so on.
54 | + "nested" means that the function is applied to all combinations 55 | between elements in `x` and `y` similar to a nested loop. 56 | 57 | 58 | ## Installation 59 | 60 | {dplyover} is not on CRAN. You can install the latest version from 61 | [GitHub](https://github.com/) with: 62 | 63 | ```{r, eval = FALSE} 64 | # install.packages("remotes") 65 | remotes::install_github("TimTeaFan/dplyover") 66 | ``` 67 | 68 | ## Getting started 69 | 70 | Below are a few examples of the {dplyover}'s *over-across function family*. More 71 | functions and workarounds of how to tackle the problems below without {dplyover} 72 | can be found in the vignette "Why dplyover?". 73 | 74 | ```{r, setup, warning = FALSE, message = FALSE} 75 | # dplyover is an extention of dplyr on won't work without it 76 | library(dplyr) 77 | library(dplyover) 78 | 79 | # For better printing: 80 | iris <- as_tibble(iris) 81 | ``` 82 | 83 | #### Apply functions to a vector 84 | 85 | `over()` applies one or several functions to a vector. We can use it inside 86 | `dplyr::mutate()` to create several similar variables that we derive from an 87 | existing column. This is helpful in cases where we want to create a batch of 88 | similar variables with only slightly changes in the argument values of the 89 | calling function. A good example are `lag` and `lead` variables. Below we use 90 | column 'a' to create lag and lead variables by `1`, `2` and `3` positions. 91 | `over()`'s `.names` argument lets us put nice names on the output columns. 92 | 93 | ```{r} 94 | tibble(a = 1:25) %>% 95 | mutate(over(c(1:3), 96 | list(lag = ~ lag(a, .x), 97 | lead = ~ lead(a, .x)), 98 | .names = "a_{fn}{x}")) 99 | ``` 100 | 101 | #### Apply functions to a set of columns and a vector simultaniously 102 | 103 | `crossover()` applies the functions in `.fns` to every combination of colums in 104 | `.xcols` with elements in `.y`. This is similar to the example above, but this time, 105 | we use a set of columns. Below we create five lagged variables for each 106 | 'Sepal.Length' and 'Sepal.Width'. Again, we use a named list as argument in `.fns` 107 | to create nice names by specifying the glue syntax in `.names.` 108 | 109 | ```{r} 110 | iris %>% 111 | transmute( 112 | crossover(starts_with("sepal"), 113 | 1:5, 114 | list(lag = ~ lag(.x, .y)), 115 | .names = "{xcol}_{fn}{y}")) %>% 116 | glimpse 117 | ``` 118 | 119 | 120 | #### Apply functions to a set of variable pairs 121 | 122 | `across2()` can be used to transform pairs of variables in one or more functions. 123 | In the example below we want to calculate the product and the sum of all pairs 124 | of 'Length' and 'Width' variables in the `iris` data set. We can use `{pre}` in 125 | the glue specification in `.names` to extract the common prefix of each pair of 126 | variables. We can further transform the names, in the example setting them 127 | `tolower`, by specifying the `.names_fn` argument: 128 | 129 | ```{r} 130 | iris %>% 131 | transmute(across2(ends_with("Length"), 132 | ends_with("Width"), 133 | .fns = list(product = ~ .x * .y, 134 | sum = ~ .x + .y), 135 | .names = "{pre}_{fn}", 136 | .names_fn = tolower)) 137 | ``` 138 | 139 | 140 | ## Performance and Compability 141 | 142 | This is an experimental package which I started developing with my own use cases 143 | in mind. I tried to keep the effort low, which is why this package *does not* 144 | internalize (read: copy) internal {dplyr} functions (especially the 'context 145 | internals'). This made it relatively easy to develop the package without: 146 | 147 | 1. copying tons of {dplyr} code, 148 | 1. having to figure out which dplyr-functions use the copied internals and 149 | 1. finally overwritting these functions (like `mutate` and other one-table verbs), 150 | which would eventually lead to conflicts with other add-on packages, like for 151 | example {tidylog}. 152 | 153 | However, the downside is that not relying on {dplyr} internals has some negative 154 | effects in terms of performance and compability. 155 | 156 | In a nutshell this means: 157 | 158 | - The *over-across function family* in {dplyover} is slower than the 159 | original `dplyr::across`. Up until {dplyr} 1.0.3 the overhead was not too big, 160 | but `dplyr::across` got much faster with {dplyr} 1.0.4 which is why the gap has 161 | widend a lot. 162 | - Although {dplyover} is designed to work in {dplyr}, some features and 163 | edge cases will not work correctly. 164 | 165 | The good news is that even without relying on {dplyr} internals most of the 166 | original functionality can be replicated and although being less performant, 167 | the current setup is optimized and falls not too far behind in terms of speed - 168 | at least when compared to the pre v1.0.4 `dplyr::across`. 169 | 170 | Regarding compability, I have spent quite some time testing the package and 171 | I was able to replicate most of the tests for `dplyr::across` successfully. 172 | 173 | For more information on the performance and compability of {dplyover} see the 174 | vignette "Performance and Compability". 175 | 176 | 177 | ## History 178 | 179 | I originally opened a 180 | [feature request on GitHub](https://github.com/tidyverse/dplyr/issues/4834) to 181 | include a very special case version of `over` (or to that time `mutate_over`) 182 | into {dplyr}. The adivse then was to make this kind of functionality available 183 | in a separate package. While I was working on this very special case version of 184 | `over`, I realized that the more general use case resembles a `purrr::map` 185 | function for inside {dplyr} verbs with different variants, which led me to the 186 | *over-across function family*. 187 | 188 | 189 | ## Acknowledgements and Disclaimer 190 | 191 | This package is not only an extention of {dplyr}. The main functions in 192 | {dplyover} are directly derived and based on `dplyr::across()` (dplyr's license 193 | and copyrights apply!). So if this package is working correctly, all the credit 194 | should go to the dplyr team. 195 | 196 | My own "contribution" (if you want to call it like that) merely consists of: 197 | 198 | 1. removing the dependencies on {dplyr}'s internal functions, and 199 | 2. slightly changing `across`' logic to make it work for vectors and a 200 | combination of two vectors and/or sets of columns. 201 | 202 | By this I most definitely introduced some bugs and edge cases which won't work, 203 | and in which case I am the only one to blame. 204 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # dplyover 5 | 6 | 7 | 8 | ![Release 9 | status](https://img.shields.io/badge/status-first%20release-yellow) 10 | [![Lifecycle](man/figures/lifecycle-experimental.svg)](man/figures/lifecycle-experimental.svg) 11 | [![R-CMD-check](https://github.com/TimTeaFan/dplyover/workflows/R-CMD-check/badge.svg)](https://github.com/TimTeaFan/dplyover/actions) 12 | [![Codecov test 13 | coverage](https://codecov.io/gh/TimTeaFan/dplyover/branch/main/graph/badge.svg)](https://codecov.io/gh/TimTeaFan/dplyover?branch=main) 14 | [![CodeFactor](https://www.codefactor.io/repository/github/timteafan/dplyover/badge)](https://www.codefactor.io/repository/github/timteafan/dplyover) 15 | [![CRAN 16 | status](https://www.r-pkg.org/badges/version/dplyover)](https://cran.r-project.org/package=dplyover) 17 | 18 | 19 | ## Overview 20 | 21 | dplyover logo 22 | 23 | {dplyover} extends {dplyr}’s functionality by building a function family 24 | around `dplyr::across()`. 25 | 26 | The goal of this *over-across function family* is to provide a concise 27 | and uniform syntax which can be used to create columns by applying 28 | functions to vectors and/or sets of columns in {dplyr}. Ideally, this 29 | will: 30 | 31 | - **reduce the amount of code** to create variables derived from 32 | existing colums, which is especially helpful when doing exploratory 33 | data analysis (e.g. lagging, collapsing, recoding etc. many 34 | variables in a similar way). 35 | - **provide a clean {dplyr} approach** to create many variables which 36 | are calculated based on two or more variables. 37 | - **improve our mental model** so that it is easier to tackle problems 38 | where the solution is based on creating new columns. 39 | 40 | The functions in the *over-apply function family* create columns by 41 | applying one or several functions to: 42 | 43 | - `dplyr::across()` a set of columns (not part of dplyover) 44 | - `over()` a vector (list or atomic vector) 45 | - `over2()` two vectors of the same length (sequentially\#) 46 | - `over2x()` two vectors (nested+) 47 | - `across2()` two sets of columns (sequentially\#) 48 | - `across2x()` two sets of columns (nested+) 49 | - `crossover()` a set of columns and a vector (nested+) 50 | 51 | \# “sequentially” means that the function is sequentially applied 52 | to the first two elements of `x[[1]]` and `y[[1]]`, then to the second 53 | pair of elements and so on.
+ “nested” means that the 54 | function is applied to all combinations between elements in `x` and `y` 55 | similar to a nested loop. 56 | 57 | ## Installation 58 | 59 | {dplyover} is not on CRAN. You can install the latest version from 60 | [GitHub](https://github.com/) with: 61 | 62 | ``` r 63 | # install.packages("remotes") 64 | remotes::install_github("TimTeaFan/dplyover") 65 | ``` 66 | 67 | ## Getting started 68 | 69 | Below are a few examples of the {dplyover}’s *over-across function 70 | family*. More functions and workarounds of how to tackle the problems 71 | below without {dplyover} can be found in the vignette 72 | “Why 73 | dplyover?”. 74 | 75 | ``` r 76 | # dplyover is an extention of dplyr on won't work without it 77 | library(dplyr) 78 | library(dplyover) 79 | 80 | # For better printing: 81 | iris <- as_tibble(iris) 82 | ``` 83 | 84 | #### Apply functions to a vector 85 | 86 | `over()` applies one or several functions to a vector. We can use it 87 | inside `dplyr::mutate()` to create several similar variables that we 88 | derive from an existing column. This is helpful in cases where we want 89 | to create a batch of similar variables with only slightly changes in the 90 | argument values of the calling function. A good example are `lag` and 91 | `lead` variables. Below we use column ‘a’ to create lag and lead 92 | variables by `1`, `2` and `3` positions. `over()`’s `.names` argument 93 | lets us put nice names on the output columns. 94 | 95 | ``` r 96 | tibble(a = 1:25) %>% 97 | mutate(over(c(1:3), 98 | list(lag = ~ lag(a, .x), 99 | lead = ~ lead(a, .x)), 100 | .names = "a_{fn}{x}")) 101 | #> # A tibble: 25 x 7 102 | #> a a_lag1 a_lead1 a_lag2 a_lead2 a_lag3 a_lead3 103 | #> 104 | #> 1 1 NA 2 NA 3 NA 4 105 | #> 2 2 1 3 NA 4 NA 5 106 | #> 3 3 2 4 1 5 NA 6 107 | #> 4 4 3 5 2 6 1 7 108 | #> # ... with 21 more rows 109 | ``` 110 | 111 | #### Apply functions to a set of columns and a vector simultaniously 112 | 113 | `crossover()` applies the functions in `.fns` to every combination of 114 | colums in `.xcols` with elements in `.y`. This is similar to the example 115 | above, but this time, we use a set of columns. Below we create five 116 | lagged variables for each ‘Sepal.Length’ and ‘Sepal.Width’. Again, we 117 | use a named list as argument in `.fns` to create nice names by 118 | specifying the glue syntax in `.names.` 119 | 120 | ``` r 121 | iris %>% 122 | transmute( 123 | crossover(starts_with("sepal"), 124 | 1:5, 125 | list(lag = ~ lag(.x, .y)), 126 | .names = "{xcol}_{fn}{y}")) %>% 127 | glimpse 128 | #> Rows: 150 129 | #> Columns: 10 130 | #> $ Sepal.Length_lag1 NA, 5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9~ 131 | #> $ Sepal.Length_lag2 NA, NA, 5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4,~ 132 | #> $ Sepal.Length_lag3 NA, NA, NA, 5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, ~ 133 | #> $ Sepal.Length_lag4 NA, NA, NA, NA, 5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5~ 134 | #> $ Sepal.Length_lag5 NA, NA, NA, NA, NA, 5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.~ 135 | #> $ Sepal.Width_lag1 NA, 3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1~ 136 | #> $ Sepal.Width_lag2 NA, NA, 3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9,~ 137 | #> $ Sepal.Width_lag3 NA, NA, NA, 3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, ~ 138 | #> $ Sepal.Width_lag4 NA, NA, NA, NA, 3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3~ 139 | #> $ Sepal.Width_lag5 NA, NA, NA, NA, NA, 3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.~ 140 | ``` 141 | 142 | #### Apply functions to a set of variable pairs 143 | 144 | `across2()` can be used to transform pairs of variables in one or more 145 | functions. In the example below we want to calculate the product and the 146 | sum of all pairs of ‘Length’ and ‘Width’ variables in the `iris` data 147 | set. We can use `{pre}` in the glue specification in `.names` to extract 148 | the common prefix of each pair of variables. We can further transform 149 | the names, in the example setting them `tolower`, by specifying the 150 | `.names_fn` argument: 151 | 152 | ``` r 153 | iris %>% 154 | transmute(across2(ends_with("Length"), 155 | ends_with("Width"), 156 | .fns = list(product = ~ .x * .y, 157 | sum = ~ .x + .y), 158 | .names = "{pre}_{fn}", 159 | .names_fn = tolower)) 160 | #> # A tibble: 150 x 4 161 | #> sepal_product sepal_sum petal_product petal_sum 162 | #> 163 | #> 1 17.8 8.6 0.280 1.60 164 | #> 2 14.7 7.9 0.280 1.60 165 | #> 3 15.0 7.9 0.26 1.5 166 | #> 4 14.3 7.7 0.3 1.7 167 | #> # ... with 146 more rows 168 | ``` 169 | 170 | ## Performance and Compability 171 | 172 | This is an experimental package which I started developing with my own 173 | use cases in mind. I tried to keep the effort low, which is why this 174 | package *does not* internalize (read: copy) internal {dplyr} functions 175 | (especially the ‘context internals’). This made it relatively easy to 176 | develop the package without: 177 | 178 | 1. copying tons of {dplyr} code, 179 | 2. having to figure out which dplyr-functions use the copied internals 180 | and 181 | 3. finally overwritting these functions (like `mutate` and other 182 | one-table verbs), which would eventually lead to conflicts with 183 | other add-on packages, like for example {tidylog}. 184 | 185 | However, the downside is that not relying on {dplyr} internals has some 186 | negative effects in terms of performance and compability. 187 | 188 | In a nutshell this means: 189 | 190 | - The *over-across function family* in {dplyover} is slower than the 191 | original `dplyr::across`. Up until {dplyr} 1.0.3 the overhead was 192 | not too big, but `dplyr::across` got much faster with {dplyr} 1.0.4 193 | which is why the gap has widend a lot. 194 | - Although {dplyover} is designed to work in {dplyr}, some features 195 | and edge cases will not work correctly. 196 | 197 | The good news is that even without relying on {dplyr} internals most of 198 | the original functionality can be replicated and although being less 199 | performant, the current setup is optimized and falls not too far behind 200 | in terms of speed - at least when compared to the pre v1.0.4 201 | `dplyr::across`. 202 | 203 | Regarding compability, I have spent quite some time testing the package 204 | and I was able to replicate most of the tests for `dplyr::across` 205 | successfully. 206 | 207 | For more information on the performance and compability of {dplyover} 208 | see the vignette 209 | “Performance 210 | and Compability”. 211 | 212 | ## History 213 | 214 | I originally opened a [feature request on 215 | GitHub](https://github.com/tidyverse/dplyr/issues/4834) to include a 216 | very special case version of `over` (or to that time `mutate_over`) into 217 | {dplyr}. The adivse then was to make this kind of functionality 218 | available in a separate package. While I was working on this very 219 | special case version of `over`, I realized that the more general use 220 | case resembles a `purrr::map` function for inside {dplyr} verbs with 221 | different variants, which led me to the *over-across function family*. 222 | 223 | ## Acknowledgements and Disclaimer 224 | 225 | This package is not only an extention of {dplyr}. The main functions in 226 | {dplyover} are directly derived and based on `dplyr::across()` (dplyr’s 227 | license and copyrights apply\!). So if this package is working 228 | correctly, all the credit should go to the dplyr team. 229 | 230 | My own “contribution” (if you want to call it like that) merely consists 231 | of: 232 | 233 | 1. removing the dependencies on {dplyr}’s internal functions, and 234 | 2. slightly changing `across`’ logic to make it work for vectors and a 235 | combination of two vectors and/or sets of columns. 236 | 237 | By this I most definitely introduced some bugs and edge cases which 238 | won’t work, and in which case I am the only one to blame. 239 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | title: dplyover 2 | url: https://timteafan.github.io/dplyover/ 3 | 4 | template: 5 | params: 6 | bootswatch: cerulean 7 | 8 | reference: 9 | - title: over-across family 10 | desc: | 11 | The functions in the *over-apply* function family create columns by applying 12 | one or several functions to: 13 | `over()` a vector (list or atomic vector); 14 | `over2()` two vectors of the same length (pairwise) 15 | `over2x()` two vectors of the same length (nested) 16 | `across2()` two sets of columns (pairwise) 17 | `across2x()` two sets of columns (nested) 18 | `crossover()` a set of columns and a vector (nested) 19 | contents: 20 | - over 21 | - over2 22 | - across2 23 | - crossover 24 | 25 | - title: helper functions 26 | desc: > 27 | {dplyover} provides three selection helpers which are intended for use 28 | in all functions that accept a vector as argument (that is `over()` and 29 | `crossover()` as well as their variants). 1. Helpers which select string 30 | parts of the column names (of the underyling data): `cut_names()` removes a 31 | specified pattern; `extract_names()` extracts a specified pattern. 32 | 2. Helpers which select values of a variable: `dist_values()` returns all 33 | distinct values; `seq_range()` returns the sequence between the range of a 34 | variable. 3. A helper function `.()` that takes a glue specifcation as input, 35 | and evaluates the final argument string as name in the caller environment. 36 | Apart from those selection heplers, `show_prefix()` and `show_suffix()` 37 | show the common pre- or suffix for each pair of variables of two sets of 38 | colums. 39 | contents: 40 | - dist_values 41 | - cut_names 42 | - "." 43 | - show_affix 44 | 45 | - title: data 46 | desc: > 47 | {dplyyover} contains a randomly generated data set from a customer 48 | satisfaction survey using CSAT (Customer Satisfaction Score) for a 49 | contract-based product. The data set comes in two versions: recoded and raw. 50 | contents: 51 | - csat 52 | - csatraw 53 | 54 | navbar: 55 | structure: 56 | right: [home, reference, articles, news, twitter, github] 57 | components: 58 | reference: 59 | icon: fas fa-tools 60 | text: Reference 61 | href: reference/index.html 62 | articles: 63 | icon: fas fa-book-open 64 | text: Articles 65 | menu: 66 | - text: Why dplyover? 67 | href: articles/why_dplyover.html 68 | - text: Performance and Compability 69 | href: articles/performance.html 70 | news: 71 | icon: fas fa-clipboard-list 72 | text: News 73 | href: news/index.html 74 | twitter: 75 | icon: "fab fa-twitter" 76 | href: https://twitter.com/timteafan 77 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | 3 | coverage: 4 | status: 5 | project: 6 | default: 7 | target: auto 8 | threshold: 1% 9 | informational: true 10 | patch: 11 | default: 12 | target: auto 13 | threshold: 1% 14 | informational: true 15 | -------------------------------------------------------------------------------- /data-raw/csatraw.R: -------------------------------------------------------------------------------- 1 | ## code to prepare `csatraw` dataset goes here 2 | 3 | type_vc <- c(rep(c("new", "existing"), 2), "reactivate") 4 | product_vc <- c("basic", "advanced", "premium") 5 | 6 | comments_good <- c("great product", "good service", "friendly staff") 7 | comments_bad <- c("too expensive", "unfriendly", "no response") 8 | comments_all <- c(comments_good, comments_bad) 9 | 10 | lookup_ls <- list( 11 | `11` = "great product", 12 | `12` = "good service", 13 | `13` = "friendly staff", 14 | `21` = "too expensive", 15 | `22` = "unfriendly", 16 | `23` = "no response" 17 | ) 18 | 19 | create_coded_rsp <- function(inp, c_good, c_bad, c_all) { 20 | 21 | size <- round(runif(length(inp), 1, 3)) 22 | 23 | res <- vapply(seq_along(inp), 24 | FUN.VALUE = character(1), 25 | FUN = function(x) { 26 | if (inp[x] > 3) { 27 | paste(sample(c_good, size[x]), collapse = ", ") 28 | } else if (inp[x] < 3) { 29 | paste(sample(c_bad, size[x]), collapse = ", ") 30 | } else { 31 | paste(sample(c_all, size[x]), collapse = ", ") 32 | } 33 | }) 34 | 35 | } 36 | 37 | # Add list column with comments 38 | set.seed(982342) 39 | 40 | csatraw <- tibble::tibble( 41 | cust_id = stringr::str_pad(sample(150:99999, 150), 5, pad = "0"), 42 | type = sample(type_vc, 150, replace = TRUE), 43 | product = sample(product_vc, 150, replace = TRUE), 44 | item1 = round(runif(150, min = 1, max = 5), 0), 45 | item1_open = create_coded_rsp(item1, 11:13, 21:23, c(11:13,21:23)), 46 | item2a = sample(c(0,1,2,2,3,3), 150, replace = TRUE), 47 | item2b = ifelse(item2a == 0, NA, round(runif(150, min = 1, max = 5), 0)), 48 | item3a = sample(c(0,1,2,2,3,3), 150, replace = TRUE), 49 | item3b = ifelse(item3a == 0, NA, round(runif(150, min = 1, max = 5), 0)), 50 | item4a = sample(c(0,1,2,2,3,3), 150, replace = TRUE), 51 | item4b = ifelse(item4a == 0, NA, round(runif(150, min = 1, max = 5), 0)), 52 | item5a = sample(c(0,1,2,2,3,3), 150, replace = TRUE), 53 | item5b = ifelse(item5a == 0, NA, round(runif(150, min = 1, max = 5), 0)), 54 | item6a = sample(c(0,1,2,2,3,3), 150, replace = TRUE), 55 | item6b = ifelse(item6a == 0, NA, round(runif(150, min = 1, max = 5), 0)) 56 | ) 57 | 58 | usethis::use_data(csatraw, overwrite = TRUE) 59 | 60 | csat <- csatraw 61 | 62 | csat <- csat %>% 63 | rename(csat = item1, 64 | csat_open = item1_open) %>% 65 | rename_with(~ gsub("a", "_contact", .x), 66 | .cols = matches("\\da$")) %>% 67 | rename_with(~ gsub("b", "_rating", .x), 68 | .cols = matches("\\db$")) %>% 69 | rename_with(~ gsub("item2", "postal", .x), 70 | .cols = starts_with("item2")) %>% 71 | rename_with(~ gsub("item3", "phone", .x), 72 | .cols = starts_with("item3")) %>% 73 | rename_with(~ gsub("item4", "email", .x), 74 | .cols = starts_with("item4")) %>% 75 | rename_with(~ gsub("item5", "website", .x), 76 | .cols = starts_with("item5")) %>% 77 | rename_with(~ gsub("item6", "shop", .x), 78 | .cols = starts_with("item6")) %>% 79 | mutate(type = factor(type, levels = c("new", "existing", "reactivate")), 80 | product = factor(product, levels = c("basic", "advanced", "premium")), 81 | across(ends_with("csat") | ends_with("_rating"), 82 | ~ recode(.x, 83 | `1` = "Very unsatisfied", 84 | `2` = "Unsatisfied", 85 | `3` = "Neutral", 86 | `4` = "Satisfied", 87 | `5` = "Very satisfied") %>% 88 | factor(., levels = c("Very unsatisfied", 89 | "Unsatisfied", 90 | "Neutral", 91 | "Satisfied", 92 | "Very satisfied"))), 93 | across(ends_with("_contact"), 94 | ~ recode(.x, 95 | `0` = "no contact", 96 | `1` = "more than 3 years ago", 97 | `2` = "within 1 to 3 years", 98 | `3` = "within last year") %>% 99 | factor(., levels = c("no contact", 100 | "more than 3 years ago", 101 | "within 1 to 3 years", 102 | "within last year"))), 103 | csat_open = purrr::map(csat_open, ~ recode(.x, !!! lookup_ls)), 104 | csat_open = gsub("11", "great product", csat_open) %>% 105 | gsub("12", "good service", .) %>% 106 | gsub("13", "friendly staff", .) %>% 107 | gsub("21", "too expensive", .) %>% 108 | gsub("22", "unfriendly", .) %>% 109 | gsub("23", "no response", .)) 110 | 111 | usethis::use_data(csat, overwrite = TRUE) 112 | 113 | -------------------------------------------------------------------------------- /data/csat.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TimTeaFan/dplyover/f0cd984586bafdc0dc78fc4ead2d76ba50d9370e/data/csat.rda -------------------------------------------------------------------------------- /data/csatraw.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TimTeaFan/dplyover/f0cd984586bafdc0dc78fc4ead2d76ba50d9370e/data/csatraw.rda -------------------------------------------------------------------------------- /dplyover.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | LineEndingConversion: Posix 18 | 19 | BuildType: Package 20 | PackageUseDevtools: Yes 21 | PackageInstallArgs: --no-multiarch --with-keep.source 22 | PackageRoxygenize: rd,collate,namespace 23 | -------------------------------------------------------------------------------- /man/across2.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/across2.R 3 | \name{across2} 4 | \alias{across2} 5 | \alias{across2x} 6 | \title{Apply functions to two sets of columns simultaniously in 'dplyr'} 7 | \usage{ 8 | across2(.xcols, .ycols, .fns, ..., .names = NULL, .names_fn = NULL) 9 | 10 | across2x( 11 | .xcols, 12 | .ycols, 13 | .fns, 14 | ..., 15 | .names = NULL, 16 | .names_fn = NULL, 17 | .comb = "all" 18 | ) 19 | } 20 | \arguments{ 21 | \item{.xcols, .ycols}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Columns to transform. 22 | Note that you can not select or compute upon grouping variables.} 23 | 24 | \item{.fns}{Functions to apply to each column in \code{.xcols} and \code{.ycols}. 25 | 26 | Possible values are: 27 | \itemize{ 28 | \item A function 29 | \item A purrr-style lambda 30 | \item A list of functions/lambdas 31 | } 32 | 33 | Note that \code{NULL} is not accepted as argument to \code{.fns}.} 34 | 35 | \item{...}{Additional arguments for the function calls in \code{.fns}.} 36 | 37 | \item{.names}{A glue specification that describes how to name the output 38 | columns. This can use: 39 | \itemize{ 40 | \item \code{{xcol}} to stand for the selected column name in \code{.xcols}, 41 | \item \code{{ycol}} to stand for the selected column name in \code{.ycols}, and 42 | \item \code{{fn}} to stand for the name of the function being applied. 43 | } 44 | 45 | The default (\code{NULL}) is equivalent to \code{"{xcol}_{ycol}"} for the single function 46 | case and \code{"{xcol}_{ycol}_{fn}"} for the case where a list is used for \code{.fns}. 47 | 48 | \code{across2()} supports two additional glue specifications: \code{{pre}} and \code{{suf}}. 49 | They extract the common alphanumeric prefix or suffix of each pair of 50 | variables. 51 | 52 | Alternatively to a glue specification, a character vector of length equal 53 | to the number of columns to be created can be supplied to \code{.names}. 54 | Note that in this case, the glue specification described above is not supported.} 55 | 56 | \item{.names_fn}{Optionally, a function that is applied after the glue 57 | specification in \code{.names} has been evaluated. This is, for example, helpful, 58 | in case the resulting names need to be further cleaned or trimmed.} 59 | 60 | \item{.comb}{In \code{across2x()} this argument allows to control which 61 | combinations of columns are to be created. This argument only matters, if 62 | the columns specified in \code{.xcols} and \code{.ycols} overlap to some extent. 63 | \itemize{ 64 | \item \code{"all"}, the default, will create all pairwise combinations between columns 65 | in \code{.xcols} and \code{.ycols} \emph{including all permutations} (e.g. 66 | \code{foo(column_x, column_y)} as well as \code{foo(column_y, column_x)}. 67 | \item \code{"unique"} will only create all unordered combinations (e.g. creates 68 | \code{foo(column_x, column_y)}, while \code{foo(column_y, column_x)} \emph{will not} be created) 69 | \item \verb{"minimal} same as \code{"unique"} and further skips all self-matches (e.g. 70 | \code{foo(column_x, column_x)} \emph{will not} be created) 71 | }} 72 | } 73 | \value{ 74 | \code{across2()} returns a tibble with one column for each pair of elements in 75 | \code{.xcols} and \code{.ycols} combined with each function in \code{.fns}. 76 | 77 | \code{across2x()} returns a tibble with one column for each combination between 78 | elements in \code{.x} and\code{.y} combined with each function in \code{.fns}. 79 | } 80 | \description{ 81 | \code{across2()} and \code{across2x()} are variants of \code{\link[dplyr:across]{dplyr::across()}} that iterate 82 | over two columns simultaneously. \code{across2()} loops each \emph{pair of columns} in \code{.xcols} 83 | and \code{.ycols} over one or more functions, while \code{across2x()} loops 84 | \emph{every combination between columns} in \code{.xcols} and \code{.ycols} over one or more functions. 85 | } 86 | \section{Examples}{ 87 | 88 | 89 | For the basic functionality of \code{across()} please refer to the examples in 90 | \code{\link[dplyr:across]{dplyr::across()}}.\if{html}{\out{
}}\preformatted{library(dplyr) 91 | 92 | # For better printing 93 | iris <- as_tibble(iris) 94 | }\if{html}{\out{
}} 95 | 96 | \code{across2()} can be used to transfrom pairs of variables in one or more functions. 97 | In the example below we want to calculate the product and the sum of all pairs of 98 | 'Length' and 'Width' variables. We can use \code{{pre}} in the glue specification in 99 | \code{.names} to extract the common prefix of each pair of variables. We can further 100 | transform the names, in the example setting them \code{tolower} by specifying the 101 | \code{.names_fn} argument:\if{html}{\out{
}}\preformatted{iris \%>\% 102 | transmute(across2(ends_with("Length"), 103 | ends_with("Width"), 104 | .fns = list(product = ~ .x * .y, 105 | sum = ~ .x + .y), 106 | .names = "\{pre\}_\{fn\}", 107 | .names_fn = tolower)) 108 | #> # A tibble: 150 x 4 109 | #> sepal_product sepal_sum petal_product petal_sum 110 | #> 111 | #> 1 17.8 8.6 0.28 1.6 112 | #> 2 14.7 7.9 0.28 1.6 113 | #> 3 15.0 7.9 0.26 1.5 114 | #> 4 14.3 7.7 0.3 1.7 115 | #> # ... with 146 more rows 116 | }\if{html}{\out{
}} 117 | 118 | \code{across2x()} can be used to perform calculations on each combination of variables. 119 | In the example below we calculate the correlation between all variables in the 120 | \code{iris} data set for each group. To do this, we \code{group_by} 'Species' and specify 121 | the {tidyselect} helper \code{everything()} to \code{.xcols} and \code{.ycols}. 122 | \code{~ round(cor(.x, .y), 2)} gives us the correlation rounded to two digits for each 123 | pair of variables. We trim the rahter long variables names by replacing "Sepal" 124 | with "S", and "Petal" with "P" in the \code{.names_fn} argument. Finally, we are not 125 | interested in correlations of the same column and want to avoid excessive reults 126 | by setting the \code{.comb} argument to \code{"minimal"}.\if{html}{\out{
}}\preformatted{iris \%>\% 127 | group_by(Species) \%>\% 128 | summarise(across2x(everything(), 129 | everything(), 130 | ~ round(cor(.x, .y), 2), 131 | .names_fn = ~ gsub("Sepal", "S", .x) \%>\% 132 | gsub("Petal", "P", .), 133 | .comb = "minimal")) 134 | #> # A tibble: 3 x 7 135 | #> Species S.Length_S.Width S.Length_P.Length S.Length_P.Width S.Width_P.Length 136 | #> 137 | #> 1 setosa 0.74 0.27 0.28 0.18 138 | #> 2 versicolor 0.53 0.75 0.55 0.56 139 | #> 3 virginica 0.46 0.86 0.28 0.4 140 | #> # ... with 2 more variables: S.Width_P.Width , P.Length_P.Width 141 | }\if{html}{\out{
}} 142 | } 143 | 144 | -------------------------------------------------------------------------------- /man/crossover.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/crossover.R 3 | \name{crossover} 4 | \alias{crossover} 5 | \title{Apply functions to a set of columns and a vector simultaniously in 'dplyr'} 6 | \usage{ 7 | crossover( 8 | .xcols = dplyr::everything(), 9 | .y, 10 | .fns, 11 | ..., 12 | .names = NULL, 13 | .names_fn = NULL 14 | ) 15 | } 16 | \arguments{ 17 | \item{.xcols}{<\code{\link[=dplyr_tidy_select]{tidy-select}}> Columns to transform. 18 | Because \code{crossover()} is used within functions like \code{summarise()} and 19 | \code{mutate()}, you can't select or compute upon grouping variables.} 20 | 21 | \item{.y}{An atomic vector or list to apply functions to. \code{crossover()} also 22 | accepts a function as \code{.y} argument. In this case each column in \code{.xcols} 23 | is looped over all the outputs that it generated with the function supplied 24 | to \code{.y}. Note: the underyling data must not be grouped, if a function 25 | is supplied to \code{.y}. 26 | 27 | If a function is supplied, the following values are possible: 28 | \itemize{ 29 | \item A bare function name, e.g. \code{unique} 30 | \item An anonymous function, e.g. \code{function(x) unique(x)} 31 | \item A purrr-style lambda, e.g. \code{~ unique(.x, fromLast = TRUE)} 32 | } 33 | 34 | Note that additional arguments can only be specified with an anonymous 35 | function, a purrr-style lamba or with a pre-filled custom function.} 36 | 37 | \item{.fns}{Functions to apply to each column in \code{.xcols} and element in \code{.y}. 38 | 39 | Possible values are: 40 | \itemize{ 41 | \item A function 42 | \item A purrr-style lambda 43 | \item A list of functions/lambdas 44 | } 45 | 46 | Note that \code{NULL} is not accepted as argument to \code{.fns}.} 47 | 48 | \item{...}{Additional arguments for the function calls in \code{.fns}.} 49 | 50 | \item{.names}{A glue specification that describes how to name the output 51 | columns. This can use: 52 | \itemize{ 53 | \item \code{{xcol}} to stand for the selected column name, 54 | \item \code{{y}} to stand for the selected vector element, and 55 | \item \code{{fn}} to stand for the name of the function being applied. 56 | } 57 | 58 | The default (\code{NULL}) is equivalent to \code{"{xcol}_{y}"} for the single function 59 | case and \code{"{xcol}_{y}_{fn}"} for the case where a list is used for \code{.fns}. 60 | 61 | Note that, depending on the nature of the underlying object in \code{.y}, 62 | specifying \code{{y}} will yield different results: 63 | \itemize{ 64 | \item If \code{.y} is an unnamed atomic vector, \code{{y}} will represent each value. 65 | \item If \code{.y} is a named list or atomic vector, \code{{y}} will represent each name. 66 | \item If \code{.y} is an unnamed list, \code{{y}} will be the index number running from 1 to \code{length(y)}. 67 | } 68 | 69 | This standard behavior (interpretation of \code{{y}}) can be overwritten by 70 | directly specifying: 71 | \itemize{ 72 | \item \code{{y_val}} for \code{.y}'s values 73 | \item \code{{y_nm}} for its names 74 | \item \code{{y_idx}} for its index numbers 75 | } 76 | 77 | Alternatively, a character vector of length equal to the number of columns to 78 | be created can be supplied to \code{.names}. Note that in this case, the glue 79 | specification described above is not supported.} 80 | 81 | \item{.names_fn}{Optionally, a function that is applied after the glue 82 | specification in \code{.names} has been evaluated. This is, for example, helpful, 83 | in case the resulting names need to be further cleaned or trimmed.} 84 | } 85 | \value{ 86 | \code{crossover()} returns a tibble with one column for each combination of 87 | columns in \code{.xcols}, elements in \code{.y} and functions in \code{.fns}. 88 | 89 | If a function is supplied as \code{.y} argument, \code{crossover()} returns a tibble with 90 | one column for each pair of output elements of \code{.y} and the column in \code{.xcols} 91 | that generated the output combined with each function in \code{.fns}. 92 | } 93 | \description{ 94 | \code{crossover()} combines the functionality of \code{\link[dplyr:across]{dplyr::across()}} with \code{\link[=over]{over()}} 95 | by iterating simultaneously over (i) a set of columns (\code{.xcols}) and (ii) 96 | a vector or list (\code{.y}). \code{crossover()} \emph{always} applies the functions in 97 | \code{.fns} in a \emph{nested} way to a combination of both inputs. There are, however, 98 | two different ways in which the functions in \code{.fns} are applied. 99 | 100 | When \code{.y} is a vector or list, each function in \code{.fns} is applied to 101 | \emph{all pairwise combinations} between columns in \code{.xcols} and elements in 102 | \code{.y} (this resembles the behavior of \code{over2x()} and \code{across2x()}). 103 | 104 | \code{crossover()} has one trick up it's sleeves, which sets it apart from the other 105 | functions in the <\code{\link[=over_across_family]{over-across family}}>: Its second input 106 | (\code{.y}) can be a function. This changes the originial behavior slightly: First 107 | the function in \code{.y} is applied to all columns in \code{.xcols} to \emph{generate} an 108 | input object which will be used as \code{.y} in the function calls in \code{.fns}. 109 | In this case each function is applied to all pairs between (i) columns in 110 | \code{.xcols} with (ii) the output elements that they generated through the 111 | function that was originally supplied to \code{.y}. Note that the underyling 112 | data must not be grouped, if a function is supplied to \code{.y}. For examples see 113 | the example section below. 114 | } 115 | \section{Examples}{ 116 | 117 | 118 | For the basic functionality please refer to the examples in \code{\link[=over]{over()}} and 119 | \code{\link[dplyr:across]{dplyr::across()}}.\if{html}{\out{
}}\preformatted{library(dplyr) 120 | 121 | # For better printing 122 | iris <- as_tibble(iris) 123 | }\if{html}{\out{
}} 124 | \subsection{Creating many similar variables for mulitple columns}{ 125 | 126 | If \code{.y} is a vector or list, \code{crossover()} loops every combination between 127 | columns in \code{.xcols} and elements in \code{.y} over the functions in \code{.fns}. This 128 | is helpful in cases where we want to create a batch of similar variables with 129 | only slightly changes in the arguments of the calling function. A good example 130 | are lagged variables. Below we create five lagged variables for each 131 | 'Sepal.Length' and 'Sepal.Width'. To create nice names we use a named list 132 | as argument in \code{.fns} and specify the glue syntax in \code{.names}.\if{html}{\out{
}}\preformatted{ iris \%>\% 133 | transmute( 134 | crossover(starts_with("sepal"), 135 | 1:5, 136 | list(lag = ~ lag(.x, .y)), 137 | .names = "\{xcol\}_\{fn\}\{y\}")) \%>\% 138 | glimpse 139 | #> Rows: 150 140 | #> Columns: 10 141 | #> $ Sepal.Length_lag1 NA, 5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.4~ 142 | #> $ Sepal.Length_lag2 NA, NA, 5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9,~ 143 | #> $ Sepal.Length_lag3 NA, NA, NA, 5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, ~ 144 | #> $ Sepal.Length_lag4 NA, NA, NA, NA, 5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4~ 145 | #> $ Sepal.Length_lag5 NA, NA, NA, NA, NA, 5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.~ 146 | #> $ Sepal.Width_lag1 NA, 3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7~ 147 | #> $ Sepal.Width_lag2 NA, NA, 3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1,~ 148 | #> $ Sepal.Width_lag3 NA, NA, NA, 3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, ~ 149 | #> $ Sepal.Width_lag4 NA, NA, NA, NA, 3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2~ 150 | #> $ Sepal.Width_lag5 NA, NA, NA, NA, NA, 3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.~ 151 | }\if{html}{\out{
}} 152 | } 153 | 154 | \subsection{Creating dummy variables for multiple varialbes (columns)}{ 155 | 156 | The \code{.y} argument of \code{crossover()} can take a function instead of list or vector. 157 | In the example below we select the columns 'type', 'product', 'csat' in \code{.xcols}. 158 | We supply the function \code{\link[=dist_values]{dist_values()}} to \code{.y}, which is a cleaner variant of 159 | base R's \code{unique()}. This generates all distinct values for all three selected 160 | variables. Now, the function in \code{.fns}, \code{~ if_else(.y == .x, 1, 0)}, is applied 161 | to each pair of distinct value in \code{.y} and the column in \code{.xcols} that generated 162 | this value. This basically creates a dummy variable for each value of each 163 | variable. Since some of the values contain whitespace characters, we can use the 164 | \code{.names_fn} argument to supply a \emph{third} function that cleans the output names 165 | by replacing spaces with an underscore and setting all characters \code{tolower()}.\if{html}{\out{
}}\preformatted{ csat \%>\% 166 | transmute( 167 | crossover(.xcols = c(type, product, csat), 168 | .y = dist_values, 169 | .fns = ~ if_else(.y == .x, 1, 0), 170 | .names_fn = ~ gsub("\\\\s", "_", .x) \%>\% tolower(.) 171 | )) \%>\% 172 | glimpse 173 | #> Rows: 150 174 | #> Columns: 11 175 | #> $ type_new 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0,~ 176 | #> $ type_existing 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1,~ 177 | #> $ type_reactivate 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0,~ 178 | #> $ product_basic 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1,~ 179 | #> $ product_advanced 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0,~ 180 | #> $ product_premium 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,~ 181 | #> $ csat_very_unsatisfied 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,~ 182 | #> $ csat_unsatisfied 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0,~ 183 | #> $ csat_neutral 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,~ 184 | #> $ csat_satisfied 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0,~ 185 | #> $ csat_very_satisfied 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,~ 186 | }\if{html}{\out{
}} 187 | } 188 | } 189 | 190 | \seealso{ 191 | Other members of the <\code{\link[=over_across_family]{over-across function family}}>. 192 | } 193 | -------------------------------------------------------------------------------- /man/csat.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data-csat.R 3 | \docType{data} 4 | \name{csat} 5 | \alias{csat} 6 | \title{Customer Satisfaction Survey (recoded data)} 7 | \format{ 8 | A tibble with 150 rows and 15 variables: 9 | \describe{ 10 | \item{cust_id}{Customer identification number} 11 | \item{type}{Type of customer: "new", "existing" or "reactive"} 12 | \item{product}{The type of product: "basic", "advanced" or "premium"} 13 | \item{csat}{The overall Customer Satisfaction Score} 14 | \item{csat_open}{Follow-up question why the respondent gave this specific 15 | Customer Satisfaction rating. The open-ended answers have been coded into six 16 | categories (multiple answers possible).} 17 | \item{postal_contact, phone_contact, email_contact, website_contact, 18 | shop_contact}{When did the customer have last contact via given channel?} 19 | \item{postal_rating, phone_rating, email_rating, website_rating, 20 | shop_rating}{If customer had contact over the given channel: 21 | How satisfied was he?} 22 | } 23 | } 24 | \usage{ 25 | csat 26 | } 27 | \description{ 28 | This data is randomly generated. It resembles data from a customer 29 | satisfaction survey using CSAT (Customer Satisfaction Score) for a 30 | contract-based product. The data has been recoded. The raw version of this data 31 | set can be found here <\code{\link{csatraw}}>. 32 | } 33 | \examples{ 34 | csat 35 | } 36 | \keyword{datasets} 37 | -------------------------------------------------------------------------------- /man/csatraw.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data-csatraw.R 3 | \docType{data} 4 | \name{csatraw} 5 | \alias{csatraw} 6 | \title{Customer Satisfaction Survey (raw data)} 7 | \format{ 8 | A tibble with 150 rows and 15 variables: 9 | \describe{ 10 | \item{cust_id}{Customer identification number} 11 | \item{type}{Type of customer: "new", "existing" or "reactive"} 12 | \item{product}{The type of product: "basic", "advanced" or "premium"} 13 | \item{item1}{The overall Customer Satisfaction Score\cr\cr Scale: Ranging from 1 = 14 | "Very unsatisfied" to 5 = "Very satisfied"} 15 | \item{item1_open}{Follow-up question why the respondent gave this specific 16 | Customer Satisfaction rating. The open-ended answers have been coded into six 17 | categories: 11 = "great product", 12 = "good service", 13 = "friendly staff", 18 | 21 = "too expensive", 22 = "unfriendly", 23 = "no response" (multiple answers 19 | possible).} 20 | \item{item2a, item3a, item4a, item5a, item6a}{When did the customer have last 21 | contact via postal mail (item2a), phone (item3a), email (item4a), website 22 | (item5a), a retail shop (item6a) ?\cr\cr Scale: 0 = "no contact", 1 = "more 23 | than 3 years ago", 2 = "within 1 to 3 years", 3 = "within the last year"} 24 | \item{item2b, item3b, item4b, item5b, item6b}{If customer had contact 25 | via postal mail (item2b), phone (item3b), email (item4b), website (item5b), 26 | a retail shop (item6b): How satisfied was he?\cr\cr 27 | Scale: Ranging from 1 = "Very unsatisfied", to 5 = "Very satisfied"} 28 | } 29 | } 30 | \usage{ 31 | csatraw 32 | } 33 | \description{ 34 | This data is randomly generated. It resembles raw data from a customer 35 | satisfaction survey using CSAT (Customer Satisfaction Score) for a 36 | contract-based product. The first three variables are given, all other 37 | variables come from a survey tool and are only named "item1" etc. 38 | A recoded version of this data set can be found here <\code{\link{csat}}>. 39 | } 40 | \examples{ 41 | csatraw 42 | } 43 | \keyword{datasets} 44 | -------------------------------------------------------------------------------- /man/dplyover-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dplyover.R 3 | \docType{package} 4 | \name{dplyover-package} 5 | \alias{dplyover} 6 | \alias{dplyover-package} 7 | \title{dplyover: Create columns by applying functions to vectors and/or columns in 'dplyr'} 8 | \description{ 9 | To learn more about dplyover, start with the vignette: 10 | \code{browseVignettes(package = "dplyover")} 11 | } 12 | \seealso{ 13 | Useful links: 14 | \itemize{ 15 | \item \url{https://github.com/TimTeaFan/dplyover} 16 | \item Report bugs at \url{https://github.com/TimTeaFan/dplyover/issues} 17 | } 18 | 19 | } 20 | \author{ 21 | \strong{Maintainer}: Tim Tiefenbach \email{mailme@tim-tiefenbach.de} (\href{https://orcid.org/0000-0001-9443-2434}{ORCID}) 22 | 23 | } 24 | -------------------------------------------------------------------------------- /man/figures/apple-touch-icon-120x120.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TimTeaFan/dplyover/f0cd984586bafdc0dc78fc4ead2d76ba50d9370e/man/figures/apple-touch-icon-120x120.png -------------------------------------------------------------------------------- /man/figures/apple-touch-icon-152x152.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TimTeaFan/dplyover/f0cd984586bafdc0dc78fc4ead2d76ba50d9370e/man/figures/apple-touch-icon-152x152.png -------------------------------------------------------------------------------- /man/figures/apple-touch-icon-180x180.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TimTeaFan/dplyover/f0cd984586bafdc0dc78fc4ead2d76ba50d9370e/man/figures/apple-touch-icon-180x180.png -------------------------------------------------------------------------------- /man/figures/apple-touch-icon-60x60.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TimTeaFan/dplyover/f0cd984586bafdc0dc78fc4ead2d76ba50d9370e/man/figures/apple-touch-icon-60x60.png -------------------------------------------------------------------------------- /man/figures/apple-touch-icon-76x76.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TimTeaFan/dplyover/f0cd984586bafdc0dc78fc4ead2d76ba50d9370e/man/figures/apple-touch-icon-76x76.png -------------------------------------------------------------------------------- /man/figures/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TimTeaFan/dplyover/f0cd984586bafdc0dc78fc4ead2d76ba50d9370e/man/figures/apple-touch-icon.png -------------------------------------------------------------------------------- /man/figures/favicon-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TimTeaFan/dplyover/f0cd984586bafdc0dc78fc4ead2d76ba50d9370e/man/figures/favicon-16x16.png -------------------------------------------------------------------------------- /man/figures/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TimTeaFan/dplyover/f0cd984586bafdc0dc78fc4ead2d76ba50d9370e/man/figures/favicon-32x32.png -------------------------------------------------------------------------------- /man/figures/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TimTeaFan/dplyover/f0cd984586bafdc0dc78fc4ead2d76ba50d9370e/man/figures/favicon.ico -------------------------------------------------------------------------------- /man/figures/lifecycle-archived.svg: -------------------------------------------------------------------------------- 1 | lifecyclelifecyclearchivedarchived -------------------------------------------------------------------------------- /man/figures/lifecycle-defunct.svg: -------------------------------------------------------------------------------- 1 | lifecyclelifecycledefunctdefunct -------------------------------------------------------------------------------- /man/figures/lifecycle-deprecated.svg: -------------------------------------------------------------------------------- 1 | lifecyclelifecycledeprecateddeprecated -------------------------------------------------------------------------------- /man/figures/lifecycle-experimental.svg: -------------------------------------------------------------------------------- 1 | lifecyclelifecycleexperimentalexperimental -------------------------------------------------------------------------------- /man/figures/lifecycle-maturing.svg: -------------------------------------------------------------------------------- 1 | lifecyclelifecyclematuringmaturing -------------------------------------------------------------------------------- /man/figures/lifecycle-questioning.svg: -------------------------------------------------------------------------------- 1 | lifecyclelifecyclequestioningquestioning -------------------------------------------------------------------------------- /man/figures/lifecycle-stable.svg: -------------------------------------------------------------------------------- 1 | lifecyclelifecyclestablestable -------------------------------------------------------------------------------- /man/figures/lifecycle-superseded.svg: -------------------------------------------------------------------------------- 1 | lifecyclelifecyclesupersededsuperseded -------------------------------------------------------------------------------- /man/figures/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TimTeaFan/dplyover/f0cd984586bafdc0dc78fc4ead2d76ba50d9370e/man/figures/logo.png -------------------------------------------------------------------------------- /man/figures/logo_big.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TimTeaFan/dplyover/f0cd984586bafdc0dc78fc4ead2d76ba50d9370e/man/figures/logo_big.png -------------------------------------------------------------------------------- /man/over.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/over.R 3 | \name{over} 4 | \alias{over} 5 | \title{Apply functions to a list or vector in 'dplyr'} 6 | \usage{ 7 | over(.x, .fns, ..., .names = NULL, .names_fn = NULL) 8 | } 9 | \arguments{ 10 | \item{.x}{An atomic vector or list to apply functions to. Alternatively a 11 | <\code{\link[=selection_helpers]{selection helper}}> can be used to create 12 | a vector.} 13 | 14 | \item{.fns}{Functions to apply to each of the elements in \code{.x}. For 15 | functions that expect variable names as input, the selected strings need to 16 | be turned into symbols and evaluated. \code{dplyrover} comes with a genuine helper 17 | function that evaluates strings as names \code{\link[=.]{.()}}. 18 | 19 | Possible values are: 20 | \itemize{ 21 | \item A function 22 | \item A purrr-style lambda 23 | \item A list of functions/lambdas 24 | } 25 | 26 | For examples see the example section below. 27 | 28 | Note that, unlike \code{across()}, \code{over()} does not accept \code{NULL} as a 29 | value to \code{.fns}.} 30 | 31 | \item{...}{Additional arguments for the function calls in \code{.fns}.} 32 | 33 | \item{.names}{A glue specification that describes how to name the output 34 | columns. This can use \code{{x}} to stand for the selected vector element, and 35 | \code{{fn}} to stand for the name of the function being applied. The default 36 | (\code{NULL}) is equivalent to \code{"{x}"} for the single function case and 37 | \code{"{x}_{fn}"} for the case where a list is used for \code{.fns}. 38 | 39 | Note that, depending on the nature of the underlying object in \code{.x}, 40 | specifying \code{{x}} will yield different results: 41 | \itemize{ 42 | \item If \code{.x} is an unnamed atomic vector, \code{{x}} will represent each value. 43 | \item If \code{.x} is a named list or atomic vector, \code{{x}} will represent each name. 44 | \item If \code{.x} is an unnamed list, \code{{x}} will be the index number running from 1 to \code{length(x)}. 45 | } 46 | 47 | This standard behavior (interpretation of \code{{x}}) can be overwritten by 48 | directly specifying: 49 | \itemize{ 50 | \item \code{{x_val}} for \code{.x}'s values 51 | \item \code{{x_nm}} for its names 52 | \item \code{{x_idx}} for its index numbers 53 | } 54 | 55 | Alternatively, a character vector of length equal to the number of columns to 56 | be created can be supplied to \code{.names}. Note that in this case, the glue 57 | specification described above is not supported.} 58 | 59 | \item{.names_fn}{Optionally, a function that is applied after the glue 60 | specification in \code{.names} has been evaluated. This is, for example, helpful 61 | in case the resulting names need to be further cleaned or trimmed.} 62 | } 63 | \value{ 64 | A tibble with one column for each element in \code{.x} and each function in \code{.fns}. 65 | } 66 | \description{ 67 | \code{over()} makes it easy to create new colums inside a \code{\link[dplyr:mutate]{dplyr::mutate()}} or 68 | \code{\link[dplyr:summarise]{dplyr::summarise()}} call by applying a function (or a set of functions) to 69 | an atomic vector or list using a syntax similar to \code{\link[dplyr:across]{dplyr::across()}}. 70 | The main difference is that \code{\link[dplyr:across]{dplyr::across()}} transforms or creates new columns 71 | based on existing ones, while \code{over()} can create new columns based on a 72 | vector or list to which it will apply one or several functions. 73 | Whereas \code{\link[dplyr:across]{dplyr::across()}} allows \code{tidy-selection} helpers to select columns, 74 | \code{over()} provides its own helper functions to select strings or values based 75 | on either (1) values of specified columns or (2) column names. See the 76 | examples below and the \code{vignette("why_dplyover")} for more details. 77 | } 78 | \section{Note}{ 79 | 80 | Similar to \code{dplyr::across()} \code{over()} works only inside dplyr verbs. 81 | } 82 | 83 | \section{Examples}{ 84 | 85 | 86 | It has two main use cases. They differ in how the elements in \code{.x} 87 | are used. Let's first attach \code{dplyr}:\if{html}{\out{
}}\preformatted{library(dplyr) 88 | 89 | # For better printing 90 | iris <- as_tibble(iris) 91 | }\if{html}{\out{
}} 92 | \subsection{(1) The General Use Case}{ 93 | 94 | Here the values in \code{.x} are used as inputs to one or more functions in \code{.fns}. 95 | This is useful, when we want to create several new variables based on the same 96 | function with varying arguments. A good example is creating a bunch of lagged 97 | variables.\if{html}{\out{
}}\preformatted{tibble(x = 1:25) \%>\% 98 | mutate(over(c(1:3), 99 | ~ lag(x, .x))) 100 | #> # A tibble: 25 x 4 101 | #> x `1` `2` `3` 102 | #> 103 | #> 1 1 NA NA NA 104 | #> 2 2 1 NA NA 105 | #> 3 3 2 1 NA 106 | #> 4 4 3 2 1 107 | #> # ... with 21 more rows 108 | }\if{html}{\out{
}} 109 | 110 | Lets create a dummy variable for each unique value in 'Species':\if{html}{\out{
}}\preformatted{iris \%>\% 111 | mutate(over(unique(Species), 112 | ~ if_else(Species == .x, 1, 0)), 113 | .keep = "none") 114 | #> # A tibble: 150 x 3 115 | #> setosa versicolor virginica 116 | #> 117 | #> 1 1 0 0 118 | #> 2 1 0 0 119 | #> 3 1 0 0 120 | #> 4 1 0 0 121 | #> # ... with 146 more rows 122 | }\if{html}{\out{
}} 123 | 124 | With \code{over()} it is also possible to create several dummy variables with 125 | different thresholds. We can use the \code{.names} argument to control the output 126 | names:\if{html}{\out{
}}\preformatted{iris \%>\% 127 | mutate(over(seq(4, 7, by = 1), 128 | ~ if_else(Sepal.Length < .x, 1, 0), 129 | .names = "Sepal.Length_\{x\}"), 130 | .keep = "none") 131 | #> # A tibble: 150 x 4 132 | #> Sepal.Length_4 Sepal.Length_5 Sepal.Length_6 Sepal.Length_7 133 | #> 134 | #> 1 0 0 1 1 135 | #> 2 0 1 1 1 136 | #> 3 0 1 1 1 137 | #> 4 0 1 1 1 138 | #> # ... with 146 more rows 139 | }\if{html}{\out{
}} 140 | 141 | A similar approach can be used with dates. Below we loop over a date 142 | sequence to check whether the date falls within a given start and end 143 | date. We can use the \code{.names_fn} argument to clean the resulting output 144 | names:\if{html}{\out{
}}\preformatted{# some dates 145 | dat_tbl <- tibble(start = seq.Date(as.Date("2020-01-01"), 146 | as.Date("2020-01-15"), 147 | by = "days"), 148 | end = start + 10) 149 | 150 | dat_tbl \%>\% 151 | mutate(over(seq(as.Date("2020-01-01"), 152 | as.Date("2020-01-21"), 153 | by = "weeks"), 154 | ~ .x >= start & .x <= end, 155 | .names = "day_\{x\}", 156 | .names_fn = ~ gsub("-", "", .x))) 157 | #> # A tibble: 15 x 5 158 | #> start end day_20200101 day_20200108 day_20200115 159 | #> 160 | #> 1 2020-01-01 2020-01-11 TRUE TRUE FALSE 161 | #> 2 2020-01-02 2020-01-12 FALSE TRUE FALSE 162 | #> 3 2020-01-03 2020-01-13 FALSE TRUE FALSE 163 | #> 4 2020-01-04 2020-01-14 FALSE TRUE FALSE 164 | #> 5 2020-01-05 2020-01-15 FALSE TRUE TRUE 165 | #> 6 2020-01-06 2020-01-16 FALSE TRUE TRUE 166 | #> 7 2020-01-07 2020-01-17 FALSE TRUE TRUE 167 | #> 8 2020-01-08 2020-01-18 FALSE TRUE TRUE 168 | #> 9 2020-01-09 2020-01-19 FALSE FALSE TRUE 169 | #> 10 2020-01-10 2020-01-20 FALSE FALSE TRUE 170 | #> 11 2020-01-11 2020-01-21 FALSE FALSE TRUE 171 | #> 12 2020-01-12 2020-01-22 FALSE FALSE TRUE 172 | #> 13 2020-01-13 2020-01-23 FALSE FALSE TRUE 173 | #> 14 2020-01-14 2020-01-24 FALSE FALSE TRUE 174 | #> 15 2020-01-15 2020-01-25 FALSE FALSE TRUE 175 | }\if{html}{\out{
}} 176 | 177 | \code{over()} can summarise data in wide format. In the example below, we want to 178 | know for each group of customers (\code{new}, \code{existing}, \code{reactivate}), how much 179 | percent of the respondents gave which rating on a five point likert scale 180 | (\code{item1}). A usual approach in the tidyverse would be to use 181 | \code{count \%>\% group_by \%>\% mutate}, which yields the same result in the usually 182 | prefered long format. Sometimes, however, we might want this kind of summary 183 | in the wide format, and in this case \code{over()} comes in handy:\if{html}{\out{
}}\preformatted{csatraw \%>\% 184 | group_by(type) \%>\% 185 | summarise(over(c(1:5), 186 | ~ mean(item1 == .x))) 187 | #> # A tibble: 3 x 6 188 | #> type `1` `2` `3` `4` `5` 189 | #> 190 | #> 1 existing 0.156 0.234 0.234 0.266 0.109 191 | #> 2 new 0.0714 0.268 0.357 0.214 0.0893 192 | #> 3 reactivate 0.0667 0.267 0.133 0.4 0.133 193 | }\if{html}{\out{
}} 194 | 195 | Instead of a vector we can provide a named list of vectors to calculate the 196 | top two and bottom two categories on the fly:\if{html}{\out{
}}\preformatted{csatraw \%>\% 197 | group_by(type) \%>\% 198 | summarise(over(list(bot2 = c(1:2), 199 | mid = 3, 200 | top2 = c(4:5)), 201 | ~ mean(item1 \%in\% .x))) 202 | #> # A tibble: 3 x 4 203 | #> type bot2 mid top2 204 | #> 205 | #> 1 existing 0.391 0.234 0.375 206 | #> 2 new 0.339 0.357 0.304 207 | #> 3 reactivate 0.333 0.133 0.533 208 | }\if{html}{\out{
}} 209 | 210 | \code{over()} can also loop over columns of a data.frame. In the example below we 211 | want to create four different dummy variables of \code{item1}: (i) the top and (ii) 212 | bottom category as well as (iii) the top two and (iv) the bottom two categories. 213 | We can create a lookup \code{data.frame} and use all columns but the first as input to 214 | \code{over()}. In the function call we make use of base R's \code{match()}, where \code{.x} 215 | represents the new values and \code{recode_df[, 1]} refers to the old values.\if{html}{\out{
}}\preformatted{ 216 | recode_df <- data.frame(old = c(1, 2, 3, 4, 5), 217 | top1 = c(0, 0, 0, 0, 1), 218 | top2 = c(0, 0, 0, 1, 1), 219 | bot1 = c(1, 0, 0, 0, 0), 220 | bot2 = c(1, 1, 0, 0, 0)) 221 | 222 | csatraw \%>\% 223 | mutate(over(recode_df[,-1], 224 | ~ .x[match(item1, recode_df[, 1])], 225 | .names = "item1_\{x\}")) \%>\% 226 | select(starts_with("item1")) 227 | #> # A tibble: 150 x 6 228 | #> item1 item1_open item1_top1 item1_top2 item1_bot1 item1_bot2 229 | #> 230 | #> 1 3 12 0 0 0 0 231 | #> 2 2 22 0 0 0 1 232 | #> 3 2 21, 22, 23 0 0 0 1 233 | #> 4 4 12, 13, 11 0 1 0 0 234 | #> # ... with 146 more rows 235 | }\if{html}{\out{
}} 236 | 237 | \code{over()} work nicely with comma separated values stored in character vectors. 238 | In the example below, the colum \code{csat_open} contains one or more comma 239 | separated reasons why a specific customer satisfaction rating was given. 240 | We can easily create a column for each response category with the help of 241 | \code{dist_values} - a wrapper around \code{unique} which can split vector elements 242 | using a separator:\if{html}{\out{
}}\preformatted{csat \%>\% 243 | mutate(over(dist_values(csat_open, .sep = ", "), 244 | ~ as.integer(grepl(.x, csat_open)), 245 | .names = "rsp_\{x\}", 246 | .names_fn = ~ gsub("\\\\s", "_", .x)), 247 | .keep = "none") \%>\% glimpse 248 | #> Rows: 150 249 | #> Columns: 6 250 | #> $ rsp_friendly_staff 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0,~ 251 | #> $ rsp_good_service 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0,~ 252 | #> $ rsp_great_product 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0,~ 253 | #> $ rsp_no_response 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1,~ 254 | #> $ rsp_too_expensive 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0,~ 255 | #> $ rsp_unfriendly 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1,~ 256 | }\if{html}{\out{
}} 257 | } 258 | 259 | \subsection{(2) A Very Specific Use Case}{ 260 | 261 | Here strings are supplied to \code{.x} to construct column names (sharing the 262 | same stem). This allows us to dynamically use more than one column in the 263 | function calls in \code{.fns}. To work properly, the strings need to be 264 | turned into symbols and evaluated. For this {dplyover} provides a genuine 265 | helper function \code{.()} that evaluates strings and helps to declutter the 266 | otherwise rather verbose code. \code{.()} supports glue syntax and takes a string 267 | as argument. 268 | 269 | Below are a few examples using two colums in the function calls in \code{.fns}. 270 | For the two column case \code{\link[=across2]{across2()}} provides a more intuitive API that is 271 | closer to the original \code{dplyr::across}. Using \code{.()} inside \code{over} is really 272 | useful for cases with more than two columns. 273 | 274 | Consider the following example of a purrr-style formula in \code{.fns} using \code{.()}:\if{html}{\out{
}}\preformatted{iris \%>\% 275 | mutate(over(c("Sepal", "Petal"), 276 | ~ .("\{.x\}.Width") + .("\{.x\}.Length") 277 | )) 278 | #> # A tibble: 150 x 7 279 | #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species Sepal Petal 280 | #> 281 | #> 1 5.1 3.5 1.4 0.2 setosa 8.6 1.6 282 | #> 2 4.9 3 1.4 0.2 setosa 7.9 1.6 283 | #> 3 4.7 3.2 1.3 0.2 setosa 7.9 1.5 284 | #> 4 4.6 3.1 1.5 0.2 setosa 7.7 1.7 285 | #> # ... with 146 more rows 286 | }\if{html}{\out{
}} 287 | 288 | The above syntax is equal to the more verbose:\if{html}{\out{
}}\preformatted{iris \%>\% 289 | mutate(over(c("Sepal", "Petal"), 290 | ~ eval(sym(paste0(.x, ".Width"))) + 291 | eval(sym(paste0(.x, ".Length"))) 292 | )) 293 | #> # A tibble: 150 x 7 294 | #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species Sepal Petal 295 | #> 296 | #> 1 5.1 3.5 1.4 0.2 setosa 8.6 1.6 297 | #> 2 4.9 3 1.4 0.2 setosa 7.9 1.6 298 | #> 3 4.7 3.2 1.3 0.2 setosa 7.9 1.5 299 | #> 4 4.6 3.1 1.5 0.2 setosa 7.7 1.7 300 | #> # ... with 146 more rows 301 | }\if{html}{\out{
}} 302 | 303 | \code{.()} also works with anonymous functions:\if{html}{\out{
}}\preformatted{iris \%>\% 304 | summarise(over(c("Sepal", "Petal"), 305 | function(x) mean(.("\{x\}.Width")) 306 | )) 307 | #> # A tibble: 1 x 2 308 | #> Sepal Petal 309 | #> 310 | #> 1 3.06 1.20 311 | }\if{html}{\out{
}} 312 | 313 | A named list of functions:\if{html}{\out{
}}\preformatted{iris \%>\% 314 | mutate(over(c("Sepal", "Petal"), 315 | list(product = ~ .("\{.x\}.Width") * .("\{.x\}.Length"), 316 | sum = ~ .("\{.x\}.Width") + .("\{.x\}.Length")) 317 | ), 318 | .keep = "none") 319 | #> # A tibble: 150 x 4 320 | #> Sepal_product Sepal_sum Petal_product Petal_sum 321 | #> 322 | #> 1 17.8 8.6 0.28 1.6 323 | #> 2 14.7 7.9 0.28 1.6 324 | #> 3 15.0 7.9 0.26 1.5 325 | #> 4 14.3 7.7 0.3 1.7 326 | #> # ... with 146 more rows 327 | }\if{html}{\out{
}} 328 | 329 | Again, use the \code{.names} argument to control the output names:\if{html}{\out{
}}\preformatted{iris \%>\% 330 | mutate(over(c("Sepal", "Petal"), 331 | list(product = ~ .("\{.x\}.Width") * .("\{.x\}.Length"), 332 | sum = ~ .("\{.x\}.Width") + .("\{.x\}.Length")), 333 | .names = "\{fn\}_\{x\}"), 334 | .keep = "none") 335 | #> # A tibble: 150 x 4 336 | #> product_Sepal sum_Sepal product_Petal sum_Petal 337 | #> 338 | #> 1 17.8 8.6 0.28 1.6 339 | #> 2 14.7 7.9 0.28 1.6 340 | #> 3 15.0 7.9 0.26 1.5 341 | #> 4 14.3 7.7 0.3 1.7 342 | #> # ... with 146 more rows 343 | }\if{html}{\out{
}} 344 | } 345 | } 346 | 347 | \seealso{ 348 | \code{\link[=over2]{over2()}} to apply a function to two objects. 349 | 350 | All members of the <\code{\link[=over_across_family]{over-across function family}}>. 351 | } 352 | -------------------------------------------------------------------------------- /man/over2.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/over2.R 3 | \name{over2} 4 | \alias{over2} 5 | \alias{over2x} 6 | \title{Apply functions to two vectors simultaniously in 'dplyr'} 7 | \usage{ 8 | over2(.x, .y, .fns, ..., .names = NULL, .names_fn = NULL) 9 | 10 | over2x(.x, .y, .fns, ..., .names = NULL, .names_fn = NULL) 11 | } 12 | \arguments{ 13 | \item{.x, .y}{An atomic vector or list to apply functions to. Alternatively a 14 | <\code{\link[=selection_helpers]{selection helper}}> can be used to create a vector. 15 | \code{over2()} requires \code{.x} and \code{.y} to be of the same length.} 16 | 17 | \item{.fns}{Functions to apply to each of the elements in \code{.x} and \code{.y}. . 18 | 19 | Possible values are: 20 | \itemize{ 21 | \item A function 22 | \item A purrr-style lambda 23 | \item A list of functions/lambdas 24 | } 25 | 26 | For examples see the example section below. 27 | 28 | Note that \code{NULL} is not accepted as argument to \code{.fns}.} 29 | 30 | \item{...}{Additional arguments for the function calls in \code{.fns}.} 31 | 32 | \item{.names}{A glue specification that describes how to name the output 33 | columns. This can use \code{{x}} and \code{{y}} to stand for the selected vector element, 34 | and \code{{fn}} to stand for the name of the function being applied. The default 35 | (\code{NULL}) is equivalent to \code{"{x}_{y}"} for the single function case and 36 | \code{"{x}_{y}_{fn}"} for the case where a list is used for \code{.fns}. 37 | 38 | Note that, depending on the nature of the underlying object in \code{.x} and \code{.y}, 39 | specifying \code{{x}/{y}} will yield different results: 40 | \itemize{ 41 | \item If \code{.x/.y} is an unnamed atomic vector, \code{{x}/{y}} will represent each value. 42 | \item If \code{.x/.y} is a named list or atomic vector, \code{{x}/{y}} will represent each name. 43 | \item If \code{.x/.y} is an unnamed list, \code{{x}/{y}} will be the index number running 44 | from 1 to \code{length(x)} or \code{length(y)} respectively. 45 | } 46 | 47 | This standard behavior (interpretation of \code{{x}/{y}}) can be overwritten by 48 | directly specifying: 49 | \itemize{ 50 | \item \code{{x_val}} or \code{{y_val}} for \code{.x}'s or \code{.y}'s values 51 | \item \code{{x_nm}} or \code{{y_nm}} for their names 52 | \item \code{{x_idx}} or \code{{y_idx}} for their index numbers 53 | } 54 | 55 | Alternatively, a character vector of length equal to the number of columns to 56 | be created can be supplied to \code{.names}. Note that in this case, the glue 57 | specification described above is not supported.} 58 | 59 | \item{.names_fn}{Optionally, a function that is applied after the glue 60 | specification in \code{.names} has been evaluated. This is, for example, helpful 61 | in case the resulting names need to be further cleaned or trimmed.} 62 | } 63 | \value{ 64 | \code{over2()} returns a tibble with one column for each pair of elements in \code{.x} 65 | and \code{.y} combined with each function in \code{.fns}. 66 | 67 | \code{over2x()} returns a tibble with one column for each combination between elements 68 | in \code{.x} and \code{.y} combined with each function in \code{.fns}. 69 | } 70 | \description{ 71 | \code{over2()} and \code{over2x()} are variants of \code{\link[=over]{over()}} that iterate over two 72 | objects simultaneously. \code{over2()} loops each \emph{pair of elements} in \code{.x} and 73 | \code{.y} over one or more functions, while \code{over2x()} loops 74 | \emph{all pairwise combinations between elements} in \code{.x} a \code{.y} over one or more 75 | functions. 76 | } 77 | \section{Examples}{ 78 | 79 | 80 | For the basic functionality please refer to the examples in \code{\link[=over]{over()}}.\if{html}{\out{
}}\preformatted{library(dplyr) 81 | 82 | # For better printing 83 | iris <- as_tibble(iris) 84 | }\if{html}{\out{
}} 85 | 86 | When doing exploratory analysis, it is often helpful to transform continious variables 87 | into several categorial variables. Below we use \code{over2()} to loop over two lists 88 | containing "breaks" and "labels" arguments, which we then use in a call to \code{cut()}:\if{html}{\out{
}}\preformatted{brks <- list(b1 = 3:8, 89 | b2 = seq(3, 9, by = 2)) 90 | 91 | labs <- list(l1 = c("3 to 4", "4 to 5", "5 to 6", 92 | "6 to 7", "7 to 8"), 93 | l2 = c("3 to 5", "5 to 7", "7 to 9")) 94 | 95 | iris \%>\% 96 | transmute(over2(brks, labs, 97 | ~ cut(Sepal.Length, 98 | breaks = .x, 99 | labels = .y), 100 | .names = "Sepal.Length.cut\{x_idx\}")) 101 | #> # A tibble: 150 x 2 102 | #> Sepal.Length.cut1 Sepal.Length.cut2 103 | #> 104 | #> 1 5 to 6 5 to 7 105 | #> 2 4 to 5 3 to 5 106 | #> 3 4 to 5 3 to 5 107 | #> 4 4 to 5 3 to 5 108 | #> # ... with 146 more rows 109 | }\if{html}{\out{
}} 110 | 111 | \code{over2x()} makes it possible to create dummy variables for interaction effects 112 | of two variables. In the example below, each customer 'type' is combined with 113 | each 'product' type:\if{html}{\out{
}}\preformatted{csat \%>\% 114 | transmute(over2x(unique(type), 115 | unique(product), 116 | ~ type == .x & product == .y)) \%>\% 117 | glimpse 118 | #> Rows: 150 119 | #> Columns: 9 120 | #> $ existing_advanced TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FA~ 121 | #> $ existing_premium FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, ~ 122 | #> $ existing_basic FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,~ 123 | #> $ reactivate_advanced FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, ~ 124 | #> $ reactivate_premium FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,~ 125 | #> $ reactivate_basic FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, ~ 126 | #> $ new_advanced FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,~ 127 | #> $ new_premium FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,~ 128 | #> $ new_basic FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, F~ 129 | }\if{html}{\out{
}} 130 | } 131 | 132 | -------------------------------------------------------------------------------- /man/over_across_family.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/over_across_family.R 3 | \name{over_across_family} 4 | \alias{over_across_family} 5 | \title{The over-across function family} 6 | \description{ 7 | \code{dplyover} extends \code{dplyr}'s functionality by building a function family 8 | around \code{dplyr::across()}. 9 | 10 | The goal of this \strong{over-across function family} is to provide a concise and 11 | uniform syntax which can be used to create columns by applying functions to 12 | vectors and / or sets of columns in dplyr. Ideally, this will improve our 13 | mental model so that it is easier to tackle problems where the solution is 14 | based on creating new columns. 15 | 16 | The functions in the over-apply function family create columns by applying 17 | one or several functions to: 18 | \subsection{basic functions}{ 19 | \itemize{ 20 | \item \code{\link[dplyr:across]{dplyr::across()}}: a set of columns 21 | \item \code{\link[=over]{over()}}: a vector (list or atomic vector) 22 | } 23 | } 24 | 25 | \subsection{variants}{ 26 | \itemize{ 27 | \item \code{\link[=over2]{over2()}} two vectors of the same length (pairwise) 28 | \item \code{\link[=over2x]{over2x()}} two vectors (nested) 29 | \item \code{\link[=across2]{across2()}} two sets of columns (pairwise) 30 | \item \code{\link[=across2x]{across2x()}} two sets of columns (nested) 31 | \item \code{\link[=crossover]{crossover()}} a set of columns and a vector (nested) 32 | } 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /man/rmd/setup.Rmd: -------------------------------------------------------------------------------- 1 | ```{r, include = FALSE} 2 | options( 3 | tibble.print_min = 4, 4 | tibble.max_extra_cols = 8, 5 | digits = 2, 6 | crayon.enabled = FALSE, 7 | cli.unicode = FALSE 8 | ) 9 | knitr::opts_chunk$set( 10 | collapse = TRUE, 11 | comment = "#>" 12 | ) 13 | library(dplyr) 14 | ``` 15 | -------------------------------------------------------------------------------- /man/select_values.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/select_values.R 3 | \name{select_values} 4 | \alias{select_values} 5 | \alias{dist_values} 6 | \alias{seq_range} 7 | \title{Select values from variables} 8 | \usage{ 9 | dist_values(x, .sep = NULL, .sort = c("asc", "desc", "none", "levels")) 10 | 11 | seq_range(x, .by) 12 | } 13 | \arguments{ 14 | \item{x}{An atomic vector or list. For \code{\link[=seq_range]{seq_range()}} x must be numeric or date.} 15 | 16 | \item{.sep}{A character vector containing regular expression(s) which are used 17 | for splitting the values (works only if x is a character vector).} 18 | 19 | \item{.sort}{A character string indicating which sorting scheme is to be applied 20 | to distinct values: ascending ("asc"), descending ("desc"), "none" or "levels". The 21 | default is ascending, only if x is a factor the default is "levels".} 22 | 23 | \item{.by}{A number (or date expression) representing the increment of the sequence.} 24 | } 25 | \value{ 26 | \code{\link[=dist_values]{dist_values()}} returns a vector of the same type of x, with exception of 27 | factors which are converted to type \code{"character"}. 28 | 29 | \code{\link[=seq_range]{seq_range()}} returns an vector of type \code{"integer"} or \code{"double"}. 30 | } 31 | \description{ 32 | These functions are \link[=selection_helpers]{selection helpers}. They are intended 33 | to be used inside all functions that accept a vector as argument (that is \code{over()} 34 | and \code{crossover()} and all their variants) to extract values of a variable. 35 | \itemize{ 36 | \item \code{\link[=dist_values]{dist_values()}} returns all distinct values (or in the case of factor variables: 37 | levels) of a variable \code{x} which are not \code{NA}. 38 | \item \code{\link[=seq_range]{seq_range()}} returns the sequence between the \code{range()} of a variable \code{x}. 39 | } 40 | } 41 | \section{Examples}{ 42 | 43 | 44 | Selection helpers can be used inside \code{dplyover::over()} which in turn must be 45 | used inside \code{dplyr::mutate} or \code{dplyr::summarise}. Let's first attach \code{dplyr}:\if{html}{\out{
}}\preformatted{library(dplyr) 46 | 47 | # For better printing 48 | iris <- as_tibble(iris) 49 | }\if{html}{\out{
}} 50 | 51 | \code{dist_values()} extracts all distinct values of a column variable. 52 | This is helpful when creating dummy variables in a loop using \code{over()}.\if{html}{\out{
}}\preformatted{iris \%>\% 53 | mutate(over(dist_values(Species), 54 | ~ if_else(Species == .x, 1, 0) 55 | ), 56 | .keep = "none") 57 | #> # A tibble: 150 x 3 58 | #> setosa versicolor virginica 59 | #> 60 | #> 1 1 0 0 61 | #> 2 1 0 0 62 | #> 3 1 0 0 63 | #> 4 1 0 0 64 | #> # ... with 146 more rows 65 | }\if{html}{\out{
}} 66 | 67 | \code{dist_values()} is just a wrapper around unique. However, it has five 68 | differences: 69 | 70 | (1) \code{NA} values are automatically stripped. Compare:\if{html}{\out{
}}\preformatted{unique(c(1:3, NA)) 71 | #> [1] 1 2 3 NA 72 | dist_values(c(1:3, NA)) 73 | #> [1] 1 2 3 74 | }\if{html}{\out{
}} 75 | 76 | (2) Applied on factors, \code{dist_values()} returns all distinct \code{levels} as 77 | character. Compare the following:\if{html}{\out{
}}\preformatted{fctrs <- factor(c(1:3, NA), levels = c(3:1)) 78 | 79 | fctrs \%>\% unique() \%>\% class() 80 | #> [1] "factor" 81 | 82 | fctrs \%>\% dist_values() \%>\% class() 83 | #> [1] "character" 84 | }\if{html}{\out{
}} 85 | 86 | (3) As default, the output is sorted in ascending order for non-factors, and 87 | is sorted as the underyling "levels" for factors. This can be controlled by 88 | setting the \code{.sort} argument. Compare:\if{html}{\out{
}}\preformatted{# non-factors 89 | unique(c(3,1,2)) 90 | #> [1] 3 1 2 91 | 92 | dist_values(c(3,1,2)) 93 | #> [1] 1 2 3 94 | dist_values(c(3,1,2), .sort = "desc") 95 | #> [1] 3 2 1 96 | dist_values(c(3,1,2), .sort = "none") 97 | #> [1] 3 1 2 98 | 99 | # factors 100 | fctrs <- factor(c(2,1,3, NA), levels = c(3:1)) 101 | 102 | dist_values(fctrs) 103 | #> [1] "3" "2" "1" 104 | dist_values(fctrs, .sort = "levels") 105 | #> [1] "3" "2" "1" 106 | dist_values(fctrs, .sort = "asc") 107 | #> [1] "1" "2" "3" 108 | dist_values(fctrs, .sort = "desc") 109 | #> [1] "3" "2" "1" 110 | dist_values(fctrs, .sort = "none") 111 | #> [1] "2" "1" "3" 112 | }\if{html}{\out{
}} 113 | 114 | (4) When used on a character vector \code{dist_values} can take a separator 115 | \code{.sep} to split the elements accordingly:\if{html}{\out{
}}\preformatted{c("1, 2, 3", 116 | "2, 4, 5", 117 | "4, 1, 7") \%>\% 118 | dist_values(., .sep = ", ") 119 | #> [1] "1" "2" "3" "4" "5" "7" 120 | }\if{html}{\out{
}} 121 | 122 | (5) When used on lists \code{dist_values} automatically simplifiies its input 123 | into a vector using \code{unlist}:\if{html}{\out{
}}\preformatted{list(a = c(1:4), b = (4:6), c(5:10)) \%>\% 124 | dist_values() 125 | #> [1] 1 2 3 4 5 6 7 8 9 10 126 | }\if{html}{\out{
}} 127 | 128 | \code{seq_range()} generates a numeric sequence between the \code{min} and \code{max} 129 | values of its input variable. This is helpful when creating many dummy 130 | variables with varying thresholds.\if{html}{\out{
}}\preformatted{iris \%>\% 131 | mutate(over(seq_range(Sepal.Length, 1), 132 | ~ if_else(Sepal.Length > .x, 1, 0), 133 | .names = "Sepal.Length.\{x\}"), 134 | .keep = "none") 135 | #> # A tibble: 150 x 3 136 | #> Sepal.Length.5 Sepal.Length.6 Sepal.Length.7 137 | #> 138 | #> 1 1 0 0 139 | #> 2 0 0 0 140 | #> 3 0 0 0 141 | #> 4 0 0 0 142 | #> # ... with 146 more rows 143 | }\if{html}{\out{
}} 144 | 145 | Note that if the input variable does not have decimal places, \code{min} and \code{max} are 146 | wrapped in \code{ceiling} and \code{floor} accordingly. This will prevent the creation of 147 | variables that contain only \code{0} or \code{1}. Compare the output below with the 148 | example above:\if{html}{\out{
}}\preformatted{iris \%>\% 149 | mutate(over(seq(round(min(Sepal.Length), 0), 150 | round(max(Sepal.Length), 0), 151 | 1), 152 | ~ if_else(Sepal.Length > .x, 1, 0), 153 | .names = "Sepal.Length.\{x\}"), 154 | .keep = "none") 155 | #> # A tibble: 150 x 5 156 | #> Sepal.Length.4 Sepal.Length.5 Sepal.Length.6 Sepal.Length.7 Sepal.Length.8 157 | #> 158 | #> 1 1 1 0 0 0 159 | #> 2 1 0 0 0 0 160 | #> 3 1 0 0 0 0 161 | #> 4 1 0 0 0 0 162 | #> # ... with 146 more rows 163 | }\if{html}{\out{
}} 164 | 165 | \code{seq_range()} also works on dates:\if{html}{\out{
}}\preformatted{some_dates <- c(as.Date("2020-01-02"), 166 | as.Date("2020-05-02"), 167 | as.Date("2020-03-02")) 168 | 169 | 170 | some_dates \%>\% 171 | seq_range(., "1 month") 172 | #> [1] "2020-01-02" "2020-02-02" "2020-03-02" "2020-04-02" "2020-05-02" 173 | }\if{html}{\out{
}} 174 | } 175 | 176 | -------------------------------------------------------------------------------- /man/select_vars.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/select_strings.R 3 | \name{select_vars} 4 | \alias{select_vars} 5 | \alias{cut_names} 6 | \alias{extract_names} 7 | \title{Select string parts or patterns of column names} 8 | \usage{ 9 | cut_names(.pattern, .remove = NULL, .vars = NULL) 10 | 11 | extract_names(.pattern, .remove = NULL, .vars = NULL) 12 | } 13 | \arguments{ 14 | \item{.pattern}{Pattern to look for.} 15 | 16 | \item{.remove}{Pattern to remove from the variable names provided in \code{.vars}. 17 | When this argument is provided, all variables names in \code{.vars} that match 18 | the pattern specified in \code{.remove} will be removed, before the \code{.pattern} to 19 | look for will be applied.} 20 | 21 | \item{.vars}{A character vector with variables names. When used inside \code{over} 22 | all column names of the underlying data are automatically supplied to \code{.vars}. 23 | This argument is useful when testing the functionality outside the context of 24 | \code{over()}.} 25 | } 26 | \value{ 27 | A character vector. 28 | } 29 | \description{ 30 | These functions are \link[=selection_helpers]{selection helpers}. 31 | They are intended to be used inside \code{over()} to extract parts or patterns of 32 | the column names of the underlying data. 33 | \itemize{ 34 | \item \code{\link[=cut_names]{cut_names()}} selects strings by removing (cutting off) the specified \code{.pattern}. 35 | This functionality resembles \code{stringr::str_remove_all()}. 36 | \item \code{\link[=extract_names]{extract_names()}} selects strings by extracting the specified \code{.pattern}. 37 | This functionality resembles \code{stringr::str_extract()}. 38 | } 39 | } 40 | \section{Examples}{ 41 | 42 | 43 | Selection helpers can be used inside \code{dplyover::over()} which in turn must be 44 | used inside \code{dplyr::mutate} or \code{dplyr::summarise}. Let's first attach \code{dplyr} 45 | (and \code{stringr} for comparision):\if{html}{\out{
}}\preformatted{library(dplyr) 46 | library(stringr) 47 | 48 | # For better printing 49 | iris <- as_tibble(iris) 50 | }\if{html}{\out{
}} 51 | 52 | Let's first compare \code{cut_names()} and \code{extract_names()} to their {stringr} 53 | equivalents \code{stringr::str_remove_all()} and \code{stringr::str_extract()}: 54 | 55 | We can observe two main differences: 56 | 57 | (1) \code{cut_names()} and \code{extract_names()} only return strings where the function 58 | was applied successfully (when characters have actually been removed or 59 | extracted). \code{stringr::str_remove_all()} returns unmatched strings as is, while 60 | \code{stringr::str_extract()} returns \code{NA}.\if{html}{\out{
}}\preformatted{cut_names("Width", .vars = names(iris)) 61 | #> [1] "Sepal." "Petal." 62 | str_remove_all(names(iris), "Width") 63 | #> [1] "Sepal.Length" "Sepal." "Petal.Length" "Petal." "Species" 64 | 65 | extract_names("Length|Width", .vars = names(iris)) 66 | #> [1] "Length" "Width" 67 | str_extract(rep(names(iris), 2), "Length|Width") 68 | #> [1] "Length" "Width" "Length" "Width" NA "Length" "Width" "Length" "Width" 69 | #> [10] NA 70 | }\if{html}{\out{
}} 71 | 72 | (2) \code{cut_names()} and \code{extract_names()} return only unique values:\if{html}{\out{
}}\preformatted{cut_names("Width", .vars = rep(names(iris), 2)) 73 | #> [1] "Sepal." "Petal." 74 | str_remove_all(rep(names(iris), 2), "Width") 75 | #> [1] "Sepal.Length" "Sepal." "Petal.Length" "Petal." "Species" 76 | #> [6] "Sepal.Length" "Sepal." "Petal.Length" "Petal." "Species" 77 | 78 | extract_names("Length|Width", .vars = names(iris)) 79 | #> [1] "Length" "Width" 80 | str_extract(rep(names(iris), 2), "Length|Width") 81 | #> [1] "Length" "Width" "Length" "Width" NA "Length" "Width" "Length" "Width" 82 | #> [10] NA 83 | }\if{html}{\out{
}} 84 | 85 | The examples above do not show that \code{cut_names()} removes \emph{all} strings matching 86 | the \code{.pattern} argument, while \code{extract_names()} does only extract the \code{.pattern} 87 | \emph{one} time:\if{html}{\out{
}}\preformatted{cut_names("Width", .vars = "Width.Petal.Width") 88 | #> [1] ".Petal." 89 | str_remove_all("Width.Petal.Width", "Width") 90 | #> [1] ".Petal." 91 | 92 | extract_names("Width", .vars = "Width.Petal.Width") 93 | #> [1] "Width" 94 | str_extract("Width.Petal.Width", "Width") 95 | #> [1] "Width" 96 | }\if{html}{\out{
}} 97 | 98 | Within \code{\link[=over]{over()}} \code{cut_names()} and \code{extract_names()} automatically use the 99 | column names of the underlying data:\if{html}{\out{
}}\preformatted{iris \%>\% 100 | mutate(over(cut_names(".Width"), 101 | ~ .("\{.x\}.Width") * .("\{.x\}.Length"), 102 | .names = "Product_\{x\}")) 103 | #> # A tibble: 150 x 7 104 | #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species Product_Sepal 105 | #> 106 | #> 1 5.1 3.5 1.4 0.2 setosa 17.8 107 | #> 2 4.9 3 1.4 0.2 setosa 14.7 108 | #> 3 4.7 3.2 1.3 0.2 setosa 15.0 109 | #> 4 4.6 3.1 1.5 0.2 setosa 14.3 110 | #> # ... with 146 more rows, and 1 more variable: Product_Petal 111 | 112 | iris \%>\% 113 | mutate(over(extract_names("Length|Width"), 114 | ~.("Petal.\{.x\}") * .("Sepal.\{.x\}"), 115 | .names = "Product_\{x\}")) 116 | #> # A tibble: 150 x 7 117 | #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species Product_Length 118 | #> 119 | #> 1 5.1 3.5 1.4 0.2 setosa 7.14 120 | #> 2 4.9 3 1.4 0.2 setosa 6.86 121 | #> 3 4.7 3.2 1.3 0.2 setosa 6.11 122 | #> 4 4.6 3.1 1.5 0.2 setosa 6.9 123 | #> # ... with 146 more rows, and 1 more variable: Product_Width 124 | }\if{html}{\out{
}} 125 | 126 | What problem does \code{cut_names()} solve? 127 | In the example above using \code{cut_names()} might not seem helpful, since we could easily 128 | use \code{c("Sepal", "Petal")} instead. However, there are cases where we have 129 | data with a lot of similar pairs of variables sharing a common prefix or 130 | suffix. If we want to loop over them using \code{over()} then \code{cut_names()} comes 131 | in handy. 132 | 133 | The usage of \code{extract_names()} might be less obvious. Lets look at raw data 134 | from a customer satifsaction survey which contains the following variables.\if{html}{\out{
}}\preformatted{csatraw \%>\% glimpse(width = 50) 135 | #> Rows: 150 136 | #> Columns: 15 137 | #> $ cust_id "61297", "07545", "03822", "8~ 138 | #> $ type "existing", "existing", "exis~ 139 | #> $ product "advanced", "advanced", "prem~ 140 | #> $ item1 3, 2, 2, 4, 4, 3, 1, 3, 3, 2,~ 141 | #> $ item1_open "12", "22", "21, 22, 23", "12~ 142 | #> $ item2a 2, 2, 2, 3, 3, 0, 3, 2, 2, 0,~ 143 | #> $ item2b 3, 2, 5, 5, 2, NA, 3, 3, 4, N~ 144 | #> $ item3a 2, 3, 3, 2, 3, 2, 3, 3, 0, 1,~ 145 | #> $ item3b 2, 4, 5, 3, 5, 3, 4, 2, NA, 2~ 146 | #> $ item4a 0, 2, 0, 0, 3, 3, 3, 2, 2, 2,~ 147 | #> $ item4b NA, 3, NA, NA, 5, 2, 3, 5, 3,~ 148 | #> $ item5a 2, 3, 2, 2, 3, 1, 3, 2, 3, 1,~ 149 | #> $ item5b 5, 2, 3, 4, 1, 3, 3, 1, 3, 2,~ 150 | #> $ item6a 2, 2, 3, 1, 3, 3, 3, 2, 3, 2,~ 151 | #> $ item6b 3, 1, 2, 2, 5, 4, 4, 2, 2, 2,~ 152 | }\if{html}{\out{
}} 153 | 154 | The survey has several 'item's consisting of two sub-questions / variables 'a' 155 | and 'b'. Lets say we want to calculate the product of those two variables for 156 | each item. \code{extract_names()} helps us to select all variables containing 157 | 'item' followed by a digit using the regex \code{"item\\\\d"} as \code{.pattern}. 158 | However, there is 'item1' and 'item1_open' which are not followed by \code{a} and 159 | \code{b}. \code{extract_names()} lets us exclude these items by setting the \code{.remove} 160 | argument to \verb{[^item1]}:\if{html}{\out{
}}\preformatted{csatraw \%>\% 161 | transmute(over(extract_names("item\\\\d", "^item1"), 162 | ~ .("\{.x\}a") * .("\{.x\}b")) 163 | ) 164 | #> # A tibble: 150 x 5 165 | #> item2 item3 item4 item5 item6 166 | #> 167 | #> 1 6 4 NA 10 6 168 | #> 2 4 12 6 6 2 169 | #> 3 10 15 NA 6 6 170 | #> 4 15 6 NA 8 2 171 | #> # ... with 146 more rows 172 | }\if{html}{\out{
}} 173 | } 174 | 175 | -------------------------------------------------------------------------------- /man/selection_helpers.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/selection_helpers.R 3 | \name{selection_helpers} 4 | \alias{selection_helpers} 5 | \alias{over_selection_helpers} 6 | \title{Selection helpers} 7 | \description{ 8 | \code{dplyover} provides three kinds of selection helpers which are intended for 9 | use in all functions that accept a vector as argument (that is \code{over()} and 10 | \code{crossover()} as well as their variants, see here for a full list of the 11 | \link[=over_across_family]{over-across function family}). 12 | 13 | Helpers which select \strong{string parts} of the \strong{column names} (of the underyling data): 14 | \itemize{ 15 | \item \code{\link[=cut_names]{cut_names()}} removes a specified pattern. 16 | \item \code{\link[=extract_names]{extract_names()}} extracts a specified pattern. 17 | } 18 | 19 | Helpers which select \strong{values} of a variable: 20 | \itemize{ 21 | \item \code{\link[=dist_values]{dist_values()}} returns all distinct values. 22 | \item \code{\link[=seq_range]{seq_range()}} returns the sequence between the \code{range()} of a variable. 23 | } 24 | 25 | A helper function that evaluates a glue specification as variable 26 | \itemize{ 27 | \item \code{\link[=.]{.()}} evaluates an interpolated string as symbol 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /man/show_affix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/show_affix.R 3 | \name{show_affix} 4 | \alias{show_affix} 5 | \alias{show_prefix} 6 | \alias{show_suffix} 7 | \title{Show affixes for variable pairs of two sets of columns} 8 | \usage{ 9 | show_prefix(.data = NULL, .xcols = NULL, .ycols = NULL) 10 | 11 | show_suffix(.data = NULL, .xcols = NULL, .ycols = NULL) 12 | } 13 | \arguments{ 14 | \item{.data}{A data frame.} 15 | 16 | \item{.xcols, .ycols}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Sets of 17 | columns for which the common pre- or suffix will be shown for each pair. 18 | Note that you can not select.} 19 | } 20 | \value{ 21 | A tibble with three columns: .xcols, .ycols and prefix or suffix. 22 | } 23 | \description{ 24 | These functions show the prefixes or suffixes for each pair of variables of 25 | two sets of columns. They are intended to be used either (1) in case \code{across2} 26 | throws an error when \code{{pre}} or \code{{suf}} are specified in \code{across2}'s \code{.names} 27 | argument or (2) before using \code{{pre}} or \code{{suf}} in \code{across2} to understand 28 | how the pre- or suffixes will look like. 29 | \itemize{ 30 | \item \code{\link[=show_prefix]{show_prefix()}} lists each variable pair and the corresponding alphanumeric prefix 31 | \item \code{\link[=show_suffix]{show_suffix()}} lists each variable pair and the corresponding alphanumeric suffix 32 | } 33 | } 34 | \section{Examples}{ 35 | 36 | 37 | Below two use cases of \code{show_prefix/suffix} are briefly explained. 38 | Let's first attach dplyr and get ready:\if{html}{\out{
}}\preformatted{library(dplyr) 39 | 40 | # For better printing 41 | iris <- as_tibble(iris) 42 | }\if{html}{\out{
}} 43 | \subsection{(1) When called after an error is thrown by across2()}{ 44 | 45 | Let's assume we use \code{across2} with the \code{{pre}} glue specification on some 46 | data where not all variable pairs share a common prefix. In the example below 47 | we use \code{dplyr::rename} to create such a case. Then \code{across2} will throw an 48 | error. The error message already suggests that we can run \code{show_prefix()} 49 | to see what went wrong. In this case we can call \code{show_prefix()} without 50 | any arguments:\if{html}{\out{
}}\preformatted{ iris \%>\% 51 | as_tibble \%>\% 52 | rename("Pesal.Length" = Sepal.Length) \%>\% 53 | mutate(across2(ends_with("Length"), 54 | ends_with("Width"), 55 | .fns = list(product = ~ .x * .y, 56 | sum = ~ .x + .y), 57 | .names = "\{pre\}_\{fn\}")) 58 | #> Error: Problem with `mutate()` input `..1`. 59 | #> i `..1 = across2(...)`. 60 | #> x Problem with `across2()` input `.names`. 61 | #> i When `\{pre\}` is used inside `.names` each pair of input variables in `.xcols` and `.ycols` must share a common prefix of length > 0. 62 | #> x For at least one pair of variables a shared prefix could not be extracted. 63 | #> i Run `show_prefix()` to see the prefixes for each variable pair. 64 | show_prefix() 65 | #> # A tibble: 2 x 3 66 | #> .xcols .ycols prefix 67 | #> 68 | #> 1 Pesal.Length Sepal.Width 69 | #> 2 Petal.Length Petal.Width Petal 70 | }\if{html}{\out{
}} 71 | } 72 | 73 | \subsection{(2) When called on a data.frame}{ 74 | 75 | When called on a data.frame we just need to specify two sets of columns: 76 | \code{.xcols} and \code{.ycols} (just like in \code{across2}).\if{html}{\out{
}}\preformatted{ iris \%>\% 77 | show_suffix(starts_with("Sepal"), 78 | starts_with("Petal")) 79 | #> # A tibble: 2 x 3 80 | #> .xcols .ycols suffix 81 | #> 82 | #> 1 Sepal.Length Petal.Length Length 83 | #> 2 Sepal.Width Petal.Width Width 84 | }\if{html}{\out{
}} 85 | } 86 | } 87 | 88 | -------------------------------------------------------------------------------- /man/string_eval.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/string_eval.R 3 | \name{string_eval} 4 | \alias{string_eval} 5 | \alias{.} 6 | \title{Evaluate an interpolated string as symbol} 7 | \usage{ 8 | .(x) 9 | } 10 | \arguments{ 11 | \item{x}{A glue specification, that is, a string which contains an R expression 12 | wrapped in curly braces, e.g. \verb{."\{.x\}_some_string"}.} 13 | } 14 | \value{ 15 | The values of the variable with the name of the final argument string, given 16 | that it exists in the caller environment. 17 | } 18 | \description{ 19 | This function takes a glue specifcation as input, and evaluates the final 20 | argument string as name in the caller environment. 21 | } 22 | \section{Examples}{ 23 | \if{html}{\out{
}}\preformatted{library(dplyr) 24 | 25 | # For better printing 26 | iris <- as_tibble(iris) 27 | }\if{html}{\out{
}} 28 | 29 | Below is a simple example from \code{over()}. In \code{over}'s function 30 | argument \code{.x} is first evaluated as 'Sepal' and then as 'Petal' which 31 | results in the final argument strings 'Sepal.Width' and 'Sepal.Length' as 32 | well as 'Petal.Width' and 'Petal.Length'.\if{html}{\out{
}}\preformatted{iris \%>\% 33 | mutate(over(c("Sepal", "Petal"), 34 | ~ .("\{.x\}.Width") + .("\{.x\}.Length") 35 | )) 36 | #> # A tibble: 150 x 7 37 | #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species Sepal Petal 38 | #> 39 | #> 1 5.1 3.5 1.4 0.2 setosa 8.6 1.6 40 | #> 2 4.9 3 1.4 0.2 setosa 7.9 1.6 41 | #> 3 4.7 3.2 1.3 0.2 setosa 7.9 1.5 42 | #> 4 4.6 3.1 1.5 0.2 setosa 7.7 1.7 43 | #> # ... with 146 more rows 44 | }\if{html}{\out{
}} 45 | 46 | The above syntax is equal to the more verbose:\if{html}{\out{
}}\preformatted{iris \%>\% 47 | mutate(over(c("Sepal", "Petal"), 48 | ~ eval(sym(paste0(.x, ".Width"))) + 49 | eval(sym(paste0(.x, ".Length"))) 50 | )) 51 | #> # A tibble: 150 x 7 52 | #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species Sepal Petal 53 | #> 54 | #> 1 5.1 3.5 1.4 0.2 setosa 8.6 1.6 55 | #> 2 4.9 3 1.4 0.2 setosa 7.9 1.6 56 | #> 3 4.7 3.2 1.3 0.2 setosa 7.9 1.5 57 | #> 4 4.6 3.1 1.5 0.2 setosa 7.7 1.7 58 | #> # ... with 146 more rows 59 | }\if{html}{\out{
}} 60 | 61 | Although \code{.()} was created with the use of \code{over()} in mind, it can also be 62 | used within \code{dplyr::across()} in combination with \code{dplyr::cur_column()}. 63 | First let's rename 'Sepal.Length' and 'Petal.Length' to 'Sepal' and 'Petal' 64 | to have a stem to which we can attach the string '.Width' to access the 65 | two 'Width' variables. Now we can call \code{.(cur_colunm())} to access the variable 66 | \code{across()} has been called on (Note: we could have used \code{.x} instead). We can 67 | further access the values of the 'Width' variables by wrapping \code{cur_column()} 68 | in curly braces \code{{}}, adding \code{.Width} and wrapping everything with 69 | quotation marks \code{.("{cur_column()}.Width")}.\if{html}{\out{
}}\preformatted{iris \%>\% 70 | rename("Sepal" = "Sepal.Length", 71 | "Petal" = "Petal.Length") \%>\% 72 | mutate(across(c(Sepal, Petal), 73 | ~ .(cur_column()) + .("\{cur_column()\}.Width"), 74 | .names = "\{col\}_sum")) 75 | #> # A tibble: 150 x 7 76 | #> Sepal Sepal.Width Petal Petal.Width Species Sepal_sum Petal_sum 77 | #> 78 | #> 1 5.1 3.5 1.4 0.2 setosa 8.6 1.6 79 | #> 2 4.9 3 1.4 0.2 setosa 7.9 1.6 80 | #> 3 4.7 3.2 1.3 0.2 setosa 7.9 1.5 81 | #> 4 4.6 3.1 1.5 0.2 setosa 7.7 1.7 82 | #> # ... with 146 more rows 83 | }\if{html}{\out{
}} 84 | 85 | A similar approach can be achieved using \code{purrr::map} in combination with \code{.()}:\if{html}{\out{
}}\preformatted{iris \%>\% 86 | rename("Sepal" = "Sepal.Length", 87 | "Petal" = "Petal.Length") \%>\% 88 | mutate(purrr::map_dfc(c("Sepal_sum" = "Sepal", "Petal_sum" = "Petal"), 89 | ~ .(.x) + .("\{.x\}.Width"))) 90 | #> # A tibble: 150 x 7 91 | #> Sepal Sepal.Width Petal Petal.Width Species Sepal_sum Petal_sum 92 | #> 93 | #> 1 5.1 3.5 1.4 0.2 setosa 8.6 1.6 94 | #> 2 4.9 3 1.4 0.2 setosa 7.9 1.6 95 | #> 3 4.7 3.2 1.3 0.2 setosa 7.9 1.5 96 | #> 4 4.6 3.1 1.5 0.2 setosa 7.7 1.7 97 | #> # ... with 146 more rows 98 | }\if{html}{\out{
}} 99 | } 100 | 101 | -------------------------------------------------------------------------------- /pkgdown/extra.css: -------------------------------------------------------------------------------- 1 | @import url('https://fonts.googleapis.com/css2?family=Source+Sans+Pro:wght@300;400&display=swap'); 2 | 3 | /* url('https://fonts.googleapis.com/css2?family=Lato:wght@100;300;400&display=swap'); */ 4 | 5 | 6 | .label-default { 7 | background-color: #6fcf18; /* #05cf2f */ 8 | } 9 | 10 | body { 11 | font-family: "Source Sans Pro", Helvetica, Arial, sans-serif !important; 12 | /* "Lato", sans-serif */ 13 | font-size: 16px !important; 14 | line-height: 1.846 !important; 15 | font-weight: 400 !important; 16 | color: #444444 !important; 17 | -webkit-font-smoothing: antialiased !important; 18 | letter-spacing: .1px !important; 19 | } 20 | 21 | .navbar li>a, .navbar li>a { 22 | color: #ffffff !important; 23 | } 24 | 25 | .navbar li>a:hover, .navbar li>a:focus { 26 | color: #ffffff !important; 27 | text-decoration: none !important; 28 | } 29 | 30 | p>a, li>a, li>small>a, ul.dropdown-menu>li>a { 31 | color: #83bb13 !important; 32 | } 33 | 34 | @media (max-width: 768px) { 35 | ul.dropdown-menu>li>a { 36 | color: #ffffff !important; 37 | } 38 | } 39 | 40 | p>a:hover, p>a:focus, li>a:hover, li>a:focus, li>small>a:hover, li>small>a:focus { 41 | color: #5d8709 !important; 42 | text-decoration: none !important; 43 | } 44 | 45 | /* ul.dropdown-menu>li>a:hover, ul.dropdown-menu>li>a:focus */ 46 | .ref-index th { 47 | font-weight: 400 !important; 48 | } 49 | 50 | h1, h2, h3, .h1, .h2, .h3 { 51 | font-family: "Source Sans Pro", Helvetica, Arial, sans-serif !important; 52 | color: #178acc !important; 53 | font-weight: 300 !important; 54 | line-height: 1.1 !important; 55 | } 56 | 57 | h4, h5, h6, .h4, .h5, .h6 { 58 | font-family: "Source Sans Pro", Helvetica, Arial, sans-serif !important; 59 | color: #178acc !important; 60 | font-weight: 400 !important; 61 | line-height: 1.1 !important; 62 | } 63 | 64 | pre { 65 | font-size: 14px !important; 66 | line-height: 1.846 !important; 67 | } 68 | 69 | code, kbd, pre, samp { 70 | font-family: "Source Code Pro", Menlo, Monaco, Consolas, "Courier New", monospace !important; 71 | } 72 | -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-120x120.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TimTeaFan/dplyover/f0cd984586bafdc0dc78fc4ead2d76ba50d9370e/pkgdown/favicon/apple-touch-icon-120x120.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-152x152.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TimTeaFan/dplyover/f0cd984586bafdc0dc78fc4ead2d76ba50d9370e/pkgdown/favicon/apple-touch-icon-152x152.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-180x180.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TimTeaFan/dplyover/f0cd984586bafdc0dc78fc4ead2d76ba50d9370e/pkgdown/favicon/apple-touch-icon-180x180.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-60x60.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TimTeaFan/dplyover/f0cd984586bafdc0dc78fc4ead2d76ba50d9370e/pkgdown/favicon/apple-touch-icon-60x60.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-76x76.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TimTeaFan/dplyover/f0cd984586bafdc0dc78fc4ead2d76ba50d9370e/pkgdown/favicon/apple-touch-icon-76x76.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TimTeaFan/dplyover/f0cd984586bafdc0dc78fc4ead2d76ba50d9370e/pkgdown/favicon/apple-touch-icon.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TimTeaFan/dplyover/f0cd984586bafdc0dc78fc4ead2d76ba50d9370e/pkgdown/favicon/favicon-16x16.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TimTeaFan/dplyover/f0cd984586bafdc0dc78fc4ead2d76ba50d9370e/pkgdown/favicon/favicon-32x32.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TimTeaFan/dplyover/f0cd984586bafdc0dc78fc4ead2d76ba50d9370e/pkgdown/favicon/favicon.ico -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(dplyover) 3 | 4 | test_check("dplyover") 5 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/over.md: -------------------------------------------------------------------------------- 1 | # over() gives meaningful messages 2 | 3 | `over()` must only be used inside dplyr verbs. 4 | 5 | --- 6 | 7 | Problem with `summarise()` input `..1`. 8 | i `..1 = over(1, 42)`. 9 | x Problem with `over()` input `.fns`. 10 | i Input `.fns` must be a function, a formula, or a list of functions/formulas. 11 | 12 | --- 13 | 14 | Problem with `summarise()` input `..1`. 15 | i `..1 = over(...)`. 16 | x Problem with `over()` input `.names`. 17 | i The number of elements in `.names` must equal the number of new columns. 18 | x 3 elements provided to `.names`, but the number of new columns is 6. 19 | i The error occurred in group 1: x = 1. 20 | 21 | --- 22 | 23 | Problem with `summarise()` input `..1`. 24 | i `..1 = over(...)`. 25 | x Problem with `over()` input `.names`. 26 | i The number of elements in `.names` must equal the number of new columns. 27 | x 7 elements provided to `.names`, but the number of new columns is 6. 28 | i The error occurred in group 1: x = 1. 29 | 30 | --- 31 | 32 | Problem with `summarise()` input `..1`. 33 | i `..1 = over(...)`. 34 | x Names must be unique. 35 | x These names are duplicated: 36 | * "one" at locations 1 and 4. 37 | * "two" at locations 2 and 5. 38 | * "three" at locations 3 and 6. 39 | i The error occurred in group 1: x = 1. 40 | 41 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/over2.md: -------------------------------------------------------------------------------- 1 | # over2() gives meaningful messages 2 | 3 | Problem with `mutate()` input `..1`. 4 | i `..1 = over2(1, c(2:3), mean)`. 5 | x Problem with `over2()` input `.x` and `.y`. 6 | i Input `.x` and `.y` must have the same length. 7 | x `.x` is of length 1, while `.y` is of length 2. 8 | 9 | -------------------------------------------------------------------------------- /tests/testthat/test-crossover.R: -------------------------------------------------------------------------------- 1 | # crossover ------------------------------------------------------------------ 2 | library(dplyr) 3 | 4 | # crossover examples of basic functionality from the example section 5 | test_that("crossover() exmample with `.y` as function", { 6 | 7 | df_crossover <- csat %>% 8 | transmute( 9 | crossover(.xcols = c(type, product, csat), 10 | .y = dist_values, 11 | .fns = ~ if_else(.y == .x, 1, 0), 12 | .names_fn = ~ gsub("\\s", "_", .x) %>% tolower(.) 13 | )) 14 | 15 | df_expect <- csat %>% 16 | transmute( 17 | type_new = if_else(type == "new", 1, 0), 18 | type_existing = if_else(type == "existing", 1, 0), 19 | type_reactivate = if_else(type == "reactivate", 1, 0), 20 | product_basic = if_else(product == "basic", 1, 0), 21 | product_advanced = if_else(product == "advanced", 1, 0), 22 | product_premium = if_else(product == "premium", 1, 0), 23 | csat_very_unsatisfied = if_else(csat == "Very unsatisfied", 1, 0), 24 | csat_unsatisfied = if_else(csat == "Unsatisfied", 1, 0), 25 | csat_neutral = if_else(csat == "Neutral", 1, 0), 26 | csat_satisfied = if_else(csat == "Satisfied", 1, 0), 27 | csat_very_satisfied = if_else(csat == "Very satisfied", 1, 0)) 28 | 29 | expect_equal(df_crossover, df_expect) 30 | 31 | }) 32 | 33 | test_that("crossover() exmample lagged variables", { 34 | 35 | df_crossover <- iris %>% 36 | transmute(crossover(starts_with("sepal"), 37 | 1:5, 38 | list(lag = ~ lag(.x, .y)), 39 | .names = "{xcol}_{fn}{y}")) 40 | 41 | df_expect <- iris %>% 42 | transmute(Sepal.Length_lag1 = lag(Sepal.Length, 1), 43 | Sepal.Length_lag2 = lag(Sepal.Length, 2), 44 | Sepal.Length_lag3 = lag(Sepal.Length, 3), 45 | Sepal.Length_lag4 = lag(Sepal.Length, 4), 46 | Sepal.Length_lag5 = lag(Sepal.Length, 5), 47 | Sepal.Width_lag1 = lag(Sepal.Width, 1), 48 | Sepal.Width_lag2 = lag(Sepal.Width, 2), 49 | Sepal.Width_lag3 = lag(Sepal.Width, 3), 50 | Sepal.Width_lag4 = lag(Sepal.Width, 4), 51 | Sepal.Width_lag5 = lag(Sepal.Width, 5)) 52 | 53 | expect_equal(df_crossover, df_expect) 54 | 55 | }) 56 | 57 | # tests adopted from across 58 | test_that("crossover() works on one column data.frame", { 59 | 60 | df0 <- data.frame(x = 1) 61 | 62 | df_crossover <- df0 %>% 63 | mutate(crossover(everything(), 1, ~ .x * .y)) 64 | 65 | df_expect <- df0 %>% 66 | mutate(`x_1` = x * 1) 67 | 68 | expect_equal(df_crossover, df_expect) 69 | 70 | }) 71 | 72 | test_that("crossover() does not select grouping variables", { 73 | 74 | df0 <- data.frame(g = 1, x = 1) 75 | 76 | df_crossover <- df0 %>% 77 | group_by(g) %>% 78 | summarise(x = crossover(everything(), 1, ~ .x * .y)) %>% 79 | pull() 80 | 81 | expect_equal(df_crossover, tibble(`x_1` = 1)) 82 | 83 | }) 84 | 85 | test_that("crossover() correctly names output columns", { 86 | df <- tibble(x = 1, y = 2, z = 3, s = "") 87 | gf <- group_by(df, x) 88 | 89 | expect_named( 90 | mutate(gf, crossover(c(y,z), 3:4, ~ .x * .y)), 91 | c("x", "y", "z", "s", "y_3", "y_4", "z_3", "z_4") 92 | ) 93 | expect_named( 94 | mutate(gf, crossover(c(y,z), 3:4, ~ .x * .y, .names = "id_{xcol}_{y}")), 95 | c("x", "y", "z", "s", "id_y_3", "id_y_4", "id_z_3", "id_z_4") 96 | ) 97 | expect_named( 98 | summarise(gf, crossover(c(y,z), 3:4, ~ mean(.x + .y), .names = "mean_{xcol}_{y}")), 99 | c("x", "mean_y_3", "mean_y_4", "mean_z_3", "mean_z_4") 100 | ) 101 | expect_named( 102 | summarise(gf, crossover(c(y,z), 3:4, list(paste = paste, sum = sum))), 103 | c("x", "y_3_paste", "y_3_sum", "y_4_paste", "y_4_sum", "z_3_paste", "z_3_sum", "z_4_paste", "z_4_sum") 104 | ) 105 | expect_named( 106 | summarise(gf, crossover(c(y,z), 3:4, list(paste = paste, mean))), 107 | c("x", "y_3_paste", "y_3_2", "y_4_paste", "y_4_2", "z_3_paste", "z_3_2", "z_4_paste", "z_4_2") 108 | ) 109 | expect_named( 110 | summarise(gf, crossover(c(y,z), 3:4, list(paste, mean = mean))), 111 | c("x", "y_3_1", "y_3_mean", "y_4_1", "y_4_mean", "z_3_1", "z_3_mean", "z_4_1", "z_4_mean") 112 | ) 113 | expect_named( 114 | summarise(gf, crossover(c(y,z), 3:4, list(mean, sum))), 115 | c("x", "y_3_1", "y_3_2", "y_4_1", "y_4_2", "z_3_1", "z_3_2", "z_4_1", "z_4_2") 116 | ) 117 | expect_named( 118 | summarise(gf, crossover(c(y,z), 119 | 3:4, 120 | list(mean = mean, paste = paste), 121 | .names = "{fn}_{xcol}_{y}")), 122 | c("x", "mean_y_3", "paste_y_3", "mean_y_4", "paste_y_4", "mean_z_3", "paste_z_3", "mean_z_4", "paste_z_4") 123 | ) 124 | # further added crossover()'s y_val, y_idx, y_nm 125 | expect_named( 126 | summarise(gf, crossover(c(y,z), 127 | list(a = 3, b = 4), 128 | list(mean = mean, paste = paste), 129 | .names = "{fn}_{xcol}_{y_val}")), 130 | c("x", "mean_y_3", "paste_y_3", "mean_y_4", "paste_y_4", "mean_z_3", "paste_z_3", "mean_z_4", "paste_z_4") 131 | ) 132 | expect_named( 133 | summarise(gf, crossover(c(y,z), 134 | list(a = 3, b = 4), 135 | list(mean = mean, paste = paste), 136 | .names = "{fn}_{xcol}_{y_nm}")), 137 | c("x", "mean_y_a", "paste_y_a", "mean_y_b", "paste_y_b", "mean_z_a", "paste_z_a", "mean_z_b", "paste_z_b") 138 | ) 139 | expect_warning( 140 | summarise(gf, crossover(c(y,z), 141 | list(a = 3:4, b = 5), 142 | list(mean = ~ mean(.x + .y), sum = ~ sum(.x + .y)), 143 | .names = "{fn}_{xcol}_{y_val}")) 144 | ) 145 | 146 | expect_warning( 147 | summarise(gf, crossover(c(y,z), 148 | list(3, 4), 149 | list(mean = ~ mean(.x + .y), sum = ~ sum(.x + .y)), 150 | .names = "{fn}_{xcol}_{y_nm}")) 151 | ) 152 | expect_named( 153 | summarise(gf, crossover(c(y,z), 154 | list(3, 4), 155 | list(sum = sum))), 156 | c("x", "y_1_sum", "y_2_sum", "z_1_sum", "z_2_sum") 157 | ) 158 | expect_named( 159 | summarise(gf, crossover(c(y,z), 160 | list(a = 3, b = 4), 161 | list(mean = mean, paste = paste), 162 | .names = "{fn}_{xcol}_{y_idx}")), 163 | c("x", "mean_y_1", "paste_y_1", "mean_y_2", "paste_y_2", "mean_z_1", "paste_z_1", "mean_z_2", "paste_z_2") 164 | ) 165 | expect_error( 166 | summarise(gf, crossover(c(y,z), 167 | list(z = 3, y = 4), 168 | list(mean = mean, paste = paste), 169 | .names = "{xcol}_{y_idx}")) 170 | ) 171 | 172 | # further added external vector 173 | col_nm_vec <- c("one", "two", "three", "four", "five", "six", "seven", "eight") 174 | expect_named( 175 | summarise(gf, crossover(c(y,z), 176 | list(z = 3, y = 4), 177 | list(paste = paste, sum = sum), 178 | .names = col_nm_vec)), 179 | c("x", col_nm_vec) 180 | ) 181 | # test that external vector throws error when too short 182 | col_nm_vec2 <- c("one", "two", "three", "four") 183 | expect_error( 184 | summarise(gf, crossover(c(y,z), 185 | list(z = 3, y = 4), 186 | list(paste = paste, sum = sum), 187 | .names = col_nm_vec2)) 188 | ) 189 | # test that external vector throws error when too long 190 | col_nm_vec3 <- c("one", "two", "three", "four", "five", "six", "seven", "eight", "nine") 191 | expect_error( 192 | summarise(gf, crossover(c(y,z), 193 | list(z = 3, y = 4), 194 | list(paste = paste, sum = sum), 195 | .names = col_nm_vec3)) 196 | ) 197 | expect_error( 198 | summarise(gf, crossover(c(y,z), 199 | list(z = 3, y = 4), 200 | list(paste = paste, sum = sum), 201 | .names = "new")) 202 | ) 203 | # test that external vectors throws error when it contains non-unique names 204 | col_nm_vec4 <- rep(c("one", "two", "three", "four"), 2) 205 | expect_error( 206 | summarise(gf, crossover(c(y,z), 207 | list(z = 3, y = 4), 208 | list(paste = paste, sum = sum), 209 | .names = col_nm_vec4)) 210 | ) 211 | # test external names vector with function in .y 212 | col_nm_vec <- c("one", "two") 213 | expect_named( 214 | summarise(df, crossover(c(y,z), 215 | dist_values, 216 | ~ if_else(.x == .y, 1L, 0L), 217 | .names = col_nm_vec)), 218 | col_nm_vec) 219 | # error case for fns in .y 220 | col_nm_vec <- c("one", "two", "three") 221 | expect_error( 222 | summarise(df, crossover(c(y,z), 223 | dist_values, 224 | ~ if_else(.x == .y, 1L, 0L), 225 | .names = col_nm_vec))) 226 | 227 | }) 228 | 229 | 230 | test_that("crossover() result locations are aligned with .fn list names", { 231 | 232 | df <- tibble(x = 1:2, y = 3:4) 233 | 234 | df_crossover <- summarise(df, 235 | crossover(c(x, y), 3:4, 236 | list(cls = ~ class(.x + .y), 237 | type = ~ is.numeric(.x + .y)))) 238 | 239 | expect <- tibble(`x_3_cls` = "integer", `x_3_type` = TRUE, 240 | `x_4_cls` = "integer", `x_4_type` = TRUE, 241 | `y_3_cls` = "integer", `y_3_type` = TRUE, 242 | `y_4_cls` = "integer", `y_4_type` = TRUE) 243 | 244 | expect_identical(df_crossover, expect) 245 | 246 | }) 247 | 248 | 249 | test_that("crossover() passes ... to functions", { 250 | 251 | df <- tibble(x = 1, y = 2) 252 | 253 | expect_equal( 254 | summarise(df, crossover(c(x, y), 255 | list(a = 10, b = 20), 256 | sum, 257 | na.rm = TRUE)), 258 | tibble(x_a = 11, x_b = 21, y_a = 12, y_b = 22) 259 | ) 260 | 261 | mean2 <- function(x, y, ...) { 262 | mean(c(x,y), ...) 263 | } 264 | 265 | expect_equal( 266 | summarise(df, crossover(c(x, y), 267 | list(a = 10, b = NA), 268 | list(sum = sum, mean = mean2), na.rm = TRUE)), 269 | tibble(x_a_sum = 11, x_a_mean = 5.5, 270 | x_b_sum = 1, x_b_mean = 1, 271 | y_a_sum = 12, y_a_mean = 6, 272 | y_b_sum = 2, y_b_mean = 2) 273 | ) 274 | 275 | }) 276 | 277 | test_that("crossover() passes unnamed arguments following .fns as ...", { 278 | 279 | df <- tibble(x = 1, y = "b") 280 | 281 | expect_equal(mutate(df, crossover(c(x, y), c(3, NA), paste, "a")), 282 | tibble(x = 1, y = "b", 283 | x_3 = "1 3 a", x_NA = "1 NA a", 284 | y_3 = "b 3 a", y_NA = "b NA a")) 285 | }) 286 | 287 | # test_that("over() works sequentially", { 288 | # 289 | # df <- tibble(a = 1) 290 | # 291 | # expect_equal( 292 | # mutate(df, 293 | # x = ncol(over(1, mean)), 294 | # y = ncol(over(1:2, mean))), 295 | # tibble(a = 1, x = 1L, y = 2L) 296 | # ) 297 | # 298 | # expect_equal( 299 | # mutate(df, 300 | # a = "x", 301 | # y = ncol(over(1, mean))), 302 | # tibble(a = "x", y = 1L) 303 | # ) 304 | # 305 | # expect_equal( 306 | # mutate(df, 307 | # x = 1, 308 | # y = ncol(over(1:2, mean))), 309 | # tibble(a = 1, x = 1, y = 2L) 310 | # ) 311 | # }) 312 | 313 | test_that("crossover() retains original ordering", { 314 | df <- tibble(a = c(1:2), b = c(3:4)) 315 | 316 | expect_equal(mutate(df, a = c(5:6), x = crossover(c(a, b), .data$b, sum))$x, 317 | tibble(a_3 = c(14, 14), a_4 = c(15, 15), b_3 = c(10, 10), b_4 = c(11, 11))) 318 | 319 | }) 320 | 321 | # test_that("crossover() gives meaningful messages", { 322 | # 323 | # # only over2, over2x specific error messages go here 324 | # gf <- tibble(g = 1, x = 1) 325 | # 326 | # expect_snapshot_error( 327 | # summarise(gf, 328 | # crossover(c(y,z), 329 | # dist_values, 330 | # ~ if_else(.x == .y, 1L, 0L))) 331 | # ) 332 | 333 | # }) 334 | 335 | test_that("crossover() uses environment from the current quosure (#5460)", { 336 | # If the data frame `y` is selected, causes a subscript conversion 337 | # error since it is fractional 338 | 339 | df <- data.frame(x = c(1, 2), y = c(1.1, 2.4)) 340 | y <- "x" 341 | 342 | expect_equal(df %>% 343 | summarise(crossover(all_of(y), 344 | 1, 345 | ~ mean(.x, na.rm = .y))), 346 | data.frame(x_1 = 1.5)) 347 | 348 | expect_equal(df %>% filter(crossover(all_of(y), 1, ~ .x + .y <= 2)), 349 | slice(df, 1)) 350 | 351 | # Recursive case fails because the `y` column has precedence (across issue: #5498) 352 | # expect_error(df %>% summarise(summarise(across(), across(all_of(y), mean)))) 353 | 354 | # Inherited case 355 | out <- df %>% summarise(local(crossover(all_of(y), 356 | 1, 357 | ~ mean(.x, na.rm = .y)))) 358 | expect_equal(out, data.frame(x_1 = 1.5)) 359 | }) 360 | 361 | 362 | 363 | # expected errors 364 | 365 | test_that("crossover() custom errors", { 366 | 367 | # inside dplyr 368 | expect_error(crossover()) 369 | 370 | # .fns must be function 371 | expect_error( 372 | summarise(tibble(x = 1), crossover(x, 2, 42)) 373 | ) 374 | 375 | # check keep used 376 | expect_warning( 377 | mutate(tibble(x = 1), crossover(x, 2, mean), .keep = "used"), 378 | "does not support the `.keep`" 379 | ) 380 | 381 | # check keep unused 382 | expect_warning( 383 | mutate(tibble(x = 1), crossover(x, 2, mean), .keep = "unused"), 384 | "does not support the `.keep`" 385 | ) 386 | 387 | # .y is function does not work on grouped df 388 | gf <- tibble(g = 1, x = 1) 389 | expect_error(summarise(gf, 390 | crossover_old(c(y,z), 391 | dist_values, 392 | ~ if_else(.x == .y, 1L, 0L)))) 393 | 394 | }) 395 | 396 | # other edge cases 397 | -------------------------------------------------------------------------------- /tests/testthat/test-select_strings.R: -------------------------------------------------------------------------------- 1 | # select_vars ------------------------------------------------------------------ 2 | 3 | # select_vars examples of basic functionality from the example section 4 | 5 | ## examples 6 | test_that("cut_names() simple exmamples", { 7 | 8 | cut_n1 <- cut_names("Width", .vars = names(iris)) 9 | expect_equal(cut_n1, c("Sepal.", "Petal.")) 10 | 11 | cut_n2 <- cut_names("Width", .vars = "Width.Petal.Width") 12 | expect_equal(cut_n2, c(".Petal.")) 13 | 14 | }) 15 | 16 | test_that("extract_names() simple exmamples", { 17 | 18 | extr_n1 <- extract_names("Length|Width", .vars = names(iris)) 19 | expect_equal(extr_n1, c("Length", "Width")) 20 | 21 | extr_n2 <- extract_names("Width", .vars = "Width.Petal.Width") 22 | expect_equal(extr_n2, c("Width")) 23 | 24 | }) 25 | 26 | test_that("cut_names() in over() (gets var names automatically)", { 27 | 28 | over_cut <- iris %>% 29 | dplyr::mutate(over(cut_names(".Width"), 30 | ~ .("{.x}.Width") * .("{.x}.Length"), 31 | .names = "Product_{x}")) 32 | 33 | expect_over_cut <- iris %>% 34 | dplyr::mutate(Product_Sepal = Sepal.Length * Sepal.Width, 35 | Product_Petal = Petal.Length * Petal.Width) 36 | 37 | expect_equal(over_cut, expect_over_cut) 38 | 39 | }) 40 | 41 | test_that("extract_names() in over() (gets var names automatically)", { 42 | 43 | over_extr <- iris %>% 44 | dplyr::mutate(over(extract_names("Length|Width"), 45 | ~.("Petal.{.x}") * .("Sepal.{.x}"), 46 | .names = "Product_{x}")) 47 | 48 | expect_over_extr <- iris %>% 49 | dplyr::mutate(Product_Length = Sepal.Length * Petal.Length, 50 | Product_Width = Sepal.Width * Petal.Width) 51 | 52 | expect_equal(over_extr, expect_over_extr) 53 | 54 | }) 55 | 56 | test_that("extract_names() in over() with `.remove`", { 57 | 58 | over_extr <- csatraw %>% 59 | dplyr::transmute(over(extract_names("item\\d", "^item1"), 60 | ~ .("{.x}a") * .("{.x}b")) 61 | ) 62 | 63 | expect_named(over_extr, paste0("item", 2:6)) 64 | 65 | }) 66 | 67 | ## more tests 68 | 69 | test_that("cut_names() in over() with `.remove`", { 70 | 71 | over_cut <- csat %>% 72 | dplyr::transmute(over(cut_names("rating$", "^email"), 73 | ~ paste0(.("{.x}rating"), .("{.x}contact")), 74 | .names = "{x}new") 75 | ) 76 | 77 | expect_named(over_cut, paste0(c("postal", "phone", "website", "shop"), "_new")) 78 | 79 | }) 80 | 81 | -------------------------------------------------------------------------------- /tests/testthat/test-select_values.R: -------------------------------------------------------------------------------- 1 | # select_values ------------------------------------------------------------------ 2 | # select_values examples of basic functionality from the example section 3 | library(dplyr) 4 | 5 | ## examples 6 | test_that("use case: dist_values() in over()", { 7 | 8 | over_dist_val <- iris %>% 9 | mutate(over(dist_values(Species), 10 | ~ if_else(Species == .x, 1, 0) 11 | ), 12 | .keep = "none") 13 | 14 | df_expect <- iris %>% 15 | mutate(setosa = if_else(Species == "setosa", 1, 0), 16 | versicolor = if_else(Species == "versicolor", 1, 0), 17 | virginica = if_else(Species == "virginica", 1, 0), 18 | .keep = "none") 19 | 20 | expect_equal(over_dist_val, df_expect) 21 | 22 | }) 23 | 24 | test_that("dist_values() short examples", { 25 | 26 | expect_equal(dist_values(c(1:3, NA)), c(1:3)) 27 | 28 | expect_equal(dist_values(c(1:3, NA), .sort = "desc"), c(3:1)) 29 | 30 | expect_equal(dist_values(c(3, 1, 2, NA), .sort = "none"), c(3, 1, 2)) 31 | 32 | expect_equal( 33 | factor(c(1:3, NA)) %>% 34 | as.factor() %>% 35 | dist_values() %>% 36 | class(), "character") 37 | 38 | }) 39 | 40 | 41 | test_that("use case: seq_rang() in over()", { 42 | 43 | over_seq_rang <- iris %>% 44 | mutate(over(seq_range(Sepal.Length, 1), 45 | ~ if_else(Sepal.Length > .x, 1, 0), 46 | .names = "Sepal.Length.{x}"), 47 | .keep = "none") 48 | 49 | df_expect <- iris %>% 50 | mutate(Sepal.Length.5 = if_else(Sepal.Length > 5, 1, 0), 51 | Sepal.Length.6 = if_else(Sepal.Length > 6, 1, 0), 52 | Sepal.Length.7 = if_else(Sepal.Length > 7, 1, 0), 53 | .keep = "none") 54 | 55 | expect_equal(over_seq_rang, df_expect) 56 | 57 | }) 58 | 59 | test_that("seq_rang() on dates", { 60 | 61 | some_dates <- c(as.Date("2020-01-02"), 62 | as.Date("2020-05-02"), 63 | as.Date("2020-03-02")) 64 | 65 | 66 | 67 | expect_equal(seq_range(some_dates, "1 month"), 68 | as.Date(c("2020-01-02", 69 | "2020-02-02", 70 | "2020-03-02", 71 | "2020-04-02", 72 | "2020-05-02"))) 73 | 74 | }) 75 | 76 | 77 | test_that("dist_values() works with comma separated vectors and lists", { 78 | 79 | expect_equal( 80 | c("1, 2, 3", 81 | "2, 4, 5, 6", 82 | "4, 1, 7") %>% 83 | dist_values(., .sep = ", "), 84 | as.character(1:7) 85 | ) 86 | 87 | expect_equal( 88 | list(a = c(1:4), b = (4:6), c(5:10)) %>% 89 | dist_values(), 90 | c(1:10)) 91 | 92 | }) 93 | 94 | 95 | test_that("dist_values() example with factors", { 96 | 97 | fctrs <- factor(letters[1:3], levels = c("b", "a", "c")) 98 | 99 | expect_equal(dist_values(fctrs), c("b", "a", "c")) 100 | 101 | expect_equal(dist_values(fctrs, .sort = "asc"), letters[1:3]) 102 | 103 | expect_equal(dist_values(fctrs, .sort = "desc"), letters[3:1]) 104 | 105 | expect_equal(dist_values(fctrs, .sort = "none"), letters[1:3]) 106 | 107 | }) 108 | 109 | # more tests 110 | 111 | test_that("seq_range() error check", { 112 | 113 | expect_error(seq_range(letters[1:3], 1)) 114 | 115 | }) 116 | 117 | 118 | -------------------------------------------------------------------------------- /tests/testthat/test-show_affix.R: -------------------------------------------------------------------------------- 1 | # show_affix ------------------------------------------------------------------ 2 | 3 | library(dplyr) 4 | 5 | # select_vars examples of basic functionality from the example section 6 | 7 | ## examples 8 | test_that("show_affix can be called after across2 error", { 9 | 10 | expect_error({ 11 | 12 | iris %>% 13 | as_tibble %>% 14 | rename("Pesal.Length" = Sepal.Length) %>% 15 | mutate(across2(ends_with("Length"), 16 | ends_with("Width"), 17 | .fns = list(product = ~ .x * .y, 18 | sum = ~ .x + .y), 19 | .names = "{pre}_{fn}")) 20 | }) 21 | 22 | out <- show_prefix() 23 | 24 | expected <- tibble( 25 | .xcols = c("Pesal.Length", "Petal.Length"), 26 | .ycols = c("Sepal.Width", "Petal.Width"), 27 | prefix = c(NA_character_, "Petal") 28 | ) 29 | 30 | expect_equal(out, expected) 31 | 32 | expect_error({ 33 | 34 | iris %>% 35 | as_tibble %>% 36 | rename("Sepal.thengl" = Sepal.Length) %>% 37 | mutate(across2(starts_with("Petal"), 38 | starts_with("Sepal"), 39 | .fns = list(product = ~ .x * .y, 40 | sum = ~ .x + .y), 41 | .names = "{suf}_{fn}")) 42 | }) 43 | 44 | out2 <- show_suffix() 45 | 46 | expected2 <- tibble( 47 | .xcols = c("Petal.Length", "Petal.Width"), 48 | .ycols = c("Sepal.thengl", "Sepal.Width"), 49 | suffix = c(NA_character_, "Width") 50 | ) 51 | 52 | expect_equal(out2, expected2) 53 | 54 | }) 55 | 56 | test_that("show_suffix can called on a data.frame", { 57 | 58 | out <- iris %>% 59 | show_suffix(starts_with("Sepal"), 60 | starts_with("Petal")) 61 | 62 | expected <- tibble( 63 | .xcols = c("Sepal.Length", "Sepal.Width"), 64 | .ycols = c("Petal.Length", "Petal.Width"), 65 | suffix = c("Length", "Width") 66 | ) 67 | 68 | expect_equal(out, expected) 69 | 70 | out2 <- iris %>% 71 | show_prefix(ends_with("Length"), 72 | ends_with("Width")) 73 | 74 | expected2 <- tibble( 75 | .xcols = c("Sepal.Length", "Petal.Length"), 76 | .ycols = c("Sepal.Width", "Petal.Width"), 77 | prefix = c("Sepal", "Petal") 78 | ) 79 | 80 | expect_equal(out2, expected2) 81 | 82 | }) 83 | 84 | 85 | -------------------------------------------------------------------------------- /tests/testthat/test-string_eval.R: -------------------------------------------------------------------------------- 1 | # string_eval ------------------------------------------------------------------ 2 | # string_eval examples of basic functionality from the example section 3 | library(dplyr) 4 | 5 | ## examples 6 | test_that(".() works in over()", { 7 | 8 | df_over <- iris %>% 9 | mutate(over(c("Sepal", "Petal"), 10 | ~ .("{.x}.Width") + .("{.x}.Length") 11 | )) 12 | 13 | df_expect <- iris %>% 14 | mutate( 15 | Sepal = Sepal.Width + Sepal.Length, 16 | Petal = Petal.Width + Petal.Length 17 | ) 18 | 19 | expect_equal(df_over, df_expect) 20 | 21 | }) 22 | 23 | # add non-dplyover examples? (across, map) 24 | 25 | ## compability 26 | 27 | # works locally but not in test_check 28 | # test_that("data.table's .() still works", { 29 | # 30 | # expect_error({ 31 | # library(data.table) 32 | # mtcarsDT <- as.data.table(mtcars) 33 | # mtcarsDT[, .(mpg, hp)] 34 | # detach("package:data.table", unload = TRUE) 35 | # }, NA) 36 | # 37 | # }) 38 | 39 | 40 | test_that("magrittrs . still works", { 41 | 42 | expect_error(mtcars %>% nrow(.), NA) 43 | 44 | }) 45 | -------------------------------------------------------------------------------- /vignettes/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.R 3 | -------------------------------------------------------------------------------- /vignettes/benchmark1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TimTeaFan/dplyover/f0cd984586bafdc0dc78fc4ead2d76ba50d9370e/vignettes/benchmark1.png -------------------------------------------------------------------------------- /vignettes/benchmark2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TimTeaFan/dplyover/f0cd984586bafdc0dc78fc4ead2d76ba50d9370e/vignettes/benchmark2.png -------------------------------------------------------------------------------- /vignettes/performance.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TimTeaFan/dplyover/f0cd984586bafdc0dc78fc4ead2d76ba50d9370e/vignettes/performance.rds -------------------------------------------------------------------------------- /vignettes/why_bench.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TimTeaFan/dplyover/f0cd984586bafdc0dc78fc4ead2d76ba50d9370e/vignettes/why_bench.rds --------------------------------------------------------------------------------