├── .Rbuildignore
├── .github
├── .gitignore
└── workflows
│ ├── check-standard.yaml
│ ├── pkgdown.yaml
│ └── test-coverage.yaml
├── .gitignore
├── DESCRIPTION
├── LICENSE
├── LICENSE.md
├── NAMESPACE
├── NEWS.md
├── R
├── across2.R
├── checks.R
├── crossover.R
├── data-csat.R
├── data-csatraw.R
├── dplyover.R
├── meta_setup.R
├── over.R
├── over2.R
├── over_across_family.R
├── select_strings.R
├── select_values.R
├── selection_helpers.R
├── show_affix.R
└── string_eval.R
├── README.Rmd
├── README.md
├── _pkgdown.yml
├── codecov.yml
├── data-raw
└── csatraw.R
├── data
├── csat.rda
└── csatraw.rda
├── dplyover.Rproj
├── man
├── across2.Rd
├── crossover.Rd
├── csat.Rd
├── csatraw.Rd
├── dplyover-package.Rd
├── figures
│ ├── apple-touch-icon-120x120.png
│ ├── apple-touch-icon-152x152.png
│ ├── apple-touch-icon-180x180.png
│ ├── apple-touch-icon-60x60.png
│ ├── apple-touch-icon-76x76.png
│ ├── apple-touch-icon.png
│ ├── favicon-16x16.png
│ ├── favicon-32x32.png
│ ├── favicon.ico
│ ├── lifecycle-archived.svg
│ ├── lifecycle-defunct.svg
│ ├── lifecycle-deprecated.svg
│ ├── lifecycle-experimental.svg
│ ├── lifecycle-maturing.svg
│ ├── lifecycle-questioning.svg
│ ├── lifecycle-stable.svg
│ ├── lifecycle-superseded.svg
│ ├── logo.png
│ └── logo_big.png
├── over.Rd
├── over2.Rd
├── over_across_family.Rd
├── rmd
│ └── setup.Rmd
├── select_values.Rd
├── select_vars.Rd
├── selection_helpers.Rd
├── show_affix.Rd
└── string_eval.Rd
├── pkgdown
├── extra.css
└── favicon
│ ├── apple-touch-icon-120x120.png
│ ├── apple-touch-icon-152x152.png
│ ├── apple-touch-icon-180x180.png
│ ├── apple-touch-icon-60x60.png
│ ├── apple-touch-icon-76x76.png
│ ├── apple-touch-icon.png
│ ├── favicon-16x16.png
│ ├── favicon-32x32.png
│ └── favicon.ico
├── tests
├── testthat.R
└── testthat
│ ├── _snaps
│ ├── over.md
│ └── over2.md
│ ├── test-across2.R
│ ├── test-crossover.R
│ ├── test-over.R
│ ├── test-over2.R
│ ├── test-select_strings.R
│ ├── test-select_values.R
│ ├── test-show_affix.R
│ └── test-string_eval.R
└── vignettes
├── .gitignore
├── benchmark1.png
├── benchmark2.png
├── performance.Rmd
├── performance.rds
├── why_bench.rds
└── why_dplyover.Rmd
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^dplyover\.Rproj$
2 | ^\.Rproj\.user$
3 | ^LICENSE\.md$
4 | ^README\.Rmd$
5 | ^data-raw$
6 | ^codecov\.yml$
7 | ^\.github$
8 | ^doc$
9 | ^Meta$
10 |
--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 |
--------------------------------------------------------------------------------
/.github/workflows/check-standard.yaml:
--------------------------------------------------------------------------------
1 | # For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag.
2 | # https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions
3 | on:
4 | push:
5 | branches:
6 | - main
7 | - master
8 | pull_request:
9 | branches:
10 | - main
11 | - master
12 |
13 | name: R-CMD-check
14 |
15 | jobs:
16 | R-CMD-check:
17 | runs-on: ${{ matrix.config.os }}
18 |
19 | name: ${{ matrix.config.os }} (${{ matrix.config.r }})
20 |
21 | strategy:
22 | fail-fast: false
23 | matrix:
24 | config:
25 | - {os: windows-latest, r: 'release'}
26 | - {os: macOS-latest, r: 'release'}
27 | - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
28 | - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
29 |
30 | env:
31 | R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
32 | RSPM: ${{ matrix.config.rspm }}
33 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
34 |
35 | steps:
36 | - uses: actions/checkout@v2
37 |
38 | - uses: r-lib/actions/setup-r@v1
39 | with:
40 | r-version: ${{ matrix.config.r }}
41 |
42 | - uses: r-lib/actions/setup-pandoc@v1
43 |
44 | - name: Query dependencies
45 | run: |
46 | install.packages('remotes')
47 | saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
48 | writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
49 | shell: Rscript {0}
50 |
51 | - name: Cache R packages
52 | if: runner.os != 'Windows'
53 | uses: actions/cache@v2
54 | with:
55 | path: ${{ env.R_LIBS_USER }}
56 | key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
57 | restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-
58 |
59 | - name: Install system dependencies
60 | if: runner.os == 'Linux'
61 | run: |
62 | while read -r cmd
63 | do
64 | eval sudo $cmd
65 | done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))')
66 |
67 | - name: Install dependencies
68 | run: |
69 | remotes::install_deps(dependencies = TRUE)
70 | remotes::install_cran("rcmdcheck")
71 | shell: Rscript {0}
72 |
73 | - name: Check
74 | env:
75 | _R_CHECK_CRAN_INCOMING_REMOTE_: false
76 | run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check")
77 | shell: Rscript {0}
78 |
79 | - name: Upload check results
80 | if: failure()
81 | uses: actions/upload-artifact@main
82 | with:
83 | name: ${{ runner.os }}-r${{ matrix.config.r }}-results
84 | path: check
85 |
--------------------------------------------------------------------------------
/.github/workflows/pkgdown.yaml:
--------------------------------------------------------------------------------
1 | on:
2 | push:
3 | branches:
4 | - main
5 | - master
6 |
7 | name: pkgdown
8 |
9 | jobs:
10 | pkgdown:
11 | runs-on: macOS-latest
12 | env:
13 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
14 | steps:
15 | - uses: actions/checkout@v2
16 |
17 | - uses: r-lib/actions/setup-r@v1
18 |
19 | - uses: r-lib/actions/setup-pandoc@v1
20 |
21 | - name: Query dependencies
22 | run: |
23 | install.packages('remotes')
24 | saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
25 | writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
26 | shell: Rscript {0}
27 |
28 | - name: Cache R packages
29 | uses: actions/cache@v2
30 | with:
31 | path: ${{ env.R_LIBS_USER }}
32 | key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
33 | restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-
34 |
35 | - name: Install dependencies
36 | run: |
37 | remotes::install_deps(dependencies = TRUE)
38 | install.packages("pkgdown", type = "binary")
39 | shell: Rscript {0}
40 |
41 | - name: Install package
42 | run: R CMD INSTALL .
43 |
44 | - name: Deploy package
45 | run: |
46 | git config --local user.email "actions@github.com"
47 | git config --local user.name "GitHub Actions"
48 | Rscript -e 'pkgdown::deploy_to_branch(new_process = FALSE)'
49 |
--------------------------------------------------------------------------------
/.github/workflows/test-coverage.yaml:
--------------------------------------------------------------------------------
1 | on:
2 | push:
3 | branches:
4 | - main
5 | - master
6 | pull_request:
7 | branches:
8 | - main
9 | - master
10 |
11 | name: test-coverage
12 |
13 | jobs:
14 | test-coverage:
15 | runs-on: macOS-latest
16 | env:
17 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
18 | steps:
19 | - uses: actions/checkout@v2
20 |
21 | - uses: r-lib/actions/setup-r@v1
22 |
23 | - uses: r-lib/actions/setup-pandoc@v1
24 |
25 | - name: Query dependencies
26 | run: |
27 | install.packages('remotes')
28 | saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
29 | writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
30 | shell: Rscript {0}
31 |
32 | - name: Cache R packages
33 | uses: actions/cache@v2
34 | with:
35 | path: ${{ env.R_LIBS_USER }}
36 | key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
37 | restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-
38 |
39 | - name: Install dependencies
40 | run: |
41 | install.packages(c("remotes"))
42 | remotes::install_deps(dependencies = TRUE)
43 | remotes::install_cran("covr")
44 | shell: Rscript {0}
45 |
46 | - name: Test coverage
47 | run: covr::codecov()
48 | shell: Rscript {0}
49 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 |
5 | to_do.R
6 |
7 | R/crossover_old.R
8 | R/cross_matrix.R
9 | R/dplyr_context_internals.R
10 | R/dplyr_data_mask.R
11 | R/across_new.R
12 |
13 | tests/testthat/test-across_new.R
14 | tests/testthat/test-crossover_old.R
15 |
16 | inst/doc
17 | doc
18 | docs
19 | Meta
20 |
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: dplyover
2 | Title: Create columns by applying functions to vectors and/or columns in 'dplyr'
3 | Version: 0.0.8.9002
4 | Authors@R:
5 | person(given = "Tim",
6 | family = "Tiefenbach",
7 | role = c("aut", "cre"),
8 | email = "mailme@tim-tiefenbach.de",
9 | comment = c(ORCID = "0000-0001-9443-2434"))
10 | Description: Extension of 'dplyr’s functionality that builds a family of functions
11 | around dplyr::across().
12 | License: MIT + file LICENSE
13 | Encoding: UTF-8
14 | LazyData: true
15 | Roxygen: list(markdown = TRUE)
16 | RoxygenNote: 7.1.1
17 | URL: https://github.com/TimTeaFan/dplyover
18 | BugReports: https://github.com/TimTeaFan/dplyover/issues
19 | Suggests:
20 | testthat (>= 3.0.0),
21 | knitr,
22 | rmarkdown,
23 | lifecycle,
24 | covr,
25 | stringr,
26 | tidyr,
27 | bench,
28 | ggplot2
29 | Imports:
30 | dplyr (>= 1.0.0),
31 | rlang (>= 0.4.7),
32 | vctrs (>= 0.3.3),
33 | purrr,
34 | glue (>= 1.3.2),
35 | tibble (>= 2.1.3),
36 | tidyselect (>= 1.1.0)
37 | Depends:
38 | R (>= 3.2.0)
39 | Config/testthat/edition: 3
40 | VignetteBuilder: knitr
41 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2020
2 | COPYRIGHT HOLDER: Tim Tiefenbach
3 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | # MIT License
2 |
3 | Copyright (c) 2020 Tim Tiefenbach
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 |
3 | export(.)
4 | export(across2)
5 | export(across2x)
6 | export(crossover)
7 | export(cut_names)
8 | export(dist_values)
9 | export(extract_names)
10 | export(over)
11 | export(over2)
12 | export(over2x)
13 | export(seq_range)
14 | export(show_prefix)
15 | export(show_suffix)
16 | importFrom(rlang,"%||%")
17 | importFrom(rlang,":=")
18 | importFrom(stats,na.omit)
19 |
--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
1 | # dplyover 0.0.8.9002
2 |
3 | * fix `inspect_call` so that it work with dev version of rlang::trace_back()
4 |
5 | # dplyover 0.0.8.9001
6 |
7 | * hotfix which fixed a bug preventing {dplyover} to run on R versions <= 4.0
8 |
9 | # dplyover 0.0.8.9000
10 |
11 | * first official release
12 | * over-across function family: `over()`, `across2()`, `crossover()` and variants
13 | * helper functions: selection helpers for values, strings and variables (see `?selection_helpers`)
14 | * helper functions for `across2` pre- and suffixes: `show_suffix()`, `show_prefix()`
15 | * datasets: `csat` and `csatraw`
16 |
17 | # dplyover 0.0.7.9000
18 |
19 | * pre-release with basic functionality
20 |
--------------------------------------------------------------------------------
/R/checks.R:
--------------------------------------------------------------------------------
1 | # adapted from https://stackoverflow.com/a/60447909/9349302
2 | is.date <- function(x) {
3 | inherits(x, c("Date", "POSIXt"))
4 | }
5 |
6 | inspect_call <- function(warn = TRUE, last_verb = FALSE) {
7 |
8 | out <- list(warn = FALSE,
9 | last_verb = NULL)
10 | trace_bck <- rlang::trace_back()
11 |
12 | if (is.null(trace_bck$calls)) {
13 | call_fns <- purrr::map(purrr::transpose(trace_bck), function(trace) {
14 | paste0(trace$namespace,
15 | trace$scope,
16 | as.character(trace$call[1]))})
17 | } else {
18 | call_fns <- purrr::map(trace_bck$calls, function(call) `[[`(call, 1) )
19 | }
20 | limit <- min(which(grepl("^dplyover::", call_fns)))
21 | mut_id <- which(grepl("^dplyr:::mutate", call_fns[1:limit - 1]))
22 |
23 | # last dplyr verb
24 | if (last_verb) {
25 | last_dplyr_verb <- max(which(grepl("^dplyr:::[mutate|summarise|summarize|filter|select|arrange|transmute]", call_fns)))
26 | out$last_verb <- last_dplyr_verb
27 | }
28 | # check keep
29 | if (warn) {
30 | if (length(mut_id) > 0) {
31 |
32 | last_mut <- as.list(trace_bck$call[[max(mut_id) - 2]])
33 |
34 | keep_arg <- grepl("^\\.keep$", names(last_mut), perl = TRUE)
35 |
36 | if (any(keep_arg)) {
37 | keep_val <- last_mut[keep_arg]
38 |
39 | if (keep_val %in% c("used", "unused")) {
40 | out$warn <- TRUE
41 | }
42 | }
43 | }
44 | }
45 | out
46 | }
47 |
48 | # this function is copied from dplyr
49 | # see README section Acknowledgements as well as dplyr's license and copyright
50 | data_mask_top <- function(env, recursive = FALSE, inherit = FALSE) {
51 | while (rlang::env_has(env, ".__tidyeval_data_mask__.", inherit = inherit)) {
52 | env <- rlang::env_parent(rlang::env_get(env, ".top_env", inherit = inherit))
53 | if (!recursive) {
54 | return(env)
55 | }
56 | }
57 |
58 | env
59 | }
60 |
--------------------------------------------------------------------------------
/R/crossover.R:
--------------------------------------------------------------------------------
1 | #' Apply functions to a set of columns and a vector simultaniously in 'dplyr'
2 | #'
3 | #' @description
4 | #' `crossover()` combines the functionality of [dplyr::across()] with [over()]
5 | #' by iterating simultaneously over (i) a set of columns (`.xcols`) and (ii)
6 | #' a vector or list (`.y`). `crossover()` *always* applies the functions in
7 | #' `.fns` in a *nested* way to a combination of both inputs. There are, however,
8 | #' two different ways in which the functions in `.fns` are applied.
9 | #'
10 | #' When `.y` is a vector or list, each function in `.fns` is applied to
11 | #' *all pairwise combinations* between columns in `.xcols` and elements in
12 | #' `.y` (this resembles the behavior of `over2x()` and `across2x()`).
13 | #'
14 | #' `crossover()` has one trick up it's sleeves, which sets it apart from the other
15 | #' functions in the <[`over-across family`][over_across_family]>: Its second input
16 | #' (`.y`) can be a function. This changes the originial behavior slightly: First
17 | #' the function in `.y` is applied to all columns in `.xcols` to *generate* an
18 | #' input object which will be used as `.y` in the function calls in `.fns`.
19 | #' In this case each function is applied to all pairs between (i) columns in
20 | #' `.xcols` with (ii) the output elements that they generated through the
21 | #' function that was originally supplied to `.y`. Note that the underyling
22 | #' data must not be grouped, if a function is supplied to `.y`. For examples see
23 | #' the example section below.
24 | #'
25 | #' @param .xcols <[`tidy-select`][dplyr_tidy_select]> Columns to transform.
26 | #' Because `crossover()` is used within functions like `summarise()` and
27 | #' `mutate()`, you can't select or compute upon grouping variables.
28 | #'
29 | #' @param .y An atomic vector or list to apply functions to. `crossover()` also
30 | #' accepts a function as `.y` argument. In this case each column in `.xcols`
31 | #' is looped over all the outputs that it generated with the function supplied
32 | #' to `.y`. Note: the underyling data must not be grouped, if a function
33 | #' is supplied to `.y`.
34 | #'
35 | #' If a function is supplied, the following values are possible:
36 | #'
37 | #' - A bare function name, e.g. `unique`
38 | #' - An anonymous function, e.g. `function(x) unique(x)`
39 | #' - A purrr-style lambda, e.g. `~ unique(.x, fromLast = TRUE)`
40 | #'
41 | #' Note that additional arguments can only be specified with an anonymous
42 | #' function, a purrr-style lamba or with a pre-filled custom function.
43 | #'
44 | #' @param .fns Functions to apply to each column in `.xcols` and element in `.y`.
45 | #'
46 | #' Possible values are:
47 | #'
48 | #' - A function
49 | #' - A purrr-style lambda
50 | #' - A list of functions/lambdas
51 | #'
52 | #' Note that `NULL` is not accepted as argument to `.fns`.
53 | #'
54 | #' @param ... Additional arguments for the function calls in `.fns`.
55 | #'
56 | #' @param .names A glue specification that describes how to name the output
57 | #' columns. This can use:
58 | #'
59 | #' - `{xcol}` to stand for the selected column name,
60 | #' - `{y}` to stand for the selected vector element, and
61 | #' - `{fn}` to stand for the name of the function being applied.
62 | #'
63 | #' The default (`NULL`) is equivalent to `"{xcol}_{y}"` for the single function
64 | #' case and `"{xcol}_{y}_{fn}"` for the case where a list is used for `.fns`.
65 | #'
66 | #' Note that, depending on the nature of the underlying object in `.y`,
67 | #' specifying `{y}` will yield different results:
68 | #'
69 | #' - If `.y` is an unnamed atomic vector, `{y}` will represent each value.
70 | #' - If `.y` is a named list or atomic vector, `{y}` will represent each name.
71 | #' - If `.y` is an unnamed list, `{y}` will be the index number running from 1 to `length(y)`.
72 | #'
73 | #' This standard behavior (interpretation of `{y}`) can be overwritten by
74 | #' directly specifying:
75 | #'
76 | #' - `{y_val}` for `.y`'s values
77 | #' - `{y_nm}` for its names
78 | #' - `{y_idx}` for its index numbers
79 | #'
80 | #' Alternatively, a character vector of length equal to the number of columns to
81 | #' be created can be supplied to `.names`. Note that in this case, the glue
82 | #' specification described above is not supported.
83 | #'
84 | #' @param .names_fn Optionally, a function that is applied after the glue
85 | #' specification in `.names` has been evaluated. This is, for example, helpful,
86 | #' in case the resulting names need to be further cleaned or trimmed.
87 | #'
88 | #' @returns
89 | #' `crossover()` returns a tibble with one column for each combination of
90 | #' columns in `.xcols`, elements in `.y` and functions in `.fns`.
91 | #'
92 | #' If a function is supplied as `.y` argument, `crossover()` returns a tibble with
93 | #' one column for each pair of output elements of `.y` and the column in `.xcols`
94 | #' that generated the output combined with each function in `.fns`.
95 | #'
96 | #' @seealso
97 | #' Other members of the <[`over-across function family`][over_across_family]>.
98 | #'
99 | #' @section Examples:
100 | #'
101 | #' ```{r, child = "man/rmd/setup.Rmd"}
102 | #' ```
103 | #'
104 | #' For the basic functionality please refer to the examples in [over()] and
105 | #' [dplyr::across()].
106 | #'
107 | #' ```{r, comment = "#>", collapse = TRUE}
108 | #' library(dplyr)
109 | #'
110 | #' # For better printing
111 | #' iris <- as_tibble(iris)
112 | #' ```
113 | #'
114 | #' ## Creating many similar variables for mulitple columns
115 | #' If `.y` is a vector or list, `crossover()` loops every combination between
116 | #' columns in `.xcols` and elements in `.y` over the functions in `.fns`. This
117 | #' is helpful in cases where we want to create a batch of similar variables with
118 | #' only slightly changes in the arguments of the calling function. A good example
119 | #' are lagged variables. Below we create five lagged variables for each
120 | #' 'Sepal.Length' and 'Sepal.Width'. To create nice names we use a named list
121 | #' as argument in `.fns` and specify the glue syntax in `.names`.
122 | #'
123 | #' ```{r, comment = "#>", collapse = TRUE}
124 | #' iris %>%
125 | #' transmute(
126 | #' crossover(starts_with("sepal"),
127 | #' 1:5,
128 | #' list(lag = ~ lag(.x, .y)),
129 | #' .names = "{xcol}_{fn}{y}")) %>%
130 | #' glimpse
131 | #' ```
132 | #'
133 | #' ## Creating dummy variables for multiple varialbes (columns)
134 | #' The `.y` argument of `crossover()` can take a function instead of list or vector.
135 | #' In the example below we select the columns 'type', 'product', 'csat' in `.xcols`.
136 | #' We supply the function [dist_values()] to `.y`, which is a cleaner variant of
137 | #' base R's `unique()`. This generates all distinct values for all three selected
138 | #' variables. Now, the function in `.fns`, `~ if_else(.y == .x, 1, 0)`, is applied
139 | #' to each pair of distinct value in `.y` and the column in `.xcols` that generated
140 | #' this value. This basically creates a dummy variable for each value of each
141 | #' variable. Since some of the values contain whitespace characters, we can use the
142 | #' `.names_fn` argument to supply a *third* function that cleans the output names
143 | #' by replacing spaces with an underscore and setting all characters `tolower()`.
144 | #'
145 | #' ```{r, comment = "#>", collapse = TRUE}
146 | #' csat %>%
147 | #' transmute(
148 | #' crossover(.xcols = c(type, product, csat),
149 | #' .y = dist_values,
150 | #' .fns = ~ if_else(.y == .x, 1, 0),
151 | #' .names_fn = ~ gsub("\\s", "_", .x) %>% tolower(.)
152 | #' )) %>%
153 | #' glimpse
154 | #' ```
155 | #'
156 | #'
157 | #' @export
158 | crossover <- function(.xcols = dplyr::everything(), .y, .fns, ..., .names = NULL, .names_fn = NULL){
159 |
160 | data <- tryCatch({
161 | dplyr::cur_data()
162 | }, error = function(e) {
163 | rlang::abort("`crossover()` must only be used inside dplyr verbs.")
164 | })
165 |
166 | deparse_call <- deparse(sys.call(),
167 | width.cutoff = 500L,
168 | backtick = TRUE,
169 | nlines = 1L,
170 | control = NULL)
171 |
172 | setup <- meta_setup(grp_id = dplyr::cur_group_id(),
173 | dep_call = deparse_call,
174 | par_frame = parent.frame(),
175 | setup_fn = "crossover_setup",
176 | cols = rlang::enquo(.xcols),
177 | y1 = .y,
178 | fns = .fns,
179 | names = .names,
180 | names_fn = .names_fn)
181 |
182 | vars <- setup$vars
183 | y <- setup$y
184 |
185 | if (length(vars) == 0L) {
186 | return(tibble::new_tibble(list(), nrow = 1L))
187 | }
188 |
189 | fns <- setup$fns
190 | names <- setup$names
191 |
192 | if (setup$each) {
193 | data <- data[unique(vars)]
194 | data_ls <- as.list(data)[vars]
195 | data <- tibble::new_tibble(data_ls, nrow = nrow(data))
196 | } else {
197 | data <- data[vars]
198 | }
199 |
200 | n_cols <- length(data)
201 | n_fns <- length(fns)
202 | seq_n_cols <- seq_len(n_cols)
203 | seq_fns <- seq_len(n_fns)
204 |
205 | k <- 1L
206 |
207 | if (setup$each) {
208 | out <- vector("list", n_cols * n_fns)
209 |
210 | for (i in seq_n_cols) {
211 | col <- data[[i]]
212 | yi <- y[[i]]
213 | for (j in seq_fns) {
214 | fn <- fns[[j]]
215 | out[[k]] <- fn(col, yi, ...)
216 | k <- k + 1L
217 | }
218 | }
219 |
220 | } else {
221 | n_y <- length(y)
222 | seq_n_y <- seq_len(n_y)
223 | out <- vector("list", n_cols * n_y * n_fns)
224 |
225 | for (i in seq_n_cols) {
226 | col <- data[[i]]
227 | for(l in seq_n_y) {
228 | yl <- y[[l]]
229 | for (j in seq_fns) {
230 | fn <- fns[[j]]
231 | out[[k]] <- fn(col, yl, ...)
232 | k <- k + 1L
233 | }
234 | }
235 | }
236 | }
237 |
238 | size <- vctrs::vec_size_common(!!!out)
239 | out <- vctrs::vec_recycle_common(!!!out, .size = size)
240 | names(out) <- names
241 | tibble::new_tibble(out, nrow = size)
242 | }
243 |
244 |
245 | crossover_setup <- function(cols, y1, fns, names, names_fn, each = FALSE) {
246 |
247 | # setup: cols
248 | data <- dplyr::cur_data()
249 | cols <- rlang::quo_set_env(cols,
250 | data_mask_top(rlang::quo_get_env(cols),
251 | recursive = FALSE,
252 | inherit = TRUE))
253 | vars <- tidyselect::eval_select(cols, data)
254 | vars <- init_vars <- names(vars)
255 |
256 | # setup: .y
257 |
258 | # if .y is function:
259 | if (is.function(y1) || rlang::is_formula(y1)) {
260 |
261 | if (length(dplyr::cur_group()) > 0) {
262 | rlang::abort(c("Problem with `crossover()` input `.y`.",
263 | i = "If `.y` is a function the underlying data must not be grouped.",
264 | x = "`crossover()` was used on a grouped data.frame."))
265 | }
266 |
267 | # set flag `each`
268 | each <- TRUE
269 |
270 | # expand vars
271 | y1 <- rlang::as_function(y1)
272 | y1 <- purrr::map(dplyr::select(data, !! cols), y1) # replace with: data[[, vars]]
273 | vars <- unlist(purrr::imap(y1, ~ rep(.y, length(.x))))
274 | y1 <- unlist(y1, recursive = FALSE)
275 |
276 | if (!is.list(y1)) y1 <- unname(y1)
277 | }
278 |
279 | y1_nm <- names(y1)
280 | y1_idx <- as.character(seq_along(y1))
281 | y1_val <- if (is.data.frame(y1) && nrow(y1) != 1) {
282 | NULL
283 | } else if (is.list(y1) && is.vector(y1) &&
284 | any(purrr::map_lgl(y1, ~ length(.x) != 1))) {
285 | NULL
286 | } else {
287 | y1
288 | }
289 |
290 | # apply `.names` smart default
291 | if (is.function(fns) || rlang::is_formula(fns)) {
292 | names <- names %||% "{xcol}_{y}"
293 | fns <- list(`1` = fns)
294 | } else {
295 | names <- names %||% "{xcol}_{y}_{fn}"
296 | }
297 |
298 | if (!is.list(fns)) {
299 | rlang::abort(c("Problem with `crossover()` input `.fns`.",
300 | i = "Input `.fns` must be a function, a formula, or a list of functions/formulas."))
301 | }
302 |
303 | # use index for unnamed lists
304 | if (is.list(y1) && !rlang::is_named(y1)) {
305 | names(y1) <- y1_idx
306 | }
307 |
308 | # TODO: Default needed when function in .y returns values?
309 |
310 | # handle formulas
311 | fns <- purrr::map(fns, rlang::as_function)
312 |
313 | # make sure fns has names, use number to replace unnamed
314 | if (is.null(names(fns))) {
315 | names_fns <- seq_along(fns)
316 | } else {
317 | names_fns <- names(fns)
318 | empties <- which(names_fns == "")
319 | if (length(empties)) {
320 | names_fns[empties] <- empties
321 | }
322 | }
323 |
324 | # setup control flow:
325 | vars_no <- length(y1) * length(fns) * if (!each) length(y1) else 1
326 | maybe_glue <- any(grepl("{.*}", names, perl = TRUE))
327 | is_glue <- any(grepl("{(xcol|y|y_val|y_nm|y_idx|fn)}", names, perl = TRUE))
328 |
329 | # if .names use glue syntax:
330 | if (is_glue) {
331 |
332 | if (length(names) > 1) {
333 | rlang::abort(c("Problem with `crossover()` input `.names`.",
334 | i = "Glue specification must be a character vector of length == 1.",
335 | x = paste0("`.names` is of length: ", length(names), ".")))
336 | }
337 |
338 | # warn that default values are used if conditions not met
339 | if (is.null(y1_val) && grepl("{y_val}", names, perl = TRUE)) {
340 | rlang::warn("in `crossover()` `.names`: used 'y_idx' instead of 'y_val'. The latter only works with lists if all elements are length 1.")
341 | }
342 |
343 | if (is.null(y1_nm) && grepl("{y_nm}", names, perl = TRUE)) {
344 | rlang::warn("in `crossover()` `.names`: used 'y_idx' instead of 'y_nm', since the input object is unnamed.")
345 | }
346 |
347 | if (each) {
348 | names <- vctrs::vec_as_names(glue::glue(names,
349 | xcol = rep(vars, each = length(fns)),
350 | y = rep(names(y1) %||% y1, each = length(fns)),
351 | y_val = rep(y1_val %||% y1_idx, each = length(fns)),
352 | y_nm = rep(y1_nm %||% y1_idx, each = length(fns)),
353 | y_idx = rep(y1_idx, each = length(fns)),
354 | fn = rep(names_fns, length(y1))),
355 | repair = "check_unique")
356 | } else {
357 | n_cols <- length(vars)
358 | n_y1 <- length(y1)
359 | n_nm_fns <- length(names_fns)
360 | seq_n_col <- seq_len(n_cols)
361 | seq_n_y1 <- seq_len(n_y1)
362 | seq_nm_fns <- seq_len(n_nm_fns)
363 | k <- 1L
364 | out <- vector("character", n_cols* n_y1 * n_nm_fns)
365 |
366 | for (i in seq_n_col) {
367 | for(l in seq_n_y1) {
368 | for (j in seq_nm_fns) {
369 | out[[k]] <- glue::glue(names,
370 | xcol = vars[[i]],
371 | y = names(y1)[[l]] %||% y1[[l]],
372 | y_val = y1_val[[l]] %||% y1_idx[[l]],
373 | y_nm = y1_nm[[l]] %||% y1_idx[[l]],
374 | y_idx = y1_idx[[l]],
375 | fn = names_fns[[j]])
376 | k <- k + 1L
377 | }
378 | }
379 | }
380 | names <- vctrs::vec_as_names(out, repair = "check_unique")
381 | }
382 |
383 |
384 | # no correct glue syntax detected
385 | } else {
386 | # glue syntax might be wrong
387 | if (maybe_glue && length(names) == 1 && vars_no > 1) {
388 | rlang::abort(c("Problem with `crossover()` input `.names`.",
389 | x = "Unrecognized glue specification `{...}` detected in `.names`.",
390 | i = "`.names` only supports the following expressions: '{xcol}'. '{y}', '{y_val}', '{y_nm}', '{y_idx}' or '{fn}'."
391 | ))
392 | }
393 | # check if non-glue names are unique
394 | vctrs::vec_as_names(names, repair = "check_unique")
395 | # check number of names
396 | if (length(names) != vars_no) {
397 | rlang::abort(c("Problem with `crossover()` input `.names`.",
398 | i = "The number of elements in `.names` must equal the number of new columns.",
399 | x = paste0(length(names), " elements provided to `.names`, but the number of new columns is ", vars_no, ".")
400 | ))
401 | }
402 | }
403 |
404 | # apply names_fn
405 | if (!is.null(names_fn)) {
406 | nm_f <- rlang::as_function(names_fn)
407 | names <- purrr::map_chr(names, nm_f)
408 | }
409 |
410 | value <- list(vars = vars, y = y1, fns = fns, names = names, each = each)
411 | value
412 | }
413 |
414 |
415 |
416 |
417 |
418 |
419 |
--------------------------------------------------------------------------------
/R/data-csat.R:
--------------------------------------------------------------------------------
1 | #' Customer Satisfaction Survey (recoded data)
2 | #'
3 | #' This data is randomly generated. It resembles data from a customer
4 | #' satisfaction survey using CSAT (Customer Satisfaction Score) for a
5 | #' contract-based product. The data has been recoded. The raw version of this data
6 | #' set can be found here <[`csatraw`][csatraw]>.
7 | #'
8 | #' @format A tibble with 150 rows and 15 variables:
9 | #' \describe{
10 | #' \item{cust_id}{Customer identification number}
11 | #' \item{type}{Type of customer: "new", "existing" or "reactive"}
12 | #' \item{product}{The type of product: "basic", "advanced" or "premium"}
13 | #' \item{csat}{The overall Customer Satisfaction Score}
14 | #' \item{csat_open}{Follow-up question why the respondent gave this specific
15 | #' Customer Satisfaction rating. The open-ended answers have been coded into six
16 | #' categories (multiple answers possible).}
17 | #' \item{postal_contact, phone_contact, email_contact, website_contact,
18 | #' shop_contact}{When did the customer have last contact via given channel?}
19 | #' \item{postal_rating, phone_rating, email_rating, website_rating,
20 | #' shop_rating}{If customer had contact over the given channel:
21 | #' How satisfied was he?}
22 | #' }
23 | #' @examples
24 | #' csat
25 | "csat"
26 |
--------------------------------------------------------------------------------
/R/data-csatraw.R:
--------------------------------------------------------------------------------
1 | #' Customer Satisfaction Survey (raw data)
2 | #'
3 | #' This data is randomly generated. It resembles raw data from a customer
4 | #' satisfaction survey using CSAT (Customer Satisfaction Score) for a
5 | #' contract-based product. The first three variables are given, all other
6 | #' variables come from a survey tool and are only named "item1" etc.
7 | #' A recoded version of this data set can be found here <[`csat`][csat]>.
8 | #'
9 | #' @format A tibble with 150 rows and 15 variables:
10 | #' \describe{
11 | #' \item{cust_id}{Customer identification number}
12 | #' \item{type}{Type of customer: "new", "existing" or "reactive"}
13 | #' \item{product}{The type of product: "basic", "advanced" or "premium"}
14 | #' \item{item1}{The overall Customer Satisfaction Score\cr\cr Scale: Ranging from 1 =
15 | #' "Very unsatisfied" to 5 = "Very satisfied"}
16 | #' \item{item1_open}{Follow-up question why the respondent gave this specific
17 | #' Customer Satisfaction rating. The open-ended answers have been coded into six
18 | #' categories: 11 = "great product", 12 = "good service", 13 = "friendly staff",
19 | #' 21 = "too expensive", 22 = "unfriendly", 23 = "no response" (multiple answers
20 | #' possible).}
21 | #' \item{item2a, item3a, item4a, item5a, item6a}{When did the customer have last
22 | #' contact via postal mail (item2a), phone (item3a), email (item4a), website
23 | #' (item5a), a retail shop (item6a) ?\cr\cr Scale: 0 = "no contact", 1 = "more
24 | #' than 3 years ago", 2 = "within 1 to 3 years", 3 = "within the last year"}
25 | #' \item{item2b, item3b, item4b, item5b, item6b}{If customer had contact
26 | #' via postal mail (item2b), phone (item3b), email (item4b), website (item5b),
27 | #' a retail shop (item6b): How satisfied was he?\cr\cr
28 | #' Scale: Ranging from 1 = "Very unsatisfied", to 5 = "Very satisfied"}
29 | #' }
30 | #' @examples
31 | #' csatraw
32 | "csatraw"
33 |
--------------------------------------------------------------------------------
/R/dplyover.R:
--------------------------------------------------------------------------------
1 | #' @description
2 | #' To learn more about dplyover, start with the vignette:
3 | #' `browseVignettes(package = "dplyover")`
4 | #' @importFrom rlang %||%
5 | #' @importFrom rlang :=
6 | #' @importFrom stats na.omit
7 | "_PACKAGE"
8 |
--------------------------------------------------------------------------------
/R/meta_setup.R:
--------------------------------------------------------------------------------
1 | # deprase call (similar to dplyr:::key_deparse)
2 | # this function is copied from dplyr
3 | # see README section Acknowledgements as well as dplyr's license and copyright
4 | deparse_call <- function(call) {
5 | deparse(call,
6 | width.cutoff = 500L,
7 | backtick = TRUE,
8 | nlines = 1L,
9 | control = NULL)
10 | }
11 |
12 | # environment where last value of across2 pre/suf error is stored
13 | .last <- rlang::new_environment()
14 |
15 | ## meta setup use by all major dplyover functions (tests passing)
16 | #> this setup is rather dodgy and currently being overhauled
17 | #> see new_meta_setup branch!
18 | #> and yes, we shouldn't write something in par_frame since dplyover does not create this environment
19 | meta_setup <- function(grp_id, dep_call, par_frame, setup_fn, ...) {
20 |
21 | call_nm <- sub("([a-z0-9]+).*", "\\1()", dep_call)
22 |
23 | dots <- rlang::list2(...)
24 |
25 | wrong_setup <- FALSE
26 |
27 | # meta setup
28 | setup_exists <- exists(".__dplyover_setup__.", envir = par_frame)
29 |
30 | # if setup already exists
31 | if (setup_exists && grp_id > 1L) {
32 | # get data
33 | parent_setup <- get(".__dplyover_setup__.", envir = par_frame)
34 | # get call number
35 | call_no <- which.min(parent_setup$call_his)
36 | call_id <- paste0("call", call_no)
37 | # update "call_his"
38 | par_frame[[".__dplyover_setup__."]][["call_his"]][call_no] <- grp_id
39 | # check call and get data from existing call
40 | if (identical(parent_setup$call_lang[call_no], dep_call)) {
41 | return(parent_setup[[call_id]]$setup)
42 | }
43 | # otherwise continue
44 | wrong_setup <- TRUE
45 | }
46 | # if this is a new call to over or if setup went wrong
47 | if (!setup_exists || wrong_setup) {
48 |
49 | # new setup
50 | if (grp_id == 1 && !grepl("^over", call_nm, perl = TRUE)) {
51 | call_info <- inspect_call()
52 | if (call_info[["warn"]]){
53 | rlang::warn(glue::glue("`{call_nm}` does not support the `.keep` argument in `dplyr::mutate()` when set to 'used' or 'unused'."))
54 | }
55 | }
56 | par_frame$`.__dplyover_setup__.` <- list()
57 | par_frame[[".__dplyover_setup__."]][["call_his"]] <- grp_id
58 | par_frame[[".__dplyover_setup__."]][["call_lang"]] <- dep_call
59 | call_id <- paste0("call", grp_id)
60 | # existing setup, but new call
61 | } else {
62 | parent_setup <- get(".__dplyover_setup__.", envir = par_frame)
63 | # register new call
64 | par_frame[[".__dplyover_setup__."]][["call_his"]] <- c(parent_setup$call_his, 1)
65 | par_frame[[".__dplyover_setup__."]][["call_lang"]] <- c(parent_setup$call_lang, dep_call)
66 | # get number of current call
67 | call_id <- paste0("call", which.min(parent_setup$call_his))
68 | }
69 |
70 | # in both cases: write data into par_frame
71 | par_frame[[".__dplyover_setup__."]][[call_id]][["setup"]] <-
72 | setup <- do.call(setup_fn, dots)
73 |
74 | setup
75 | }
76 |
--------------------------------------------------------------------------------
/R/over.R:
--------------------------------------------------------------------------------
1 | #' Apply functions to a list or vector in 'dplyr'
2 | #'
3 | #' @description
4 | #' `over()` makes it easy to create new colums inside a [dplyr::mutate()] or
5 | #' [dplyr::summarise()] call by applying a function (or a set of functions) to
6 | #' an atomic vector or list using a syntax similar to [dplyr::across()].
7 | #' The main difference is that [dplyr::across()] transforms or creates new columns
8 | #' based on existing ones, while `over()` can create new columns based on a
9 | #' vector or list to which it will apply one or several functions.
10 | #' Whereas [dplyr::across()] allows `tidy-selection` helpers to select columns,
11 | #' `over()` provides its own helper functions to select strings or values based
12 | #' on either (1) values of specified columns or (2) column names. See the
13 | #' examples below and the `vignette("why_dplyover")` for more details.
14 | #'
15 | #' @param .x An atomic vector or list to apply functions to. Alternatively a
16 | #' <[`selection helper`][selection_helpers]> can be used to create
17 | #' a vector.
18 | #'
19 | #' @param .fns Functions to apply to each of the elements in `.x`. For
20 | #' functions that expect variable names as input, the selected strings need to
21 | #' be turned into symbols and evaluated. `dplyrover` comes with a genuine helper
22 | #' function that evaluates strings as names [`.()`].
23 | #'
24 | #' Possible values are:
25 | #'
26 | #' - A function
27 | #' - A purrr-style lambda
28 | #' - A list of functions/lambdas
29 | #'
30 | #' For examples see the example section below.
31 | #'
32 | #' Note that, unlike `across()`, `over()` does not accept `NULL` as a
33 | #' value to `.fns`.
34 | #'
35 | #' @param ... Additional arguments for the function calls in `.fns`.
36 | #'
37 | #' @param .names A glue specification that describes how to name the output
38 | #' columns. This can use `{x}` to stand for the selected vector element, and
39 | #' `{fn}` to stand for the name of the function being applied. The default
40 | #' (`NULL`) is equivalent to `"{x}"` for the single function case and
41 | #' `"{x}_{fn}"` for the case where a list is used for `.fns`.
42 | #'
43 | #' Note that, depending on the nature of the underlying object in `.x`,
44 | #' specifying `{x}` will yield different results:
45 | #'
46 | #' - If `.x` is an unnamed atomic vector, `{x}` will represent each value.
47 | #' - If `.x` is a named list or atomic vector, `{x}` will represent each name.
48 | #' - If `.x` is an unnamed list, `{x}` will be the index number running from 1 to `length(x)`.
49 | #'
50 | #' This standard behavior (interpretation of `{x}`) can be overwritten by
51 | #' directly specifying:
52 | #'
53 | #' - `{x_val}` for `.x`'s values
54 | #' - `{x_nm}` for its names
55 | #' - `{x_idx}` for its index numbers
56 | #'
57 | #' Alternatively, a character vector of length equal to the number of columns to
58 | #' be created can be supplied to `.names`. Note that in this case, the glue
59 | #' specification described above is not supported.
60 | #'
61 | #' @param .names_fn Optionally, a function that is applied after the glue
62 | #' specification in `.names` has been evaluated. This is, for example, helpful
63 | #' in case the resulting names need to be further cleaned or trimmed.
64 | #'
65 | #' @returns
66 | #' A tibble with one column for each element in `.x` and each function in `.fns`.
67 | #'
68 | #' @section Note:
69 | #' Similar to `dplyr::across()` `over()` works only inside dplyr verbs.
70 | #'
71 | #' @seealso
72 | #' [over2()] to apply a function to two objects.
73 | #'
74 | #' All members of the <[`over-across function family`][over_across_family]>.
75 | #'
76 | #' @section Examples:
77 | #'
78 | #' ```{r, child = "man/rmd/setup.Rmd"}
79 | #' ```
80 | #'
81 | #' It has two main use cases. They differ in how the elements in `.x`
82 | #' are used. Let's first attach `dplyr`:
83 | #'
84 | #' ```{r, comment = "#>", collapse = TRUE}
85 | #' library(dplyr)
86 | #'
87 | #' # For better printing
88 | #' iris <- as_tibble(iris)
89 | #' ```
90 | #'
91 | #'
92 | #' #### (1) The General Use Case
93 | #' Here the values in `.x` are used as inputs to one or more functions in `.fns`.
94 | #' This is useful, when we want to create several new variables based on the same
95 | #' function with varying arguments. A good example is creating a bunch of lagged
96 | #' variables.
97 | #'
98 | #' ```{r, comment = "#>", collapse = TRUE}
99 | #' tibble(x = 1:25) %>%
100 | #' mutate(over(c(1:3),
101 | #' ~ lag(x, .x)))
102 | #' ```
103 | #'
104 | #' Lets create a dummy variable for each unique value in 'Species':
105 | #' ```{r, comment = "#>", collapse = TRUE}
106 | #' iris %>%
107 | #' mutate(over(unique(Species),
108 | #' ~ if_else(Species == .x, 1, 0)),
109 | #' .keep = "none")
110 | #' ```
111 | #'
112 | #' With `over()` it is also possible to create several dummy variables with
113 | #' different thresholds. We can use the `.names` argument to control the output
114 | #' names:
115 | #'
116 | #' ```{r, comment = "#>", collapse = TRUE}
117 | #' iris %>%
118 | #' mutate(over(seq(4, 7, by = 1),
119 | #' ~ if_else(Sepal.Length < .x, 1, 0),
120 | #' .names = "Sepal.Length_{x}"),
121 | #' .keep = "none")
122 | #' ```
123 | #'
124 | #' A similar approach can be used with dates. Below we loop over a date
125 | #' sequence to check whether the date falls within a given start and end
126 | #' date. We can use the `.names_fn` argument to clean the resulting output
127 | #' names:
128 | #'
129 | #' ```{r, comment = "#>", collapse = TRUE}
130 | #' # some dates
131 | #' dat_tbl <- tibble(start = seq.Date(as.Date("2020-01-01"),
132 | #' as.Date("2020-01-15"),
133 | #' by = "days"),
134 | #' end = start + 10)
135 | #'
136 | #' dat_tbl %>%
137 | #' mutate(over(seq(as.Date("2020-01-01"),
138 | #' as.Date("2020-01-21"),
139 | #' by = "weeks"),
140 | #' ~ .x >= start & .x <= end,
141 | #' .names = "day_{x}",
142 | #' .names_fn = ~ gsub("-", "", .x)))
143 | #' ```
144 | #'
145 | #' `over()` can summarise data in wide format. In the example below, we want to
146 | #' know for each group of customers (`new`, `existing`, `reactivate`), how much
147 | #' percent of the respondents gave which rating on a five point likert scale
148 | #' (`item1`). A usual approach in the tidyverse would be to use
149 | #' `count %>% group_by %>% mutate`, which yields the same result in the usually
150 | #' prefered long format. Sometimes, however, we might want this kind of summary
151 | #' in the wide format, and in this case `over()` comes in handy:
152 | #'
153 | #' ```{r, comment = "#>", collapse = TRUE}
154 | #' csatraw %>%
155 | #' group_by(type) %>%
156 | #' summarise(over(c(1:5),
157 | #' ~ mean(item1 == .x)))
158 | #' ```
159 | #'
160 | #' Instead of a vector we can provide a named list of vectors to calculate the
161 | #' top two and bottom two categories on the fly:
162 | #'
163 | #' ```{r, comment = "#>", collapse = TRUE}
164 | #' csatraw %>%
165 | #' group_by(type) %>%
166 | #' summarise(over(list(bot2 = c(1:2),
167 | #' mid = 3,
168 | #' top2 = c(4:5)),
169 | #' ~ mean(item1 %in% .x)))
170 | #' ```
171 | #'
172 | #' `over()` can also loop over columns of a data.frame. In the example below we
173 | #' want to create four different dummy variables of `item1`: (i) the top and (ii)
174 | #' bottom category as well as (iii) the top two and (iv) the bottom two categories.
175 | #' We can create a lookup `data.frame` and use all columns but the first as input to
176 | #' `over()`. In the function call we make use of base R's `match()`, where `.x`
177 | #' represents the new values and `recode_df[, 1]` refers to the old values.
178 | #'
179 | #' ```{r, comment = "#>", collapse = TRUE}
180 | #'
181 | #' recode_df <- data.frame(old = c(1, 2, 3, 4, 5),
182 | #' top1 = c(0, 0, 0, 0, 1),
183 | #' top2 = c(0, 0, 0, 1, 1),
184 | #' bot1 = c(1, 0, 0, 0, 0),
185 | #' bot2 = c(1, 1, 0, 0, 0))
186 | #'
187 | #' csatraw %>%
188 | #' mutate(over(recode_df[,-1],
189 | #' ~ .x[match(item1, recode_df[, 1])],
190 | #' .names = "item1_{x}")) %>%
191 | #' select(starts_with("item1"))
192 | #' ```
193 | #'
194 | #' `over()` work nicely with comma separated values stored in character vectors.
195 | #' In the example below, the colum `csat_open` contains one or more comma
196 | #' separated reasons why a specific customer satisfaction rating was given.
197 | #' We can easily create a column for each response category with the help of
198 | #' `dist_values` - a wrapper around `unique` which can split vector elements
199 | #' using a separator:
200 | #'
201 | #' ```{r, comment = "#>", collapse = TRUE}
202 | #' csat %>%
203 | #' mutate(over(dist_values(csat_open, .sep = ", "),
204 | #' ~ as.integer(grepl(.x, csat_open)),
205 | #' .names = "rsp_{x}",
206 | #' .names_fn = ~ gsub("\\s", "_", .x)),
207 | #' .keep = "none") %>% glimpse
208 | #' ```
209 | #'
210 | #'
211 | #' #### (2) A Very Specific Use Case
212 | #' Here strings are supplied to `.x` to construct column names (sharing the
213 | #' same stem). This allows us to dynamically use more than one column in the
214 | #' function calls in `.fns`. To work properly, the strings need to be
215 | #' turned into symbols and evaluated. For this {dplyover} provides a genuine
216 | #' helper function `.()` that evaluates strings and helps to declutter the
217 | #' otherwise rather verbose code. `.()` supports glue syntax and takes a string
218 | #' as argument.
219 | #'
220 | #' Below are a few examples using two colums in the function calls in `.fns`.
221 | #' For the two column case [across2()] provides a more intuitive API that is
222 | #' closer to the original `dplyr::across`. Using `.()` inside `over` is really
223 | #' useful for cases with more than two columns.
224 | #'
225 | #' Consider the following example of a purrr-style formula in `.fns` using `.()`:
226 | #'
227 | #' ```{r, comment = "#>", collapse = TRUE}
228 | #' iris %>%
229 | #' mutate(over(c("Sepal", "Petal"),
230 | #' ~ .("{.x}.Width") + .("{.x}.Length")
231 | #' ))
232 | #' ```
233 | #'
234 | #' The above syntax is equal to the more verbose:
235 | #' ```{r, comment = "#>", collapse = TRUE}
236 | #' iris %>%
237 | #' mutate(over(c("Sepal", "Petal"),
238 | #' ~ eval(sym(paste0(.x, ".Width"))) +
239 | #' eval(sym(paste0(.x, ".Length")))
240 | #' ))
241 | #' ```
242 | #'
243 | #' `.()` also works with anonymous functions:
244 | #' ```{r, comment = "#>", collapse = TRUE}
245 | #' iris %>%
246 | #' summarise(over(c("Sepal", "Petal"),
247 | #' function(x) mean(.("{x}.Width"))
248 | #' ))
249 | #' ```
250 | #'
251 | #' A named list of functions:
252 | #' ```{r, comment = "#>", collapse = TRUE}
253 | #' iris %>%
254 | #' mutate(over(c("Sepal", "Petal"),
255 | #' list(product = ~ .("{.x}.Width") * .("{.x}.Length"),
256 | #' sum = ~ .("{.x}.Width") + .("{.x}.Length"))
257 | #' ),
258 | #' .keep = "none")
259 | #' ```
260 | #'
261 | #' Again, use the `.names` argument to control the output names:
262 | #' ```{r, comment = "#>", collapse = TRUE}
263 | #' iris %>%
264 | #' mutate(over(c("Sepal", "Petal"),
265 | #' list(product = ~ .("{.x}.Width") * .("{.x}.Length"),
266 | #' sum = ~ .("{.x}.Width") + .("{.x}.Length")),
267 | #' .names = "{fn}_{x}"),
268 | #' .keep = "none")
269 | #' ```
270 | #' @export
271 | over <- function(.x, .fns, ..., .names = NULL, .names_fn = NULL){
272 |
273 | grp_id <- tryCatch({
274 | dplyr::cur_group_id()
275 | }, error = function(e) {
276 | rlang::abort("`over()` must only be used inside dplyr verbs.")
277 | })
278 |
279 | setup <- meta_setup(dep_call = deparse_call(sys.call()),
280 | grp_id = grp_id,
281 | par_frame = parent.frame(),
282 | setup_fn = "over_setup",
283 | x1 = .x,
284 | fns = .fns,
285 | names = .names,
286 | names_fn = .names_fn)
287 |
288 | x <- setup$x
289 | fns <- setup$fns
290 | names <- setup$names
291 |
292 | # check empty input
293 | if (length(x) == 0L) {
294 | return(tibble::new_tibble(list(), nrow = 1L))
295 | }
296 |
297 | n_x <- length(x)
298 | n_fns <- length(fns)
299 | seq_n_x <- seq_len(n_x)
300 | seq_fns <- seq_len(n_fns)
301 | k <- 1L
302 | out <- vector("list", n_x * n_fns)
303 |
304 | for (i in seq_n_x) {
305 | xi <- x[[i]]
306 | for (j in seq_fns) {
307 | fn <- fns[[j]]
308 | out[[k]] <- fn(xi, ...)
309 | k <- k + 1L
310 | }
311 | }
312 | size <- vctrs::vec_size_common(!!!out)
313 | out <- vctrs::vec_recycle_common(!!!out, .size = size)
314 | names(out) <- names
315 | tibble::new_tibble(out, nrow = size)
316 | }
317 |
318 |
319 | over_setup <- function(x1, fns, names, names_fn) {
320 |
321 | # setup name variants
322 | x1_nm <- names(x1)
323 | x1_idx <- as.character(seq_along(x1))
324 | x1_val <- if (is.data.frame(x1) && nrow(x1) != 1) {
325 | NULL
326 | } else if (is.list(x1) && is.vector(x1) &&
327 | any(purrr::map_lgl(x1, ~ length(.x) != 1))) {
328 | NULL
329 | } else {
330 | x1
331 | }
332 |
333 | # apply `.names` smart default
334 | if (is.function(fns) || rlang::is_formula(fns)) {
335 | names <- names %||% "{x}"
336 | fns <- list(`1` = fns)
337 | } else {
338 | names <- names %||% "{x}_{fn}"
339 | }
340 |
341 | if (!is.list(fns)) {
342 | rlang::abort(c("Problem with `over()` input `.fns`.",
343 | i = "Input `.fns` must be a function, a formula, or a list of functions/formulas."))
344 | }
345 |
346 | # use index for unnamed lists
347 | if (is.list(x1) && !rlang::is_named(x1)) {
348 | names(x1) <- x1_idx
349 | }
350 |
351 | # handle formulas
352 | fns <- purrr::map(fns, rlang::as_function)
353 |
354 | # make sure fns has names, use number to replace unnamed
355 | if (is.null(names(fns))) {
356 | names_fns <- seq_along(fns)
357 | } else {
358 | names_fns <- names(fns)
359 | empties <- which(names_fns == "")
360 | if (length(empties)) {
361 | names_fns[empties] <- empties
362 | }
363 | }
364 |
365 | # setup control flow:
366 | vars_no <- length(x1) * length(fns)
367 | maybe_glue <- any(grepl("{.*}", names, perl = TRUE))
368 | is_glue <- any(grepl("{(x|x_val|x_nm|x_idx|fn)}", names, perl = TRUE))
369 |
370 | # if .names use glue syntax:
371 | if (is_glue) {
372 |
373 | if (length(names) > 1) {
374 | rlang::abort(c("Problem with `over()` input `.names`.",
375 | i = "Glue specification must be a character vector of length == 1.",
376 | x = paste0("`.names` is of length: ", length(names), ".")))
377 | }
378 |
379 | # warn that default values are used if conditions not met
380 | if (is.null(x1_val) && grepl("{x_val}", names, perl = TRUE)) {
381 | rlang::warn("in `over()` `.names`: used 'x_idx' instead of 'x_val'. The latter only works with lists if all elements are length 1.")
382 | }
383 |
384 | if (is.null(x1_nm) && grepl("{x_nm}", names, perl = TRUE)) {
385 | rlang::warn("in `over()` `.names`: used 'x_idx' instead of 'x_nm', since the input object is unnamed.")
386 | }
387 |
388 | names <- vctrs::vec_as_names(glue::glue(names,
389 | x = rep(names(x1) %||% x1, each = length(fns)),
390 | x_val = rep(x1_val %||% x1_idx, each = length(fns)),
391 | x_nm = rep(x1_nm %||% x1_idx, each = length(fns)),
392 | x_idx = rep(x1_idx, each = length(fns)),
393 | fn = rep(names_fns, length(x1))),
394 | repair = "check_unique")
395 |
396 | # no correct glue syntax detected
397 | } else {
398 | # glue syntax might be wrong
399 | if (maybe_glue && length(names) == 1 && vars_no > 1) {
400 | rlang::abort(c("Problem with `over()` input `.names`.",
401 | x = "Unrecognized glue specification `{...}` detected in `.names`.",
402 | i = "`.names` only supports the following expressions: '{x}', '{x_val}', '{x_nm}', '{x_idx}' or '{fn}'."
403 | ))
404 | }
405 | # check that non-glue names are unique
406 | vctrs::vec_as_names(names, repair = "check_unique")
407 | # check number of names
408 | if (length(names) != vars_no) {
409 | rlang::abort(c("Problem with `over()` input `.names`.",
410 | i = "The number of elements in `.names` must equal the number of new columns.",
411 | x = paste0(length(names), " elements provided to `.names`, but the number of new columns is ", vars_no, ".")
412 | ))
413 | }
414 | }
415 |
416 | # apply names_fn
417 | if (!is.null(names_fn)) {
418 | nm_f <- rlang::as_function(names_fn)
419 | names <- purrr::map_chr(names, nm_f)
420 | }
421 |
422 | value <- list(x = x1, fns = fns, names = names)
423 | value
424 | }
425 |
426 |
427 |
428 |
--------------------------------------------------------------------------------
/R/over_across_family.R:
--------------------------------------------------------------------------------
1 | #' The over-across function family
2 | #'
3 | #' @description
4 | #'
5 | #' `dplyover` extends `dplyr`'s functionality by building a function family
6 | #' around `dplyr::across()`.
7 | #'
8 | #' The goal of this **over-across function family** is to provide a concise and
9 | #' uniform syntax which can be used to create columns by applying functions to
10 | #' vectors and / or sets of columns in dplyr. Ideally, this will improve our
11 | #' mental model so that it is easier to tackle problems where the solution is
12 | #' based on creating new columns.
13 | #'
14 | #' The functions in the over-apply function family create columns by applying
15 | #' one or several functions to:
16 | #'
17 | #' ### basic functions
18 | #' - [dplyr::across()]: a set of columns
19 | #' - [over()]: a vector (list or atomic vector)
20 | #'
21 | #' ### variants
22 | #' - [over2()] two vectors of the same length (pairwise)
23 | #' - [over2x()] two vectors (nested)
24 | #' - [across2()] two sets of columns (pairwise)
25 | #' - [across2x()] two sets of columns (nested)
26 | #' - [crossover()] a set of columns and a vector (nested)
27 | #'
28 | #' @name over_across_family
29 | NULL
30 |
--------------------------------------------------------------------------------
/R/select_strings.R:
--------------------------------------------------------------------------------
1 | #' Select string parts or patterns of column names
2 | #'
3 | #' @description
4 | #'
5 | #' These functions are [selection helpers][selection_helpers].
6 | #' They are intended to be used inside `over()` to extract parts or patterns of
7 | #' the column names of the underlying data.
8 | #'
9 | #' * [cut_names()] selects strings by removing (cutting off) the specified `.pattern`.
10 | #' This functionality resembles `stringr::str_remove_all()`.
11 | #'
12 | #' * [extract_names()] selects strings by extracting the specified `.pattern`.
13 | #' This functionality resembles `stringr::str_extract()`.
14 | #'
15 | #' @param .pattern Pattern to look for.
16 | #' @param .vars A character vector with variables names. When used inside `over`
17 | #' all column names of the underlying data are automatically supplied to `.vars`.
18 | #' This argument is useful when testing the functionality outside the context of
19 | #' `over()`.
20 | #' @param .remove Pattern to remove from the variable names provided in `.vars`.
21 | #' When this argument is provided, all variables names in `.vars` that match
22 | #' the pattern specified in `.remove` will be removed, before the `.pattern` to
23 | #' look for will be applied.
24 | #'
25 | #' @return
26 | #' A character vector.
27 | #'
28 | #' @section Examples:
29 | #'
30 | #' ```{r, child = "man/rmd/setup.Rmd"}
31 | #' ```
32 | #'
33 | #' Selection helpers can be used inside `dplyover::over()` which in turn must be
34 | #' used inside `dplyr::mutate` or `dplyr::summarise`. Let's first attach `dplyr`
35 | #' (and `stringr` for comparision):
36 | #'
37 | #' ```{r, comment = "#>", collapse = TRUE}
38 | #' library(dplyr)
39 | #' library(stringr)
40 | #'
41 | #' # For better printing
42 | #' iris <- as_tibble(iris)
43 | #' ```
44 | #'
45 | #' Let's first compare `cut_names()` and `extract_names()` to their {stringr}
46 | #' equivalents `stringr::str_remove_all()` and `stringr::str_extract()`:
47 | #'
48 | #' We can observe two main differences:
49 | #'
50 | #' (1) `cut_names()` and `extract_names()` only return strings where the function
51 | #' was applied successfully (when characters have actually been removed or
52 | #' extracted). `stringr::str_remove_all()` returns unmatched strings as is, while
53 | #' `stringr::str_extract()` returns `NA`.
54 | #'
55 | #' ```{r, comment = "#>", collapse = TRUE}
56 | #' cut_names("Width", .vars = names(iris))
57 | #' str_remove_all(names(iris), "Width")
58 | #'
59 | #' extract_names("Length|Width", .vars = names(iris))
60 | #' str_extract(rep(names(iris), 2), "Length|Width")
61 | #' ```
62 | #'
63 | #' (2) `cut_names()` and `extract_names()` return only unique values:
64 | #'
65 | #' ```{r, comment = "#>", collapse = TRUE}
66 | #' cut_names("Width", .vars = rep(names(iris), 2))
67 | #' str_remove_all(rep(names(iris), 2), "Width")
68 | #'
69 | #' extract_names("Length|Width", .vars = names(iris))
70 | #' str_extract(rep(names(iris), 2), "Length|Width")
71 | #' ```
72 | #'
73 | #' The examples above do not show that `cut_names()` removes *all* strings matching
74 | #' the `.pattern` argument, while `extract_names()` does only extract the `.pattern`
75 | #' *one* time:
76 | #'
77 | #' ```{r, comment = "#>", collapse = TRUE}
78 | #' cut_names("Width", .vars = "Width.Petal.Width")
79 | #' str_remove_all("Width.Petal.Width", "Width")
80 | #'
81 | #' extract_names("Width", .vars = "Width.Petal.Width")
82 | #' str_extract("Width.Petal.Width", "Width")
83 | #' ```
84 | #'
85 | #' Within [`over()`] `cut_names()` and `extract_names()` automatically use the
86 | #' column names of the underlying data:
87 | #'
88 | #' ```{r, comment = "#>", collapse = TRUE}
89 | #' iris %>%
90 | #' mutate(over(cut_names(".Width"),
91 | #' ~ .("{.x}.Width") * .("{.x}.Length"),
92 | #' .names = "Product_{x}"))
93 | #'
94 | #' iris %>%
95 | #' mutate(over(extract_names("Length|Width"),
96 | #' ~.("Petal.{.x}") * .("Sepal.{.x}"),
97 | #' .names = "Product_{x}"))
98 | #' ```
99 | #'
100 | #' What problem does `cut_names()` solve?
101 | #' In the example above using `cut_names()` might not seem helpful, since we could easily
102 | #' use `c("Sepal", "Petal")` instead. However, there are cases where we have
103 | #' data with a lot of similar pairs of variables sharing a common prefix or
104 | #' suffix. If we want to loop over them using `over()` then `cut_names()` comes
105 | #' in handy.
106 | #'
107 | #' The usage of `extract_names()` might be less obvious. Lets look at raw data
108 | #' from a customer satifsaction survey which contains the following variables.
109 | #'
110 | #' ```{r, comment = "#>", collapse = TRUE}
111 | #' csatraw %>% glimpse(width = 50)
112 | #' ```
113 | #'
114 | #' The survey has several 'item's consisting of two sub-questions / variables 'a'
115 | #' and 'b'. Lets say we want to calculate the product of those two variables for
116 | #' each item. `extract_names()` helps us to select all variables containing
117 | #' 'item' followed by a digit using the regex `"item\\d"` as `.pattern`.
118 | #' However, there is 'item1' and 'item1_open' which are not followed by `a` and
119 | #' `b`. `extract_names()` lets us exclude these items by setting the `.remove`
120 | #' argument to `[^item1]`:
121 | #'
122 | #' ```{r, comment = "#>", collapse = TRUE}
123 | #' csatraw %>%
124 | #' transmute(over(extract_names("item\\d", "^item1"),
125 | #' ~ .("{.x}a") * .("{.x}b"))
126 | #' )
127 | #' ```
128 | #' @name select_vars
129 | NULL
130 |
131 | #' @rdname select_vars
132 | #' @export
133 | cut_names <- function(.pattern, .remove = NULL, .vars = NULL) {
134 |
135 | .varn <- .vars
136 |
137 | if (is.null(.vars) && sys.call(sys.nframe() - 2)[[1]] == "meta_setup") {
138 | .varn <- names(dplyr::cur_data_all())
139 | }
140 |
141 | if (is.null(.remove)) {
142 | .selected <- .varn
143 | } else {
144 | .notselected <- grep(.remove, .varn, perl = TRUE, value = TRUE)
145 |
146 | if (length(.notselected) == 0) {
147 | rlang::abort(
148 | c("Problem with `cut_names()` input `.remove`.",
149 | i = paste0("The character string provided in `.remove` ('",
150 | .remove, "') must at least match one ",
151 | ifelse(is.null(.vars), "column name.", "element in `.vars`.")),
152 | x = "No match was found."))
153 | }
154 |
155 | .selected <- setdiff(.varn, .notselected)
156 | }
157 |
158 |
159 | .match <- grepl(.pattern, .selected, perl = TRUE)
160 | .extract <- gsub(.pattern, "", .selected, perl = TRUE)[.match]
161 |
162 | if (length(.extract) == 0) {
163 | rlang::abort(
164 | c("Problem with `cut_names()` input `.pattern`.",
165 | i = paste0("The character string provided in `.pattern` ('",
166 | .pattern, "') must at least return one match."),
167 | x = "No match was found."))
168 | }
169 |
170 | unique(.extract[nchar(.extract) > 0])
171 | }
172 |
173 | #' @rdname select_vars
174 | #' @export
175 | extract_names <- function(.pattern, .remove = NULL, .vars = NULL) {
176 |
177 | .varn <- .vars
178 |
179 | if (is.null(.vars) && sys.call(sys.nframe() - 2)[[1]] == "meta_setup") {
180 | .varn <- names(dplyr::cur_data_all())
181 | }
182 |
183 | if (is.null(.remove)) {
184 | .selected <- .varn
185 | } else {
186 | .notselected <- grep(.remove, .varn, perl = TRUE, value = TRUE)
187 |
188 | if (length(.notselected) == 0) {
189 | rlang::abort(
190 | c("Problem with `extract_names()` input `.remove`.",
191 | i = paste0("The character string provided in `.remove` ('",
192 | .remove, "') must at least match one ",
193 | ifelse(is.null(.vars), "column name.", "element in `.vars`.")),
194 | x = "No match was found."))
195 | }
196 | .selected <- setdiff(.varn, .notselected)
197 | }
198 |
199 | .extract <- regexpr(.pattern, .selected, perl = TRUE)
200 | .res <- regmatches(.selected, .extract)
201 |
202 | if (length(.res) == 0) {
203 | rlang::abort(
204 | c("Problem with `extract_names()` input `.pattern`.",
205 | i = paste0("The character string provided in `.pattern` ('",
206 | .pattern, "') must at least return one match."),
207 | x = "No match was found."))
208 | }
209 |
210 | unique(.res)
211 |
212 | }
213 |
--------------------------------------------------------------------------------
/R/select_values.R:
--------------------------------------------------------------------------------
1 | #' Select values from variables
2 | #'
3 | #' @description
4 | #'
5 | #' These functions are [selection helpers][selection_helpers]. They are intended
6 | #' to be used inside all functions that accept a vector as argument (that is `over()`
7 | #' and `crossover()` and all their variants) to extract values of a variable.
8 | #'
9 | #' * [dist_values()] returns all distinct values (or in the case of factor variables:
10 | #' levels) of a variable `x` which are not `NA`.
11 | #'
12 | #' * [seq_range()] returns the sequence between the `range()` of a variable `x`.
13 | #'
14 | #' @param x An atomic vector or list. For [seq_range()] x must be numeric or date.
15 | #' @param .sep A character vector containing regular expression(s) which are used
16 | #' for splitting the values (works only if x is a character vector).
17 | #' @param .sort A character string indicating which sorting scheme is to be applied
18 | #' to distinct values: ascending ("asc"), descending ("desc"), "none" or "levels". The
19 | #' default is ascending, only if x is a factor the default is "levels".
20 | #' @param .by A number (or date expression) representing the increment of the sequence.
21 | #'
22 | #' @return
23 | #' [dist_values()] returns a vector of the same type of x, with exception of
24 | #' factors which are converted to type `"character"`.
25 | #'
26 | #' [seq_range()] returns an vector of type `"integer"` or `"double"`.
27 | #'
28 | #' @section Examples:
29 | #'
30 | #' ```{r, child = "man/rmd/setup.Rmd"}
31 | #' ```
32 | #'
33 | #' Selection helpers can be used inside `dplyover::over()` which in turn must be
34 | #' used inside `dplyr::mutate` or `dplyr::summarise`. Let's first attach `dplyr`:
35 | #'
36 | #' ```{r, comment = "#>", collapse = TRUE}
37 | #' library(dplyr)
38 | #'
39 | #' # For better printing
40 | #' iris <- as_tibble(iris)
41 | #' ```
42 | #'
43 | #' `dist_values()` extracts all distinct values of a column variable.
44 | #' This is helpful when creating dummy variables in a loop using `over()`.
45 | #'
46 | #' ```{r, comment = "#>", collapse = TRUE}
47 | #' iris %>%
48 | #' mutate(over(dist_values(Species),
49 | #' ~ if_else(Species == .x, 1, 0)
50 | #' ),
51 | #' .keep = "none")
52 | #' ```
53 | #'
54 | #' `dist_values()` is just a wrapper around unique. However, it has five
55 | #' differences:
56 | #'
57 | #' (1) `NA` values are automatically stripped. Compare:
58 | #'
59 | #' ```{r, comment = "#>", collapse = TRUE}
60 | #' unique(c(1:3, NA))
61 | #' dist_values(c(1:3, NA))
62 | #' ```
63 | #'
64 | #' (2) Applied on factors, `dist_values()` returns all distinct `levels` as
65 | #' character. Compare the following:
66 | #'
67 | #' ```{r, comment = "#>", collapse = TRUE}
68 | #' fctrs <- factor(c(1:3, NA), levels = c(3:1))
69 | #'
70 | #' fctrs %>% unique() %>% class()
71 | #'
72 | #' fctrs %>% dist_values() %>% class()
73 | #' ```
74 | #'
75 | #' (3) As default, the output is sorted in ascending order for non-factors, and
76 | #' is sorted as the underyling "levels" for factors. This can be controlled by
77 | #' setting the `.sort` argument. Compare:
78 | #'
79 | #' ```{r, comment = "#>", collapse = TRUE}
80 | #' # non-factors
81 | #' unique(c(3,1,2))
82 | #'
83 | #' dist_values(c(3,1,2))
84 | #' dist_values(c(3,1,2), .sort = "desc")
85 | #' dist_values(c(3,1,2), .sort = "none")
86 | #'
87 | #' # factors
88 | #' fctrs <- factor(c(2,1,3, NA), levels = c(3:1))
89 | #'
90 | #' dist_values(fctrs)
91 | #' dist_values(fctrs, .sort = "levels")
92 | #' dist_values(fctrs, .sort = "asc")
93 | #' dist_values(fctrs, .sort = "desc")
94 | #' dist_values(fctrs, .sort = "none")
95 | #'
96 | #' ```
97 | #'
98 | #' (4) When used on a character vector `dist_values` can take a separator
99 | #' `.sep` to split the elements accordingly:
100 | #'
101 | #' ```{r, comment = "#>", collapse = TRUE}
102 | #' c("1, 2, 3",
103 | #' "2, 4, 5",
104 | #' "4, 1, 7") %>%
105 | #' dist_values(., .sep = ", ")
106 | #' ```
107 | #'
108 | #' (5) When used on lists `dist_values` automatically simplifiies its input
109 | #' into a vector using `unlist`:
110 | #'
111 | #' ```{r, comment = "#>", collapse = TRUE}
112 | #' list(a = c(1:4), b = (4:6), c(5:10)) %>%
113 | #' dist_values()
114 | #' ```
115 | #'
116 | #' ----------
117 | #' `seq_range()` generates a numeric sequence between the `min` and `max`
118 | #' values of its input variable. This is helpful when creating many dummy
119 | #' variables with varying thresholds.
120 | #'
121 | #' ```{r, comment = "#>", collapse = TRUE}
122 | #' iris %>%
123 | #' mutate(over(seq_range(Sepal.Length, 1),
124 | #' ~ if_else(Sepal.Length > .x, 1, 0),
125 | #' .names = "Sepal.Length.{x}"),
126 | #' .keep = "none")
127 | #' ```
128 | #'
129 | #' Note that if the input variable does not have decimal places, `min` and `max` are
130 | #' wrapped in `ceiling` and `floor` accordingly. This will prevent the creation of
131 | #' variables that contain only `0` or `1`. Compare the output below with the
132 | #' example above:
133 | #'
134 | #' ```{r, comment = "#>", collapse = TRUE}
135 | #' iris %>%
136 | #' mutate(over(seq(round(min(Sepal.Length), 0),
137 | #' round(max(Sepal.Length), 0),
138 | #' 1),
139 | #' ~ if_else(Sepal.Length > .x, 1, 0),
140 | #' .names = "Sepal.Length.{x}"),
141 | #' .keep = "none")
142 | #' ```
143 | #'
144 | #' `seq_range()` also works on dates:
145 | #'
146 | #' ```{r, comment = "#>", collapse = TRUE}
147 | #' some_dates <- c(as.Date("2020-01-02"),
148 | #' as.Date("2020-05-02"),
149 | #' as.Date("2020-03-02"))
150 | #'
151 | #'
152 | #' some_dates %>%
153 | #' seq_range(., "1 month")
154 | #' ```
155 | #'
156 | #' @name select_values
157 | NULL
158 |
159 | #' @rdname select_values
160 | #' @export
161 | dist_values <- function(x, .sep = NULL, .sort = c("asc", "desc", "none", "levels")) {
162 |
163 | is_null <- identical(.sort, c("asc", "desc", "none", "levels"))
164 | sort <- match.arg(.sort)
165 |
166 | if (is.list(x)) {
167 | x <- unlist(x)
168 | }
169 | if (!is.null(.sep)) {
170 | x <- unlist(strsplit(x, .sep))
171 | }
172 |
173 | res <- as.vector(na.omit(unique(x)))
174 | if (!is.factor(x)) {
175 | if (sort == "asc") {
176 | return(sort(res))
177 | } else if (sort == "desc") {
178 | return(sort(res, decreasing = TRUE))
179 | } else {
180 | return(res)
181 | }
182 | } else {
183 | x <- levels(x)
184 | if (is_null || .sort == "levels") {
185 | return(x)
186 | } else if (sort == "asc") {
187 | return(sort(x))
188 | } else if (sort == "desc") {
189 | return(sort(x, decreasing = TRUE))
190 | } else {
191 | res
192 | }
193 | }
194 | }
195 |
196 | #' @rdname select_values
197 | #' @export
198 | seq_range <- function(x, .by) {
199 |
200 | if (!class(x) %in% c("numeric", "integer", "Date")) {
201 | rlang::abort(
202 | c("Problem with `seq_range()` input `x`.",
203 | i = "`x` must be a numeric vector.",
204 | x = paste0("`x` is of class: ", class(x), "."))
205 | )
206 | }
207 |
208 | .range <- range(x)
209 |
210 | if (!is.date(x) && identical(.by, round(.by, 0))) {
211 | .range[1] <- ceiling(.range[1])
212 | .range[2] <- floor(.range[2])
213 | }
214 |
215 | seq(.range[1], .range[2], by = .by)
216 |
217 | }
218 |
--------------------------------------------------------------------------------
/R/selection_helpers.R:
--------------------------------------------------------------------------------
1 | #' Selection helpers
2 | #'
3 | #' @description
4 | #'
5 | #' `dplyover` provides three kinds of selection helpers which are intended for
6 | #' use in all functions that accept a vector as argument (that is `over()` and
7 | #' `crossover()` as well as their variants, see here for a full list of the
8 | #' [over-across function family][over_across_family]).
9 | #'
10 | #' Helpers which select **string parts** of the **column names** (of the underyling data):
11 | #' - [cut_names()] removes a specified pattern.
12 | #' - [extract_names()] extracts a specified pattern.
13 | #'
14 | #' Helpers which select **values** of a variable:
15 | #' - [dist_values()] returns all distinct values.
16 | #' - [seq_range()] returns the sequence between the `range()` of a variable.
17 | #'
18 | #' A helper function that evaluates a glue specification as variable
19 | #' - [.()] evaluates an interpolated string as symbol
20 | #'
21 | #' @name selection_helpers
22 | #' @aliases over_selection_helpers
23 | NULL
24 |
--------------------------------------------------------------------------------
/R/show_affix.R:
--------------------------------------------------------------------------------
1 | #' Show affixes for variable pairs of two sets of columns
2 | #'
3 | #' @description
4 | #'
5 | #' These functions show the prefixes or suffixes for each pair of variables of
6 | #' two sets of columns. They are intended to be used either (1) in case `across2`
7 | #' throws an error when `{pre}` or `{suf}` are specified in `across2`'s `.names`
8 | #' argument or (2) before using `{pre}` or `{suf}` in `across2` to understand
9 | #' how the pre- or suffixes will look like.
10 | #'
11 | #' * [show_prefix()] lists each variable pair and the corresponding alphanumeric prefix
12 | #'
13 | #' * [show_suffix()] lists each variable pair and the corresponding alphanumeric suffix
14 | #'
15 | #' @param .data A data frame.
16 | #' @param .xcols,.ycols <[`tidy-select`][dplyr::dplyr_tidy_select]> Sets of
17 | #' columns for which the common pre- or suffix will be shown for each pair.
18 | #' Note that you can not select.
19 | #'
20 | #' @return
21 | #' A tibble with three columns: .xcols, .ycols and prefix or suffix.
22 | #'
23 | #' @section Examples:
24 | #'
25 | #' ```{r, child = "man/rmd/setup.Rmd"}
26 | #' ```
27 | #' Below two use cases of `show_prefix/suffix` are briefly explained.
28 | #' Let's first attach dplyr and get ready:
29 | #'
30 | #' ```{r, comment = "#>", collapse = TRUE}
31 | #' library(dplyr)
32 | #'
33 | #' # For better printing
34 | #' iris <- as_tibble(iris)
35 | #' ```
36 | #'
37 | #' ## (1) When called after an error is thrown by across2()
38 | #'
39 | #' Let's assume we use `across2` with the `{pre}` glue specification on some
40 | #' data where not all variable pairs share a common prefix. In the example below
41 | #' we use `dplyr::rename` to create such a case. Then `across2` will throw an
42 | #' error. The error message already suggests that we can run `show_prefix()`
43 | #' to see what went wrong. In this case we can call `show_prefix()` without
44 | #' any arguments:
45 | #'
46 | #' ```{r, comment = "#>", collapse = TRUE, error = TRUE}
47 | #' iris %>%
48 | #' as_tibble %>%
49 | #' rename("Pesal.Length" = Sepal.Length) %>%
50 | #' mutate(across2(ends_with("Length"),
51 | #' ends_with("Width"),
52 | #' .fns = list(product = ~ .x * .y,
53 | #' sum = ~ .x + .y),
54 | #' .names = "{pre}_{fn}"))
55 | #' show_prefix()
56 | #' ```
57 | #'
58 | #' ## (2) When called on a data.frame
59 | #'
60 | #' When called on a data.frame we just need to specify two sets of columns:
61 | #' `.xcols` and `.ycols` (just like in `across2`).
62 | #'
63 | #' ```{r, comment = "#>", collapse = TRUE}
64 | #' iris %>%
65 | #' show_suffix(starts_with("Sepal"),
66 | #' starts_with("Petal"))
67 | #' ```
68 | #'
69 | #'
70 | #' @name show_affix
71 | NULL
72 | #' @rdname show_affix
73 | #' @export
74 | show_prefix <- function(.data = NULL, .xcols = NULL, .ycols = NULL) {
75 |
76 | if (is.null(.data) && !is.null(dplyover:::.last$value)) {
77 |
78 | .data <- .last$value$data
79 | .xcols <- .last$value$xcols
80 | .ycols <- .last$value$ycols
81 |
82 | rm(value, envir = .last)
83 |
84 | } else {
85 | .xcols <- rlang::enexpr(.xcols)
86 | .ycols <- rlang::enexpr(.ycols)
87 | }
88 |
89 | show_affix(data = .data,
90 | xcols = .xcols,
91 | ycols = .ycols,
92 | type = "prefix")
93 | }
94 |
95 | #' @rdname show_affix
96 | #' @export
97 | show_suffix <- function(.data = NULL, .xcols = NULL, .ycols = NULL) {
98 |
99 | if (is.null(.data) && !is.null(.last$value)) {
100 |
101 | .data <- .last$value$data
102 | .xcols <- .last$value$xcols
103 | .ycols <- .last$value$ycols
104 |
105 | rm(value, envir = .last)
106 |
107 | } else {
108 | .xcols <- rlang::enexpr(.xcols)
109 | .ycols <- rlang::enexpr(.ycols)
110 | }
111 | show_affix(data = .data,
112 | xcols = .xcols,
113 | ycols = .ycols,
114 | type = "suffix")
115 | }
116 |
117 |
118 | show_affix <- function(data, xcols, ycols, type = c("prefix", "suffix")) {
119 |
120 | group_vars <- group_vars(data)
121 |
122 | if (length(group_vars) > 0) {
123 | data <- dplyr::ungroup(data)
124 | data <- dplyr::select(data, -dplyr::all_of(group_vars))
125 | }
126 |
127 | xvars <- tidyselect::eval_select(xcols, data)
128 | yvars <- tidyselect::eval_select(ycols, data)
129 |
130 | xvars <- names(xvars)
131 | yvars <- names(yvars)
132 |
133 | if (length(xvars) != length(yvars)) {
134 | rlang::abort(c(paste0("Problem with `show_", type,"()` input `.xcols` and `.ycols`."),
135 | i = "Input `.xcols` and `.ycols` must have the same number of columns.",
136 | x = paste0(length(xvars), " columns are selected in `.xcols`, ",
137 | "while ", length(yvars), " columns are selected in `.ycols`.")))
138 | }
139 |
140 | var_nms <- purrr::flatten(purrr::map2(xvars, yvars, ~ list(c(.x, .y))))
141 | if (type == "prefix") {
142 | res <- purrr::map(var_nms, ~ get_affix(.x, "prefix"))
143 | } else {
144 | res <- purrr::map(var_nms, ~ get_affix(.x, "suffix"))
145 | }
146 |
147 | res <- unlist(purrr::modify_if(res, rlang::is_empty, ~ NA_character_))
148 |
149 | inp_tbl <- tibble::tibble(.xcols = xvars,
150 | .ycols = yvars,
151 | !! type := res)
152 |
153 | print_min <- getOption("tibble.print_min") %||% 10
154 | print_max <- getOption("tibble.print_max") %||% 20
155 |
156 | if (nrow(inp_tbl) > print_max) {
157 | cat("Use `.Last.value %>% View()` to see to full list of variables.")
158 | }
159 | inp_tbl
160 | }
161 |
162 | # helper function for across2_setup
163 | get_affix <- function(x, type = c("prefix", "suffix")) {
164 |
165 | side <- switch(type,
166 | "prefix" = "right",
167 | "suffix" = "left")
168 |
169 | x <- stringr::str_pad(x, max(nchar(x)), side = side, pad = " ")
170 | x_ls <- purrr::transpose(strsplit(x, ""))
171 | x_ls_length <- purrr::map_dbl(purrr::map(x_ls, unique), length)
172 | x_rle <- rle(x_ls_length)
173 |
174 | if (side == "right" && x_rle$values[1] == 1) {
175 | res <- stringr::str_sub(x[[1]],
176 | start = 1L,
177 | end = x_rle$length[1])
178 |
179 | } else if (side == "left" && x_rle$values[length(x_rle$values)] == 1) {
180 | res_start <- sum(x_rle$length[-length(x_rle$length)]) + 1
181 | res_length <- x_rle$length[length(x_rle$length)]
182 | res_end <- res_start + res_length
183 |
184 | res <- stringr::str_sub(x[[1]],
185 | start = res_start,
186 | end = res_end)
187 | } else {
188 | res <- NULL
189 | }
190 |
191 | res <- stringr::str_remove_all(res, "[:punct:]*$")
192 | res <- stringr::str_remove_all(res, "^[:punct:]*")
193 |
194 | if (side == "right") {
195 | res <- stringr::str_extract(res, "^[:alnum:]*")
196 | } else {
197 | res <- stringr::str_extract(res, "[:alnum:]*$")
198 | }
199 |
200 | res
201 |
202 | }
203 |
204 | # add to tests
205 | # x <- c("Sepal.Length", "Sepal.Width")
206 | # x <- c("Length.Sepal", "Width.Sepal")
207 | # x <- c("Length.of.Sepal.here", "Length.no.Sepal.here")
208 | # get_affix(x, "suffix")
209 |
--------------------------------------------------------------------------------
/R/string_eval.R:
--------------------------------------------------------------------------------
1 | #' Evaluate an interpolated string as symbol
2 | #'
3 | #' @description
4 | #'
5 | #' This function takes a glue specifcation as input, and evaluates the final
6 | #' argument string as name in the caller environment.
7 | #'
8 | #' @param x A glue specification, that is, a string which contains an R expression
9 | #' wrapped in curly braces, e.g. `."{.x}_some_string"`.
10 | #'
11 | #' @return
12 | #' The values of the variable with the name of the final argument string, given
13 | #' that it exists in the caller environment.
14 | #'
15 | #' @section Examples:
16 | #'
17 | #' ```{r, child = "man/rmd/setup.Rmd"}
18 | #' ```
19 | #' ```{r, comment = "#>", collapse = TRUE}
20 | #' library(dplyr)
21 | #'
22 | #' # For better printing
23 | #' iris <- as_tibble(iris)
24 | #' ```
25 | #'
26 | #' Below is a simple example from `over()`. In `over`'s function
27 | #' argument `.x` is first evaluated as 'Sepal' and then as 'Petal' which
28 | #' results in the final argument strings 'Sepal.Width' and 'Sepal.Length' as
29 | #' well as 'Petal.Width' and 'Petal.Length'.
30 | #'
31 | #' ```{r, comment = "#>", collapse = TRUE}
32 | #' iris %>%
33 | #' mutate(over(c("Sepal", "Petal"),
34 | #' ~ .("{.x}.Width") + .("{.x}.Length")
35 | #' ))
36 | #' ```
37 | #'
38 | #' The above syntax is equal to the more verbose:
39 | #' ```{r, comment = "#>", collapse = TRUE}
40 | #' iris %>%
41 | #' mutate(over(c("Sepal", "Petal"),
42 | #' ~ eval(sym(paste0(.x, ".Width"))) +
43 | #' eval(sym(paste0(.x, ".Length")))
44 | #' ))
45 | #' ```
46 | #'
47 | #' Although `.()` was created with the use of `over()` in mind, it can also be
48 | #' used within `dplyr::across()` in combination with `dplyr::cur_column()`.
49 | #' First let's rename 'Sepal.Length' and 'Petal.Length' to 'Sepal' and 'Petal'
50 | #' to have a stem to which we can attach the string '.Width' to access the
51 | #' two 'Width' variables. Now we can call `.(cur_colunm())` to access the variable
52 | #' `across()` has been called on (Note: we could have used `.x` instead). We can
53 | #' further access the values of the 'Width' variables by wrapping `cur_column()`
54 | #' in curly braces `{}`, adding `.Width` and wrapping everything with
55 | #' quotation marks `.("{cur_column()}.Width")`.
56 | #'
57 | #' ```{r, comment = "#>", collapse = TRUE}
58 | #' iris %>%
59 | #' rename("Sepal" = "Sepal.Length",
60 | #' "Petal" = "Petal.Length") %>%
61 | #' mutate(across(c(Sepal, Petal),
62 | #' ~ .(cur_column()) + .("{cur_column()}.Width"),
63 | #' .names = "{col}_sum"))
64 | #' ```
65 | #'
66 | #' A similar approach can be achieved using `purrr::map` in combination with `.()`:
67 | #' ```{r, comment = "#>", collapse = TRUE}
68 | #' iris %>%
69 | #' rename("Sepal" = "Sepal.Length",
70 | #' "Petal" = "Petal.Length") %>%
71 | #' mutate(purrr::map_dfc(c("Sepal_sum" = "Sepal", "Petal_sum" = "Petal"),
72 | #' ~ .(.x) + .("{.x}.Width")))
73 | #' ```
74 | #' @name string_eval
75 | #' @export
76 | `.` <- function(x) {
77 | rlang::eval_tidy(rlang::sym(glue::glue(x,
78 | .open = "{",
79 | .close = "}",
80 | .envir = parent.frame())),
81 | env = rlang::caller_env())
82 | }
83 |
--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | output: github_document
3 | ---
4 |
5 |
6 |
7 | ```{r, child = "man/rmd/setup.Rmd"}
8 | ```
9 |
10 | # dplyover
11 |
12 |
13 | 
14 | [](man/figures/lifecycle-experimental.svg)
15 | [](https://github.com/TimTeaFan/dplyover/actions)
16 | [](https://codecov.io/gh/TimTeaFan/dplyover?branch=main)
17 | [](https://www.codefactor.io/repository/github/timteafan/dplyover)
18 | [](https://cran.r-project.org/package=dplyover)
19 |
20 |
21 | ## Overview
22 |
23 |
24 |
25 | {dplyover} extends {dplyr}'s functionality by building a function family
26 | around `dplyr::across()`.
27 |
28 | The goal of this *over-across function family* is to provide a concise and
29 | uniform syntax which can be used to create columns by applying functions to
30 | vectors and/or sets of columns in {dplyr}. Ideally, this will:
31 |
32 | - **reduce the amount of code** to create variables derived from existing colums,
33 | which is especially helpful when doing exploratory data analysis (e.g. lagging,
34 | collapsing, recoding etc. many variables in a similar way).
35 | - **provide a clean {dplyr} approach** to create many variables which are
36 | calculated based on two or more variables.
37 | - **improve our mental model** so that it is easier to tackle problems where the
38 | solution is based on creating new columns.
39 |
40 | The functions in the *over-apply function family* create columns by applying
41 | one or several functions to:
42 |
43 | - `dplyr::across()` a set of columns (not part of dplyover)
44 | - `over()` a vector (list or atomic vector)
45 | - `over2()` two vectors of the same length (sequentially^#^)
46 | - `over2x()` two vectors (nested^+^)
47 | - `across2()` two sets of columns (sequentially^#^)
48 | - `across2x()` two sets of columns (nested^+^)
49 | - `crossover()` a set of columns and a vector (nested^+^)
50 |
51 | # "sequentially" means that the function is sequentially applied to the
52 | first two elements of `x[[1]]` and `y[[1]]`, then to the second pair of elements
53 | and so on.
54 | + "nested" means that the function is applied to all combinations
55 | between elements in `x` and `y` similar to a nested loop.
56 |
57 |
58 | ## Installation
59 |
60 | {dplyover} is not on CRAN. You can install the latest version from
61 | [GitHub](https://github.com/) with:
62 |
63 | ```{r, eval = FALSE}
64 | # install.packages("remotes")
65 | remotes::install_github("TimTeaFan/dplyover")
66 | ```
67 |
68 | ## Getting started
69 |
70 | Below are a few examples of the {dplyover}'s *over-across function family*. More
71 | functions and workarounds of how to tackle the problems below without {dplyover}
72 | can be found in the vignette "Why dplyover?".
73 |
74 | ```{r, setup, warning = FALSE, message = FALSE}
75 | # dplyover is an extention of dplyr on won't work without it
76 | library(dplyr)
77 | library(dplyover)
78 |
79 | # For better printing:
80 | iris <- as_tibble(iris)
81 | ```
82 |
83 | #### Apply functions to a vector
84 |
85 | `over()` applies one or several functions to a vector. We can use it inside
86 | `dplyr::mutate()` to create several similar variables that we derive from an
87 | existing column. This is helpful in cases where we want to create a batch of
88 | similar variables with only slightly changes in the argument values of the
89 | calling function. A good example are `lag` and `lead` variables. Below we use
90 | column 'a' to create lag and lead variables by `1`, `2` and `3` positions.
91 | `over()`'s `.names` argument lets us put nice names on the output columns.
92 |
93 | ```{r}
94 | tibble(a = 1:25) %>%
95 | mutate(over(c(1:3),
96 | list(lag = ~ lag(a, .x),
97 | lead = ~ lead(a, .x)),
98 | .names = "a_{fn}{x}"))
99 | ```
100 |
101 | #### Apply functions to a set of columns and a vector simultaniously
102 |
103 | `crossover()` applies the functions in `.fns` to every combination of colums in
104 | `.xcols` with elements in `.y`. This is similar to the example above, but this time,
105 | we use a set of columns. Below we create five lagged variables for each
106 | 'Sepal.Length' and 'Sepal.Width'. Again, we use a named list as argument in `.fns`
107 | to create nice names by specifying the glue syntax in `.names.`
108 |
109 | ```{r}
110 | iris %>%
111 | transmute(
112 | crossover(starts_with("sepal"),
113 | 1:5,
114 | list(lag = ~ lag(.x, .y)),
115 | .names = "{xcol}_{fn}{y}")) %>%
116 | glimpse
117 | ```
118 |
119 |
120 | #### Apply functions to a set of variable pairs
121 |
122 | `across2()` can be used to transform pairs of variables in one or more functions.
123 | In the example below we want to calculate the product and the sum of all pairs
124 | of 'Length' and 'Width' variables in the `iris` data set. We can use `{pre}` in
125 | the glue specification in `.names` to extract the common prefix of each pair of
126 | variables. We can further transform the names, in the example setting them
127 | `tolower`, by specifying the `.names_fn` argument:
128 |
129 | ```{r}
130 | iris %>%
131 | transmute(across2(ends_with("Length"),
132 | ends_with("Width"),
133 | .fns = list(product = ~ .x * .y,
134 | sum = ~ .x + .y),
135 | .names = "{pre}_{fn}",
136 | .names_fn = tolower))
137 | ```
138 |
139 |
140 | ## Performance and Compability
141 |
142 | This is an experimental package which I started developing with my own use cases
143 | in mind. I tried to keep the effort low, which is why this package *does not*
144 | internalize (read: copy) internal {dplyr} functions (especially the 'context
145 | internals'). This made it relatively easy to develop the package without:
146 |
147 | 1. copying tons of {dplyr} code,
148 | 1. having to figure out which dplyr-functions use the copied internals and
149 | 1. finally overwritting these functions (like `mutate` and other one-table verbs),
150 | which would eventually lead to conflicts with other add-on packages, like for
151 | example {tidylog}.
152 |
153 | However, the downside is that not relying on {dplyr} internals has some negative
154 | effects in terms of performance and compability.
155 |
156 | In a nutshell this means:
157 |
158 | - The *over-across function family* in {dplyover} is slower than the
159 | original `dplyr::across`. Up until {dplyr} 1.0.3 the overhead was not too big,
160 | but `dplyr::across` got much faster with {dplyr} 1.0.4 which is why the gap has
161 | widend a lot.
162 | - Although {dplyover} is designed to work in {dplyr}, some features and
163 | edge cases will not work correctly.
164 |
165 | The good news is that even without relying on {dplyr} internals most of the
166 | original functionality can be replicated and although being less performant,
167 | the current setup is optimized and falls not too far behind in terms of speed -
168 | at least when compared to the pre v1.0.4 `dplyr::across`.
169 |
170 | Regarding compability, I have spent quite some time testing the package and
171 | I was able to replicate most of the tests for `dplyr::across` successfully.
172 |
173 | For more information on the performance and compability of {dplyover} see the
174 | vignette "Performance and Compability".
175 |
176 |
177 | ## History
178 |
179 | I originally opened a
180 | [feature request on GitHub](https://github.com/tidyverse/dplyr/issues/4834) to
181 | include a very special case version of `over` (or to that time `mutate_over`)
182 | into {dplyr}. The adivse then was to make this kind of functionality available
183 | in a separate package. While I was working on this very special case version of
184 | `over`, I realized that the more general use case resembles a `purrr::map`
185 | function for inside {dplyr} verbs with different variants, which led me to the
186 | *over-across function family*.
187 |
188 |
189 | ## Acknowledgements and Disclaimer
190 |
191 | This package is not only an extention of {dplyr}. The main functions in
192 | {dplyover} are directly derived and based on `dplyr::across()` (dplyr's license
193 | and copyrights apply!). So if this package is working correctly, all the credit
194 | should go to the dplyr team.
195 |
196 | My own "contribution" (if you want to call it like that) merely consists of:
197 |
198 | 1. removing the dependencies on {dplyr}'s internal functions, and
199 | 2. slightly changing `across`' logic to make it work for vectors and a
200 | combination of two vectors and/or sets of columns.
201 |
202 | By this I most definitely introduced some bugs and edge cases which won't work,
203 | and in which case I am the only one to blame.
204 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | # dplyover
5 |
6 |
7 |
8 | 
10 | [](man/figures/lifecycle-experimental.svg)
11 | [](https://github.com/TimTeaFan/dplyover/actions)
12 | [](https://codecov.io/gh/TimTeaFan/dplyover?branch=main)
14 | [](https://www.codefactor.io/repository/github/timteafan/dplyover)
15 | [](https://cran.r-project.org/package=dplyover)
17 |
18 |
19 | ## Overview
20 |
21 |
22 |
23 | {dplyover} extends {dplyr}’s functionality by building a function family
24 | around `dplyr::across()`.
25 |
26 | The goal of this *over-across function family* is to provide a concise
27 | and uniform syntax which can be used to create columns by applying
28 | functions to vectors and/or sets of columns in {dplyr}. Ideally, this
29 | will:
30 |
31 | - **reduce the amount of code** to create variables derived from
32 | existing colums, which is especially helpful when doing exploratory
33 | data analysis (e.g. lagging, collapsing, recoding etc. many
34 | variables in a similar way).
35 | - **provide a clean {dplyr} approach** to create many variables which
36 | are calculated based on two or more variables.
37 | - **improve our mental model** so that it is easier to tackle problems
38 | where the solution is based on creating new columns.
39 |
40 | The functions in the *over-apply function family* create columns by
41 | applying one or several functions to:
42 |
43 | - `dplyr::across()` a set of columns (not part of dplyover)
44 | - `over()` a vector (list or atomic vector)
45 | - `over2()` two vectors of the same length (sequentially\#)
46 | - `over2x()` two vectors (nested+)
47 | - `across2()` two sets of columns (sequentially\#)
48 | - `across2x()` two sets of columns (nested+)
49 | - `crossover()` a set of columns and a vector (nested+)
50 |
51 | \# “sequentially” means that the function is sequentially applied
52 | to the first two elements of `x[[1]]` and `y[[1]]`, then to the second
53 | pair of elements and so on. + “nested” means that the
54 | function is applied to all combinations between elements in `x` and `y`
55 | similar to a nested loop.
56 |
57 | ## Installation
58 |
59 | {dplyover} is not on CRAN. You can install the latest version from
60 | [GitHub](https://github.com/) with:
61 |
62 | ``` r
63 | # install.packages("remotes")
64 | remotes::install_github("TimTeaFan/dplyover")
65 | ```
66 |
67 | ## Getting started
68 |
69 | Below are a few examples of the {dplyover}’s *over-across function
70 | family*. More functions and workarounds of how to tackle the problems
71 | below without {dplyover} can be found in the vignette
72 | “Why
73 | dplyover?”.
74 |
75 | ``` r
76 | # dplyover is an extention of dplyr on won't work without it
77 | library(dplyr)
78 | library(dplyover)
79 |
80 | # For better printing:
81 | iris <- as_tibble(iris)
82 | ```
83 |
84 | #### Apply functions to a vector
85 |
86 | `over()` applies one or several functions to a vector. We can use it
87 | inside `dplyr::mutate()` to create several similar variables that we
88 | derive from an existing column. This is helpful in cases where we want
89 | to create a batch of similar variables with only slightly changes in the
90 | argument values of the calling function. A good example are `lag` and
91 | `lead` variables. Below we use column ‘a’ to create lag and lead
92 | variables by `1`, `2` and `3` positions. `over()`’s `.names` argument
93 | lets us put nice names on the output columns.
94 |
95 | ``` r
96 | tibble(a = 1:25) %>%
97 | mutate(over(c(1:3),
98 | list(lag = ~ lag(a, .x),
99 | lead = ~ lead(a, .x)),
100 | .names = "a_{fn}{x}"))
101 | #> # A tibble: 25 x 7
102 | #> a a_lag1 a_lead1 a_lag2 a_lead2 a_lag3 a_lead3
103 | #>
104 | #> 1 1 NA 2 NA 3 NA 4
105 | #> 2 2 1 3 NA 4 NA 5
106 | #> 3 3 2 4 1 5 NA 6
107 | #> 4 4 3 5 2 6 1 7
108 | #> # ... with 21 more rows
109 | ```
110 |
111 | #### Apply functions to a set of columns and a vector simultaniously
112 |
113 | `crossover()` applies the functions in `.fns` to every combination of
114 | colums in `.xcols` with elements in `.y`. This is similar to the example
115 | above, but this time, we use a set of columns. Below we create five
116 | lagged variables for each ‘Sepal.Length’ and ‘Sepal.Width’. Again, we
117 | use a named list as argument in `.fns` to create nice names by
118 | specifying the glue syntax in `.names.`
119 |
120 | ``` r
121 | iris %>%
122 | transmute(
123 | crossover(starts_with("sepal"),
124 | 1:5,
125 | list(lag = ~ lag(.x, .y)),
126 | .names = "{xcol}_{fn}{y}")) %>%
127 | glimpse
128 | #> Rows: 150
129 | #> Columns: 10
130 | #> $ Sepal.Length_lag1 NA, 5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9~
131 | #> $ Sepal.Length_lag2 NA, NA, 5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4,~
132 | #> $ Sepal.Length_lag3 NA, NA, NA, 5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, ~
133 | #> $ Sepal.Length_lag4 NA, NA, NA, NA, 5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5~
134 | #> $ Sepal.Length_lag5 NA, NA, NA, NA, NA, 5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.~
135 | #> $ Sepal.Width_lag1 NA, 3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1~
136 | #> $ Sepal.Width_lag2 NA, NA, 3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9,~
137 | #> $ Sepal.Width_lag3 NA, NA, NA, 3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, ~
138 | #> $ Sepal.Width_lag4 NA, NA, NA, NA, 3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3~
139 | #> $ Sepal.Width_lag5 NA, NA, NA, NA, NA, 3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.~
140 | ```
141 |
142 | #### Apply functions to a set of variable pairs
143 |
144 | `across2()` can be used to transform pairs of variables in one or more
145 | functions. In the example below we want to calculate the product and the
146 | sum of all pairs of ‘Length’ and ‘Width’ variables in the `iris` data
147 | set. We can use `{pre}` in the glue specification in `.names` to extract
148 | the common prefix of each pair of variables. We can further transform
149 | the names, in the example setting them `tolower`, by specifying the
150 | `.names_fn` argument:
151 |
152 | ``` r
153 | iris %>%
154 | transmute(across2(ends_with("Length"),
155 | ends_with("Width"),
156 | .fns = list(product = ~ .x * .y,
157 | sum = ~ .x + .y),
158 | .names = "{pre}_{fn}",
159 | .names_fn = tolower))
160 | #> # A tibble: 150 x 4
161 | #> sepal_product sepal_sum petal_product petal_sum
162 | #>
163 | #> 1 17.8 8.6 0.280 1.60
164 | #> 2 14.7 7.9 0.280 1.60
165 | #> 3 15.0 7.9 0.26 1.5
166 | #> 4 14.3 7.7 0.3 1.7
167 | #> # ... with 146 more rows
168 | ```
169 |
170 | ## Performance and Compability
171 |
172 | This is an experimental package which I started developing with my own
173 | use cases in mind. I tried to keep the effort low, which is why this
174 | package *does not* internalize (read: copy) internal {dplyr} functions
175 | (especially the ‘context internals’). This made it relatively easy to
176 | develop the package without:
177 |
178 | 1. copying tons of {dplyr} code,
179 | 2. having to figure out which dplyr-functions use the copied internals
180 | and
181 | 3. finally overwritting these functions (like `mutate` and other
182 | one-table verbs), which would eventually lead to conflicts with
183 | other add-on packages, like for example {tidylog}.
184 |
185 | However, the downside is that not relying on {dplyr} internals has some
186 | negative effects in terms of performance and compability.
187 |
188 | In a nutshell this means:
189 |
190 | - The *over-across function family* in {dplyover} is slower than the
191 | original `dplyr::across`. Up until {dplyr} 1.0.3 the overhead was
192 | not too big, but `dplyr::across` got much faster with {dplyr} 1.0.4
193 | which is why the gap has widend a lot.
194 | - Although {dplyover} is designed to work in {dplyr}, some features
195 | and edge cases will not work correctly.
196 |
197 | The good news is that even without relying on {dplyr} internals most of
198 | the original functionality can be replicated and although being less
199 | performant, the current setup is optimized and falls not too far behind
200 | in terms of speed - at least when compared to the pre v1.0.4
201 | `dplyr::across`.
202 |
203 | Regarding compability, I have spent quite some time testing the package
204 | and I was able to replicate most of the tests for `dplyr::across`
205 | successfully.
206 |
207 | For more information on the performance and compability of {dplyover}
208 | see the vignette
209 | “Performance
210 | and Compability”.
211 |
212 | ## History
213 |
214 | I originally opened a [feature request on
215 | GitHub](https://github.com/tidyverse/dplyr/issues/4834) to include a
216 | very special case version of `over` (or to that time `mutate_over`) into
217 | {dplyr}. The adivse then was to make this kind of functionality
218 | available in a separate package. While I was working on this very
219 | special case version of `over`, I realized that the more general use
220 | case resembles a `purrr::map` function for inside {dplyr} verbs with
221 | different variants, which led me to the *over-across function family*.
222 |
223 | ## Acknowledgements and Disclaimer
224 |
225 | This package is not only an extention of {dplyr}. The main functions in
226 | {dplyover} are directly derived and based on `dplyr::across()` (dplyr’s
227 | license and copyrights apply\!). So if this package is working
228 | correctly, all the credit should go to the dplyr team.
229 |
230 | My own “contribution” (if you want to call it like that) merely consists
231 | of:
232 |
233 | 1. removing the dependencies on {dplyr}’s internal functions, and
234 | 2. slightly changing `across`’ logic to make it work for vectors and a
235 | combination of two vectors and/or sets of columns.
236 |
237 | By this I most definitely introduced some bugs and edge cases which
238 | won’t work, and in which case I am the only one to blame.
239 |
--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
1 | title: dplyover
2 | url: https://timteafan.github.io/dplyover/
3 |
4 | template:
5 | params:
6 | bootswatch: cerulean
7 |
8 | reference:
9 | - title: over-across family
10 | desc: |
11 | The functions in the *over-apply* function family create columns by applying
12 | one or several functions to:
13 | `over()` a vector (list or atomic vector);
14 | `over2()` two vectors of the same length (pairwise)
15 | `over2x()` two vectors of the same length (nested)
16 | `across2()` two sets of columns (pairwise)
17 | `across2x()` two sets of columns (nested)
18 | `crossover()` a set of columns and a vector (nested)
19 | contents:
20 | - over
21 | - over2
22 | - across2
23 | - crossover
24 |
25 | - title: helper functions
26 | desc: >
27 | {dplyover} provides three selection helpers which are intended for use
28 | in all functions that accept a vector as argument (that is `over()` and
29 | `crossover()` as well as their variants). 1. Helpers which select string
30 | parts of the column names (of the underyling data): `cut_names()` removes a
31 | specified pattern; `extract_names()` extracts a specified pattern.
32 | 2. Helpers which select values of a variable: `dist_values()` returns all
33 | distinct values; `seq_range()` returns the sequence between the range of a
34 | variable. 3. A helper function `.()` that takes a glue specifcation as input,
35 | and evaluates the final argument string as name in the caller environment.
36 | Apart from those selection heplers, `show_prefix()` and `show_suffix()`
37 | show the common pre- or suffix for each pair of variables of two sets of
38 | colums.
39 | contents:
40 | - dist_values
41 | - cut_names
42 | - "."
43 | - show_affix
44 |
45 | - title: data
46 | desc: >
47 | {dplyyover} contains a randomly generated data set from a customer
48 | satisfaction survey using CSAT (Customer Satisfaction Score) for a
49 | contract-based product. The data set comes in two versions: recoded and raw.
50 | contents:
51 | - csat
52 | - csatraw
53 |
54 | navbar:
55 | structure:
56 | right: [home, reference, articles, news, twitter, github]
57 | components:
58 | reference:
59 | icon: fas fa-tools
60 | text: Reference
61 | href: reference/index.html
62 | articles:
63 | icon: fas fa-book-open
64 | text: Articles
65 | menu:
66 | - text: Why dplyover?
67 | href: articles/why_dplyover.html
68 | - text: Performance and Compability
69 | href: articles/performance.html
70 | news:
71 | icon: fas fa-clipboard-list
72 | text: News
73 | href: news/index.html
74 | twitter:
75 | icon: "fab fa-twitter"
76 | href: https://twitter.com/timteafan
77 |
--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
1 | comment: false
2 |
3 | coverage:
4 | status:
5 | project:
6 | default:
7 | target: auto
8 | threshold: 1%
9 | informational: true
10 | patch:
11 | default:
12 | target: auto
13 | threshold: 1%
14 | informational: true
15 |
--------------------------------------------------------------------------------
/data-raw/csatraw.R:
--------------------------------------------------------------------------------
1 | ## code to prepare `csatraw` dataset goes here
2 |
3 | type_vc <- c(rep(c("new", "existing"), 2), "reactivate")
4 | product_vc <- c("basic", "advanced", "premium")
5 |
6 | comments_good <- c("great product", "good service", "friendly staff")
7 | comments_bad <- c("too expensive", "unfriendly", "no response")
8 | comments_all <- c(comments_good, comments_bad)
9 |
10 | lookup_ls <- list(
11 | `11` = "great product",
12 | `12` = "good service",
13 | `13` = "friendly staff",
14 | `21` = "too expensive",
15 | `22` = "unfriendly",
16 | `23` = "no response"
17 | )
18 |
19 | create_coded_rsp <- function(inp, c_good, c_bad, c_all) {
20 |
21 | size <- round(runif(length(inp), 1, 3))
22 |
23 | res <- vapply(seq_along(inp),
24 | FUN.VALUE = character(1),
25 | FUN = function(x) {
26 | if (inp[x] > 3) {
27 | paste(sample(c_good, size[x]), collapse = ", ")
28 | } else if (inp[x] < 3) {
29 | paste(sample(c_bad, size[x]), collapse = ", ")
30 | } else {
31 | paste(sample(c_all, size[x]), collapse = ", ")
32 | }
33 | })
34 |
35 | }
36 |
37 | # Add list column with comments
38 | set.seed(982342)
39 |
40 | csatraw <- tibble::tibble(
41 | cust_id = stringr::str_pad(sample(150:99999, 150), 5, pad = "0"),
42 | type = sample(type_vc, 150, replace = TRUE),
43 | product = sample(product_vc, 150, replace = TRUE),
44 | item1 = round(runif(150, min = 1, max = 5), 0),
45 | item1_open = create_coded_rsp(item1, 11:13, 21:23, c(11:13,21:23)),
46 | item2a = sample(c(0,1,2,2,3,3), 150, replace = TRUE),
47 | item2b = ifelse(item2a == 0, NA, round(runif(150, min = 1, max = 5), 0)),
48 | item3a = sample(c(0,1,2,2,3,3), 150, replace = TRUE),
49 | item3b = ifelse(item3a == 0, NA, round(runif(150, min = 1, max = 5), 0)),
50 | item4a = sample(c(0,1,2,2,3,3), 150, replace = TRUE),
51 | item4b = ifelse(item4a == 0, NA, round(runif(150, min = 1, max = 5), 0)),
52 | item5a = sample(c(0,1,2,2,3,3), 150, replace = TRUE),
53 | item5b = ifelse(item5a == 0, NA, round(runif(150, min = 1, max = 5), 0)),
54 | item6a = sample(c(0,1,2,2,3,3), 150, replace = TRUE),
55 | item6b = ifelse(item6a == 0, NA, round(runif(150, min = 1, max = 5), 0))
56 | )
57 |
58 | usethis::use_data(csatraw, overwrite = TRUE)
59 |
60 | csat <- csatraw
61 |
62 | csat <- csat %>%
63 | rename(csat = item1,
64 | csat_open = item1_open) %>%
65 | rename_with(~ gsub("a", "_contact", .x),
66 | .cols = matches("\\da$")) %>%
67 | rename_with(~ gsub("b", "_rating", .x),
68 | .cols = matches("\\db$")) %>%
69 | rename_with(~ gsub("item2", "postal", .x),
70 | .cols = starts_with("item2")) %>%
71 | rename_with(~ gsub("item3", "phone", .x),
72 | .cols = starts_with("item3")) %>%
73 | rename_with(~ gsub("item4", "email", .x),
74 | .cols = starts_with("item4")) %>%
75 | rename_with(~ gsub("item5", "website", .x),
76 | .cols = starts_with("item5")) %>%
77 | rename_with(~ gsub("item6", "shop", .x),
78 | .cols = starts_with("item6")) %>%
79 | mutate(type = factor(type, levels = c("new", "existing", "reactivate")),
80 | product = factor(product, levels = c("basic", "advanced", "premium")),
81 | across(ends_with("csat") | ends_with("_rating"),
82 | ~ recode(.x,
83 | `1` = "Very unsatisfied",
84 | `2` = "Unsatisfied",
85 | `3` = "Neutral",
86 | `4` = "Satisfied",
87 | `5` = "Very satisfied") %>%
88 | factor(., levels = c("Very unsatisfied",
89 | "Unsatisfied",
90 | "Neutral",
91 | "Satisfied",
92 | "Very satisfied"))),
93 | across(ends_with("_contact"),
94 | ~ recode(.x,
95 | `0` = "no contact",
96 | `1` = "more than 3 years ago",
97 | `2` = "within 1 to 3 years",
98 | `3` = "within last year") %>%
99 | factor(., levels = c("no contact",
100 | "more than 3 years ago",
101 | "within 1 to 3 years",
102 | "within last year"))),
103 | csat_open = purrr::map(csat_open, ~ recode(.x, !!! lookup_ls)),
104 | csat_open = gsub("11", "great product", csat_open) %>%
105 | gsub("12", "good service", .) %>%
106 | gsub("13", "friendly staff", .) %>%
107 | gsub("21", "too expensive", .) %>%
108 | gsub("22", "unfriendly", .) %>%
109 | gsub("23", "no response", .))
110 |
111 | usethis::use_data(csat, overwrite = TRUE)
112 |
113 |
--------------------------------------------------------------------------------
/data/csat.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TimTeaFan/dplyover/f0cd984586bafdc0dc78fc4ead2d76ba50d9370e/data/csat.rda
--------------------------------------------------------------------------------
/data/csatraw.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TimTeaFan/dplyover/f0cd984586bafdc0dc78fc4ead2d76ba50d9370e/data/csatraw.rda
--------------------------------------------------------------------------------
/dplyover.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: No
4 | SaveWorkspace: No
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 |
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 |
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | LineEndingConversion: Posix
18 |
19 | BuildType: Package
20 | PackageUseDevtools: Yes
21 | PackageInstallArgs: --no-multiarch --with-keep.source
22 | PackageRoxygenize: rd,collate,namespace
23 |
--------------------------------------------------------------------------------
/man/across2.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/across2.R
3 | \name{across2}
4 | \alias{across2}
5 | \alias{across2x}
6 | \title{Apply functions to two sets of columns simultaniously in 'dplyr'}
7 | \usage{
8 | across2(.xcols, .ycols, .fns, ..., .names = NULL, .names_fn = NULL)
9 |
10 | across2x(
11 | .xcols,
12 | .ycols,
13 | .fns,
14 | ...,
15 | .names = NULL,
16 | .names_fn = NULL,
17 | .comb = "all"
18 | )
19 | }
20 | \arguments{
21 | \item{.xcols, .ycols}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Columns to transform.
22 | Note that you can not select or compute upon grouping variables.}
23 |
24 | \item{.fns}{Functions to apply to each column in \code{.xcols} and \code{.ycols}.
25 |
26 | Possible values are:
27 | \itemize{
28 | \item A function
29 | \item A purrr-style lambda
30 | \item A list of functions/lambdas
31 | }
32 |
33 | Note that \code{NULL} is not accepted as argument to \code{.fns}.}
34 |
35 | \item{...}{Additional arguments for the function calls in \code{.fns}.}
36 |
37 | \item{.names}{A glue specification that describes how to name the output
38 | columns. This can use:
39 | \itemize{
40 | \item \code{{xcol}} to stand for the selected column name in \code{.xcols},
41 | \item \code{{ycol}} to stand for the selected column name in \code{.ycols}, and
42 | \item \code{{fn}} to stand for the name of the function being applied.
43 | }
44 |
45 | The default (\code{NULL}) is equivalent to \code{"{xcol}_{ycol}"} for the single function
46 | case and \code{"{xcol}_{ycol}_{fn}"} for the case where a list is used for \code{.fns}.
47 |
48 | \code{across2()} supports two additional glue specifications: \code{{pre}} and \code{{suf}}.
49 | They extract the common alphanumeric prefix or suffix of each pair of
50 | variables.
51 |
52 | Alternatively to a glue specification, a character vector of length equal
53 | to the number of columns to be created can be supplied to \code{.names}.
54 | Note that in this case, the glue specification described above is not supported.}
55 |
56 | \item{.names_fn}{Optionally, a function that is applied after the glue
57 | specification in \code{.names} has been evaluated. This is, for example, helpful,
58 | in case the resulting names need to be further cleaned or trimmed.}
59 |
60 | \item{.comb}{In \code{across2x()} this argument allows to control which
61 | combinations of columns are to be created. This argument only matters, if
62 | the columns specified in \code{.xcols} and \code{.ycols} overlap to some extent.
63 | \itemize{
64 | \item \code{"all"}, the default, will create all pairwise combinations between columns
65 | in \code{.xcols} and \code{.ycols} \emph{including all permutations} (e.g.
66 | \code{foo(column_x, column_y)} as well as \code{foo(column_y, column_x)}.
67 | \item \code{"unique"} will only create all unordered combinations (e.g. creates
68 | \code{foo(column_x, column_y)}, while \code{foo(column_y, column_x)} \emph{will not} be created)
69 | \item \verb{"minimal} same as \code{"unique"} and further skips all self-matches (e.g.
70 | \code{foo(column_x, column_x)} \emph{will not} be created)
71 | }}
72 | }
73 | \value{
74 | \code{across2()} returns a tibble with one column for each pair of elements in
75 | \code{.xcols} and \code{.ycols} combined with each function in \code{.fns}.
76 |
77 | \code{across2x()} returns a tibble with one column for each combination between
78 | elements in \code{.x} and\code{.y} combined with each function in \code{.fns}.
79 | }
80 | \description{
81 | \code{across2()} and \code{across2x()} are variants of \code{\link[dplyr:across]{dplyr::across()}} that iterate
82 | over two columns simultaneously. \code{across2()} loops each \emph{pair of columns} in \code{.xcols}
83 | and \code{.ycols} over one or more functions, while \code{across2x()} loops
84 | \emph{every combination between columns} in \code{.xcols} and \code{.ycols} over one or more functions.
85 | }
86 | \section{Examples}{
87 |
88 |
89 | For the basic functionality of \code{across()} please refer to the examples in
90 | \code{\link[dplyr:across]{dplyr::across()}}.\if{html}{\out{
}}
95 |
96 | \code{across2()} can be used to transfrom pairs of variables in one or more functions.
97 | In the example below we want to calculate the product and the sum of all pairs of
98 | 'Length' and 'Width' variables. We can use \code{{pre}} in the glue specification in
99 | \code{.names} to extract the common prefix of each pair of variables. We can further
100 | transform the names, in the example setting them \code{tolower} by specifying the
101 | \code{.names_fn} argument:\if{html}{\out{
}}
117 |
118 | \code{across2x()} can be used to perform calculations on each combination of variables.
119 | In the example below we calculate the correlation between all variables in the
120 | \code{iris} data set for each group. To do this, we \code{group_by} 'Species' and specify
121 | the {tidyselect} helper \code{everything()} to \code{.xcols} and \code{.ycols}.
122 | \code{~ round(cor(.x, .y), 2)} gives us the correlation rounded to two digits for each
123 | pair of variables. We trim the rahter long variables names by replacing "Sepal"
124 | with "S", and "Petal" with "P" in the \code{.names_fn} argument. Finally, we are not
125 | interested in correlations of the same column and want to avoid excessive reults
126 | by setting the \code{.comb} argument to \code{"minimal"}.\if{html}{\out{
}}
142 | }
143 |
144 |
--------------------------------------------------------------------------------
/man/crossover.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/crossover.R
3 | \name{crossover}
4 | \alias{crossover}
5 | \title{Apply functions to a set of columns and a vector simultaniously in 'dplyr'}
6 | \usage{
7 | crossover(
8 | .xcols = dplyr::everything(),
9 | .y,
10 | .fns,
11 | ...,
12 | .names = NULL,
13 | .names_fn = NULL
14 | )
15 | }
16 | \arguments{
17 | \item{.xcols}{<\code{\link[=dplyr_tidy_select]{tidy-select}}> Columns to transform.
18 | Because \code{crossover()} is used within functions like \code{summarise()} and
19 | \code{mutate()}, you can't select or compute upon grouping variables.}
20 |
21 | \item{.y}{An atomic vector or list to apply functions to. \code{crossover()} also
22 | accepts a function as \code{.y} argument. In this case each column in \code{.xcols}
23 | is looped over all the outputs that it generated with the function supplied
24 | to \code{.y}. Note: the underyling data must not be grouped, if a function
25 | is supplied to \code{.y}.
26 |
27 | If a function is supplied, the following values are possible:
28 | \itemize{
29 | \item A bare function name, e.g. \code{unique}
30 | \item An anonymous function, e.g. \code{function(x) unique(x)}
31 | \item A purrr-style lambda, e.g. \code{~ unique(.x, fromLast = TRUE)}
32 | }
33 |
34 | Note that additional arguments can only be specified with an anonymous
35 | function, a purrr-style lamba or with a pre-filled custom function.}
36 |
37 | \item{.fns}{Functions to apply to each column in \code{.xcols} and element in \code{.y}.
38 |
39 | Possible values are:
40 | \itemize{
41 | \item A function
42 | \item A purrr-style lambda
43 | \item A list of functions/lambdas
44 | }
45 |
46 | Note that \code{NULL} is not accepted as argument to \code{.fns}.}
47 |
48 | \item{...}{Additional arguments for the function calls in \code{.fns}.}
49 |
50 | \item{.names}{A glue specification that describes how to name the output
51 | columns. This can use:
52 | \itemize{
53 | \item \code{{xcol}} to stand for the selected column name,
54 | \item \code{{y}} to stand for the selected vector element, and
55 | \item \code{{fn}} to stand for the name of the function being applied.
56 | }
57 |
58 | The default (\code{NULL}) is equivalent to \code{"{xcol}_{y}"} for the single function
59 | case and \code{"{xcol}_{y}_{fn}"} for the case where a list is used for \code{.fns}.
60 |
61 | Note that, depending on the nature of the underlying object in \code{.y},
62 | specifying \code{{y}} will yield different results:
63 | \itemize{
64 | \item If \code{.y} is an unnamed atomic vector, \code{{y}} will represent each value.
65 | \item If \code{.y} is a named list or atomic vector, \code{{y}} will represent each name.
66 | \item If \code{.y} is an unnamed list, \code{{y}} will be the index number running from 1 to \code{length(y)}.
67 | }
68 |
69 | This standard behavior (interpretation of \code{{y}}) can be overwritten by
70 | directly specifying:
71 | \itemize{
72 | \item \code{{y_val}} for \code{.y}'s values
73 | \item \code{{y_nm}} for its names
74 | \item \code{{y_idx}} for its index numbers
75 | }
76 |
77 | Alternatively, a character vector of length equal to the number of columns to
78 | be created can be supplied to \code{.names}. Note that in this case, the glue
79 | specification described above is not supported.}
80 |
81 | \item{.names_fn}{Optionally, a function that is applied after the glue
82 | specification in \code{.names} has been evaluated. This is, for example, helpful,
83 | in case the resulting names need to be further cleaned or trimmed.}
84 | }
85 | \value{
86 | \code{crossover()} returns a tibble with one column for each combination of
87 | columns in \code{.xcols}, elements in \code{.y} and functions in \code{.fns}.
88 |
89 | If a function is supplied as \code{.y} argument, \code{crossover()} returns a tibble with
90 | one column for each pair of output elements of \code{.y} and the column in \code{.xcols}
91 | that generated the output combined with each function in \code{.fns}.
92 | }
93 | \description{
94 | \code{crossover()} combines the functionality of \code{\link[dplyr:across]{dplyr::across()}} with \code{\link[=over]{over()}}
95 | by iterating simultaneously over (i) a set of columns (\code{.xcols}) and (ii)
96 | a vector or list (\code{.y}). \code{crossover()} \emph{always} applies the functions in
97 | \code{.fns} in a \emph{nested} way to a combination of both inputs. There are, however,
98 | two different ways in which the functions in \code{.fns} are applied.
99 |
100 | When \code{.y} is a vector or list, each function in \code{.fns} is applied to
101 | \emph{all pairwise combinations} between columns in \code{.xcols} and elements in
102 | \code{.y} (this resembles the behavior of \code{over2x()} and \code{across2x()}).
103 |
104 | \code{crossover()} has one trick up it's sleeves, which sets it apart from the other
105 | functions in the <\code{\link[=over_across_family]{over-across family}}>: Its second input
106 | (\code{.y}) can be a function. This changes the originial behavior slightly: First
107 | the function in \code{.y} is applied to all columns in \code{.xcols} to \emph{generate} an
108 | input object which will be used as \code{.y} in the function calls in \code{.fns}.
109 | In this case each function is applied to all pairs between (i) columns in
110 | \code{.xcols} with (ii) the output elements that they generated through the
111 | function that was originally supplied to \code{.y}. Note that the underyling
112 | data must not be grouped, if a function is supplied to \code{.y}. For examples see
113 | the example section below.
114 | }
115 | \section{Examples}{
116 |
117 |
118 | For the basic functionality please refer to the examples in \code{\link[=over]{over()}} and
119 | \code{\link[dplyr:across]{dplyr::across()}}.\if{html}{\out{
}}
124 | \subsection{Creating many similar variables for mulitple columns}{
125 |
126 | If \code{.y} is a vector or list, \code{crossover()} loops every combination between
127 | columns in \code{.xcols} and elements in \code{.y} over the functions in \code{.fns}. This
128 | is helpful in cases where we want to create a batch of similar variables with
129 | only slightly changes in the arguments of the calling function. A good example
130 | are lagged variables. Below we create five lagged variables for each
131 | 'Sepal.Length' and 'Sepal.Width'. To create nice names we use a named list
132 | as argument in \code{.fns} and specify the glue syntax in \code{.names}.\if{html}{\out{
}}
152 | }
153 |
154 | \subsection{Creating dummy variables for multiple varialbes (columns)}{
155 |
156 | The \code{.y} argument of \code{crossover()} can take a function instead of list or vector.
157 | In the example below we select the columns 'type', 'product', 'csat' in \code{.xcols}.
158 | We supply the function \code{\link[=dist_values]{dist_values()}} to \code{.y}, which is a cleaner variant of
159 | base R's \code{unique()}. This generates all distinct values for all three selected
160 | variables. Now, the function in \code{.fns}, \code{~ if_else(.y == .x, 1, 0)}, is applied
161 | to each pair of distinct value in \code{.y} and the column in \code{.xcols} that generated
162 | this value. This basically creates a dummy variable for each value of each
163 | variable. Since some of the values contain whitespace characters, we can use the
164 | \code{.names_fn} argument to supply a \emph{third} function that cleans the output names
165 | by replacing spaces with an underscore and setting all characters \code{tolower()}.\if{html}{\out{
}}
187 | }
188 | }
189 |
190 | \seealso{
191 | Other members of the <\code{\link[=over_across_family]{over-across function family}}>.
192 | }
193 |
--------------------------------------------------------------------------------
/man/csat.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/data-csat.R
3 | \docType{data}
4 | \name{csat}
5 | \alias{csat}
6 | \title{Customer Satisfaction Survey (recoded data)}
7 | \format{
8 | A tibble with 150 rows and 15 variables:
9 | \describe{
10 | \item{cust_id}{Customer identification number}
11 | \item{type}{Type of customer: "new", "existing" or "reactive"}
12 | \item{product}{The type of product: "basic", "advanced" or "premium"}
13 | \item{csat}{The overall Customer Satisfaction Score}
14 | \item{csat_open}{Follow-up question why the respondent gave this specific
15 | Customer Satisfaction rating. The open-ended answers have been coded into six
16 | categories (multiple answers possible).}
17 | \item{postal_contact, phone_contact, email_contact, website_contact,
18 | shop_contact}{When did the customer have last contact via given channel?}
19 | \item{postal_rating, phone_rating, email_rating, website_rating,
20 | shop_rating}{If customer had contact over the given channel:
21 | How satisfied was he?}
22 | }
23 | }
24 | \usage{
25 | csat
26 | }
27 | \description{
28 | This data is randomly generated. It resembles data from a customer
29 | satisfaction survey using CSAT (Customer Satisfaction Score) for a
30 | contract-based product. The data has been recoded. The raw version of this data
31 | set can be found here <\code{\link{csatraw}}>.
32 | }
33 | \examples{
34 | csat
35 | }
36 | \keyword{datasets}
37 |
--------------------------------------------------------------------------------
/man/csatraw.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/data-csatraw.R
3 | \docType{data}
4 | \name{csatraw}
5 | \alias{csatraw}
6 | \title{Customer Satisfaction Survey (raw data)}
7 | \format{
8 | A tibble with 150 rows and 15 variables:
9 | \describe{
10 | \item{cust_id}{Customer identification number}
11 | \item{type}{Type of customer: "new", "existing" or "reactive"}
12 | \item{product}{The type of product: "basic", "advanced" or "premium"}
13 | \item{item1}{The overall Customer Satisfaction Score\cr\cr Scale: Ranging from 1 =
14 | "Very unsatisfied" to 5 = "Very satisfied"}
15 | \item{item1_open}{Follow-up question why the respondent gave this specific
16 | Customer Satisfaction rating. The open-ended answers have been coded into six
17 | categories: 11 = "great product", 12 = "good service", 13 = "friendly staff",
18 | 21 = "too expensive", 22 = "unfriendly", 23 = "no response" (multiple answers
19 | possible).}
20 | \item{item2a, item3a, item4a, item5a, item6a}{When did the customer have last
21 | contact via postal mail (item2a), phone (item3a), email (item4a), website
22 | (item5a), a retail shop (item6a) ?\cr\cr Scale: 0 = "no contact", 1 = "more
23 | than 3 years ago", 2 = "within 1 to 3 years", 3 = "within the last year"}
24 | \item{item2b, item3b, item4b, item5b, item6b}{If customer had contact
25 | via postal mail (item2b), phone (item3b), email (item4b), website (item5b),
26 | a retail shop (item6b): How satisfied was he?\cr\cr
27 | Scale: Ranging from 1 = "Very unsatisfied", to 5 = "Very satisfied"}
28 | }
29 | }
30 | \usage{
31 | csatraw
32 | }
33 | \description{
34 | This data is randomly generated. It resembles raw data from a customer
35 | satisfaction survey using CSAT (Customer Satisfaction Score) for a
36 | contract-based product. The first three variables are given, all other
37 | variables come from a survey tool and are only named "item1" etc.
38 | A recoded version of this data set can be found here <\code{\link{csat}}>.
39 | }
40 | \examples{
41 | csatraw
42 | }
43 | \keyword{datasets}
44 |
--------------------------------------------------------------------------------
/man/dplyover-package.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dplyover.R
3 | \docType{package}
4 | \name{dplyover-package}
5 | \alias{dplyover}
6 | \alias{dplyover-package}
7 | \title{dplyover: Create columns by applying functions to vectors and/or columns in 'dplyr'}
8 | \description{
9 | To learn more about dplyover, start with the vignette:
10 | \code{browseVignettes(package = "dplyover")}
11 | }
12 | \seealso{
13 | Useful links:
14 | \itemize{
15 | \item \url{https://github.com/TimTeaFan/dplyover}
16 | \item Report bugs at \url{https://github.com/TimTeaFan/dplyover/issues}
17 | }
18 |
19 | }
20 | \author{
21 | \strong{Maintainer}: Tim Tiefenbach \email{mailme@tim-tiefenbach.de} (\href{https://orcid.org/0000-0001-9443-2434}{ORCID})
22 |
23 | }
24 |
--------------------------------------------------------------------------------
/man/figures/apple-touch-icon-120x120.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TimTeaFan/dplyover/f0cd984586bafdc0dc78fc4ead2d76ba50d9370e/man/figures/apple-touch-icon-120x120.png
--------------------------------------------------------------------------------
/man/figures/apple-touch-icon-152x152.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TimTeaFan/dplyover/f0cd984586bafdc0dc78fc4ead2d76ba50d9370e/man/figures/apple-touch-icon-152x152.png
--------------------------------------------------------------------------------
/man/figures/apple-touch-icon-180x180.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TimTeaFan/dplyover/f0cd984586bafdc0dc78fc4ead2d76ba50d9370e/man/figures/apple-touch-icon-180x180.png
--------------------------------------------------------------------------------
/man/figures/apple-touch-icon-60x60.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TimTeaFan/dplyover/f0cd984586bafdc0dc78fc4ead2d76ba50d9370e/man/figures/apple-touch-icon-60x60.png
--------------------------------------------------------------------------------
/man/figures/apple-touch-icon-76x76.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TimTeaFan/dplyover/f0cd984586bafdc0dc78fc4ead2d76ba50d9370e/man/figures/apple-touch-icon-76x76.png
--------------------------------------------------------------------------------
/man/figures/apple-touch-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TimTeaFan/dplyover/f0cd984586bafdc0dc78fc4ead2d76ba50d9370e/man/figures/apple-touch-icon.png
--------------------------------------------------------------------------------
/man/figures/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TimTeaFan/dplyover/f0cd984586bafdc0dc78fc4ead2d76ba50d9370e/man/figures/favicon-16x16.png
--------------------------------------------------------------------------------
/man/figures/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TimTeaFan/dplyover/f0cd984586bafdc0dc78fc4ead2d76ba50d9370e/man/figures/favicon-32x32.png
--------------------------------------------------------------------------------
/man/figures/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TimTeaFan/dplyover/f0cd984586bafdc0dc78fc4ead2d76ba50d9370e/man/figures/favicon.ico
--------------------------------------------------------------------------------
/man/figures/lifecycle-archived.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/man/figures/lifecycle-defunct.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/man/figures/lifecycle-deprecated.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/man/figures/lifecycle-experimental.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/man/figures/lifecycle-maturing.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/man/figures/lifecycle-questioning.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/man/figures/lifecycle-stable.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/man/figures/lifecycle-superseded.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/man/figures/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TimTeaFan/dplyover/f0cd984586bafdc0dc78fc4ead2d76ba50d9370e/man/figures/logo.png
--------------------------------------------------------------------------------
/man/figures/logo_big.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TimTeaFan/dplyover/f0cd984586bafdc0dc78fc4ead2d76ba50d9370e/man/figures/logo_big.png
--------------------------------------------------------------------------------
/man/over.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/over.R
3 | \name{over}
4 | \alias{over}
5 | \title{Apply functions to a list or vector in 'dplyr'}
6 | \usage{
7 | over(.x, .fns, ..., .names = NULL, .names_fn = NULL)
8 | }
9 | \arguments{
10 | \item{.x}{An atomic vector or list to apply functions to. Alternatively a
11 | <\code{\link[=selection_helpers]{selection helper}}> can be used to create
12 | a vector.}
13 |
14 | \item{.fns}{Functions to apply to each of the elements in \code{.x}. For
15 | functions that expect variable names as input, the selected strings need to
16 | be turned into symbols and evaluated. \code{dplyrover} comes with a genuine helper
17 | function that evaluates strings as names \code{\link[=.]{.()}}.
18 |
19 | Possible values are:
20 | \itemize{
21 | \item A function
22 | \item A purrr-style lambda
23 | \item A list of functions/lambdas
24 | }
25 |
26 | For examples see the example section below.
27 |
28 | Note that, unlike \code{across()}, \code{over()} does not accept \code{NULL} as a
29 | value to \code{.fns}.}
30 |
31 | \item{...}{Additional arguments for the function calls in \code{.fns}.}
32 |
33 | \item{.names}{A glue specification that describes how to name the output
34 | columns. This can use \code{{x}} to stand for the selected vector element, and
35 | \code{{fn}} to stand for the name of the function being applied. The default
36 | (\code{NULL}) is equivalent to \code{"{x}"} for the single function case and
37 | \code{"{x}_{fn}"} for the case where a list is used for \code{.fns}.
38 |
39 | Note that, depending on the nature of the underlying object in \code{.x},
40 | specifying \code{{x}} will yield different results:
41 | \itemize{
42 | \item If \code{.x} is an unnamed atomic vector, \code{{x}} will represent each value.
43 | \item If \code{.x} is a named list or atomic vector, \code{{x}} will represent each name.
44 | \item If \code{.x} is an unnamed list, \code{{x}} will be the index number running from 1 to \code{length(x)}.
45 | }
46 |
47 | This standard behavior (interpretation of \code{{x}}) can be overwritten by
48 | directly specifying:
49 | \itemize{
50 | \item \code{{x_val}} for \code{.x}'s values
51 | \item \code{{x_nm}} for its names
52 | \item \code{{x_idx}} for its index numbers
53 | }
54 |
55 | Alternatively, a character vector of length equal to the number of columns to
56 | be created can be supplied to \code{.names}. Note that in this case, the glue
57 | specification described above is not supported.}
58 |
59 | \item{.names_fn}{Optionally, a function that is applied after the glue
60 | specification in \code{.names} has been evaluated. This is, for example, helpful
61 | in case the resulting names need to be further cleaned or trimmed.}
62 | }
63 | \value{
64 | A tibble with one column for each element in \code{.x} and each function in \code{.fns}.
65 | }
66 | \description{
67 | \code{over()} makes it easy to create new colums inside a \code{\link[dplyr:mutate]{dplyr::mutate()}} or
68 | \code{\link[dplyr:summarise]{dplyr::summarise()}} call by applying a function (or a set of functions) to
69 | an atomic vector or list using a syntax similar to \code{\link[dplyr:across]{dplyr::across()}}.
70 | The main difference is that \code{\link[dplyr:across]{dplyr::across()}} transforms or creates new columns
71 | based on existing ones, while \code{over()} can create new columns based on a
72 | vector or list to which it will apply one or several functions.
73 | Whereas \code{\link[dplyr:across]{dplyr::across()}} allows \code{tidy-selection} helpers to select columns,
74 | \code{over()} provides its own helper functions to select strings or values based
75 | on either (1) values of specified columns or (2) column names. See the
76 | examples below and the \code{vignette("why_dplyover")} for more details.
77 | }
78 | \section{Note}{
79 |
80 | Similar to \code{dplyr::across()} \code{over()} works only inside dplyr verbs.
81 | }
82 |
83 | \section{Examples}{
84 |
85 |
86 | It has two main use cases. They differ in how the elements in \code{.x}
87 | are used. Let's first attach \code{dplyr}:\if{html}{\out{
}}
92 | \subsection{(1) The General Use Case}{
93 |
94 | Here the values in \code{.x} are used as inputs to one or more functions in \code{.fns}.
95 | This is useful, when we want to create several new variables based on the same
96 | function with varying arguments. A good example is creating a bunch of lagged
97 | variables.\if{html}{\out{
}}\preformatted{tibble(x = 1:25) \%>\%
98 | mutate(over(c(1:3),
99 | ~ lag(x, .x)))
100 | #> # A tibble: 25 x 4
101 | #> x `1` `2` `3`
102 | #>
103 | #> 1 1 NA NA NA
104 | #> 2 2 1 NA NA
105 | #> 3 3 2 1 NA
106 | #> 4 4 3 2 1
107 | #> # ... with 21 more rows
108 | }\if{html}{\out{
}}
109 |
110 | Lets create a dummy variable for each unique value in 'Species':\if{html}{\out{
}}
123 |
124 | With \code{over()} it is also possible to create several dummy variables with
125 | different thresholds. We can use the \code{.names} argument to control the output
126 | names:\if{html}{\out{
}}
140 |
141 | A similar approach can be used with dates. Below we loop over a date
142 | sequence to check whether the date falls within a given start and end
143 | date. We can use the \code{.names_fn} argument to clean the resulting output
144 | names:\if{html}{\out{
}}
176 |
177 | \code{over()} can summarise data in wide format. In the example below, we want to
178 | know for each group of customers (\code{new}, \code{existing}, \code{reactivate}), how much
179 | percent of the respondents gave which rating on a five point likert scale
180 | (\code{item1}). A usual approach in the tidyverse would be to use
181 | \code{count \%>\% group_by \%>\% mutate}, which yields the same result in the usually
182 | prefered long format. Sometimes, however, we might want this kind of summary
183 | in the wide format, and in this case \code{over()} comes in handy:\if{html}{\out{
}}
194 |
195 | Instead of a vector we can provide a named list of vectors to calculate the
196 | top two and bottom two categories on the fly:\if{html}{\out{
}}
209 |
210 | \code{over()} can also loop over columns of a data.frame. In the example below we
211 | want to create four different dummy variables of \code{item1}: (i) the top and (ii)
212 | bottom category as well as (iii) the top two and (iv) the bottom two categories.
213 | We can create a lookup \code{data.frame} and use all columns but the first as input to
214 | \code{over()}. In the function call we make use of base R's \code{match()}, where \code{.x}
215 | represents the new values and \code{recode_df[, 1]} refers to the old values.\if{html}{\out{
}}
236 |
237 | \code{over()} work nicely with comma separated values stored in character vectors.
238 | In the example below, the colum \code{csat_open} contains one or more comma
239 | separated reasons why a specific customer satisfaction rating was given.
240 | We can easily create a column for each response category with the help of
241 | \code{dist_values} - a wrapper around \code{unique} which can split vector elements
242 | using a separator:\if{html}{\out{
}}
257 | }
258 |
259 | \subsection{(2) A Very Specific Use Case}{
260 |
261 | Here strings are supplied to \code{.x} to construct column names (sharing the
262 | same stem). This allows us to dynamically use more than one column in the
263 | function calls in \code{.fns}. To work properly, the strings need to be
264 | turned into symbols and evaluated. For this {dplyover} provides a genuine
265 | helper function \code{.()} that evaluates strings and helps to declutter the
266 | otherwise rather verbose code. \code{.()} supports glue syntax and takes a string
267 | as argument.
268 |
269 | Below are a few examples using two colums in the function calls in \code{.fns}.
270 | For the two column case \code{\link[=across2]{across2()}} provides a more intuitive API that is
271 | closer to the original \code{dplyr::across}. Using \code{.()} inside \code{over} is really
272 | useful for cases with more than two columns.
273 |
274 | Consider the following example of a purrr-style formula in \code{.fns} using \code{.()}:\if{html}{\out{
}}
344 | }
345 | }
346 |
347 | \seealso{
348 | \code{\link[=over2]{over2()}} to apply a function to two objects.
349 |
350 | All members of the <\code{\link[=over_across_family]{over-across function family}}>.
351 | }
352 |
--------------------------------------------------------------------------------
/man/over2.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/over2.R
3 | \name{over2}
4 | \alias{over2}
5 | \alias{over2x}
6 | \title{Apply functions to two vectors simultaniously in 'dplyr'}
7 | \usage{
8 | over2(.x, .y, .fns, ..., .names = NULL, .names_fn = NULL)
9 |
10 | over2x(.x, .y, .fns, ..., .names = NULL, .names_fn = NULL)
11 | }
12 | \arguments{
13 | \item{.x, .y}{An atomic vector or list to apply functions to. Alternatively a
14 | <\code{\link[=selection_helpers]{selection helper}}> can be used to create a vector.
15 | \code{over2()} requires \code{.x} and \code{.y} to be of the same length.}
16 |
17 | \item{.fns}{Functions to apply to each of the elements in \code{.x} and \code{.y}. .
18 |
19 | Possible values are:
20 | \itemize{
21 | \item A function
22 | \item A purrr-style lambda
23 | \item A list of functions/lambdas
24 | }
25 |
26 | For examples see the example section below.
27 |
28 | Note that \code{NULL} is not accepted as argument to \code{.fns}.}
29 |
30 | \item{...}{Additional arguments for the function calls in \code{.fns}.}
31 |
32 | \item{.names}{A glue specification that describes how to name the output
33 | columns. This can use \code{{x}} and \code{{y}} to stand for the selected vector element,
34 | and \code{{fn}} to stand for the name of the function being applied. The default
35 | (\code{NULL}) is equivalent to \code{"{x}_{y}"} for the single function case and
36 | \code{"{x}_{y}_{fn}"} for the case where a list is used for \code{.fns}.
37 |
38 | Note that, depending on the nature of the underlying object in \code{.x} and \code{.y},
39 | specifying \code{{x}/{y}} will yield different results:
40 | \itemize{
41 | \item If \code{.x/.y} is an unnamed atomic vector, \code{{x}/{y}} will represent each value.
42 | \item If \code{.x/.y} is a named list or atomic vector, \code{{x}/{y}} will represent each name.
43 | \item If \code{.x/.y} is an unnamed list, \code{{x}/{y}} will be the index number running
44 | from 1 to \code{length(x)} or \code{length(y)} respectively.
45 | }
46 |
47 | This standard behavior (interpretation of \code{{x}/{y}}) can be overwritten by
48 | directly specifying:
49 | \itemize{
50 | \item \code{{x_val}} or \code{{y_val}} for \code{.x}'s or \code{.y}'s values
51 | \item \code{{x_nm}} or \code{{y_nm}} for their names
52 | \item \code{{x_idx}} or \code{{y_idx}} for their index numbers
53 | }
54 |
55 | Alternatively, a character vector of length equal to the number of columns to
56 | be created can be supplied to \code{.names}. Note that in this case, the glue
57 | specification described above is not supported.}
58 |
59 | \item{.names_fn}{Optionally, a function that is applied after the glue
60 | specification in \code{.names} has been evaluated. This is, for example, helpful
61 | in case the resulting names need to be further cleaned or trimmed.}
62 | }
63 | \value{
64 | \code{over2()} returns a tibble with one column for each pair of elements in \code{.x}
65 | and \code{.y} combined with each function in \code{.fns}.
66 |
67 | \code{over2x()} returns a tibble with one column for each combination between elements
68 | in \code{.x} and \code{.y} combined with each function in \code{.fns}.
69 | }
70 | \description{
71 | \code{over2()} and \code{over2x()} are variants of \code{\link[=over]{over()}} that iterate over two
72 | objects simultaneously. \code{over2()} loops each \emph{pair of elements} in \code{.x} and
73 | \code{.y} over one or more functions, while \code{over2x()} loops
74 | \emph{all pairwise combinations between elements} in \code{.x} a \code{.y} over one or more
75 | functions.
76 | }
77 | \section{Examples}{
78 |
79 |
80 | For the basic functionality please refer to the examples in \code{\link[=over]{over()}}.\if{html}{\out{
}}
85 |
86 | When doing exploratory analysis, it is often helpful to transform continious variables
87 | into several categorial variables. Below we use \code{over2()} to loop over two lists
88 | containing "breaks" and "labels" arguments, which we then use in a call to \code{cut()}:\if{html}{\out{
}}\preformatted{brks <- list(b1 = 3:8,
89 | b2 = seq(3, 9, by = 2))
90 |
91 | labs <- list(l1 = c("3 to 4", "4 to 5", "5 to 6",
92 | "6 to 7", "7 to 8"),
93 | l2 = c("3 to 5", "5 to 7", "7 to 9"))
94 |
95 | iris \%>\%
96 | transmute(over2(brks, labs,
97 | ~ cut(Sepal.Length,
98 | breaks = .x,
99 | labels = .y),
100 | .names = "Sepal.Length.cut\{x_idx\}"))
101 | #> # A tibble: 150 x 2
102 | #> Sepal.Length.cut1 Sepal.Length.cut2
103 | #>
104 | #> 1 5 to 6 5 to 7
105 | #> 2 4 to 5 3 to 5
106 | #> 3 4 to 5 3 to 5
107 | #> 4 4 to 5 3 to 5
108 | #> # ... with 146 more rows
109 | }\if{html}{\out{
}}
110 |
111 | \code{over2x()} makes it possible to create dummy variables for interaction effects
112 | of two variables. In the example below, each customer 'type' is combined with
113 | each 'product' type:\if{html}{\out{
}}
130 | }
131 |
132 |
--------------------------------------------------------------------------------
/man/over_across_family.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/over_across_family.R
3 | \name{over_across_family}
4 | \alias{over_across_family}
5 | \title{The over-across function family}
6 | \description{
7 | \code{dplyover} extends \code{dplyr}'s functionality by building a function family
8 | around \code{dplyr::across()}.
9 |
10 | The goal of this \strong{over-across function family} is to provide a concise and
11 | uniform syntax which can be used to create columns by applying functions to
12 | vectors and / or sets of columns in dplyr. Ideally, this will improve our
13 | mental model so that it is easier to tackle problems where the solution is
14 | based on creating new columns.
15 |
16 | The functions in the over-apply function family create columns by applying
17 | one or several functions to:
18 | \subsection{basic functions}{
19 | \itemize{
20 | \item \code{\link[dplyr:across]{dplyr::across()}}: a set of columns
21 | \item \code{\link[=over]{over()}}: a vector (list or atomic vector)
22 | }
23 | }
24 |
25 | \subsection{variants}{
26 | \itemize{
27 | \item \code{\link[=over2]{over2()}} two vectors of the same length (pairwise)
28 | \item \code{\link[=over2x]{over2x()}} two vectors (nested)
29 | \item \code{\link[=across2]{across2()}} two sets of columns (pairwise)
30 | \item \code{\link[=across2x]{across2x()}} two sets of columns (nested)
31 | \item \code{\link[=crossover]{crossover()}} a set of columns and a vector (nested)
32 | }
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/man/rmd/setup.Rmd:
--------------------------------------------------------------------------------
1 | ```{r, include = FALSE}
2 | options(
3 | tibble.print_min = 4,
4 | tibble.max_extra_cols = 8,
5 | digits = 2,
6 | crayon.enabled = FALSE,
7 | cli.unicode = FALSE
8 | )
9 | knitr::opts_chunk$set(
10 | collapse = TRUE,
11 | comment = "#>"
12 | )
13 | library(dplyr)
14 | ```
15 |
--------------------------------------------------------------------------------
/man/select_values.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/select_values.R
3 | \name{select_values}
4 | \alias{select_values}
5 | \alias{dist_values}
6 | \alias{seq_range}
7 | \title{Select values from variables}
8 | \usage{
9 | dist_values(x, .sep = NULL, .sort = c("asc", "desc", "none", "levels"))
10 |
11 | seq_range(x, .by)
12 | }
13 | \arguments{
14 | \item{x}{An atomic vector or list. For \code{\link[=seq_range]{seq_range()}} x must be numeric or date.}
15 |
16 | \item{.sep}{A character vector containing regular expression(s) which are used
17 | for splitting the values (works only if x is a character vector).}
18 |
19 | \item{.sort}{A character string indicating which sorting scheme is to be applied
20 | to distinct values: ascending ("asc"), descending ("desc"), "none" or "levels". The
21 | default is ascending, only if x is a factor the default is "levels".}
22 |
23 | \item{.by}{A number (or date expression) representing the increment of the sequence.}
24 | }
25 | \value{
26 | \code{\link[=dist_values]{dist_values()}} returns a vector of the same type of x, with exception of
27 | factors which are converted to type \code{"character"}.
28 |
29 | \code{\link[=seq_range]{seq_range()}} returns an vector of type \code{"integer"} or \code{"double"}.
30 | }
31 | \description{
32 | These functions are \link[=selection_helpers]{selection helpers}. They are intended
33 | to be used inside all functions that accept a vector as argument (that is \code{over()}
34 | and \code{crossover()} and all their variants) to extract values of a variable.
35 | \itemize{
36 | \item \code{\link[=dist_values]{dist_values()}} returns all distinct values (or in the case of factor variables:
37 | levels) of a variable \code{x} which are not \code{NA}.
38 | \item \code{\link[=seq_range]{seq_range()}} returns the sequence between the \code{range()} of a variable \code{x}.
39 | }
40 | }
41 | \section{Examples}{
42 |
43 |
44 | Selection helpers can be used inside \code{dplyover::over()} which in turn must be
45 | used inside \code{dplyr::mutate} or \code{dplyr::summarise}. Let's first attach \code{dplyr}:\if{html}{\out{
}}
50 |
51 | \code{dist_values()} extracts all distinct values of a column variable.
52 | This is helpful when creating dummy variables in a loop using \code{over()}.\if{html}{\out{
}}
66 |
67 | \code{dist_values()} is just a wrapper around unique. However, it has five
68 | differences:
69 |
70 | (1) \code{NA} values are automatically stripped. Compare:\if{html}{\out{
}}
85 |
86 | (3) As default, the output is sorted in ascending order for non-factors, and
87 | is sorted as the underyling "levels" for factors. This can be controlled by
88 | setting the \code{.sort} argument. Compare:\if{html}{\out{
}}
113 |
114 | (4) When used on a character vector \code{dist_values} can take a separator
115 | \code{.sep} to split the elements accordingly:\if{html}{\out{
}}
127 |
128 | \code{seq_range()} generates a numeric sequence between the \code{min} and \code{max}
129 | values of its input variable. This is helpful when creating many dummy
130 | variables with varying thresholds.\if{html}{\out{
}}
144 |
145 | Note that if the input variable does not have decimal places, \code{min} and \code{max} are
146 | wrapped in \code{ceiling} and \code{floor} accordingly. This will prevent the creation of
147 | variables that contain only \code{0} or \code{1}. Compare the output below with the
148 | example above:\if{html}{\out{
}}
174 | }
175 |
176 |
--------------------------------------------------------------------------------
/man/select_vars.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/select_strings.R
3 | \name{select_vars}
4 | \alias{select_vars}
5 | \alias{cut_names}
6 | \alias{extract_names}
7 | \title{Select string parts or patterns of column names}
8 | \usage{
9 | cut_names(.pattern, .remove = NULL, .vars = NULL)
10 |
11 | extract_names(.pattern, .remove = NULL, .vars = NULL)
12 | }
13 | \arguments{
14 | \item{.pattern}{Pattern to look for.}
15 |
16 | \item{.remove}{Pattern to remove from the variable names provided in \code{.vars}.
17 | When this argument is provided, all variables names in \code{.vars} that match
18 | the pattern specified in \code{.remove} will be removed, before the \code{.pattern} to
19 | look for will be applied.}
20 |
21 | \item{.vars}{A character vector with variables names. When used inside \code{over}
22 | all column names of the underlying data are automatically supplied to \code{.vars}.
23 | This argument is useful when testing the functionality outside the context of
24 | \code{over()}.}
25 | }
26 | \value{
27 | A character vector.
28 | }
29 | \description{
30 | These functions are \link[=selection_helpers]{selection helpers}.
31 | They are intended to be used inside \code{over()} to extract parts or patterns of
32 | the column names of the underlying data.
33 | \itemize{
34 | \item \code{\link[=cut_names]{cut_names()}} selects strings by removing (cutting off) the specified \code{.pattern}.
35 | This functionality resembles \code{stringr::str_remove_all()}.
36 | \item \code{\link[=extract_names]{extract_names()}} selects strings by extracting the specified \code{.pattern}.
37 | This functionality resembles \code{stringr::str_extract()}.
38 | }
39 | }
40 | \section{Examples}{
41 |
42 |
43 | Selection helpers can be used inside \code{dplyover::over()} which in turn must be
44 | used inside \code{dplyr::mutate} or \code{dplyr::summarise}. Let's first attach \code{dplyr}
45 | (and \code{stringr} for comparision):\if{html}{\out{
}}
51 |
52 | Let's first compare \code{cut_names()} and \code{extract_names()} to their {stringr}
53 | equivalents \code{stringr::str_remove_all()} and \code{stringr::str_extract()}:
54 |
55 | We can observe two main differences:
56 |
57 | (1) \code{cut_names()} and \code{extract_names()} only return strings where the function
58 | was applied successfully (when characters have actually been removed or
59 | extracted). \code{stringr::str_remove_all()} returns unmatched strings as is, while
60 | \code{stringr::str_extract()} returns \code{NA}.\if{html}{\out{
}}
84 |
85 | The examples above do not show that \code{cut_names()} removes \emph{all} strings matching
86 | the \code{.pattern} argument, while \code{extract_names()} does only extract the \code{.pattern}
87 | \emph{one} time:\if{html}{\out{
}}
97 |
98 | Within \code{\link[=over]{over()}} \code{cut_names()} and \code{extract_names()} automatically use the
99 | column names of the underlying data:\if{html}{\out{
}}
125 |
126 | What problem does \code{cut_names()} solve?
127 | In the example above using \code{cut_names()} might not seem helpful, since we could easily
128 | use \code{c("Sepal", "Petal")} instead. However, there are cases where we have
129 | data with a lot of similar pairs of variables sharing a common prefix or
130 | suffix. If we want to loop over them using \code{over()} then \code{cut_names()} comes
131 | in handy.
132 |
133 | The usage of \code{extract_names()} might be less obvious. Lets look at raw data
134 | from a customer satifsaction survey which contains the following variables.\if{html}{\out{
}}
153 |
154 | The survey has several 'item's consisting of two sub-questions / variables 'a'
155 | and 'b'. Lets say we want to calculate the product of those two variables for
156 | each item. \code{extract_names()} helps us to select all variables containing
157 | 'item' followed by a digit using the regex \code{"item\\\\d"} as \code{.pattern}.
158 | However, there is 'item1' and 'item1_open' which are not followed by \code{a} and
159 | \code{b}. \code{extract_names()} lets us exclude these items by setting the \code{.remove}
160 | argument to \verb{[^item1]}:\if{html}{\out{
}}
173 | }
174 |
175 |
--------------------------------------------------------------------------------
/man/selection_helpers.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/selection_helpers.R
3 | \name{selection_helpers}
4 | \alias{selection_helpers}
5 | \alias{over_selection_helpers}
6 | \title{Selection helpers}
7 | \description{
8 | \code{dplyover} provides three kinds of selection helpers which are intended for
9 | use in all functions that accept a vector as argument (that is \code{over()} and
10 | \code{crossover()} as well as their variants, see here for a full list of the
11 | \link[=over_across_family]{over-across function family}).
12 |
13 | Helpers which select \strong{string parts} of the \strong{column names} (of the underyling data):
14 | \itemize{
15 | \item \code{\link[=cut_names]{cut_names()}} removes a specified pattern.
16 | \item \code{\link[=extract_names]{extract_names()}} extracts a specified pattern.
17 | }
18 |
19 | Helpers which select \strong{values} of a variable:
20 | \itemize{
21 | \item \code{\link[=dist_values]{dist_values()}} returns all distinct values.
22 | \item \code{\link[=seq_range]{seq_range()}} returns the sequence between the \code{range()} of a variable.
23 | }
24 |
25 | A helper function that evaluates a glue specification as variable
26 | \itemize{
27 | \item \code{\link[=.]{.()}} evaluates an interpolated string as symbol
28 | }
29 | }
30 |
--------------------------------------------------------------------------------
/man/show_affix.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/show_affix.R
3 | \name{show_affix}
4 | \alias{show_affix}
5 | \alias{show_prefix}
6 | \alias{show_suffix}
7 | \title{Show affixes for variable pairs of two sets of columns}
8 | \usage{
9 | show_prefix(.data = NULL, .xcols = NULL, .ycols = NULL)
10 |
11 | show_suffix(.data = NULL, .xcols = NULL, .ycols = NULL)
12 | }
13 | \arguments{
14 | \item{.data}{A data frame.}
15 |
16 | \item{.xcols, .ycols}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Sets of
17 | columns for which the common pre- or suffix will be shown for each pair.
18 | Note that you can not select.}
19 | }
20 | \value{
21 | A tibble with three columns: .xcols, .ycols and prefix or suffix.
22 | }
23 | \description{
24 | These functions show the prefixes or suffixes for each pair of variables of
25 | two sets of columns. They are intended to be used either (1) in case \code{across2}
26 | throws an error when \code{{pre}} or \code{{suf}} are specified in \code{across2}'s \code{.names}
27 | argument or (2) before using \code{{pre}} or \code{{suf}} in \code{across2} to understand
28 | how the pre- or suffixes will look like.
29 | \itemize{
30 | \item \code{\link[=show_prefix]{show_prefix()}} lists each variable pair and the corresponding alphanumeric prefix
31 | \item \code{\link[=show_suffix]{show_suffix()}} lists each variable pair and the corresponding alphanumeric suffix
32 | }
33 | }
34 | \section{Examples}{
35 |
36 |
37 | Below two use cases of \code{show_prefix/suffix} are briefly explained.
38 | Let's first attach dplyr and get ready:\if{html}{\out{
}}
43 | \subsection{(1) When called after an error is thrown by across2()}{
44 |
45 | Let's assume we use \code{across2} with the \code{{pre}} glue specification on some
46 | data where not all variable pairs share a common prefix. In the example below
47 | we use \code{dplyr::rename} to create such a case. Then \code{across2} will throw an
48 | error. The error message already suggests that we can run \code{show_prefix()}
49 | to see what went wrong. In this case we can call \code{show_prefix()} without
50 | any arguments:\if{html}{\out{
}}\preformatted{ iris \%>\%
51 | as_tibble \%>\%
52 | rename("Pesal.Length" = Sepal.Length) \%>\%
53 | mutate(across2(ends_with("Length"),
54 | ends_with("Width"),
55 | .fns = list(product = ~ .x * .y,
56 | sum = ~ .x + .y),
57 | .names = "\{pre\}_\{fn\}"))
58 | #> Error: Problem with `mutate()` input `..1`.
59 | #> i `..1 = across2(...)`.
60 | #> x Problem with `across2()` input `.names`.
61 | #> i When `\{pre\}` is used inside `.names` each pair of input variables in `.xcols` and `.ycols` must share a common prefix of length > 0.
62 | #> x For at least one pair of variables a shared prefix could not be extracted.
63 | #> i Run `show_prefix()` to see the prefixes for each variable pair.
64 | show_prefix()
65 | #> # A tibble: 2 x 3
66 | #> .xcols .ycols prefix
67 | #>
68 | #> 1 Pesal.Length Sepal.Width
69 | #> 2 Petal.Length Petal.Width Petal
70 | }\if{html}{\out{
}}
71 | }
72 |
73 | \subsection{(2) When called on a data.frame}{
74 |
75 | When called on a data.frame we just need to specify two sets of columns:
76 | \code{.xcols} and \code{.ycols} (just like in \code{across2}).\if{html}{\out{
}}
85 | }
86 | }
87 |
88 |
--------------------------------------------------------------------------------
/man/string_eval.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/string_eval.R
3 | \name{string_eval}
4 | \alias{string_eval}
5 | \alias{.}
6 | \title{Evaluate an interpolated string as symbol}
7 | \usage{
8 | .(x)
9 | }
10 | \arguments{
11 | \item{x}{A glue specification, that is, a string which contains an R expression
12 | wrapped in curly braces, e.g. \verb{."\{.x\}_some_string"}.}
13 | }
14 | \value{
15 | The values of the variable with the name of the final argument string, given
16 | that it exists in the caller environment.
17 | }
18 | \description{
19 | This function takes a glue specifcation as input, and evaluates the final
20 | argument string as name in the caller environment.
21 | }
22 | \section{Examples}{
23 | \if{html}{\out{
}}
28 |
29 | Below is a simple example from \code{over()}. In \code{over}'s function
30 | argument \code{.x} is first evaluated as 'Sepal' and then as 'Petal' which
31 | results in the final argument strings 'Sepal.Width' and 'Sepal.Length' as
32 | well as 'Petal.Width' and 'Petal.Length'.\if{html}{\out{
}}
60 |
61 | Although \code{.()} was created with the use of \code{over()} in mind, it can also be
62 | used within \code{dplyr::across()} in combination with \code{dplyr::cur_column()}.
63 | First let's rename 'Sepal.Length' and 'Petal.Length' to 'Sepal' and 'Petal'
64 | to have a stem to which we can attach the string '.Width' to access the
65 | two 'Width' variables. Now we can call \code{.(cur_colunm())} to access the variable
66 | \code{across()} has been called on (Note: we could have used \code{.x} instead). We can
67 | further access the values of the 'Width' variables by wrapping \code{cur_column()}
68 | in curly braces \code{{}}, adding \code{.Width} and wrapping everything with
69 | quotation marks \code{.("{cur_column()}.Width")}.\if{html}{\out{