├── .Rbuildignore
├── .gitignore
├── .travis.yml
├── DESCRIPTION
├── LICENSE
├── LICENSE.md
├── NAMESPACE
├── R
├── rap.R
└── zest_join.R
├── README.Rmd
├── README.md
├── man
├── figures
│ └── logo.png
├── rap.Rd
├── reexports.Rd
├── slam.Rd
└── zest_join.Rd
├── rap.Rproj
└── tests
├── testthat.R
└── testthat
├── test-rap.R
└── test-wap.R
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^\.travis\.yml$
2 | ^README\.Rmd$
3 | ^LICENSE\.md$
4 | ^rap\.Rproj$
5 | ^\.Rproj\.user$
6 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | # R for travis: see documentation at https://docs.travis-ci.com/user/languages/r
2 |
3 | language: R
4 | sudo: false
5 | cache: packages
6 |
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: rap
2 | Title: rowwise operations on a data frame
3 | Version: 0.0.0.9001
4 | Authors@R: c(
5 | person("Romain", "François", email = "romain@purrple.cat", role = c("aut", "cre")),
6 | person("Davis", "Vaughan", role = c("ctb", "rev")),
7 | person("Suthira", "Owlarn", role = c("ctb", "rev")),
8 | person("Thomas Lin", "Pedersen", role = c("art"))
9 | )
10 | Description: rowwise operations on a data frame.
11 | Depends: R (>= 3.5.0)
12 | License: MIT + file LICENSE
13 | Encoding: UTF-8
14 | LazyData: true
15 | Imports:
16 | assertthat (>= 0.2.0),
17 | rlang (>= 0.3.1),
18 | purrr (>= 0.3.0),
19 | tibble (>= 2.0.1),
20 | dplyr (>= 0.8.0),
21 | utils,
22 | vctrs (>= 0.1.0),
23 | zeallot (>= 0.1.0),
24 | magrittr (>= 1.5),
25 | gapminder
26 | Roxygen: list(markdown = TRUE)
27 | RoxygenNote: 6.1.1
28 | Suggests:
29 | testthat (>= 2.0.1),
30 | broom
31 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2018
2 | COPYRIGHT HOLDER: Romain François
3 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | # MIT License
2 |
3 | Copyright (c) 2018 Romain François
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 |
3 | S3method(print,rap_lambda)
4 | export("%>%")
5 | export(lap)
6 | export(rap)
7 | export(slam)
8 | export(wap)
9 | export(zest_join)
10 | importFrom(assertthat,assert_that)
11 | importFrom(dplyr,group_vars)
12 | importFrom(dplyr,is_grouped_df)
13 | importFrom(dplyr,tbl_vars)
14 | importFrom(magrittr,"%>%")
15 | importFrom(purrr,iwalk)
16 | importFrom(purrr,map)
17 | importFrom(purrr,map_chr)
18 | importFrom(purrr,map_dbl)
19 | importFrom(purrr,map_dfr)
20 | importFrom(purrr,map_int)
21 | importFrom(purrr,map_lgl)
22 | importFrom(purrr,map_raw)
23 | importFrom(rlang,abort)
24 | importFrom(rlang,caller_env)
25 | importFrom(rlang,dots_n)
26 | importFrom(rlang,enexpr)
27 | importFrom(rlang,env)
28 | importFrom(rlang,env_parent)
29 | importFrom(rlang,eval_bare)
30 | importFrom(rlang,eval_tidy)
31 | importFrom(rlang,expr)
32 | importFrom(rlang,expr_print)
33 | importFrom(rlang,f_env)
34 | importFrom(rlang,f_lhs)
35 | importFrom(rlang,f_rhs)
36 | importFrom(rlang,is_formula)
37 | importFrom(rlang,is_vector)
38 | importFrom(rlang,list2)
39 | importFrom(rlang,missing_arg)
40 | importFrom(rlang,new_function)
41 | importFrom(rlang,quos)
42 | importFrom(rlang,set_names)
43 | importFrom(rlang,sym)
44 | importFrom(tibble,add_column)
45 | importFrom(utils,globalVariables)
46 | importFrom(vctrs,vec_c)
47 | importFrom(vctrs,vec_cbind)
48 | importFrom(vctrs,vec_rbind)
49 | importFrom(vctrs,vec_size)
50 | importFrom(zeallot,"%<-%")
51 |
--------------------------------------------------------------------------------
/R/rap.R:
--------------------------------------------------------------------------------
1 | #' @importFrom rlang set_names list2 quos expr new_function eval_tidy missing_arg caller_env
2 | #' @importFrom rlang is_formula f_rhs abort is_vector dots_n f_env env sym f_lhs env_parent expr_print
3 | #' @importFrom rlang eval_bare enexpr
4 | #'
5 | #' @importFrom assertthat assert_that
6 | #' @importFrom purrr map map_dbl map_lgl map_int map_chr map_dfr map_raw iwalk
7 | #' @importFrom tibble add_column
8 | #' @importFrom dplyr is_grouped_df tbl_vars group_vars
9 | #' @importFrom utils globalVariables
10 | #' @importFrom vctrs vec_size vec_c vec_rbind vec_cbind
11 | #' @importFrom zeallot %<-%
12 | #' @importFrom magrittr %>%
13 | is_bare_vector <- function(x) {
14 | is_vector(x) && !is.object(x) && is.null(attr(x, "class"))
15 | }
16 |
17 | #' @export
18 | magrittr::`%>%`
19 |
20 | globalVariables(c(".::index::.", ".::rhs::.", "lambda", "mapper", "name", "."))
21 |
22 | observation_matrix <- function(.) {
23 | expr( (!!.)[`.::index::.`, , drop = FALSE])
24 | }
25 |
26 | observation_data_frame <- function(.) {
27 | expr( (!!.)[`.::index::.`, , drop = FALSE])
28 | }
29 |
30 | observation_object <- function(.) {
31 | expr( (!!.)[[`.::index::.`]])
32 | }
33 |
34 | observation_bare_vector <- function(.) {
35 | expr( .subset2(!!., `.::index::.`))
36 | }
37 |
38 | rapper_args <- function(.tbl, env) {
39 | args <- set_names(
40 | map(.tbl, ~ if (is.data.frame(.) ){
41 | observation_data_frame(.)
42 | } else if(is.matrix(.)){
43 | observation_matrix(.)
44 | } else if (is_bare_vector(.)) {
45 | observation_bare_vector(.)
46 | } else {
47 | observation_object(.)
48 | }),
49 | tbl_vars(.tbl)
50 | )
51 | list2(`.::index::.` = missing_arg(), !!!args, ..data = quote(environment()), ..env = env )
52 | }
53 |
54 | map_for_type <- function(.ptype, combine = vec_c) {
55 | function(.x, .f, ...) {
56 | out <- map(.x, function(x){
57 | res <- .f(x, ...)
58 | stopifnot(vec_size(res) == 1L)
59 | res
60 | })
61 | combine(!!!out, .ptype = .ptype)
62 | }
63 | }
64 |
65 | map_for <- function(.ptype) {
66 | if (identical(.ptype, list())) {
67 | map
68 | } else if(identical(.ptype, integer())) {
69 | map_int
70 | } else if(identical(.ptype, double())) {
71 | map_dbl
72 | } else if(identical(.ptype, raw())) {
73 | map_raw
74 | } else if(identical(.ptype, character())) {
75 | map_chr
76 | } else if(identical(.ptype, logical())) {
77 | map_lgl
78 | } else if(is.data.frame(.ptype)) {
79 | if (ncol(.ptype) == 0L){
80 | map_for_type(NULL, vec_rbind)
81 | } else {
82 | map_for_type(.ptype, vec_rbind)
83 | }
84 | } else {
85 | map_for_type(.ptype, vec_c)
86 | }
87 | }
88 |
89 | # borrowed from https://github.com/r-lib/rlang/blob/148a166481ba19551afec649570efe2de53f0248/R/eval.R#L314
90 | value <- function(expr) {
91 | eval_bare(enexpr(expr), caller_env())
92 | }
93 |
94 | prepare_wap <- function(.tbl, .f, check = TRUE) {
95 | if (check) {
96 | assert_that(
97 | is_formula(.f),
98 | msg = ".f should be formula"
99 | )
100 | }
101 | lhs <- f_lhs(.f)
102 |
103 | # the type
104 | .ptype <- if (is.null(lhs)) list() else eval_tidy(lhs, env = f_env(.f))
105 |
106 | # the lambda
107 | body <- expr({
108 | value(!!(f_rhs(.f)))
109 | })
110 | env <- f_env(.f)
111 | lambda <- new_function(rapper_args(.tbl, env = env_parent(env)), body, env = env(value = value, env))
112 | attr(lambda, "class") <- "rap_lambda"
113 |
114 | # the mapper
115 | .map <- map_for(.ptype)
116 |
117 | list(
118 | lambda = lambda,
119 | mapper = .map
120 | )
121 | }
122 |
123 | #' Map over columns of a data frame simultaneously
124 | #'
125 | #' @param .tbl A data frame
126 | #' @param .f a single formula
127 | #' @param ... formulas
128 | #'
129 | #' The *rhs* of each formula uses columns of `.tbl`, and each stands for a single
130 | #' observation.
131 | #'
132 | #' The *lhs* of each formula indicates the type, in the [vctrs::vec_c()] sense.
133 | #'
134 | #' - empty or `list()`: no check is performed on the results of
135 | #' the rhs expression and a list is returned.
136 | #'
137 | #' - `data.frame()`: to indicate that the rhs should evaluate
138 | #' to a data frame of 1 row. The data frames don't need to be of a specific types
139 | #' and are are combined with [vctrs::vec_rbind()].
140 | #'
141 | #' - A data frame of a specific type, e.g. `data.frame(x = integer(), y = double())`
142 | #' The rhs should evaluate to a data frame of that type with 1 row.
143 | #'
144 | #' - Any other ptype that makes sense for [vctrs::vec_c()]. Each result must
145 | #' validate `vctrs::vec_size(.) == 1L` and are combined with
146 | #' `vctrs::vec_c(!!!, .ptype = .ptype)`
147 | #'
148 | #' In `rap()` if the formula is named, the result becomes a new column of the
149 | #' `tbl`, otherwise the formula is only used for side effects.
150 | #'
151 | #' @return
152 | #' - `wap()` returns a vector of the type specified by the lhs of the formula.
153 | #' The vector validates `vec_size() == nrow(.tbl)`. This is similar
154 | #' to [purrr::pmap()]
155 | #'
156 | #' - `rap()` adds a column to `.tbl` per formula in `...`
157 | #'
158 | #' @examples
159 | #'
160 | #' library(purrr)
161 | #' library(dplyr)
162 | #' library(tibble)
163 | #'
164 | #' tbl <- tibble(cyl_threshold = c(4, 6, 8), mpg_threshold = c(30, 25, 20))
165 | #'
166 | #' # ----- wap
167 | #' # returns a list of 3 elements
168 | #' tbl %>%
169 | #' wap( ~ filter(mtcars, cyl == cyl_threshold, mpg < mpg_threshold))
170 | #'
171 | #' # same, i.e. list() is equivalent to empty
172 | #' tbl %>%
173 | #' wap(list() ~ filter(mtcars, cyl == cyl_threshold, mpg < mpg_threshold))
174 | #'
175 | #' # can specify the output type with the formula lhs
176 | #' tbl %>%
177 | #' wap(integer() ~ nrow(filter(mtcars, cyl == cyl_threshold, mpg < mpg_threshold)))
178 | #'
179 | #' # to make data frames
180 | #' starwars %>%
181 | #' wap(data.frame() ~ data.frame(species = length(species), films = length(films)))
182 | #'
183 | #' # ----- rap: add columns
184 | #' tbl %>%
185 | #' rap(
186 | #' x = ~ filter(mtcars, cyl == cyl_threshold, mpg < mpg_threshold),
187 | #' n = integer() ~ nrow(x)
188 | #' )
189 | #'
190 | #' # rap is especially useful for iterating over multiple models
191 | #' starwars %>%
192 | #' group_nest(gender) %>%
193 | #' rap(
194 | #' model = ~ lm(height ~ mass + birth_year, data = data),
195 | #' perf = double() ~ summary(model)$adj.r.squared
196 | #' )
197 | #'
198 | #' @rdname rap
199 | #' @export
200 | wap <- function(.tbl, .f) {
201 | c(lambda, mapper) %<-% prepare_wap(.tbl, .f = .f, check = TRUE)
202 | mapper(seq_len(nrow(.tbl)), lambda)
203 | }
204 |
205 | #' @rdname rap
206 | #' @export
207 | lap <- function(.tbl, .f) {
208 | prepare_wap(.tbl, .f, check = TRUE)$lambda
209 | }
210 |
211 | #' @export
212 | print.rap_lambda <- function(x, ...) {
213 | expr_print(unclass(x))
214 | invisible(x)
215 | }
216 |
217 | #' @rdname rap
218 | #' @export
219 | rap <- function(.tbl, ...) {
220 | formulas <- list2(...)
221 | if(is.null(names(formulas))) {
222 | names(formulas) <- rep("", length(formulas))
223 | }
224 | assert_that(
225 | all(map_lgl(formulas, is_formula)),
226 | msg = "`...` should be a list of formulas"
227 | )
228 |
229 | iwalk(formulas, ~{
230 | c(lambda, mapper) %<-% prepare_wap(.tbl, .x, check = FALSE)
231 |
232 | if (is_grouped_df(.tbl) && .y %in% group_vars(.tbl)) {
233 | abort("cannot rap() a grouping variable")
234 | }
235 |
236 | res <- mapper(seq_len(nrow(.tbl)), lambda)
237 | if (.y != "") {
238 | # res is a column
239 | .tbl[[.y]] <<- res
240 | } else {
241 | # res is a list of one-row tibbles, so
242 | # assemble them together and auto splice
243 | .tbl <<- vec_cbind(.tbl, vec_rbind(!!!res))
244 | }
245 |
246 | })
247 | .tbl
248 | }
249 |
250 | #' pmap adapter
251 | #'
252 | #' `slam()` is typically used in [purrr::pmap()] calls to transform a
253 | #' formula that uses the raw names into a function. `slam()` is not quite
254 | #' [rap()].
255 | #'
256 | #' @param .tbl a tibble
257 | #' @param formula a formula that uses columns from the tibble
258 | #'
259 | #' @examples
260 | #' library(purrr)
261 | #' library(gapminder)
262 | #' library(dplyr)
263 | #'
264 | #' oceania <- gapminder::gapminder %>%
265 | #' filter(continent == "Oceania") %>%
266 | #' mutate(yr1952 = year - 1952) %>%
267 | #' select(-continent) %>%
268 | #' group_nest(country)
269 | #'
270 | #' # the idea of slam is to promote a formula expressed with the
271 | #' # column names into a pmap() ready function
272 | #' formula <- ~broom::tidy(stats::lm(lifeExp ~ yr1952, data))
273 | #' oceania %>%
274 | #' pmap(slam(oceania, formula))
275 | #'
276 | #' # this is similar to e.g.
277 | #' oceania %>%
278 | #' wap(~broom::tidy(stats::lm(lifeExp ~ yr1952, data)))
279 | #'
280 | #' @export
281 | slam <- function(.tbl, formula) {
282 | args <- set_names(rep(list(missing_arg()), ncol(.tbl)), tbl_vars(.tbl))
283 | body <- expr({
284 | value(!!(f_rhs(formula)))
285 | })
286 | env <- env(f_env(formula), value = value)
287 |
288 | new_function(args, body, env)
289 | }
290 |
291 |
292 |
--------------------------------------------------------------------------------
/R/zest_join.R:
--------------------------------------------------------------------------------
1 |
2 | #' zest join
3 | #'
4 | #' a zest join is similar to a [dplyr::nest_join()] but the rows of `y` that are
5 | #' included in the list column are controlled by a predicate.
6 | #'
7 | #' @param x a tibble
8 | #'
9 | #' @param y another tibble
10 | #'
11 | #' @param ... named predicate formulas
12 | #'
13 | #' The rhs of the formulas is used y [dplyr::filter()] on `y` for each row of `x`.
14 | #'
15 | #' - Literal column names refer to columns of `y`. Alternatively you can use `.data$`.
16 | #'
17 | #' - To use the current value for a column of `x` you can use unquoting, e.g. `!!cyl`
18 | #'
19 | #' @return a tibble that contains all columns and rows of `x`, plus an additional list column per formula:
20 | #' - its name is given by the name of the formula
21 | #' - each element of the column is a tibble
22 | #' - each of the tibbles is a subset of `y` according to the rhs of the formula
23 | #'
24 | #' @examples
25 | #'
26 | #' tbl <- tibble::tibble(cyl = c(4, 6, 8), mpg = c(30, 25, 20))
27 | #'
28 | #' # zest join of tbl and mtcars
29 | #' # - the created column is called `data`
30 | #' # - each element of the data column is the result of filter(mtcars, cyl == !!cyl & mpg < !!mpg)
31 | #' # - `cyl` and `mpg` refer to columns of `y`
32 | #' # - `!!cyl` and `!!mpg` refer to the current
33 | #' tbl %>%
34 | #' zest_join(mtcars, data = ~cyl == !!cyl & mpg < !!mpg)
35 | #'
36 | #' # similar to
37 | #' tbl %>%
38 | #' rap(data = ~filter(mtcars, cyl == !!cyl & mpg < !!mpg))
39 | #'
40 | #' # multiple zest
41 | #' tbl %>%
42 | #' zest_join(mtcars,
43 | #' one = ~cyl == !!cyl & mpg < !!mpg,
44 | #' two = ~cyl < !!cyl & mpg > !!mpg
45 | #' )
46 | #'
47 | #' @export
48 | zest_join <- function(x, y, ...) {
49 | out <- x
50 |
51 | formulas <- list(...)
52 | assert_that(
53 | !is.null(names(formulas)),
54 | all(map_lgl(formulas, is_formula)),
55 | msg = "`...` should be a named list of formulas"
56 | )
57 |
58 | iwalk(formulas, ~{
59 | c(lambda, mapper) %<-% prepare_wap(x, .x, check = FALSE)
60 |
61 | if (.y %in% tbl_vars(x)) {
62 | abort("cannot zest_join() a column with the same name as a column of x")
63 | }
64 |
65 | predicate <- body(lambda)[[2]][[2]]
66 | body(lambda)[[2]][[2]] <- call('filter', sym(".::rhs::."), predicate)
67 | environment(lambda) <- env(`.::rhs::.` = y, filter = dplyr::filter, environment(lambda))
68 |
69 | out[[.y]] <<- mapper(seq_len(nrow(x)), lambda)
70 | })
71 | out
72 | }
73 |
--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | output: github_document
3 | ---
4 |
5 |
6 |
7 | ```{r setup, include = FALSE}
8 | knitr::opts_chunk$set(
9 | collapse = TRUE,
10 | comment = "#>",
11 | fig.path = "man/figures/README-",
12 | out.width = "100%"
13 | )
14 | ```
15 |
16 | # rap
17 |
18 | [](https://www.tidyverse.org/lifecycle/)
19 | [](https://travis-ci.org/romainfrancois/rap)
20 |
21 | 
22 |
23 | Experimenting with yet another way to do rowwise operations.
24 |
25 | ## Installation
26 |
27 | You can install `rap` from gitub
28 |
29 | ``` r
30 | # install.packages("devtools")
31 | devtools::install_github("romainfrancois/rap")
32 | ```
33 |
34 | ## Why
35 |
36 | This offers `rap()` as an alternative to some versions of:
37 |
38 | - `rowwise()` + `do()`
39 | - `mutate()` + `pmap()`
40 | - maybe `purrrlyr` ?
41 | - probably other approaches
42 |
43 | `rap()` works with lambdas supplied as formulas, similar to `purrr::map()`
44 | but instead of `.x`, `.y`, `..1`, `..2`, ...the lambda can use the column names,
45 | which stand for a single element of the associated vector, in the `[[` sense.
46 |
47 | ## rap
48 |
49 | ```{r}
50 | library(tidyverse)
51 | library(rap)
52 |
53 | tbl <- tibble(cyl_threshold = c(4, 6, 8), mpg_threshold = c(30, 25, 20))
54 | tbl
55 |
56 | tbl %>%
57 | rap(x = ~filter(mtcars, cyl == cyl_threshold, mpg < mpg_threshold))
58 | ```
59 |
60 | If the lhs of the formula is empty, `rap()` adds a list column. Otherwise the lhs
61 | can be used to specify the type:
62 |
63 | ```{r}
64 | tbl %>%
65 | rap(
66 | x = ~ filter(mtcars, cyl == cyl_threshold, mpg < mpg_threshold),
67 | n = integer() ~ nrow(x)
68 | )
69 | ```
70 |
71 | this example is based on this [issue](https://github.com/tidyverse/purrr/issues/280),
72 | which has equivalent with `pmap`:
73 |
74 | ```{r}
75 | tbl %>%
76 | mutate(
77 | x = pmap(
78 | .l = list(cyl_threshold, mpg_threshold),
79 | function(cc, mm) filter(mtcars, cyl == cc, mpg < mm)
80 | ),
81 | n = map_int(x, nrow)
82 | )
83 | ```
84 |
85 | ## wap
86 |
87 | ```{r}
88 | library(dplyr)
89 |
90 | starwars <- head(starwars)
91 |
92 | # creates a list of length 1 integer vectors
93 | # because type not specified
94 | starwars %>%
95 | wap(~length(films))
96 |
97 | # using the lhs to specify the type
98 | starwars %>%
99 | wap(integer() ~ length(films))
100 |
101 | # list of data frames
102 | starwars %>%
103 | wap(~ data.frame(vehicles = length(vehicles), starships = length(starships)))
104 |
105 | # Specify type as data.frame() row binds them
106 | starwars %>%
107 | wap(data.frame() ~ data.frame(vehicles = length(vehicles), starships = length(starships)))
108 | ```
109 |
110 |
111 | ## zest_join
112 |
113 | `r emo::ji("lemon")` `zest_join()` is similar to `dplyr::nest_join()` but
114 | you control what goes in the nested column. `Z` is `N` but `r emo::ji("arrow_heading_down")`.
115 |
116 | ```{r}
117 | tbl <- tibble(cyl_threshold = c(4, 6, 8), mpg_threshold = c(30, 25, 20))
118 | tbl %>%
119 | zest_join(mtcars, data = ~cyl == cyl_threshold & mpg < mpg_threshold)
120 | ```
121 |
122 | In the rhs of the formula :
123 |
124 | - `cyl` and `mpg` refer to columns of `mtcars`
125 | - `cyl_threshold` and `mpg_threshold` refer to the current value from `tbl` because these columns don't exist in mtcars. If you wanted to refer to columns that are present both in mtcars and tbl you would have to unquote the columns in tbl with the unquoting operator, e.g. !!cyl
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | rap
4 | ====================================================
5 |
6 | [](https://www.tidyverse.org/lifecycle/) [](https://travis-ci.org/romainfrancois/rap)
7 |
8 | 
9 |
10 | Experimenting with yet another way to do rowwise operations.
11 |
12 | Installation
13 | ------------
14 |
15 | You can install `rap` from gitub
16 |
17 | ``` r
18 | # install.packages("devtools")
19 | devtools::install_github("romainfrancois/rap")
20 | ```
21 |
22 | Why
23 | ---
24 |
25 | This offers `rap()` as an alternative to some versions of:
26 |
27 | - `rowwise()` + `do()`
28 | - `mutate()` + `pmap()`
29 | - maybe `purrrlyr` ?
30 | - probably other approaches
31 |
32 | `rap()` works with lambdas supplied as formulas, similar to `purrr::map()` but instead of `.x`, `.y`, `..1`, `..2`, ...the lambda can use the column names, which stand for a single element of the associated vector, in the `[[` sense.
33 |
34 | rap
35 | ---
36 |
37 | ``` r
38 | library(tidyverse)
39 | #> ── Attaching packages ──────────────────── tidyverse 1.2.1 ──
40 | #> ✔ ggplot2 3.1.0 ✔ purrr 0.2.5.9000
41 | #> ✔ tibble 1.4.99.9006 ✔ dplyr 0.7.8
42 | #> ✔ tidyr 0.8.1 ✔ stringr 1.3.1
43 | #> ✔ readr 1.1.1 ✔ forcats 0.3.0
44 | #> ── Conflicts ─────────────────────── tidyverse_conflicts() ──
45 | #> ✖ dplyr::filter() masks stats::filter()
46 | #> ✖ dplyr::lag() masks stats::lag()
47 | library(rap)
48 |
49 | tbl <- tibble(cyl_threshold = c(4, 6, 8), mpg_threshold = c(30, 25, 20))
50 | tbl
51 | #> # A tibble: 3 x 2
52 | #> cyl_threshold mpg_threshold
53 | #>
54 | #> 1 4 30
55 | #> 2 6 25
56 | #> 3 8 20
57 |
58 | tbl %>%
59 | rap(x = ~filter(mtcars, cyl == cyl_threshold, mpg < mpg_threshold))
60 | #> # A tibble: 3 x 3
61 | #> cyl_threshold mpg_threshold x
62 | #>
63 | #> 1 4 30
64 | #> 2 6 25
65 | #> 3 8 20
66 | ```
67 |
68 | If the lhs of the formula is empty, `rap()` adds a list column. Otherwise the lhs can be used to specify the type:
69 |
70 | ``` r
71 | tbl %>%
72 | rap(
73 | x = ~ filter(mtcars, cyl == cyl_threshold, mpg < mpg_threshold),
74 | n = integer() ~ nrow(x)
75 | )
76 | #> # A tibble: 3 x 4
77 | #> cyl_threshold mpg_threshold x n
78 | #>
79 | #> 1 4 30 7
80 | #> 2 6 25 7
81 | #> 3 8 20 14
82 | ```
83 |
84 | this example is based on this [issue](https://github.com/tidyverse/purrr/issues/280), which has equivalent with `pmap`:
85 |
86 | ``` r
87 | tbl %>%
88 | mutate(
89 | x = pmap(
90 | .l = list(cyl_threshold, mpg_threshold),
91 | function(cc, mm) filter(mtcars, cyl == cc, mpg < mm)
92 | ),
93 | n = map_int(x, nrow)
94 | )
95 | #> # A tibble: 3 x 4
96 | #> cyl_threshold mpg_threshold x n
97 | #>
98 | #> 1 4 30 7
99 | #> 2 6 25 7
100 | #> 3 8 20 14
101 | ```
102 |
103 | wap
104 | ---
105 |
106 | ``` r
107 | library(dplyr)
108 |
109 | starwars <- head(starwars)
110 |
111 | # creates a list of length 1 integer vectors
112 | # because type not specified
113 | starwars %>%
114 | wap(~length(films))
115 | #> [[1]]
116 | #> [1] 5
117 | #>
118 | #> [[2]]
119 | #> [1] 6
120 | #>
121 | #> [[3]]
122 | #> [1] 7
123 | #>
124 | #> [[4]]
125 | #> [1] 4
126 | #>
127 | #> [[5]]
128 | #> [1] 5
129 | #>
130 | #> [[6]]
131 | #> [1] 3
132 |
133 | # using the lhs to specify the type
134 | starwars %>%
135 | wap(integer() ~ length(films))
136 | #> [1] 5 6 7 4 5 3
137 |
138 | # list of data frames
139 | starwars %>%
140 | wap(~ data.frame(vehicles = length(vehicles), starships = length(starships)))
141 | #> [[1]]
142 | #> vehicles starships
143 | #> 1 2 2
144 | #>
145 | #> [[2]]
146 | #> vehicles starships
147 | #> 1 0 0
148 | #>
149 | #> [[3]]
150 | #> vehicles starships
151 | #> 1 0 0
152 | #>
153 | #> [[4]]
154 | #> vehicles starships
155 | #> 1 0 1
156 | #>
157 | #> [[5]]
158 | #> vehicles starships
159 | #> 1 1 0
160 | #>
161 | #> [[6]]
162 | #> vehicles starships
163 | #> 1 0 0
164 |
165 | # Specify type as data.frame() row binds them
166 | starwars %>%
167 | wap(data.frame() ~ data.frame(vehicles = length(vehicles), starships = length(starships)))
168 | #> vehicles starships
169 | #> 1 2 2
170 | #> 2 0 0
171 | #> 3 0 0
172 | #> 4 0 1
173 | #> 5 1 0
174 | #> 6 0 0
175 | ```
176 |
177 | zest\_join
178 | ----------
179 |
180 | 🍋 `zest_join()` is similar to `dplyr::nest_join()` but you control what goes in the nested column. `Z` is `N` but ⤵️.
181 |
182 | ``` r
183 | tbl <- tibble(cyl_threshold = c(4, 6, 8), mpg_threshold = c(30, 25, 20))
184 | tbl %>%
185 | zest_join(mtcars, data = ~cyl == !!cyl_threshold & mpg < !!mpg_threshold)
186 | #> # A tibble: 3 x 3
187 | #> cyl_threshold mpg_threshold data
188 | #>
189 | #> 1 4 30
190 | #> 2 6 25
191 | #> 3 8 20
192 | ```
193 |
194 | In the rhs of the formula :
195 |
196 | - `cyl` and `mpg` refer to columns of `mtcars`
197 | - `cyl_threshold` and `mpg_threshold` refer to the current value from `tbl` because these columns don't exist in mtcars. If you wanted to refer to columns that are present both in mtcars and tbl you would have to unquote the columns in tbl with the unquoting operator, e.g. !!cyl
198 |
--------------------------------------------------------------------------------
/man/figures/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/romainfrancois/rap/ff8d735a7ad93fad021e7aca45f3abe4d0cb80cc/man/figures/logo.png
--------------------------------------------------------------------------------
/man/rap.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/rap.R
3 | \name{wap}
4 | \alias{wap}
5 | \alias{lap}
6 | \alias{rap}
7 | \title{Map over columns of a data frame simultaneously}
8 | \usage{
9 | wap(.tbl, .f)
10 |
11 | lap(.tbl, .f)
12 |
13 | rap(.tbl, ...)
14 | }
15 | \arguments{
16 | \item{.tbl}{A data frame}
17 |
18 | \item{.f}{a single formula}
19 |
20 | \item{...}{formulas
21 |
22 | The \emph{rhs} of each formula uses columns of \code{.tbl}, and each stands for a single
23 | observation.
24 |
25 | The \emph{lhs} of each formula indicates the type, in the \code{\link[vctrs:vec_c]{vctrs::vec_c()}} sense.
26 | \itemize{
27 | \item empty or \code{list()}: no check is performed on the results of
28 | the rhs expression and a list is returned.
29 | \item \code{data.frame()}: to indicate that the rhs should evaluate
30 | to a data frame of 1 row. The data frames don't need to be of a specific types
31 | and are are combined with \code{\link[vctrs:vec_rbind]{vctrs::vec_rbind()}}.
32 | \item A data frame of a specific type, e.g. \code{data.frame(x = integer(), y = double())}
33 | The rhs should evaluate to a data frame of that type with 1 row.
34 | \item Any other ptype that makes sense for \code{\link[vctrs:vec_c]{vctrs::vec_c()}}. Each result must
35 | validate \code{vctrs::vec_size(.) == 1L} and are combined with
36 | \code{vctrs::vec_c(!!!, .ptype = .ptype)}
37 | }
38 |
39 | In \code{rap()} if the formula is named, the result becomes a new column of the
40 | \code{tbl}, otherwise the formula is only used for side effects.}
41 | }
42 | \value{
43 | \itemize{
44 | \item \code{wap()} returns a vector of the type specified by the lhs of the formula.
45 | The vector validates \code{vec_size() == nrow(.tbl)}. This is similar
46 | to \code{\link[purrr:pmap]{purrr::pmap()}}
47 | \item \code{rap()} adds a column to \code{.tbl} per formula in \code{...}
48 | }
49 | }
50 | \description{
51 | Map over columns of a data frame simultaneously
52 | }
53 | \examples{
54 |
55 | library(purrr)
56 | library(dplyr)
57 | library(tibble)
58 |
59 | tbl <- tibble(cyl_threshold = c(4, 6, 8), mpg_threshold = c(30, 25, 20))
60 |
61 | # ----- wap
62 | # returns a list of 3 elements
63 | tbl \%>\%
64 | wap( ~ filter(mtcars, cyl == cyl_threshold, mpg < mpg_threshold))
65 |
66 | # same, i.e. list() is equivalent to empty
67 | tbl \%>\%
68 | wap(list() ~ filter(mtcars, cyl == cyl_threshold, mpg < mpg_threshold))
69 |
70 | # can specify the output type with the formula lhs
71 | tbl \%>\%
72 | wap(integer() ~ nrow(filter(mtcars, cyl == cyl_threshold, mpg < mpg_threshold)))
73 |
74 | # to make data frames
75 | starwars \%>\%
76 | wap(data.frame() ~ data.frame(species = length(species), films = length(films)))
77 |
78 | # ----- rap: add columns
79 | tbl \%>\%
80 | rap(
81 | x = ~ filter(mtcars, cyl == cyl_threshold, mpg < mpg_threshold),
82 | n = integer() ~ nrow(x)
83 | )
84 |
85 | # rap is especially useful for iterating over multiple models
86 | starwars \%>\%
87 | group_nest(gender) \%>\%
88 | rap(
89 | model = ~ lm(height ~ mass + birth_year, data = data),
90 | perf = double() ~ summary(model)$adj.r.squared
91 | )
92 |
93 | }
94 |
--------------------------------------------------------------------------------
/man/reexports.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/rap.R
3 | \docType{import}
4 | \name{reexports}
5 | \alias{reexports}
6 | \alias{\%>\%}
7 | \title{Objects exported from other packages}
8 | \keyword{internal}
9 | \description{
10 | These objects are imported from other packages. Follow the links
11 | below to see their documentation.
12 |
13 | \describe{
14 | \item{magrittr}{\code{\link[magrittr]{\%>\%}}}
15 | }}
16 |
17 |
--------------------------------------------------------------------------------
/man/slam.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/rap.R
3 | \name{slam}
4 | \alias{slam}
5 | \title{pmap adapter}
6 | \usage{
7 | slam(.tbl, formula)
8 | }
9 | \arguments{
10 | \item{.tbl}{a tibble}
11 |
12 | \item{formula}{a formula that uses columns from the tibble}
13 | }
14 | \description{
15 | \code{slam()} is typically used in \code{\link[purrr:pmap]{purrr::pmap()}} calls to transform a
16 | formula that uses the raw names into a function. \code{slam()} is not quite
17 | \code{\link[=rap]{rap()}}.
18 | }
19 | \examples{
20 | library(purrr)
21 | library(gapminder)
22 | library(dplyr)
23 |
24 | oceania <- gapminder::gapminder \%>\%
25 | filter(continent == "Oceania") \%>\%
26 | mutate(yr1952 = year - 1952) \%>\%
27 | select(-continent) \%>\%
28 | group_nest(country)
29 |
30 | # the idea of slam is to promote a formula expressed with the
31 | # column names into a pmap() ready function
32 | formula <- ~broom::tidy(stats::lm(lifeExp ~ yr1952, data))
33 | oceania \%>\%
34 | pmap(slam(oceania, formula))
35 |
36 | # this is similar to e.g.
37 | oceania \%>\%
38 | wap(~broom::tidy(stats::lm(lifeExp ~ yr1952, data)))
39 |
40 | }
41 |
--------------------------------------------------------------------------------
/man/zest_join.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/zest_join.R
3 | \name{zest_join}
4 | \alias{zest_join}
5 | \title{zest join}
6 | \usage{
7 | zest_join(x, y, ...)
8 | }
9 | \arguments{
10 | \item{x}{a tibble}
11 |
12 | \item{y}{another tibble}
13 |
14 | \item{...}{named predicate formulas
15 |
16 | The rhs of the formulas is used y \code{\link[dplyr:filter]{dplyr::filter()}} on \code{y} for each row of \code{x}.
17 | \itemize{
18 | \item Literal column names refer to columns of \code{y}. Alternatively you can use \code{.data$}.
19 | \item To use the current value for a column of \code{x} you can use unquoting, e.g. \code{!!cyl}
20 | }}
21 | }
22 | \value{
23 | a tibble that contains all columns and rows of \code{x}, plus an additional list column per formula:
24 | \itemize{
25 | \item its name is given by the name of the formula
26 | \item each element of the column is a tibble
27 | \item each of the tibbles is a subset of \code{y} according to the rhs of the formula
28 | }
29 | }
30 | \description{
31 | a zest join is similar to a \code{\link[dplyr:nest_join]{dplyr::nest_join()}} but the rows of \code{y} that are
32 | included in the list column are controlled by a predicate.
33 | }
34 | \examples{
35 |
36 | tbl <- tibble::tibble(cyl = c(4, 6, 8), mpg = c(30, 25, 20))
37 |
38 | # zest join of tbl and mtcars
39 | # - the created column is called `data`
40 | # - each element of the data column is the result of filter(mtcars, cyl == !!cyl & mpg < !!mpg)
41 | # - `cyl` and `mpg` refer to columns of `y`
42 | # - `!!cyl` and `!!mpg` refer to the current
43 | tbl \%>\%
44 | zest_join(mtcars, data = ~cyl == !!cyl & mpg < !!mpg)
45 |
46 | # similar to
47 | tbl \%>\%
48 | rap(data = ~filter(mtcars, cyl == !!cyl & mpg < !!mpg))
49 |
50 | # multiple zest
51 | tbl \%>\%
52 | zest_join(mtcars,
53 | one = ~cyl == !!cyl & mpg < !!mpg,
54 | two = ~cyl < !!cyl & mpg > !!mpg
55 | )
56 |
57 | }
58 |
--------------------------------------------------------------------------------
/rap.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: No
4 | SaveWorkspace: No
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 |
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 |
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 |
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageRoxygenize: rd,collate,namespace
22 |
--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(rap)
3 |
4 | test_check("rap")
5 |
--------------------------------------------------------------------------------
/tests/testthat/test-rap.R:
--------------------------------------------------------------------------------
1 | context("rap()")
2 |
3 | test_that("rap() with no lhs a list column", {
4 | res <- rap(iris, x = ~ Sepal.Length *2)
5 | expect_is(res, "data.frame")
6 | expect_equal(names(res), c(names(iris), "x"))
7 | expect_is(res[["x"]], "list")
8 | expect_equal(length(res[["x"]]), 150L)
9 | expect_equal(unlist(res[["x"]]), iris$Sepal.Length * 2)
10 | })
11 |
12 | test_that("rap() respects lhs of formula for purrr variants", {
13 | res <- iris %>%
14 | rap(
15 | int = integer() ~ 1L,
16 | dbl = double() ~ 1L,
17 | chr = character() ~ "",
18 | raw = raw() ~ as.raw(1),
19 | lgl = logical() ~ Sepal.Length < Sepal.Width
20 | )
21 | expected <- iris %>%
22 | tibble::add_column(
23 | int = 1L,
24 | dbl = 1,
25 | chr = "",
26 | raw = as.raw(1),
27 | lgl = iris$Sepal.Length < iris$Sepal.Width
28 | )
29 | expect_equal(res, expected)
30 | })
31 |
32 | test_that("rap() can make data frame columns", {
33 | res <- rap(iris,
34 | x = data.frame() ~ data.frame(Sepal = Sepal.Length * Sepal.Width, Petal = Petal.Length * Petal.Width)
35 | )
36 | expected <- tibble::add_column(
37 | iris,
38 | x = with(iris, data.frame(Sepal = Sepal.Length * Sepal.Width, Petal = Petal.Length * Petal.Width))
39 | )
40 | expect_equal(res, expected)
41 | })
42 |
43 | test_that("rap() can make factors", {
44 | expect_equal(
45 | rap(iris, x = iris$Species ~ Species),
46 | tibble::add_column(iris, x = iris$Species)
47 | )
48 | })
49 |
50 | test_that("rap() only accepts results with 1 observation when type is not list()", {
51 | expect_error(rap(iris, x = integer() ~ 1:2))
52 | expect_error(rap(iris, x = double() ~ c(1,2)))
53 | expect_error(rap(iris, x = character() ~ letters))
54 | expect_error(rap(iris, x = logical() ~ c(TRUE, FALSE)))
55 | expect_error(rap(iris, x = raw() ~ as.raw(0:255)))
56 | expect_error(rap(iris, x = data.frame() ~mtcars))
57 | })
58 |
59 | test_that("unquoting is done locally, and is equivalent to using the `..data` pronoun", {
60 | tbl <- tibble::tibble(cyl = c(4, 6, 8), mpg = c(30, 25, 20))
61 |
62 | res1 <- tbl %>%
63 | rap(x = ~dplyr::filter(mtcars, cyl == !!cyl, mpg < !!mpg) )
64 |
65 | res2 <- tbl %>%
66 | rap(x = ~dplyr::filter(mtcars, cyl == ..data$cyl, mpg < ..data$mpg) )
67 |
68 | expect_identical(res1, res2)
69 | })
70 |
71 | test_that("..env is the formula environment", {
72 | tbl <- tibble::tibble(cyl = c(4, 6, 8), mpg = c(30, 25, 20))
73 |
74 | mpg <- 20
75 | res1 <- tbl %>%
76 | rap(x = ~dplyr::filter(mtcars, cyl == !!cyl, mpg < ..env$mpg) )
77 |
78 | res2 <- tbl %>%
79 | rap(x = ~dplyr::filter(mtcars, cyl == ..data$cyl, mpg < 20) )
80 |
81 | expect_identical(res1, res2)
82 | })
83 |
84 | test_that("rap can splice", {
85 | tbl <- tibble::tibble(
86 | params = list(list(from = 1, to = 2), list(from = 1, to = 10))
87 | ) %>%
88 | rap(y = ~ seq(!!!params))
89 |
90 | expect_equal(tbl$y, list(1:2, 1:10))
91 | })
92 |
--------------------------------------------------------------------------------
/tests/testthat/test-wap.R:
--------------------------------------------------------------------------------
1 | context("wap()")
2 |
3 | test_that("wap() creates a list", {
4 | res <- wap(iris, ~ Sepal.Length *2)
5 | expect_is(res, "list")
6 | expect_equal(length(res), 150L)
7 | expect_equal(unlist(res), iris$Sepal.Length * 2)
8 | })
9 |
10 | test_that("wap() respects basic types", {
11 | expect_equal(wap(iris, integer() ~ 1L), rep(1L, 150))
12 | expect_equal(wap(iris, double() ~ 1L), rep(1, 150))
13 | expect_equal(wap(iris, character() ~ ""), rep("", 150))
14 | expect_equal(wap(iris, raw() ~ as.raw(1L)), rep(as.raw(1L), 150))
15 | expect_equal(wap(iris, logical() ~ Sepal.Length < Sepal.Width), iris$Sepal.Length < iris$Sepal.Width)
16 | })
17 |
18 | test_that("wap() with data.frame() in lhs", {
19 | res <- wap(iris, data.frame() ~ data.frame(Sepal = Sepal.Length * Sepal.Width, Petal = Petal.Length * Petal.Width))
20 | expected <- with(iris, data.frame(Sepal = Sepal.Length * Sepal.Width, Petal = Petal.Length * Petal.Width))
21 |
22 | expect_equal(res, expected)
23 | })
24 |
25 | test_that("wap() can make factors", {
26 | f <- wap(iris, iris$Species ~ Species)
27 | expect_equal(f, iris$Species)
28 | })
29 |
30 | test_that("wap() only accepts results with 1 observation when .ptype is specified", {
31 | expect_error(wap(iris, integer() ~ 1:2))
32 | expect_error(wap(iris, double() ~ c(1,2)))
33 | expect_error(wap(iris, character() ~ letters))
34 | expect_error(wap(iris, logical() ~ c(TRUE, FALSE)))
35 | expect_error(wap(iris, raw() ~ as.raw(0:255)))
36 | expect_error(wap(iris, data.frame() ~ mtcars))
37 | })
38 |
39 |
--------------------------------------------------------------------------------