├── .Rbuildignore ├── .gitignore ├── .travis.yml ├── DESCRIPTION ├── LICENSE ├── LICENSE.md ├── NAMESPACE ├── R ├── rap.R └── zest_join.R ├── README.Rmd ├── README.md ├── man ├── figures │ └── logo.png ├── rap.Rd ├── reexports.Rd ├── slam.Rd └── zest_join.Rd ├── rap.Rproj └── tests ├── testthat.R └── testthat ├── test-rap.R └── test-wap.R /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^\.travis\.yml$ 2 | ^README\.Rmd$ 3 | ^LICENSE\.md$ 4 | ^rap\.Rproj$ 5 | ^\.Rproj\.user$ 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # R for travis: see documentation at https://docs.travis-ci.com/user/languages/r 2 | 3 | language: R 4 | sudo: false 5 | cache: packages 6 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: rap 2 | Title: rowwise operations on a data frame 3 | Version: 0.0.0.9001 4 | Authors@R: c( 5 | person("Romain", "François", email = "romain@purrple.cat", role = c("aut", "cre")), 6 | person("Davis", "Vaughan", role = c("ctb", "rev")), 7 | person("Suthira", "Owlarn", role = c("ctb", "rev")), 8 | person("Thomas Lin", "Pedersen", role = c("art")) 9 | ) 10 | Description: rowwise operations on a data frame. 11 | Depends: R (>= 3.5.0) 12 | License: MIT + file LICENSE 13 | Encoding: UTF-8 14 | LazyData: true 15 | Imports: 16 | assertthat (>= 0.2.0), 17 | rlang (>= 0.3.1), 18 | purrr (>= 0.3.0), 19 | tibble (>= 2.0.1), 20 | dplyr (>= 0.8.0), 21 | utils, 22 | vctrs (>= 0.1.0), 23 | zeallot (>= 0.1.0), 24 | magrittr (>= 1.5), 25 | gapminder 26 | Roxygen: list(markdown = TRUE) 27 | RoxygenNote: 6.1.1 28 | Suggests: 29 | testthat (>= 2.0.1), 30 | broom 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2018 2 | COPYRIGHT HOLDER: Romain François 3 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2018 Romain François 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(print,rap_lambda) 4 | export("%>%") 5 | export(lap) 6 | export(rap) 7 | export(slam) 8 | export(wap) 9 | export(zest_join) 10 | importFrom(assertthat,assert_that) 11 | importFrom(dplyr,group_vars) 12 | importFrom(dplyr,is_grouped_df) 13 | importFrom(dplyr,tbl_vars) 14 | importFrom(magrittr,"%>%") 15 | importFrom(purrr,iwalk) 16 | importFrom(purrr,map) 17 | importFrom(purrr,map_chr) 18 | importFrom(purrr,map_dbl) 19 | importFrom(purrr,map_dfr) 20 | importFrom(purrr,map_int) 21 | importFrom(purrr,map_lgl) 22 | importFrom(purrr,map_raw) 23 | importFrom(rlang,abort) 24 | importFrom(rlang,caller_env) 25 | importFrom(rlang,dots_n) 26 | importFrom(rlang,enexpr) 27 | importFrom(rlang,env) 28 | importFrom(rlang,env_parent) 29 | importFrom(rlang,eval_bare) 30 | importFrom(rlang,eval_tidy) 31 | importFrom(rlang,expr) 32 | importFrom(rlang,expr_print) 33 | importFrom(rlang,f_env) 34 | importFrom(rlang,f_lhs) 35 | importFrom(rlang,f_rhs) 36 | importFrom(rlang,is_formula) 37 | importFrom(rlang,is_vector) 38 | importFrom(rlang,list2) 39 | importFrom(rlang,missing_arg) 40 | importFrom(rlang,new_function) 41 | importFrom(rlang,quos) 42 | importFrom(rlang,set_names) 43 | importFrom(rlang,sym) 44 | importFrom(tibble,add_column) 45 | importFrom(utils,globalVariables) 46 | importFrom(vctrs,vec_c) 47 | importFrom(vctrs,vec_cbind) 48 | importFrom(vctrs,vec_rbind) 49 | importFrom(vctrs,vec_size) 50 | importFrom(zeallot,"%<-%") 51 | -------------------------------------------------------------------------------- /R/rap.R: -------------------------------------------------------------------------------- 1 | #' @importFrom rlang set_names list2 quos expr new_function eval_tidy missing_arg caller_env 2 | #' @importFrom rlang is_formula f_rhs abort is_vector dots_n f_env env sym f_lhs env_parent expr_print 3 | #' @importFrom rlang eval_bare enexpr 4 | #' 5 | #' @importFrom assertthat assert_that 6 | #' @importFrom purrr map map_dbl map_lgl map_int map_chr map_dfr map_raw iwalk 7 | #' @importFrom tibble add_column 8 | #' @importFrom dplyr is_grouped_df tbl_vars group_vars 9 | #' @importFrom utils globalVariables 10 | #' @importFrom vctrs vec_size vec_c vec_rbind vec_cbind 11 | #' @importFrom zeallot %<-% 12 | #' @importFrom magrittr %>% 13 | is_bare_vector <- function(x) { 14 | is_vector(x) && !is.object(x) && is.null(attr(x, "class")) 15 | } 16 | 17 | #' @export 18 | magrittr::`%>%` 19 | 20 | globalVariables(c(".::index::.", ".::rhs::.", "lambda", "mapper", "name", ".")) 21 | 22 | observation_matrix <- function(.) { 23 | expr( (!!.)[`.::index::.`, , drop = FALSE]) 24 | } 25 | 26 | observation_data_frame <- function(.) { 27 | expr( (!!.)[`.::index::.`, , drop = FALSE]) 28 | } 29 | 30 | observation_object <- function(.) { 31 | expr( (!!.)[[`.::index::.`]]) 32 | } 33 | 34 | observation_bare_vector <- function(.) { 35 | expr( .subset2(!!., `.::index::.`)) 36 | } 37 | 38 | rapper_args <- function(.tbl, env) { 39 | args <- set_names( 40 | map(.tbl, ~ if (is.data.frame(.) ){ 41 | observation_data_frame(.) 42 | } else if(is.matrix(.)){ 43 | observation_matrix(.) 44 | } else if (is_bare_vector(.)) { 45 | observation_bare_vector(.) 46 | } else { 47 | observation_object(.) 48 | }), 49 | tbl_vars(.tbl) 50 | ) 51 | list2(`.::index::.` = missing_arg(), !!!args, ..data = quote(environment()), ..env = env ) 52 | } 53 | 54 | map_for_type <- function(.ptype, combine = vec_c) { 55 | function(.x, .f, ...) { 56 | out <- map(.x, function(x){ 57 | res <- .f(x, ...) 58 | stopifnot(vec_size(res) == 1L) 59 | res 60 | }) 61 | combine(!!!out, .ptype = .ptype) 62 | } 63 | } 64 | 65 | map_for <- function(.ptype) { 66 | if (identical(.ptype, list())) { 67 | map 68 | } else if(identical(.ptype, integer())) { 69 | map_int 70 | } else if(identical(.ptype, double())) { 71 | map_dbl 72 | } else if(identical(.ptype, raw())) { 73 | map_raw 74 | } else if(identical(.ptype, character())) { 75 | map_chr 76 | } else if(identical(.ptype, logical())) { 77 | map_lgl 78 | } else if(is.data.frame(.ptype)) { 79 | if (ncol(.ptype) == 0L){ 80 | map_for_type(NULL, vec_rbind) 81 | } else { 82 | map_for_type(.ptype, vec_rbind) 83 | } 84 | } else { 85 | map_for_type(.ptype, vec_c) 86 | } 87 | } 88 | 89 | # borrowed from https://github.com/r-lib/rlang/blob/148a166481ba19551afec649570efe2de53f0248/R/eval.R#L314 90 | value <- function(expr) { 91 | eval_bare(enexpr(expr), caller_env()) 92 | } 93 | 94 | prepare_wap <- function(.tbl, .f, check = TRUE) { 95 | if (check) { 96 | assert_that( 97 | is_formula(.f), 98 | msg = ".f should be formula" 99 | ) 100 | } 101 | lhs <- f_lhs(.f) 102 | 103 | # the type 104 | .ptype <- if (is.null(lhs)) list() else eval_tidy(lhs, env = f_env(.f)) 105 | 106 | # the lambda 107 | body <- expr({ 108 | value(!!(f_rhs(.f))) 109 | }) 110 | env <- f_env(.f) 111 | lambda <- new_function(rapper_args(.tbl, env = env_parent(env)), body, env = env(value = value, env)) 112 | attr(lambda, "class") <- "rap_lambda" 113 | 114 | # the mapper 115 | .map <- map_for(.ptype) 116 | 117 | list( 118 | lambda = lambda, 119 | mapper = .map 120 | ) 121 | } 122 | 123 | #' Map over columns of a data frame simultaneously 124 | #' 125 | #' @param .tbl A data frame 126 | #' @param .f a single formula 127 | #' @param ... formulas 128 | #' 129 | #' The *rhs* of each formula uses columns of `.tbl`, and each stands for a single 130 | #' observation. 131 | #' 132 | #' The *lhs* of each formula indicates the type, in the [vctrs::vec_c()] sense. 133 | #' 134 | #' - empty or `list()`: no check is performed on the results of 135 | #' the rhs expression and a list is returned. 136 | #' 137 | #' - `data.frame()`: to indicate that the rhs should evaluate 138 | #' to a data frame of 1 row. The data frames don't need to be of a specific types 139 | #' and are are combined with [vctrs::vec_rbind()]. 140 | #' 141 | #' - A data frame of a specific type, e.g. `data.frame(x = integer(), y = double())` 142 | #' The rhs should evaluate to a data frame of that type with 1 row. 143 | #' 144 | #' - Any other ptype that makes sense for [vctrs::vec_c()]. Each result must 145 | #' validate `vctrs::vec_size(.) == 1L` and are combined with 146 | #' `vctrs::vec_c(!!!, .ptype = .ptype)` 147 | #' 148 | #' In `rap()` if the formula is named, the result becomes a new column of the 149 | #' `tbl`, otherwise the formula is only used for side effects. 150 | #' 151 | #' @return 152 | #' - `wap()` returns a vector of the type specified by the lhs of the formula. 153 | #' The vector validates `vec_size() == nrow(.tbl)`. This is similar 154 | #' to [purrr::pmap()] 155 | #' 156 | #' - `rap()` adds a column to `.tbl` per formula in `...` 157 | #' 158 | #' @examples 159 | #' 160 | #' library(purrr) 161 | #' library(dplyr) 162 | #' library(tibble) 163 | #' 164 | #' tbl <- tibble(cyl_threshold = c(4, 6, 8), mpg_threshold = c(30, 25, 20)) 165 | #' 166 | #' # ----- wap 167 | #' # returns a list of 3 elements 168 | #' tbl %>% 169 | #' wap( ~ filter(mtcars, cyl == cyl_threshold, mpg < mpg_threshold)) 170 | #' 171 | #' # same, i.e. list() is equivalent to empty 172 | #' tbl %>% 173 | #' wap(list() ~ filter(mtcars, cyl == cyl_threshold, mpg < mpg_threshold)) 174 | #' 175 | #' # can specify the output type with the formula lhs 176 | #' tbl %>% 177 | #' wap(integer() ~ nrow(filter(mtcars, cyl == cyl_threshold, mpg < mpg_threshold))) 178 | #' 179 | #' # to make data frames 180 | #' starwars %>% 181 | #' wap(data.frame() ~ data.frame(species = length(species), films = length(films))) 182 | #' 183 | #' # ----- rap: add columns 184 | #' tbl %>% 185 | #' rap( 186 | #' x = ~ filter(mtcars, cyl == cyl_threshold, mpg < mpg_threshold), 187 | #' n = integer() ~ nrow(x) 188 | #' ) 189 | #' 190 | #' # rap is especially useful for iterating over multiple models 191 | #' starwars %>% 192 | #' group_nest(gender) %>% 193 | #' rap( 194 | #' model = ~ lm(height ~ mass + birth_year, data = data), 195 | #' perf = double() ~ summary(model)$adj.r.squared 196 | #' ) 197 | #' 198 | #' @rdname rap 199 | #' @export 200 | wap <- function(.tbl, .f) { 201 | c(lambda, mapper) %<-% prepare_wap(.tbl, .f = .f, check = TRUE) 202 | mapper(seq_len(nrow(.tbl)), lambda) 203 | } 204 | 205 | #' @rdname rap 206 | #' @export 207 | lap <- function(.tbl, .f) { 208 | prepare_wap(.tbl, .f, check = TRUE)$lambda 209 | } 210 | 211 | #' @export 212 | print.rap_lambda <- function(x, ...) { 213 | expr_print(unclass(x)) 214 | invisible(x) 215 | } 216 | 217 | #' @rdname rap 218 | #' @export 219 | rap <- function(.tbl, ...) { 220 | formulas <- list2(...) 221 | if(is.null(names(formulas))) { 222 | names(formulas) <- rep("", length(formulas)) 223 | } 224 | assert_that( 225 | all(map_lgl(formulas, is_formula)), 226 | msg = "`...` should be a list of formulas" 227 | ) 228 | 229 | iwalk(formulas, ~{ 230 | c(lambda, mapper) %<-% prepare_wap(.tbl, .x, check = FALSE) 231 | 232 | if (is_grouped_df(.tbl) && .y %in% group_vars(.tbl)) { 233 | abort("cannot rap() a grouping variable") 234 | } 235 | 236 | res <- mapper(seq_len(nrow(.tbl)), lambda) 237 | if (.y != "") { 238 | # res is a column 239 | .tbl[[.y]] <<- res 240 | } else { 241 | # res is a list of one-row tibbles, so 242 | # assemble them together and auto splice 243 | .tbl <<- vec_cbind(.tbl, vec_rbind(!!!res)) 244 | } 245 | 246 | }) 247 | .tbl 248 | } 249 | 250 | #' pmap adapter 251 | #' 252 | #' `slam()` is typically used in [purrr::pmap()] calls to transform a 253 | #' formula that uses the raw names into a function. `slam()` is not quite 254 | #' [rap()]. 255 | #' 256 | #' @param .tbl a tibble 257 | #' @param formula a formula that uses columns from the tibble 258 | #' 259 | #' @examples 260 | #' library(purrr) 261 | #' library(gapminder) 262 | #' library(dplyr) 263 | #' 264 | #' oceania <- gapminder::gapminder %>% 265 | #' filter(continent == "Oceania") %>% 266 | #' mutate(yr1952 = year - 1952) %>% 267 | #' select(-continent) %>% 268 | #' group_nest(country) 269 | #' 270 | #' # the idea of slam is to promote a formula expressed with the 271 | #' # column names into a pmap() ready function 272 | #' formula <- ~broom::tidy(stats::lm(lifeExp ~ yr1952, data)) 273 | #' oceania %>% 274 | #' pmap(slam(oceania, formula)) 275 | #' 276 | #' # this is similar to e.g. 277 | #' oceania %>% 278 | #' wap(~broom::tidy(stats::lm(lifeExp ~ yr1952, data))) 279 | #' 280 | #' @export 281 | slam <- function(.tbl, formula) { 282 | args <- set_names(rep(list(missing_arg()), ncol(.tbl)), tbl_vars(.tbl)) 283 | body <- expr({ 284 | value(!!(f_rhs(formula))) 285 | }) 286 | env <- env(f_env(formula), value = value) 287 | 288 | new_function(args, body, env) 289 | } 290 | 291 | 292 | -------------------------------------------------------------------------------- /R/zest_join.R: -------------------------------------------------------------------------------- 1 | 2 | #' zest join 3 | #' 4 | #' a zest join is similar to a [dplyr::nest_join()] but the rows of `y` that are 5 | #' included in the list column are controlled by a predicate. 6 | #' 7 | #' @param x a tibble 8 | #' 9 | #' @param y another tibble 10 | #' 11 | #' @param ... named predicate formulas 12 | #' 13 | #' The rhs of the formulas is used y [dplyr::filter()] on `y` for each row of `x`. 14 | #' 15 | #' - Literal column names refer to columns of `y`. Alternatively you can use `.data$`. 16 | #' 17 | #' - To use the current value for a column of `x` you can use unquoting, e.g. `!!cyl` 18 | #' 19 | #' @return a tibble that contains all columns and rows of `x`, plus an additional list column per formula: 20 | #' - its name is given by the name of the formula 21 | #' - each element of the column is a tibble 22 | #' - each of the tibbles is a subset of `y` according to the rhs of the formula 23 | #' 24 | #' @examples 25 | #' 26 | #' tbl <- tibble::tibble(cyl = c(4, 6, 8), mpg = c(30, 25, 20)) 27 | #' 28 | #' # zest join of tbl and mtcars 29 | #' # - the created column is called `data` 30 | #' # - each element of the data column is the result of filter(mtcars, cyl == !!cyl & mpg < !!mpg) 31 | #' # - `cyl` and `mpg` refer to columns of `y` 32 | #' # - `!!cyl` and `!!mpg` refer to the current 33 | #' tbl %>% 34 | #' zest_join(mtcars, data = ~cyl == !!cyl & mpg < !!mpg) 35 | #' 36 | #' # similar to 37 | #' tbl %>% 38 | #' rap(data = ~filter(mtcars, cyl == !!cyl & mpg < !!mpg)) 39 | #' 40 | #' # multiple zest 41 | #' tbl %>% 42 | #' zest_join(mtcars, 43 | #' one = ~cyl == !!cyl & mpg < !!mpg, 44 | #' two = ~cyl < !!cyl & mpg > !!mpg 45 | #' ) 46 | #' 47 | #' @export 48 | zest_join <- function(x, y, ...) { 49 | out <- x 50 | 51 | formulas <- list(...) 52 | assert_that( 53 | !is.null(names(formulas)), 54 | all(map_lgl(formulas, is_formula)), 55 | msg = "`...` should be a named list of formulas" 56 | ) 57 | 58 | iwalk(formulas, ~{ 59 | c(lambda, mapper) %<-% prepare_wap(x, .x, check = FALSE) 60 | 61 | if (.y %in% tbl_vars(x)) { 62 | abort("cannot zest_join() a column with the same name as a column of x") 63 | } 64 | 65 | predicate <- body(lambda)[[2]][[2]] 66 | body(lambda)[[2]][[2]] <- call('filter', sym(".::rhs::."), predicate) 67 | environment(lambda) <- env(`.::rhs::.` = y, filter = dplyr::filter, environment(lambda)) 68 | 69 | out[[.y]] <<- mapper(seq_len(nrow(x)), lambda) 70 | }) 71 | out 72 | } 73 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | 6 | 7 | ```{r setup, include = FALSE} 8 | knitr::opts_chunk$set( 9 | collapse = TRUE, 10 | comment = "#>", 11 | fig.path = "man/figures/README-", 12 | out.width = "100%" 13 | ) 14 | ``` 15 | 16 | # rap 17 | 18 | [![Lifecycle Status](https://img.shields.io/badge/lifecycle-experimental-blue.svg)](https://www.tidyverse.org/lifecycle/) 19 | [![Travis build status](https://travis-ci.org/romainfrancois/rap.svg?branch=master)](https://travis-ci.org/romainfrancois/rap) 20 | 21 | ![](https://media.giphy.com/media/l41Yy7rv1mVZNQCT6/giphy.gif) 22 | 23 | Experimenting with yet another way to do rowwise operations. 24 | 25 | ## Installation 26 | 27 | You can install `rap` from gitub 28 | 29 | ``` r 30 | # install.packages("devtools") 31 | devtools::install_github("romainfrancois/rap") 32 | ``` 33 | 34 | ## Why 35 | 36 | This offers `rap()` as an alternative to some versions of: 37 | 38 | - `rowwise()` + `do()` 39 | - `mutate()` + `pmap()` 40 | - maybe `purrrlyr` ? 41 | - probably other approaches 42 | 43 | `rap()` works with lambdas supplied as formulas, similar to `purrr::map()` 44 | but instead of `.x`, `.y`, `..1`, `..2`, ...the lambda can use the column names, 45 | which stand for a single element of the associated vector, in the `[[` sense. 46 | 47 | ## rap 48 | 49 | ```{r} 50 | library(tidyverse) 51 | library(rap) 52 | 53 | tbl <- tibble(cyl_threshold = c(4, 6, 8), mpg_threshold = c(30, 25, 20)) 54 | tbl 55 | 56 | tbl %>% 57 | rap(x = ~filter(mtcars, cyl == cyl_threshold, mpg < mpg_threshold)) 58 | ``` 59 | 60 | If the lhs of the formula is empty, `rap()` adds a list column. Otherwise the lhs 61 | can be used to specify the type: 62 | 63 | ```{r} 64 | tbl %>% 65 | rap( 66 | x = ~ filter(mtcars, cyl == cyl_threshold, mpg < mpg_threshold), 67 | n = integer() ~ nrow(x) 68 | ) 69 | ``` 70 | 71 | this example is based on this [issue](https://github.com/tidyverse/purrr/issues/280), 72 | which has equivalent with `pmap`: 73 | 74 | ```{r} 75 | tbl %>% 76 | mutate( 77 | x = pmap( 78 | .l = list(cyl_threshold, mpg_threshold), 79 | function(cc, mm) filter(mtcars, cyl == cc, mpg < mm) 80 | ), 81 | n = map_int(x, nrow) 82 | ) 83 | ``` 84 | 85 | ## wap 86 | 87 | ```{r} 88 | library(dplyr) 89 | 90 | starwars <- head(starwars) 91 | 92 | # creates a list of length 1 integer vectors 93 | # because type not specified 94 | starwars %>% 95 | wap(~length(films)) 96 | 97 | # using the lhs to specify the type 98 | starwars %>% 99 | wap(integer() ~ length(films)) 100 | 101 | # list of data frames 102 | starwars %>% 103 | wap(~ data.frame(vehicles = length(vehicles), starships = length(starships))) 104 | 105 | # Specify type as data.frame() row binds them 106 | starwars %>% 107 | wap(data.frame() ~ data.frame(vehicles = length(vehicles), starships = length(starships))) 108 | ``` 109 | 110 | 111 | ## zest_join 112 | 113 | `r emo::ji("lemon")` `zest_join()` is similar to `dplyr::nest_join()` but 114 | you control what goes in the nested column. `Z` is `N` but `r emo::ji("arrow_heading_down")`. 115 | 116 | ```{r} 117 | tbl <- tibble(cyl_threshold = c(4, 6, 8), mpg_threshold = c(30, 25, 20)) 118 | tbl %>% 119 | zest_join(mtcars, data = ~cyl == cyl_threshold & mpg < mpg_threshold) 120 | ``` 121 | 122 | In the rhs of the formula : 123 | 124 | - `cyl` and `mpg` refer to columns of `mtcars` 125 | - `cyl_threshold` and `mpg_threshold` refer to the current value from `tbl` because these columns don't exist in mtcars. If you wanted to refer to columns that are present both in mtcars and tbl you would have to unquote the columns in tbl with the unquoting operator, e.g. !!cyl -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | rap 4 | ==================================================== 5 | 6 | [![Lifecycle Status](https://img.shields.io/badge/lifecycle-experimental-blue.svg)](https://www.tidyverse.org/lifecycle/) [![Travis build status](https://travis-ci.org/romainfrancois/rap.svg?branch=master)](https://travis-ci.org/romainfrancois/rap) 7 | 8 | ![](https://media.giphy.com/media/l41Yy7rv1mVZNQCT6/giphy.gif) 9 | 10 | Experimenting with yet another way to do rowwise operations. 11 | 12 | Installation 13 | ------------ 14 | 15 | You can install `rap` from gitub 16 | 17 | ``` r 18 | # install.packages("devtools") 19 | devtools::install_github("romainfrancois/rap") 20 | ``` 21 | 22 | Why 23 | --- 24 | 25 | This offers `rap()` as an alternative to some versions of: 26 | 27 | - `rowwise()` + `do()` 28 | - `mutate()` + `pmap()` 29 | - maybe `purrrlyr` ? 30 | - probably other approaches 31 | 32 | `rap()` works with lambdas supplied as formulas, similar to `purrr::map()` but instead of `.x`, `.y`, `..1`, `..2`, ...the lambda can use the column names, which stand for a single element of the associated vector, in the `[[` sense. 33 | 34 | rap 35 | --- 36 | 37 | ``` r 38 | library(tidyverse) 39 | #> ── Attaching packages ──────────────────── tidyverse 1.2.1 ── 40 | #> ✔ ggplot2 3.1.0 ✔ purrr 0.2.5.9000 41 | #> ✔ tibble 1.4.99.9006 ✔ dplyr 0.7.8 42 | #> ✔ tidyr 0.8.1 ✔ stringr 1.3.1 43 | #> ✔ readr 1.1.1 ✔ forcats 0.3.0 44 | #> ── Conflicts ─────────────────────── tidyverse_conflicts() ── 45 | #> ✖ dplyr::filter() masks stats::filter() 46 | #> ✖ dplyr::lag() masks stats::lag() 47 | library(rap) 48 | 49 | tbl <- tibble(cyl_threshold = c(4, 6, 8), mpg_threshold = c(30, 25, 20)) 50 | tbl 51 | #> # A tibble: 3 x 2 52 | #> cyl_threshold mpg_threshold 53 | #> 54 | #> 1 4 30 55 | #> 2 6 25 56 | #> 3 8 20 57 | 58 | tbl %>% 59 | rap(x = ~filter(mtcars, cyl == cyl_threshold, mpg < mpg_threshold)) 60 | #> # A tibble: 3 x 3 61 | #> cyl_threshold mpg_threshold x 62 | #> 63 | #> 1 4 30 64 | #> 2 6 25 65 | #> 3 8 20 66 | ``` 67 | 68 | If the lhs of the formula is empty, `rap()` adds a list column. Otherwise the lhs can be used to specify the type: 69 | 70 | ``` r 71 | tbl %>% 72 | rap( 73 | x = ~ filter(mtcars, cyl == cyl_threshold, mpg < mpg_threshold), 74 | n = integer() ~ nrow(x) 75 | ) 76 | #> # A tibble: 3 x 4 77 | #> cyl_threshold mpg_threshold x n 78 | #> 79 | #> 1 4 30 7 80 | #> 2 6 25 7 81 | #> 3 8 20 14 82 | ``` 83 | 84 | this example is based on this [issue](https://github.com/tidyverse/purrr/issues/280), which has equivalent with `pmap`: 85 | 86 | ``` r 87 | tbl %>% 88 | mutate( 89 | x = pmap( 90 | .l = list(cyl_threshold, mpg_threshold), 91 | function(cc, mm) filter(mtcars, cyl == cc, mpg < mm) 92 | ), 93 | n = map_int(x, nrow) 94 | ) 95 | #> # A tibble: 3 x 4 96 | #> cyl_threshold mpg_threshold x n 97 | #> 98 | #> 1 4 30 7 99 | #> 2 6 25 7 100 | #> 3 8 20 14 101 | ``` 102 | 103 | wap 104 | --- 105 | 106 | ``` r 107 | library(dplyr) 108 | 109 | starwars <- head(starwars) 110 | 111 | # creates a list of length 1 integer vectors 112 | # because type not specified 113 | starwars %>% 114 | wap(~length(films)) 115 | #> [[1]] 116 | #> [1] 5 117 | #> 118 | #> [[2]] 119 | #> [1] 6 120 | #> 121 | #> [[3]] 122 | #> [1] 7 123 | #> 124 | #> [[4]] 125 | #> [1] 4 126 | #> 127 | #> [[5]] 128 | #> [1] 5 129 | #> 130 | #> [[6]] 131 | #> [1] 3 132 | 133 | # using the lhs to specify the type 134 | starwars %>% 135 | wap(integer() ~ length(films)) 136 | #> [1] 5 6 7 4 5 3 137 | 138 | # list of data frames 139 | starwars %>% 140 | wap(~ data.frame(vehicles = length(vehicles), starships = length(starships))) 141 | #> [[1]] 142 | #> vehicles starships 143 | #> 1 2 2 144 | #> 145 | #> [[2]] 146 | #> vehicles starships 147 | #> 1 0 0 148 | #> 149 | #> [[3]] 150 | #> vehicles starships 151 | #> 1 0 0 152 | #> 153 | #> [[4]] 154 | #> vehicles starships 155 | #> 1 0 1 156 | #> 157 | #> [[5]] 158 | #> vehicles starships 159 | #> 1 1 0 160 | #> 161 | #> [[6]] 162 | #> vehicles starships 163 | #> 1 0 0 164 | 165 | # Specify type as data.frame() row binds them 166 | starwars %>% 167 | wap(data.frame() ~ data.frame(vehicles = length(vehicles), starships = length(starships))) 168 | #> vehicles starships 169 | #> 1 2 2 170 | #> 2 0 0 171 | #> 3 0 0 172 | #> 4 0 1 173 | #> 5 1 0 174 | #> 6 0 0 175 | ``` 176 | 177 | zest\_join 178 | ---------- 179 | 180 | 🍋 `zest_join()` is similar to `dplyr::nest_join()` but you control what goes in the nested column. `Z` is `N` but ⤵️. 181 | 182 | ``` r 183 | tbl <- tibble(cyl_threshold = c(4, 6, 8), mpg_threshold = c(30, 25, 20)) 184 | tbl %>% 185 | zest_join(mtcars, data = ~cyl == !!cyl_threshold & mpg < !!mpg_threshold) 186 | #> # A tibble: 3 x 3 187 | #> cyl_threshold mpg_threshold data 188 | #> 189 | #> 1 4 30 190 | #> 2 6 25 191 | #> 3 8 20 192 | ``` 193 | 194 | In the rhs of the formula : 195 | 196 | - `cyl` and `mpg` refer to columns of `mtcars` 197 | - `cyl_threshold` and `mpg_threshold` refer to the current value from `tbl` because these columns don't exist in mtcars. If you wanted to refer to columns that are present both in mtcars and tbl you would have to unquote the columns in tbl with the unquoting operator, e.g. !!cyl 198 | -------------------------------------------------------------------------------- /man/figures/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/romainfrancois/rap/ff8d735a7ad93fad021e7aca45f3abe4d0cb80cc/man/figures/logo.png -------------------------------------------------------------------------------- /man/rap.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/rap.R 3 | \name{wap} 4 | \alias{wap} 5 | \alias{lap} 6 | \alias{rap} 7 | \title{Map over columns of a data frame simultaneously} 8 | \usage{ 9 | wap(.tbl, .f) 10 | 11 | lap(.tbl, .f) 12 | 13 | rap(.tbl, ...) 14 | } 15 | \arguments{ 16 | \item{.tbl}{A data frame} 17 | 18 | \item{.f}{a single formula} 19 | 20 | \item{...}{formulas 21 | 22 | The \emph{rhs} of each formula uses columns of \code{.tbl}, and each stands for a single 23 | observation. 24 | 25 | The \emph{lhs} of each formula indicates the type, in the \code{\link[vctrs:vec_c]{vctrs::vec_c()}} sense. 26 | \itemize{ 27 | \item empty or \code{list()}: no check is performed on the results of 28 | the rhs expression and a list is returned. 29 | \item \code{data.frame()}: to indicate that the rhs should evaluate 30 | to a data frame of 1 row. The data frames don't need to be of a specific types 31 | and are are combined with \code{\link[vctrs:vec_rbind]{vctrs::vec_rbind()}}. 32 | \item A data frame of a specific type, e.g. \code{data.frame(x = integer(), y = double())} 33 | The rhs should evaluate to a data frame of that type with 1 row. 34 | \item Any other ptype that makes sense for \code{\link[vctrs:vec_c]{vctrs::vec_c()}}. Each result must 35 | validate \code{vctrs::vec_size(.) == 1L} and are combined with 36 | \code{vctrs::vec_c(!!!, .ptype = .ptype)} 37 | } 38 | 39 | In \code{rap()} if the formula is named, the result becomes a new column of the 40 | \code{tbl}, otherwise the formula is only used for side effects.} 41 | } 42 | \value{ 43 | \itemize{ 44 | \item \code{wap()} returns a vector of the type specified by the lhs of the formula. 45 | The vector validates \code{vec_size() == nrow(.tbl)}. This is similar 46 | to \code{\link[purrr:pmap]{purrr::pmap()}} 47 | \item \code{rap()} adds a column to \code{.tbl} per formula in \code{...} 48 | } 49 | } 50 | \description{ 51 | Map over columns of a data frame simultaneously 52 | } 53 | \examples{ 54 | 55 | library(purrr) 56 | library(dplyr) 57 | library(tibble) 58 | 59 | tbl <- tibble(cyl_threshold = c(4, 6, 8), mpg_threshold = c(30, 25, 20)) 60 | 61 | # ----- wap 62 | # returns a list of 3 elements 63 | tbl \%>\% 64 | wap( ~ filter(mtcars, cyl == cyl_threshold, mpg < mpg_threshold)) 65 | 66 | # same, i.e. list() is equivalent to empty 67 | tbl \%>\% 68 | wap(list() ~ filter(mtcars, cyl == cyl_threshold, mpg < mpg_threshold)) 69 | 70 | # can specify the output type with the formula lhs 71 | tbl \%>\% 72 | wap(integer() ~ nrow(filter(mtcars, cyl == cyl_threshold, mpg < mpg_threshold))) 73 | 74 | # to make data frames 75 | starwars \%>\% 76 | wap(data.frame() ~ data.frame(species = length(species), films = length(films))) 77 | 78 | # ----- rap: add columns 79 | tbl \%>\% 80 | rap( 81 | x = ~ filter(mtcars, cyl == cyl_threshold, mpg < mpg_threshold), 82 | n = integer() ~ nrow(x) 83 | ) 84 | 85 | # rap is especially useful for iterating over multiple models 86 | starwars \%>\% 87 | group_nest(gender) \%>\% 88 | rap( 89 | model = ~ lm(height ~ mass + birth_year, data = data), 90 | perf = double() ~ summary(model)$adj.r.squared 91 | ) 92 | 93 | } 94 | -------------------------------------------------------------------------------- /man/reexports.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/rap.R 3 | \docType{import} 4 | \name{reexports} 5 | \alias{reexports} 6 | \alias{\%>\%} 7 | \title{Objects exported from other packages} 8 | \keyword{internal} 9 | \description{ 10 | These objects are imported from other packages. Follow the links 11 | below to see their documentation. 12 | 13 | \describe{ 14 | \item{magrittr}{\code{\link[magrittr]{\%>\%}}} 15 | }} 16 | 17 | -------------------------------------------------------------------------------- /man/slam.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/rap.R 3 | \name{slam} 4 | \alias{slam} 5 | \title{pmap adapter} 6 | \usage{ 7 | slam(.tbl, formula) 8 | } 9 | \arguments{ 10 | \item{.tbl}{a tibble} 11 | 12 | \item{formula}{a formula that uses columns from the tibble} 13 | } 14 | \description{ 15 | \code{slam()} is typically used in \code{\link[purrr:pmap]{purrr::pmap()}} calls to transform a 16 | formula that uses the raw names into a function. \code{slam()} is not quite 17 | \code{\link[=rap]{rap()}}. 18 | } 19 | \examples{ 20 | library(purrr) 21 | library(gapminder) 22 | library(dplyr) 23 | 24 | oceania <- gapminder::gapminder \%>\% 25 | filter(continent == "Oceania") \%>\% 26 | mutate(yr1952 = year - 1952) \%>\% 27 | select(-continent) \%>\% 28 | group_nest(country) 29 | 30 | # the idea of slam is to promote a formula expressed with the 31 | # column names into a pmap() ready function 32 | formula <- ~broom::tidy(stats::lm(lifeExp ~ yr1952, data)) 33 | oceania \%>\% 34 | pmap(slam(oceania, formula)) 35 | 36 | # this is similar to e.g. 37 | oceania \%>\% 38 | wap(~broom::tidy(stats::lm(lifeExp ~ yr1952, data))) 39 | 40 | } 41 | -------------------------------------------------------------------------------- /man/zest_join.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/zest_join.R 3 | \name{zest_join} 4 | \alias{zest_join} 5 | \title{zest join} 6 | \usage{ 7 | zest_join(x, y, ...) 8 | } 9 | \arguments{ 10 | \item{x}{a tibble} 11 | 12 | \item{y}{another tibble} 13 | 14 | \item{...}{named predicate formulas 15 | 16 | The rhs of the formulas is used y \code{\link[dplyr:filter]{dplyr::filter()}} on \code{y} for each row of \code{x}. 17 | \itemize{ 18 | \item Literal column names refer to columns of \code{y}. Alternatively you can use \code{.data$}. 19 | \item To use the current value for a column of \code{x} you can use unquoting, e.g. \code{!!cyl} 20 | }} 21 | } 22 | \value{ 23 | a tibble that contains all columns and rows of \code{x}, plus an additional list column per formula: 24 | \itemize{ 25 | \item its name is given by the name of the formula 26 | \item each element of the column is a tibble 27 | \item each of the tibbles is a subset of \code{y} according to the rhs of the formula 28 | } 29 | } 30 | \description{ 31 | a zest join is similar to a \code{\link[dplyr:nest_join]{dplyr::nest_join()}} but the rows of \code{y} that are 32 | included in the list column are controlled by a predicate. 33 | } 34 | \examples{ 35 | 36 | tbl <- tibble::tibble(cyl = c(4, 6, 8), mpg = c(30, 25, 20)) 37 | 38 | # zest join of tbl and mtcars 39 | # - the created column is called `data` 40 | # - each element of the data column is the result of filter(mtcars, cyl == !!cyl & mpg < !!mpg) 41 | # - `cyl` and `mpg` refer to columns of `y` 42 | # - `!!cyl` and `!!mpg` refer to the current 43 | tbl \%>\% 44 | zest_join(mtcars, data = ~cyl == !!cyl & mpg < !!mpg) 45 | 46 | # similar to 47 | tbl \%>\% 48 | rap(data = ~filter(mtcars, cyl == !!cyl & mpg < !!mpg)) 49 | 50 | # multiple zest 51 | tbl \%>\% 52 | zest_join(mtcars, 53 | one = ~cyl == !!cyl & mpg < !!mpg, 54 | two = ~cyl < !!cyl & mpg > !!mpg 55 | ) 56 | 57 | } 58 | -------------------------------------------------------------------------------- /rap.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: rd,collate,namespace 22 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(rap) 3 | 4 | test_check("rap") 5 | -------------------------------------------------------------------------------- /tests/testthat/test-rap.R: -------------------------------------------------------------------------------- 1 | context("rap()") 2 | 3 | test_that("rap() with no lhs a list column", { 4 | res <- rap(iris, x = ~ Sepal.Length *2) 5 | expect_is(res, "data.frame") 6 | expect_equal(names(res), c(names(iris), "x")) 7 | expect_is(res[["x"]], "list") 8 | expect_equal(length(res[["x"]]), 150L) 9 | expect_equal(unlist(res[["x"]]), iris$Sepal.Length * 2) 10 | }) 11 | 12 | test_that("rap() respects lhs of formula for purrr variants", { 13 | res <- iris %>% 14 | rap( 15 | int = integer() ~ 1L, 16 | dbl = double() ~ 1L, 17 | chr = character() ~ "", 18 | raw = raw() ~ as.raw(1), 19 | lgl = logical() ~ Sepal.Length < Sepal.Width 20 | ) 21 | expected <- iris %>% 22 | tibble::add_column( 23 | int = 1L, 24 | dbl = 1, 25 | chr = "", 26 | raw = as.raw(1), 27 | lgl = iris$Sepal.Length < iris$Sepal.Width 28 | ) 29 | expect_equal(res, expected) 30 | }) 31 | 32 | test_that("rap() can make data frame columns", { 33 | res <- rap(iris, 34 | x = data.frame() ~ data.frame(Sepal = Sepal.Length * Sepal.Width, Petal = Petal.Length * Petal.Width) 35 | ) 36 | expected <- tibble::add_column( 37 | iris, 38 | x = with(iris, data.frame(Sepal = Sepal.Length * Sepal.Width, Petal = Petal.Length * Petal.Width)) 39 | ) 40 | expect_equal(res, expected) 41 | }) 42 | 43 | test_that("rap() can make factors", { 44 | expect_equal( 45 | rap(iris, x = iris$Species ~ Species), 46 | tibble::add_column(iris, x = iris$Species) 47 | ) 48 | }) 49 | 50 | test_that("rap() only accepts results with 1 observation when type is not list()", { 51 | expect_error(rap(iris, x = integer() ~ 1:2)) 52 | expect_error(rap(iris, x = double() ~ c(1,2))) 53 | expect_error(rap(iris, x = character() ~ letters)) 54 | expect_error(rap(iris, x = logical() ~ c(TRUE, FALSE))) 55 | expect_error(rap(iris, x = raw() ~ as.raw(0:255))) 56 | expect_error(rap(iris, x = data.frame() ~mtcars)) 57 | }) 58 | 59 | test_that("unquoting is done locally, and is equivalent to using the `..data` pronoun", { 60 | tbl <- tibble::tibble(cyl = c(4, 6, 8), mpg = c(30, 25, 20)) 61 | 62 | res1 <- tbl %>% 63 | rap(x = ~dplyr::filter(mtcars, cyl == !!cyl, mpg < !!mpg) ) 64 | 65 | res2 <- tbl %>% 66 | rap(x = ~dplyr::filter(mtcars, cyl == ..data$cyl, mpg < ..data$mpg) ) 67 | 68 | expect_identical(res1, res2) 69 | }) 70 | 71 | test_that("..env is the formula environment", { 72 | tbl <- tibble::tibble(cyl = c(4, 6, 8), mpg = c(30, 25, 20)) 73 | 74 | mpg <- 20 75 | res1 <- tbl %>% 76 | rap(x = ~dplyr::filter(mtcars, cyl == !!cyl, mpg < ..env$mpg) ) 77 | 78 | res2 <- tbl %>% 79 | rap(x = ~dplyr::filter(mtcars, cyl == ..data$cyl, mpg < 20) ) 80 | 81 | expect_identical(res1, res2) 82 | }) 83 | 84 | test_that("rap can splice", { 85 | tbl <- tibble::tibble( 86 | params = list(list(from = 1, to = 2), list(from = 1, to = 10)) 87 | ) %>% 88 | rap(y = ~ seq(!!!params)) 89 | 90 | expect_equal(tbl$y, list(1:2, 1:10)) 91 | }) 92 | -------------------------------------------------------------------------------- /tests/testthat/test-wap.R: -------------------------------------------------------------------------------- 1 | context("wap()") 2 | 3 | test_that("wap() creates a list", { 4 | res <- wap(iris, ~ Sepal.Length *2) 5 | expect_is(res, "list") 6 | expect_equal(length(res), 150L) 7 | expect_equal(unlist(res), iris$Sepal.Length * 2) 8 | }) 9 | 10 | test_that("wap() respects basic types", { 11 | expect_equal(wap(iris, integer() ~ 1L), rep(1L, 150)) 12 | expect_equal(wap(iris, double() ~ 1L), rep(1, 150)) 13 | expect_equal(wap(iris, character() ~ ""), rep("", 150)) 14 | expect_equal(wap(iris, raw() ~ as.raw(1L)), rep(as.raw(1L), 150)) 15 | expect_equal(wap(iris, logical() ~ Sepal.Length < Sepal.Width), iris$Sepal.Length < iris$Sepal.Width) 16 | }) 17 | 18 | test_that("wap() with data.frame() in lhs", { 19 | res <- wap(iris, data.frame() ~ data.frame(Sepal = Sepal.Length * Sepal.Width, Petal = Petal.Length * Petal.Width)) 20 | expected <- with(iris, data.frame(Sepal = Sepal.Length * Sepal.Width, Petal = Petal.Length * Petal.Width)) 21 | 22 | expect_equal(res, expected) 23 | }) 24 | 25 | test_that("wap() can make factors", { 26 | f <- wap(iris, iris$Species ~ Species) 27 | expect_equal(f, iris$Species) 28 | }) 29 | 30 | test_that("wap() only accepts results with 1 observation when .ptype is specified", { 31 | expect_error(wap(iris, integer() ~ 1:2)) 32 | expect_error(wap(iris, double() ~ c(1,2))) 33 | expect_error(wap(iris, character() ~ letters)) 34 | expect_error(wap(iris, logical() ~ c(TRUE, FALSE))) 35 | expect_error(wap(iris, raw() ~ as.raw(0:255))) 36 | expect_error(wap(iris, data.frame() ~ mtcars)) 37 | }) 38 | 39 | --------------------------------------------------------------------------------