├── _pkgdown.yml ├── src ├── .gitignore ├── salsa.cpp ├── dance.h ├── RcppExports.cpp └── bolero.cpp ├── .gitignore ├── LICENSE ├── tests ├── testthat.R └── testthat │ └── test-tango.R ├── man ├── figures │ └── logo.png ├── bolero.Rd ├── mambo.Rd ├── reexports.Rd ├── choreography.Rd ├── ballet.Rd ├── rumba.Rd ├── tango.Rd ├── samba.Rd ├── jive.Rd └── swing.Rd ├── .Rbuildignore ├── dance.Rproj ├── R ├── RcppExports.R ├── zzz.R ├── filter.R ├── ballet.R ├── choreography.R ├── rumba.R ├── summarise.R ├── tools.R ├── swing.R ├── foxtrot.R └── mutate.R ├── DESCRIPTION ├── .travis.yml ├── LICENSE.md ├── NAMESPACE ├── README.Rmd └── README.md /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | destination: docs 2 | -------------------------------------------------------------------------------- /src/.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.so 3 | *.dll 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | docs/ 2 | .Rproj.user 3 | .Rhistory 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2019 2 | COPYRIGHT HOLDER: Romain François 3 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(dance) 3 | 4 | test_check("dance") 5 | -------------------------------------------------------------------------------- /man/figures/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/romainfrancois/dance/HEAD/man/figures/logo.png -------------------------------------------------------------------------------- /tests/testthat/test-tango.R: -------------------------------------------------------------------------------- 1 | context("test-tango") 2 | 3 | test_that("multiplication works", { 4 | expect_equal(2 * 2, 4) 5 | }) 6 | -------------------------------------------------------------------------------- /src/salsa.cpp: -------------------------------------------------------------------------------- 1 | #include "dance.h" 2 | 3 | // [[Rcpp::export]] 4 | void salsa_check_sizes(SEXP steps, SEXP rows, SEXP n_formulas_) { 5 | 6 | } 7 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^pkgdown$ 2 | ^_pkgdown\.yml$ 3 | ^docs$ 4 | ^\.travis\.yml$ 5 | ^LICENSE\.md$ 6 | ^README\.Rmd$ 7 | ^dance\.Rproj$ 8 | ^\.Rproj\.user$ 9 | -------------------------------------------------------------------------------- /src/dance.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | namespace dance { 5 | 6 | inline R_xlen_t convert_R_xlen(SEXP x) { 7 | switch(TYPEOF(x)){ 8 | case INTSXP: return (R_xlen_t) INTEGER_ELT(x, 0); 9 | case REALSXP: return (R_xlen_t) REAL_ELT(x, 0); 10 | default: 11 | return 0; 12 | } 13 | } 14 | 15 | } 16 | -------------------------------------------------------------------------------- /dance.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: rd,collate,namespace 22 | -------------------------------------------------------------------------------- /R/RcppExports.R: -------------------------------------------------------------------------------- 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | bolero_check_results <- function(steps, rows, nsteps_) { 5 | invisible(.Call(`_dance_bolero_check_results`, steps, rows, nsteps_)) 6 | } 7 | 8 | bolero_lgl_steps_to_indices <- function(steps, n_steps_, original_rows) { 9 | .Call(`_dance_bolero_lgl_steps_to_indices`, steps, n_steps_, original_rows) 10 | } 11 | 12 | salsa_check_sizes <- function(steps, rows, n_formulas_) { 13 | invisible(.Call(`_dance_salsa_check_sizes`, steps, rows, n_formulas_)) 14 | } 15 | 16 | -------------------------------------------------------------------------------- /man/bolero.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/filter.R 3 | \name{bolero} 4 | \alias{bolero} 5 | \title{Filtering rows} 6 | \usage{ 7 | bolero(.tbl, ..., .op = and, .env = caller_env()) 8 | } 9 | \arguments{ 10 | \item{.tbl}{data frame, most likely grouped} 11 | 12 | \item{..., .env}{formulas and caller environment} 13 | 14 | \item{.op}{binary operator to \code{\link[=reduce]{reduce()}} results when there are multiple \code{...}} 15 | } 16 | \value{ 17 | A tibble with matching rows 18 | } 19 | \description{ 20 | Filtering rows 21 | } 22 | \examples{ 23 | iris \%>\% 24 | bolero(~ Sepal.Length > 5.5, ~Sepal.Width >= 4) 25 | 26 | } 27 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: dance 2 | Title: Tibble Dancing 3 | Version: 0.0.0.9000 4 | Authors@R: c( 5 | person("Romain", "François", email = "romain@purrple.cat", role = c("aut", "cre")), 6 | person("Alix", "François", role = "art") 7 | ) 8 | Description: Tibble dancing. 9 | Depends: R (>= 3.5.0) 10 | License: MIT + file LICENSE 11 | Encoding: UTF-8 12 | LazyData: true 13 | Imports: 14 | vctrs (>= 0.1.0), 15 | rlang (>= 0.3.1), 16 | magrittr (>= 1.5), 17 | dplyr (>= 0.8.0.1), 18 | purrr (>= 0.3.1), 19 | assertthat (>= 0.2.0), 20 | tidyselect (>= 0.2.5), 21 | glue (>= 1.3.0), 22 | tibble (>= 2.0.1), 23 | zeallot (>= 0.1.0), 24 | utils, 25 | crayon, 26 | Rcpp 27 | RoxygenNote: 6.1.1 28 | Suggests: 29 | testthat, 30 | broom 31 | Roxygen: list(markdown = TRUE) 32 | SystemRequirements: C++11 33 | LinkingTo: 34 | Rcpp 35 | -------------------------------------------------------------------------------- /man/mambo.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/filter.R 3 | \name{mambo} 4 | \alias{mambo} 5 | \title{Apply same predicate to multiple variables, and reduce} 6 | \usage{ 7 | mambo(.fun, ..., .tbl = get_tbl(), .op = and, .env = caller_env()) 8 | } 9 | \arguments{ 10 | \item{.fun}{A function or a formula that uses \code{.}} 11 | 12 | \item{...}{Tidy selection, see \code{\link[tidyselect:vars_select]{tidyselect::vars_select()}}} 13 | 14 | \item{.tbl, .env}{Data frame to select columns from, and parent environment, 15 | you most likely don't need to supply those arguments} 16 | 17 | \item{.op}{binary operator to \code{\link[purrr:reduce]{purrr::reduce()}}} 18 | } 19 | \value{ 20 | a single formula 21 | } 22 | \description{ 23 | Apply same predicate to multiple variables, and reduce 24 | } 25 | \examples{ 26 | iris \%>\% 27 | bolero(mambo(~. > 4, starts_with("Sepal"))) 28 | 29 | } 30 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # R for travis: see documentation at https://docs.travis-ci.com/user/languages/r 2 | 3 | language: R 4 | cache: packages 5 | latex: false 6 | fortran: false 7 | 8 | jobs: 9 | include: 10 | - r: devel 11 | - r: release 12 | before_cache: 13 | - Rscript -e 'remotes::install_cran("pkgdown")' 14 | - Rscript -e 'remotes::install_github("hadley/emo")' 15 | deploy: 16 | provider: script 17 | script: Rscript -e 'pkgdown::deploy_site_github()' 18 | skip_cleanup: true 19 | - r: release 20 | env: 21 | - DEVEL_PACKAGES=true 22 | r_github_packages: 23 | - r-lib/vctrs 24 | - r-lib/rlang 25 | - tidyverse/magrittr 26 | - tidyverse/dplyr 27 | - tidyverse/purrr 28 | - hadley/assertthat 29 | - tidyverse/tidyselect 30 | - tidyverse/glue 31 | - tidyverse/tibble 32 | - r-lib/zeallot 33 | env: 34 | global: 35 | - R_REMOTES_NO_ERRORS_FROM_WARNINGS=true 36 | - _R_CHECK_SYSTEM_CLOCK_=FALSE 37 | 38 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2019 Romain François 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /R/zzz.R: -------------------------------------------------------------------------------- 1 | #' @import vctrs 2 | #' @import rlang 3 | #' @import dplyr 4 | #' @import tidyselect 5 | #' @importFrom purrr map map2 map_int map_dbl map_raw map_dfr map_chr map_lgl walk walk2 reduce as_mapper map_if map2_int transpose 6 | #' @importFrom assertthat assert_that 7 | #' @importFrom glue glue glue_collapse 8 | #' @importFrom tibble tibble as_tibble 9 | #' @importFrom magrittr and or 10 | #' @importFrom utils head 11 | #' @importFrom crayon red green silver 12 | #' @importFrom Rcpp sourceCpp 13 | #' @useDynLib dance, .registration = TRUE 14 | NULL 15 | 16 | #' @export 17 | tibble::tibble 18 | 19 | #' @export 20 | magrittr::`%>%` 21 | 22 | #' @export 23 | magrittr::and 24 | 25 | #' @export 26 | magrittr::or 27 | 28 | #' @export 29 | dplyr::group_by 30 | 31 | #' @export 32 | zeallot::`%<-%` 33 | 34 | #' @export 35 | tidyselect::starts_with 36 | 37 | #' @export 38 | tidyselect::ends_with 39 | 40 | #' @export 41 | tidyselect::contains 42 | 43 | #' @export 44 | tidyselect::matches 45 | 46 | #' @export 47 | tidyselect::num_range 48 | 49 | #' @export 50 | tidyselect::one_of 51 | 52 | #' @export 53 | tidyselect::everything 54 | 55 | #' @export 56 | tidyselect::last_col 57 | 58 | #' @export 59 | dplyr::group_cols 60 | 61 | 62 | -------------------------------------------------------------------------------- /man/reexports.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/zzz.R 3 | \docType{import} 4 | \name{reexports} 5 | \alias{reexports} 6 | \alias{tibble} 7 | \alias{\%>\%} 8 | \alias{and} 9 | \alias{or} 10 | \alias{group_by} 11 | \alias{\%<-\%} 12 | \alias{starts_with} 13 | \alias{ends_with} 14 | \alias{contains} 15 | \alias{matches} 16 | \alias{num_range} 17 | \alias{one_of} 18 | \alias{everything} 19 | \alias{last_col} 20 | \alias{group_cols} 21 | \title{Objects exported from other packages} 22 | \keyword{internal} 23 | \description{ 24 | These objects are imported from other packages. Follow the links 25 | below to see their documentation. 26 | 27 | \describe{ 28 | \item{dplyr}{\code{\link[dplyr]{group_by}}, \code{\link[dplyr]{group_cols}}} 29 | 30 | \item{magrittr}{\code{\link[magrittr]{\%>\%}}, \code{\link[magrittr]{and}}, \code{\link[magrittr]{or}}} 31 | 32 | \item{tibble}{\code{\link[tibble]{tibble}}} 33 | 34 | \item{tidyselect}{\code{\link[tidyselect]{starts_with}}, \code{\link[tidyselect]{ends_with}}, \code{\link[tidyselect]{contains}}, \code{\link[tidyselect]{matches}}, \code{\link[tidyselect]{num_range}}, \code{\link[tidyselect]{one_of}}, \code{\link[tidyselect]{everything}}, \code{\link[tidyselect]{last_col}}} 35 | 36 | \item{zeallot}{\code{\link[zeallot]{\%<-\%}}} 37 | }} 38 | 39 | -------------------------------------------------------------------------------- /man/choreography.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/choreography.R 3 | \name{choreography} 4 | \alias{choreography} 5 | \title{choreography} 6 | \usage{ 7 | choreography(.tbl, ..., .env = caller_env()) 8 | } 9 | \arguments{ 10 | \item{.tbl}{A data frame} 11 | 12 | \item{...}{A variable number of formulas. \code{choreography()} only 13 | uses the rhs of each of the formulas.} 14 | 15 | \item{.env}{parent environment of the created function, see \code{\link[rlang:new_function]{rlang::new_function()}}} 16 | } 17 | \value{ 18 | a function that can be called with a single argument that 19 | represents indices. 20 | 21 | When called with an integer vector \code{idx}, the function returns a list 22 | of each of the expressions given on the rhs evaluated on the subset 23 | of the columns, i.e. in the formula \code{~mean(Sepal.Length)} the column 24 | \code{Sepal.Length} stands for \code{Sepal.Length[idx]}. 25 | } 26 | \description{ 27 | The choreography is a central concept of the dance 28 | package, most of the time you don't need to use it directly, but it is 29 | used by many other functions like \code{\link[=tango]{tango()}}, \code{\link[=samba]{samba()}}, ... 30 | } 31 | \examples{ 32 | 33 | moves <- choreography(iris, 34 | Sepal.Length = ~mean(Sepal.Length), 35 | Sepal.Width = ~mean(Sepal.Width) 36 | ) 37 | 38 | moves(1:10) 39 | # this returns the same as 40 | list( 41 | Sepal.Length = mean(iris$Sepal.Length[1:10]), 42 | Sepal.Width = mean(iris$Sepal.Width[1:10]) 43 | ) 44 | 45 | } 46 | -------------------------------------------------------------------------------- /man/ballet.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ballet.R 3 | \name{ballet} 4 | \alias{ballet} 5 | \title{ballet} 6 | \usage{ 7 | ballet(.tbl, ..., .env = caller_env()) 8 | } 9 | \arguments{ 10 | \item{.tbl}{A data frame} 11 | 12 | \item{...}{Description of the columns to create, see \code{\link[=choreography]{choreography()}} for details.} 13 | 14 | \item{.env}{parent environment of the function generated by \code{\link[=choreography]{choreography()}}} 15 | } 16 | \value{ 17 | a list of 2 elements 18 | \itemize{ 19 | \item \code{ptypes} : a list of \code{n_moves} types, in the sense of the \code{.ptype} argument to \code{\link[vctrs:vec_c]{vctrs::vec_c()}} 20 | \item \code{steps} : a list of \code{n_groups} results, each of size \code{n_moves} 21 | } 22 | } 23 | \description{ 24 | \code{ballet()} iterates on the groups, as identified by \code{\link[dplyr:group_rows]{dplyr::group_rows()}}), 25 | and applies the \code{\link[=choreography]{choreography()}} to each group. 26 | } 27 | \details{ 28 | The grouping structure of \code{.tbl} defines the number of groups \code{n_groups} and the 29 | formulas in \code{...} gives the number of dance moves \code{n_moves} 30 | } 31 | \examples{ 32 | data <- iris \%>\% 33 | group_by(Species) 34 | 35 | # most dance functions get the result of ballet() 36 | # and then process it 37 | c(ptypes, steps) \%<-\% 38 | ballet(data, 39 | Sepal.Length = ~mean(Sepal.Length), 40 | Sepal.Width = ~mean(Sepal.Width) 41 | ) 42 | ptypes 43 | steps 44 | 45 | purrr::transpose(steps) 46 | purrr::map_dbl(steps, 1) 47 | purrr::map_dbl(steps, 2) 48 | 49 | } 50 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(chacha,dance_grouped_df) 4 | S3method(chacha,data.frame) 5 | S3method(chacha,grouped_df) 6 | S3method(print,choreography) 7 | export("%<-%") 8 | export("%>%") 9 | export(and) 10 | export(bachata) 11 | export(ballet) 12 | export(bolero) 13 | export(chacha) 14 | export(charleston) 15 | export(choreography) 16 | export(contains) 17 | export(ends_with) 18 | export(everything) 19 | export(foxtrot) 20 | export(group_by) 21 | export(group_cols) 22 | export(jive) 23 | export(last_col) 24 | export(madison) 25 | export(mambo) 26 | export(matches) 27 | export(num_range) 28 | export(one_of) 29 | export(or) 30 | export(polka) 31 | export(rumba) 32 | export(salsa) 33 | export(samba) 34 | export(starts_with) 35 | export(swing) 36 | export(tango) 37 | export(tibble) 38 | export(twist) 39 | export(waltz) 40 | export(zumba) 41 | import(dplyr) 42 | import(rlang) 43 | import(tidyselect) 44 | import(vctrs) 45 | importFrom(Rcpp,sourceCpp) 46 | importFrom(assertthat,assert_that) 47 | importFrom(crayon,green) 48 | importFrom(crayon,red) 49 | importFrom(crayon,silver) 50 | importFrom(glue,glue) 51 | importFrom(glue,glue_collapse) 52 | importFrom(magrittr,and) 53 | importFrom(magrittr,or) 54 | importFrom(purrr,as_mapper) 55 | importFrom(purrr,map) 56 | importFrom(purrr,map2) 57 | importFrom(purrr,map2_int) 58 | importFrom(purrr,map_chr) 59 | importFrom(purrr,map_dbl) 60 | importFrom(purrr,map_dfr) 61 | importFrom(purrr,map_if) 62 | importFrom(purrr,map_int) 63 | importFrom(purrr,map_lgl) 64 | importFrom(purrr,map_raw) 65 | importFrom(purrr,reduce) 66 | importFrom(purrr,transpose) 67 | importFrom(purrr,walk) 68 | importFrom(purrr,walk2) 69 | importFrom(tibble,as_tibble) 70 | importFrom(tibble,tibble) 71 | importFrom(utils,head) 72 | useDynLib(dance, .registration = TRUE) 73 | -------------------------------------------------------------------------------- /R/filter.R: -------------------------------------------------------------------------------- 1 | #' Apply same predicate to multiple variables, and reduce 2 | #' 3 | #' @param .fun A function or a formula that uses `.` 4 | #' @param ... Tidy selection, see [tidyselect::vars_select()] 5 | #' @param .op binary operator to [purrr::reduce()] 6 | #' @param .tbl,.env Data frame to select columns from, and parent environment, 7 | #' you most likely don't need to supply those arguments 8 | #' 9 | #' @return a single formula 10 | #' 11 | #' @examples 12 | #' iris %>% 13 | #' bolero(mambo(~. > 4, starts_with("Sepal"))) 14 | #' 15 | #' @export 16 | mambo <- function(.fun, ..., .tbl = get_tbl(), .op = and, .env = caller_env()) { 17 | predicate <- swing(.fun, ..., .tbl = .tbl, .env = .env) %>% 18 | map(f_rhs) %>% 19 | reduce(~expr((!!.op)(!!.x, !!.y))) 20 | 21 | new_formula(NULL, predicate, env = .env) 22 | } 23 | 24 | #' Filtering rows 25 | #' 26 | #' @param .tbl data frame, most likely grouped 27 | #' @param ...,.env formulas and caller environment 28 | #' @param .op binary operator to [reduce()] results when there are multiple `...` 29 | #' 30 | #' @return A tibble with matching rows 31 | #' 32 | #' @examples 33 | #' iris %>% 34 | #' bolero(~ Sepal.Length > 5.5, ~Sepal.Width >= 4) 35 | #' 36 | #' @export 37 | bolero <- function(.tbl, ..., .op = and, .env = caller_env()) { 38 | c(ptypes, steps) %<-% ballet(.tbl, ..., .env = .env) 39 | rows <- group_rows(.tbl) 40 | 41 | bolero_check_results(steps, rows, length(ptypes)) 42 | 43 | # the indices for each group 44 | c(indices, new_rows) %<-% bolero_lgl_steps_to_indices(steps, length(ptypes), rows) 45 | tbl_slice <- vec_slice(.tbl, flatten_int(indices)) 46 | 47 | if (is_grouped_df(.tbl)) { 48 | tbl_slice <- new_grouped_df( 49 | tbl_slice, 50 | vec_cbind(group_keys(.tbl), tibble(.rows := new_rows)), 51 | class = "dance_grouped_df" 52 | ) 53 | } 54 | 55 | tbl_slice 56 | } 57 | -------------------------------------------------------------------------------- /R/ballet.R: -------------------------------------------------------------------------------- 1 | #' ballet 2 | #' 3 | #' `ballet()` iterates on the groups, as identified by [dplyr::group_rows()]), 4 | #' and applies the [choreography()] to each group. 5 | #' 6 | #' @param .tbl A data frame 7 | #' @param ... Description of the columns to create, see [choreography()] for details. 8 | #' @param .env parent environment of the function generated by [choreography()] 9 | #' 10 | #' @details 11 | #' The grouping structure of `.tbl` defines the number of groups `n_groups` and the 12 | #' formulas in `...` gives the number of dance moves `n_moves` 13 | #' 14 | #' @return a list of 2 elements 15 | #' 16 | #' - `ptypes` : a list of `n_moves` types, in the sense of the `.ptype` argument to [vctrs::vec_c()] 17 | #' - `steps` : a list of `n_groups` results, each of size `n_moves` 18 | #' 19 | #' @examples 20 | #' data <- iris %>% 21 | #' group_by(Species) 22 | #' 23 | #' # most dance functions get the result of ballet() 24 | #' # and then process it 25 | #' c(ptypes, steps) %<-% 26 | #' ballet(data, 27 | #' Sepal.Length = ~mean(Sepal.Length), 28 | #' Sepal.Width = ~mean(Sepal.Width) 29 | #' ) 30 | #' ptypes 31 | #' steps 32 | #' 33 | #' purrr::transpose(steps) 34 | #' purrr::map_dbl(steps, 1) 35 | #' purrr::map_dbl(steps, 2) 36 | #' 37 | #' @export 38 | ballet <- function(.tbl, ..., .env = caller_env()) { 39 | set_tbl(.tbl) 40 | formulas <- list2(...) 41 | 42 | assert_that( 43 | all(map_lgl(formulas, is_formula)), 44 | msg = "`...` should be a list of formulas" 45 | ) 46 | 47 | rows <- group_rows(.tbl) 48 | n_groups <- length(rows) 49 | 50 | # the right hand side of the formula give the type 51 | # empty gives NULL whih means guessing the type 52 | ptypes <- map(formulas, ~eval_bare(f_lhs(.x), f_env(.x))) 53 | 54 | # for each group, apply the choreography derived from the formulas 55 | moves <- choreography(.tbl, ..., .env = .env) 56 | steps <- map(rows, moves) 57 | 58 | list(ptypes = ptypes, steps = steps) 59 | } 60 | -------------------------------------------------------------------------------- /man/rumba.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/rumba.R 3 | \name{rumba} 4 | \alias{rumba} 5 | \alias{zumba} 6 | \title{Apply several functions to the same column} 7 | \usage{ 8 | rumba(.var, ..., .tbl = get_tbl(), .name = NULL, .env = caller_env()) 9 | 10 | zumba(.var, ..., .tbl = get_tbl(), .name = NULL, .env = caller_env()) 11 | } 12 | \arguments{ 13 | \item{.var}{A variable specified as in \code{\link[dplyr:pull]{dplyr::pull()}}} 14 | 15 | \item{...}{list of functions or formulas using \code{.} to refer to the column} 16 | 17 | \item{.tbl, .env}{data frame to get columns from and caller environment. Most of the 18 | time, you don't need to set these} 19 | 20 | \item{.name}{\code{\link[glue:glue]{glue::glue()}} model to name the created columns. The model can use : 21 | \itemize{ 22 | \item \code{{fun}} to refer to the function name 23 | \item \code{{idx}} to refer to the index of the function with the given list 24 | \item \code{{var}} to refer to the selected name 25 | The default uses \code{"{fun}"} is the \code{...} list is named, and \code{"fn{idx}"} otherwise 26 | }} 27 | } 28 | \value{ 29 | \itemize{ 30 | \item \code{rumba()} returns a spliced list of formulas suitable for the \code{...} of a 31 | \code{\link[=choreography]{choreography()}} based dance, e.g. \code{\link[=tango]{tango()}}, \code{\link[=samba]{samba()}}, \code{\link[=jive]{jive()}} 32 | \item \code{zumba()} returns a single formula that packs the results 33 | } 34 | } 35 | \description{ 36 | Apply several functions to the same column 37 | } 38 | \examples{ 39 | g <- group_by(iris, Species) 40 | 41 | # ---- tango() 42 | g \%>\% 43 | tango(rumba(Sepal.Length, mean = mean, median = median)) 44 | 45 | # select the first column, control the result names 46 | # with the glue() model 47 | g \%>\% 48 | tango(rumba(1, mean = mean, median = median, .name = "{var}_{fun}")) 49 | 50 | g \%>\% 51 | tango(Sepal.Width = zumba(Sepal.Width, mean = mean, median = median)) 52 | 53 | # ---- jive() 54 | g \%>\% 55 | jive( 56 | rumba(Sepal.Width, five = fivenum, quantile = quantile) 57 | ) 58 | 59 | } 60 | -------------------------------------------------------------------------------- /man/tango.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/summarise.R 3 | \name{tango} 4 | \alias{tango} 5 | \alias{waltz} 6 | \alias{polka} 7 | \alias{charleston} 8 | \title{Summarise one row per group} 9 | \usage{ 10 | tango(.tbl, ..., .env = caller_env()) 11 | 12 | waltz(.tbl, ..., .env = caller_env()) 13 | 14 | polka(.tbl) 15 | 16 | charleston(.tbl, ..., .name = "data", .env = caller_env()) 17 | } 18 | \arguments{ 19 | \item{.tbl}{A data frame, most likely a grouped data frame} 20 | 21 | \item{..., .env}{formulas for each column to create, and parent environment, see \code{\link[=ballet]{ballet()}}} 22 | 23 | \item{.name}{Name of the packed column made by \code{charleston()} 24 | 25 | The four functions play a separate role around the idea of 26 | \code{\link[dplyr:summarise]{dplyr::summarise()}}: 27 | \itemize{ 28 | \item \code{polka()} peels off one level of grouping from the grouping variable, 29 | i.e. if \code{.tbl} was grouped by \code{x} and \code{y} the result of \code{polka()} 30 | contains columns \code{x} and \code{y} and is only grouped by \code{x} 31 | \item \code{waltz()} runs the \code{\link[=ballet]{ballet()}} defined by \code{...} and makes 32 | sure each result is of \code{\link[vctrs:vec_size]{vctrs::vec_size()}} 1. The result tibble of \code{waltz()} 33 | does not contain the grouping variables. 34 | \item \code{tango()} is the closest to \code{\link[dplyr:summarise]{dplyr::summarise()}}, it column binds 35 | the result of \code{polka()} and \code{waltz()} with \code{\link[vctrs:vec_cbind]{vctrs::vec_cbind()}}. 36 | \item \code{charleston()} is similar to \code{tango()} but the results are packed 37 | instead of being bind. The name of the created packed column is 38 | controled by the \code{.name} argument. 39 | }} 40 | } 41 | \description{ 42 | Applies the \code{\link[=ballet]{ballet()}} and makes sure each results is of size 1, 43 | according to \code{\link[vctrs:vec_size]{vctrs::vec_size()}} 44 | } 45 | \examples{ 46 | g <- group_by(iris, Species) 47 | 48 | polka(g) 49 | 50 | g \%>\% 51 | waltz(Sepal = ~mean(Sepal.Length * Sepal.Width)) 52 | 53 | g \%>\% 54 | tango(Sepal = ~mean(Sepal.Length * Sepal.Width)) 55 | 56 | g \%>\% 57 | charleston(Sepal = ~mean(Sepal.Length * Sepal.Width)) 58 | 59 | } 60 | -------------------------------------------------------------------------------- /R/choreography.R: -------------------------------------------------------------------------------- 1 | #' choreography 2 | #' 3 | #' The choreography is a central concept of the dance 4 | #' package, most of the time you don't need to use it directly, but it is 5 | #' used by many other functions like [tango()], [samba()], ... 6 | #' 7 | #' @param .tbl A data frame 8 | #' @param ... A variable number of formulas. `choreography()` only 9 | #' uses the rhs of each of the formulas. 10 | #' 11 | #' @param .env parent environment of the created function, see [rlang::new_function()] 12 | #' 13 | #' @return a function that can be called with a single argument that 14 | #' represents indices. 15 | #' 16 | #' When called with an integer vector `idx`, the function returns a list 17 | #' of each of the expressions given on the rhs evaluated on the subset 18 | #' of the columns, i.e. in the formula `~mean(Sepal.Length)` the column 19 | #' `Sepal.Length` stands for `Sepal.Length[idx]`. 20 | #' 21 | #' @examples 22 | #' 23 | #' moves <- choreography(iris, 24 | #' Sepal.Length = ~mean(Sepal.Length), 25 | #' Sepal.Width = ~mean(Sepal.Width) 26 | #' ) 27 | #' 28 | #' moves(1:10) 29 | #' # this returns the same as 30 | #' list( 31 | #' Sepal.Length = mean(iris$Sepal.Length[1:10]), 32 | #' Sepal.Width = mean(iris$Sepal.Width[1:10]) 33 | #' ) 34 | #' 35 | #' @export 36 | choreography <- function(.tbl, ..., .env = caller_env()) { 37 | args <- tbl_slicer_args(.tbl) 38 | body <- expr(list(!!!map(list2(...), f_rhs))) 39 | structure(rlang::new_function(args, body, env = .env), class = "choreography") 40 | } 41 | 42 | #' @export 43 | print.choreography <- function(x, ...) { 44 | body <- body(x) 45 | 46 | fs <- formals(x)[-1] 47 | funs <- map_chr(fs, ~{ 48 | # now it's either .subset or vctrs::vec_slice 49 | res <- if(identical(.x[[1L]], .subset)) { 50 | "<.subset>" 51 | } else { 52 | "" 53 | } 54 | green(res) 55 | }) 56 | index <- red("`.::index::.`") 57 | txt <- glue( 58 | "function({index},\n ", 59 | glue_collapse(glue("{var} = {fun}(<{data}{var}>, {index})", fun = funs, var = silver(names(funs)), data = silver(".tbl$")), sep = ",\n "), 60 | "\n){{\n ", 61 | glue_collapse(expr_deparse(body(x)), sep = " \n"), 62 | "\n}}\n" 63 | ) 64 | writeLines(txt) 65 | invisible(x) 66 | } 67 | -------------------------------------------------------------------------------- /R/rumba.R: -------------------------------------------------------------------------------- 1 | 2 | #' Apply several functions to the same column 3 | #' 4 | #' @param .var A variable specified as in [dplyr::pull()] 5 | #' @param ... list of functions or formulas using `.` to refer to the column 6 | #' @param .tbl,.env data frame to get columns from and caller environment. Most of the 7 | #' time, you don't need to set these 8 | #' @param .name [glue::glue()] model to name the created columns. The model can use : 9 | #' - `{fun}` to refer to the function name 10 | #' - `{idx}` to refer to the index of the function with the given list 11 | #' - `{var}` to refer to the selected name 12 | #' The default uses `"{fun}"` is the `...` list is named, and `"fn{idx}"` otherwise 13 | #' 14 | #' @return 15 | #' 16 | #' - `rumba()` returns a spliced list of formulas suitable for the `...` of a 17 | #' [choreography()] based dance, e.g. [tango()], [samba()], [jive()] 18 | #' 19 | #' - `zumba()` returns a single formula that packs the results 20 | #' 21 | #' @examples 22 | #' g <- group_by(iris, Species) 23 | #' 24 | #' # ---- tango() 25 | #' g %>% 26 | #' tango(rumba(Sepal.Length, mean = mean, median = median)) 27 | #' 28 | #' # select the first column, control the result names 29 | #' # with the glue() model 30 | #' g %>% 31 | #' tango(rumba(1, mean = mean, median = median, .name = "{var}_{fun}")) 32 | #' 33 | #' g %>% 34 | #' tango(Sepal.Width = zumba(Sepal.Width, mean = mean, median = median)) 35 | #' 36 | #' # ---- jive() 37 | #' g %>% 38 | #' jive( 39 | #' rumba(Sepal.Width, five = fivenum, quantile = quantile) 40 | #' ) 41 | #' 42 | #' @export 43 | rumba <- function(.var, ..., .tbl = get_tbl(), .name = NULL, .env = caller_env()) { 44 | .var <- vars_pull(names(.tbl), !!enquo(.var)) 45 | .funs <- list2(...) 46 | names(.funs) <- glue( 47 | .name %||% if(is.null(names(.funs))) "fn{idx}" else "{fun}", 48 | fun = names(.funs) %||% rep("", length(.funs)), 49 | idx = seq_along(.funs), 50 | var = .var 51 | ) 52 | 53 | splice(map(.funs, ~{ 54 | c(.ptype, .fun) %<-% promote_formula(.x, .env) 55 | new_formula(.ptype, expr((!!.fun)(!!sym(.var)))) 56 | })) 57 | } 58 | 59 | #' @rdname rumba 60 | #' @export 61 | zumba <- function(.var, ..., .tbl = get_tbl(), .name = NULL, .env = caller_env()) { 62 | formulas <- rumba(!!enquo(.var), ..., .tbl = .tbl, .name = .name, .env = .env) 63 | rhs <- expr( 64 | tibble(!!!map(formulas, f_rhs)) 65 | ) 66 | new_formula(NULL, rhs, env = .env) 67 | } 68 | -------------------------------------------------------------------------------- /src/RcppExports.cpp: -------------------------------------------------------------------------------- 1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | #include 5 | 6 | using namespace Rcpp; 7 | 8 | // bolero_check_results 9 | void bolero_check_results(SEXP steps, SEXP rows, SEXP nsteps_); 10 | RcppExport SEXP _dance_bolero_check_results(SEXP stepsSEXP, SEXP rowsSEXP, SEXP nsteps_SEXP) { 11 | BEGIN_RCPP 12 | Rcpp::RNGScope rcpp_rngScope_gen; 13 | Rcpp::traits::input_parameter< SEXP >::type steps(stepsSEXP); 14 | Rcpp::traits::input_parameter< SEXP >::type rows(rowsSEXP); 15 | Rcpp::traits::input_parameter< SEXP >::type nsteps_(nsteps_SEXP); 16 | bolero_check_results(steps, rows, nsteps_); 17 | return R_NilValue; 18 | END_RCPP 19 | } 20 | // bolero_lgl_steps_to_indices 21 | SEXP bolero_lgl_steps_to_indices(SEXP steps, SEXP n_steps_, SEXP original_rows); 22 | RcppExport SEXP _dance_bolero_lgl_steps_to_indices(SEXP stepsSEXP, SEXP n_steps_SEXP, SEXP original_rowsSEXP) { 23 | BEGIN_RCPP 24 | Rcpp::RObject rcpp_result_gen; 25 | Rcpp::RNGScope rcpp_rngScope_gen; 26 | Rcpp::traits::input_parameter< SEXP >::type steps(stepsSEXP); 27 | Rcpp::traits::input_parameter< SEXP >::type n_steps_(n_steps_SEXP); 28 | Rcpp::traits::input_parameter< SEXP >::type original_rows(original_rowsSEXP); 29 | rcpp_result_gen = Rcpp::wrap(bolero_lgl_steps_to_indices(steps, n_steps_, original_rows)); 30 | return rcpp_result_gen; 31 | END_RCPP 32 | } 33 | // salsa_check_sizes 34 | void salsa_check_sizes(SEXP steps, SEXP rows, SEXP n_formulas_); 35 | RcppExport SEXP _dance_salsa_check_sizes(SEXP stepsSEXP, SEXP rowsSEXP, SEXP n_formulas_SEXP) { 36 | BEGIN_RCPP 37 | Rcpp::RNGScope rcpp_rngScope_gen; 38 | Rcpp::traits::input_parameter< SEXP >::type steps(stepsSEXP); 39 | Rcpp::traits::input_parameter< SEXP >::type rows(rowsSEXP); 40 | Rcpp::traits::input_parameter< SEXP >::type n_formulas_(n_formulas_SEXP); 41 | salsa_check_sizes(steps, rows, n_formulas_); 42 | return R_NilValue; 43 | END_RCPP 44 | } 45 | 46 | static const R_CallMethodDef CallEntries[] = { 47 | {"_dance_bolero_check_results", (DL_FUNC) &_dance_bolero_check_results, 3}, 48 | {"_dance_bolero_lgl_steps_to_indices", (DL_FUNC) &_dance_bolero_lgl_steps_to_indices, 3}, 49 | {"_dance_salsa_check_sizes", (DL_FUNC) &_dance_salsa_check_sizes, 3}, 50 | {NULL, NULL, 0} 51 | }; 52 | 53 | RcppExport void R_init_dance(DllInfo *dll) { 54 | R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); 55 | R_useDynamicSymbols(dll, FALSE); 56 | } 57 | -------------------------------------------------------------------------------- /src/bolero.cpp: -------------------------------------------------------------------------------- 1 | #include "dance.h" 2 | 3 | // [[Rcpp::export]] 4 | void bolero_check_results(SEXP steps, SEXP rows, SEXP nsteps_) { 5 | R_xlen_t n = XLENGTH(steps); 6 | R_xlen_t n_steps = dance::convert_R_xlen(nsteps_); 7 | 8 | for (R_xlen_t i=0; i starts(n_steps); 43 | for (R_xlen_t k=0; k\% 56 | salsa( 57 | Sepal = ~Sepal.Length * Sepal.Width, 58 | Petal = ~Petal.Length * Petal.Width 59 | ) 60 | 61 | # returns a dance_grouped_df with the two 62 | # additional columns `Sepal` and `Petal` 63 | g \%>\% 64 | samba( 65 | Sepal = ~Sepal.Length * Sepal.Width, 66 | Petal = ~Petal.Length * Petal.Width 67 | ) 68 | 69 | # returns a dance_grouped_df with the one 70 | # additional data frame column 71 | g \%>\% 72 | madison( 73 | Sepal = ~Sepal.Length * Sepal.Width, 74 | Petal = ~Petal.Length * Petal.Width 75 | ) 76 | 77 | } 78 | -------------------------------------------------------------------------------- /R/summarise.R: -------------------------------------------------------------------------------- 1 | 2 | #' Summarise one row per group 3 | #' 4 | #' Applies the [ballet()] and makes sure each results is of size 1, 5 | #' according to [vctrs::vec_size()] 6 | #' 7 | #' @param .tbl A data frame, most likely a grouped data frame 8 | #' @param ...,.env formulas for each column to create, and parent environment, see [ballet()] 9 | #' @param .name Name of the packed column made by `charleston()` 10 | #' 11 | #' The four functions play a separate role around the idea of 12 | #' [dplyr::summarise()]: 13 | #' 14 | #' - `polka()` peels off one level of grouping from the grouping variable, 15 | #' i.e. if `.tbl` was grouped by `x` and `y` the result of `polka()` 16 | #' contains columns `x` and `y` and is only grouped by `x` 17 | #' 18 | #' - `waltz()` runs the [ballet()] defined by `...` and makes 19 | #' sure each result is of [vctrs::vec_size()] 1. The result tibble of `waltz()` 20 | #' does not contain the grouping variables. 21 | #' 22 | #' - `tango()` is the closest to [dplyr::summarise()], it column binds 23 | #' the result of `polka()` and `waltz()` with [vctrs::vec_cbind()]. 24 | #' 25 | #' - `charleston()` is similar to `tango()` but the results are packed 26 | #' instead of being bind. The name of the created packed column is 27 | #' controled by the `.name` argument. 28 | #' 29 | #' @examples 30 | #' g <- group_by(iris, Species) 31 | #' 32 | #' polka(g) 33 | #' 34 | #' g %>% 35 | #' waltz(Sepal = ~mean(Sepal.Length * Sepal.Width)) 36 | #' 37 | #' g %>% 38 | #' tango(Sepal = ~mean(Sepal.Length * Sepal.Width)) 39 | #' 40 | #' g %>% 41 | #' charleston(Sepal = ~mean(Sepal.Length * Sepal.Width)) 42 | #' 43 | #' @export 44 | tango <- function(.tbl, ..., .env = caller_env()) { 45 | grps <- polka(.tbl) 46 | out <- vec_cbind(grps, waltz(.tbl, ..., .env = .env)) 47 | if (is_grouped_df(grps)) { 48 | out <- new_grouped_df(out, attr(grps, "groups")) 49 | } 50 | out 51 | } 52 | 53 | #' @rdname tango 54 | #' @export 55 | waltz <- function(.tbl, ..., .env = caller_env()) { 56 | # evaluate all the formulas in each group 57 | c(ptypes, steps) %<-% ballet(.tbl, ..., .env = .env) 58 | 59 | # check all results are length 1 60 | walk(steps, ~walk(.x, ~assert_that(vec_size(.x) == 1L))) 61 | 62 | # transpose, combine 63 | results <- map2(transpose(steps), ptypes, ~vec_c(!!!.x, .ptype = .y)) 64 | 65 | as_tibble_splice(results) 66 | } 67 | 68 | #' @rdname tango 69 | #' @export 70 | polka <- function(.tbl) { 71 | groups <- head(groups(.tbl), -1L) 72 | 73 | .tbl <- .tbl %>% 74 | group_keys() %>% 75 | group_by(!!!groups) 76 | 77 | if (is_grouped_df(.tbl) && !inherits(.tbl, "dance_grouped_df")) { 78 | class(.tbl) <- c("dance_grouped_df", class(.tbl)) 79 | } 80 | .tbl 81 | } 82 | 83 | #' @rdname tango 84 | #' @export 85 | charleston <- function(.tbl, ..., .name = "data", .env = caller_env()) { 86 | vec_cbind(polka(.tbl), tibble(!!.name := waltz(.tbl, ..., .env = .env))) 87 | } 88 | -------------------------------------------------------------------------------- /R/tools.R: -------------------------------------------------------------------------------- 1 | dance_env <- new.env() 2 | 3 | set_tbl <- function(.tbl) { 4 | old <- dance_env[["context"]] 5 | dance_env[["context"]] <- .tbl 6 | old 7 | } 8 | 9 | get_tbl <- function() { 10 | dance_env[["context"]] 11 | } 12 | 13 | map_for_type <- function(.ptype, combine = vec_c) { 14 | function(.x, .f, ...) { 15 | .f <- as_mapper(.f) 16 | out <- map(.x, function(x){ 17 | res <- .f(x, ...) 18 | stopifnot(vec_size(res) == 1L) 19 | res 20 | }) 21 | combine(!!!out, .ptype = .ptype) 22 | } 23 | } 24 | 25 | map_for <- function(.ptype) { 26 | if (identical(.ptype, list())) { 27 | map 28 | } else if(identical(.ptype, integer())) { 29 | map_int 30 | } else if(identical(.ptype, double())) { 31 | map_dbl 32 | } else if(identical(.ptype, raw())) { 33 | map_raw 34 | } else if(identical(.ptype, character())) { 35 | map_chr 36 | } else if(identical(.ptype, logical())) { 37 | map_lgl 38 | } else if(is.data.frame(.ptype)) { 39 | if (ncol(.ptype) == 0L){ 40 | map_for_type(NULL, vec_rbind) 41 | } else { 42 | map_for_type(.ptype, vec_rbind) 43 | } 44 | } else { 45 | map_for_type(.ptype, vec_c) 46 | } 47 | } 48 | 49 | globalVariables(c(".::index::.", "mapper", "name", ".", ".ptype", ".rows", "ptypes", "rows", "steps")) 50 | 51 | is_bare_vector <- function(x) { 52 | is_vector(x) && !is.object(x) && is.null(attr(x, "class")) 53 | } 54 | 55 | slicer <- function(.) { 56 | if (is_bare_vector(.)) { 57 | .subset 58 | } else { 59 | vec_slice 60 | } 61 | } 62 | 63 | tbl_slicer_args <- function(.tbl) { 64 | args <- map(.tbl, ~expr((!!slicer(.x))((!!.x), `.::index::.`))) 65 | list2(`.::index::.` = missing_arg(), !!!args) 66 | } 67 | 68 | promote_formula <- function(.fun, .env) { 69 | if (is_function(.fun)) { 70 | .ptype <- NULL 71 | } else if(is_formula(.fun)){ 72 | .ptype <- eval_bare(f_lhs(.fun), .env) 73 | .fun <- as_function(new_formula(NULL, f_rhs(.fun), env = .env), env = .env) 74 | } 75 | 76 | list(.ptype, .fun) 77 | } 78 | 79 | as_tibble_splice <- function(x, ...) { 80 | if (is.null(names(x))) { 81 | names(x) <- rep("", length(x)) 82 | } 83 | needs_splice <- names(x) == "" & map_lgl(x, is.data.frame) 84 | 85 | n <- sum(map2_int(x, needs_splice, ~ { 86 | if(.y) length(.x) else 1L 87 | })) 88 | 89 | out_names <- flatten_chr(map2(x, names(x), ~{ 90 | if(.y == "") names(.x) else .y 91 | })) 92 | 93 | out <- rep(list(NULL), n) 94 | k <- 1L 95 | for(i in seq_along(x)) { 96 | if (needs_splice[i]) { 97 | tbl <- x[[i]] 98 | for(j in seq_len(ncol(tbl))) { 99 | out[[k]] <- tbl[[j]] 100 | k <- k + 1 101 | } 102 | } else { 103 | out[[k]] <- x[[i]] 104 | k <- k + 1 105 | } 106 | } 107 | as_tibble(set_names(out, out_names), ...) 108 | } 109 | -------------------------------------------------------------------------------- /man/jive.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/foxtrot.R 3 | \name{foxtrot} 4 | \alias{foxtrot} 5 | \alias{bachata} 6 | \alias{jive} 7 | \title{Modify} 8 | \usage{ 9 | foxtrot(.tbl, ..., .env = caller_env()) 10 | 11 | bachata(.tbl, ..., .name = "data", .env = caller_env()) 12 | 13 | jive(.tbl, ..., .env = caller_env()) 14 | } 15 | \arguments{ 16 | \item{.tbl}{A data frame, most likely a grouped data frame} 17 | 18 | \item{..., .env}{formulas for each column to create, and parent environment, see \code{\link[=ballet]{ballet()}}} 19 | 20 | \item{.name}{Name of the packed column made by \code{bachata()}} 21 | } 22 | \value{ 23 | \itemize{ 24 | \item \code{foxtrot()} returns a list of tibbles, one per group of \code{.tbl}. This is 25 | close in functionality to \code{\link[dplyr:group_map]{dplyr::group_map()}}. 26 | \item \code{bachata()} returns a tibble that \code{\link[vctrs:vec_cbind]{vctrs::vec_cbind()}} the results of \code{\link[=polka]{polka()}} and 27 | the result from \code{foxtrot()} as a list column 28 | \item \code{jive()} row binds the results from \code{foxtrot()} and reconstructs the grouping structure. 29 | This is similar in functionality to \code{dplyr::group_modify()} 30 | } 31 | } 32 | \description{ 33 | Applies the \code{\link[=ballet]{ballet()}} with no restriction on the \code{\link[=vec_size]{vec_size()}} of the outputs. 34 | } 35 | \examples{ 36 | g <- group_by(iris, Species) 37 | 38 | \dontrun{ 39 | if (requireNamespace("broom", quietly = TRUE)) { 40 | ### foxtrot() to return a list of tibbles 41 | 42 | # A list of tibbles with the `quantiles` column derived from the 43 | # formula on each group 44 | g \%>\% 45 | foxtrot( 46 | quantiles = ~quantile(Petal.Length, probs = c(0.25, 0.5, 0.75)) 47 | ) 48 | 49 | g \%>\% 50 | foxtrot( 51 | ~ broom::tidy(lm(Petal.Length ~ Sepal.Length)) 52 | ) 53 | 54 | ### bachata() to nest the results of foxtrot() 55 | 56 | # bachata() = polka() + a list column of 2 columns tibbles 57 | g \%>\% 58 | bachata( 59 | Sepal.Length = ~quantile(Sepal.Length, probs = c(0.25, 0.5, 0.75)), 60 | Sepal.Width = ~quantile(Sepal.Width, probs = c(0.25, 0.5, 0.75)), 61 | ) 62 | 63 | # because the formula is unnamed, the columns of the tibbles 64 | # generated by broom::tidy() are auto-spliced 65 | g \%>\% 66 | bachata( 67 | ~ broom::tidy(lm(Petal.Length ~ Sepal.Length)) 68 | ) 69 | 70 | ### jive() 71 | g \%>\% 72 | jive( 73 | Sepal.Length = ~quantile(Sepal.Length, probs = c(0.25, 0.5, 0.75)), 74 | Sepal.Width = ~quantile(Sepal.Width, probs = c(0.25, 0.5, 0.75)), 75 | ) 76 | 77 | # unnamed -> columns are spliced 78 | g \%>\% 79 | jive( 80 | ~ broom::tidy(lm(Petal.Length ~ Sepal.Length)) 81 | ) 82 | 83 | # named -> packed column 84 | g \%>\% 85 | jive( 86 | model = ~ broom::tidy(lm(Petal.Length ~ Sepal.Length)) 87 | ) 88 | } 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /man/swing.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/swing.R 3 | \name{swing} 4 | \alias{swing} 5 | \alias{twist} 6 | \title{Apply a single function to multiple columns} 7 | \usage{ 8 | swing(.fun, ..., .tbl = get_tbl(), .name = "{var}", 9 | .env = caller_env()) 10 | 11 | twist(.fun, ..., .tbl = get_tbl(), .name = "{var}", 12 | .env = caller_env()) 13 | } 14 | \arguments{ 15 | \item{.fun}{A function or a formula that uses \code{.} or 16 | \code{.x} to refer to each of the selected column} 17 | 18 | \item{...}{tidy selection of columns, see \code{\link[tidyselect:vars_select]{tidyselect::vars_select()}} for details} 19 | 20 | \item{.tbl, .env}{data frame \code{...} selects columns from, this is 21 | automatically set by the \code{\link[=choreography]{choreography()}}, you should rarely need to use these arguments} 22 | 23 | \item{.name}{\code{\link[glue:glue]{glue::glue()}} model to name the outputs. The model may use : 24 | \itemize{ 25 | \item \code{{var}} to refer to the name of the current selected variable 26 | \item \code{{idx}} to refer to the index of the current variable 27 | The default value "{var}" for \code{.name} simply uses the name of the selected variable 28 | }} 29 | } 30 | \description{ 31 | \itemize{ 32 | \item \code{swing()} : returns a spliced list of formulas, suitable for the 33 | \code{...} argument of \code{\link[=choreography]{choreography()}}, as well as all the dances that 34 | use a \code{\link[=choreography]{choreography()}} 35 | } 36 | } 37 | \details{ 38 | \itemize{ 39 | \item \code{twist()} : returns a single formula that makes a tibble column 40 | } 41 | 42 | These functions are generally used within other dances, such as 43 | \code{\link[=tango]{tango()}}, \code{\link[=samba]{samba()}} or \code{\link[=jive]{jive()}} 44 | } 45 | \examples{ 46 | g <- iris \%>\% group_by(Species) 47 | 48 | ##------- tango() 49 | 50 | # Apply mean to all columns that start with Sepal 51 | # and choose how the result columns are called 52 | g \%>\% 53 | tango( 54 | swing(mean, starts_with("Sepal"), .name = "mean_{var}") 55 | ) 56 | 57 | # if you want to use extra arguments of `.fun` you can embed 58 | # them with the lambda syntax 59 | g \%>\% 60 | tango( 61 | swing(~mean(., trim = .2), starts_with("Sepal"), .name = "mean_{var}") 62 | ) 63 | 64 | # use twist() to instead create a single packed column 65 | g \%>\% 66 | tango( 67 | mean = twist(mean, starts_with("Sepal")) 68 | ) 69 | # but in fact, if you don't name the formula made by twist() 70 | # the columns are auto spliced 71 | g \%>\% 72 | tango( 73 | twist(mean, starts_with("Sepal")) 74 | ) 75 | 76 | ##------- samba() 77 | 78 | g \%>\% 79 | samba( 80 | swing(~. - mean(.), starts_with("Sepal"), .name = "centered_{var}") 81 | ) 82 | 83 | g \%>\% 84 | samba( 85 | centered = twist(~. - mean(.), starts_with("Sepal"), .name = "centered_{var}") 86 | ) 87 | 88 | ##------- jive() 89 | 90 | g \%>\% 91 | jive( 92 | q = ~ c("25\%", "50\%", "75\%"), 93 | swing(~quantile(., c(0.25, 0.5, 0.75)), contains(".")) 94 | ) 95 | 96 | } 97 | \seealso{ 98 | \code{\link[=rumba]{rumba()}} and \code{\link[=zumba]{zumba()}} to apply several functions to the same column 99 | } 100 | -------------------------------------------------------------------------------- /R/swing.R: -------------------------------------------------------------------------------- 1 | 2 | #' Apply a single function to multiple columns 3 | #' 4 | #' - `swing()` : returns a spliced list of formulas, suitable for the 5 | #' `...` argument of [choreography()], as well as all the dances that 6 | #' use a [choreography()] 7 | #' 8 | #' - `twist()` : returns a single formula that makes a tibble column 9 | #' 10 | #' These functions are generally used within other dances, such as 11 | #' [tango()], [samba()] or [jive()] 12 | #' 13 | #' @param .fun A function or a formula that uses `.` or 14 | #' `.x` to refer to each of the selected column 15 | #' @param ... tidy selection of columns, see [tidyselect::vars_select()] for details 16 | #' @param .tbl,.env data frame `...` selects columns from, this is 17 | #' automatically set by the [choreography()], you should rarely need to use these arguments 18 | #' @param .name [glue::glue()] model to name the outputs. The model may use : 19 | #' - `{var}` to refer to the name of the current selected variable 20 | #' - `{idx}` to refer to the index of the current variable 21 | #' The default value "{var}" for `.name` simply uses the name of the selected variable 22 | #' 23 | #' @seealso [rumba()] and [zumba()] to apply several functions to the same column 24 | #' 25 | #' @examples 26 | #' g <- iris %>% group_by(Species) 27 | #' 28 | #' ##------- tango() 29 | #' 30 | #' # Apply mean to all columns that start with Sepal 31 | #' # and choose how the result columns are called 32 | #' g %>% 33 | #' tango( 34 | #' swing(mean, starts_with("Sepal"), .name = "mean_{var}") 35 | #' ) 36 | #' 37 | #' # if you want to use extra arguments of `.fun` you can embed 38 | #' # them with the lambda syntax 39 | #' g %>% 40 | #' tango( 41 | #' swing(~mean(., trim = .2), starts_with("Sepal"), .name = "mean_{var}") 42 | #' ) 43 | #' 44 | #' # use twist() to instead create a single packed column 45 | #' g %>% 46 | #' tango( 47 | #' mean = twist(mean, starts_with("Sepal")) 48 | #' ) 49 | #' # but in fact, if you don't name the formula made by twist() 50 | #' # the columns are auto spliced 51 | #' g %>% 52 | #' tango( 53 | #' twist(mean, starts_with("Sepal")) 54 | #' ) 55 | #' 56 | #' ##------- samba() 57 | #' 58 | #' g %>% 59 | #' samba( 60 | #' swing(~. - mean(.), starts_with("Sepal"), .name = "centered_{var}") 61 | #' ) 62 | #' 63 | #' g %>% 64 | #' samba( 65 | #' centered = twist(~. - mean(.), starts_with("Sepal"), .name = "centered_{var}") 66 | #' ) 67 | #' 68 | #' ##------- jive() 69 | #' 70 | #' g %>% 71 | #' jive( 72 | #' q = ~ c("25%", "50%", "75%"), 73 | #' swing(~quantile(., c(0.25, 0.5, 0.75)), contains(".")) 74 | #' ) 75 | #' 76 | #' @export 77 | swing <- function(.fun, ..., .tbl = get_tbl(), .name = "{var}", .env = caller_env()) { 78 | vars <- vars_select(tbl_vars(.tbl), ...) 79 | names(vars) <- glue(.name, var = names(vars), idx = seq_along(vars)) 80 | c(.ptype, .fun) %<-% promote_formula(.fun, .env) 81 | 82 | splice( 83 | map(vars, ~new_formula(.ptype, expr((!!.fun)(!!sym(.))))) 84 | ) 85 | } 86 | 87 | #' @rdname swing 88 | #' @export 89 | twist <- function(.fun, ..., .tbl = get_tbl(), .name = "{var}", .env = caller_env()) { 90 | expressions <- swing(.fun, ..., .tbl = .tbl, .name = .name, .env = .env) 91 | rhs <- expr(tibble(!!!map(expressions, f_rhs))) 92 | new_formula(NULL, rhs, env = .env) 93 | } 94 | -------------------------------------------------------------------------------- /R/foxtrot.R: -------------------------------------------------------------------------------- 1 | 2 | #' @rdname jive 3 | #' @export 4 | foxtrot <- function(.tbl, ..., .env = caller_env()) { 5 | # evaluate all the formulas in each group 6 | c(., steps) %<-% ballet(.tbl, ..., .env = .env) 7 | 8 | map(steps, ~as_tibble_splice(.)) 9 | } 10 | 11 | #' @rdname jive 12 | #' @export 13 | bachata <- function(.tbl, ..., .name = "data", .env = caller_env()) { 14 | vec_cbind(polka(.tbl), !!.name := foxtrot(.tbl, ..., .env = .env)) 15 | } 16 | 17 | #' Modify 18 | #' 19 | #' Applies the [ballet()] with no restriction on the [vec_size()] of the outputs. 20 | #' 21 | #' @param .tbl A data frame, most likely a grouped data frame 22 | #' @param ...,.env formulas for each column to create, and parent environment, see [ballet()] 23 | #' @param .name Name of the packed column made by `bachata()` 24 | #' 25 | #' @return 26 | #' 27 | #' - `foxtrot()` returns a list of tibbles, one per group of `.tbl`. This is 28 | #' close in functionality to [dplyr::group_map()]. 29 | #' 30 | #' - `bachata()` returns a tibble that [vctrs::vec_cbind()] the results of [polka()] and 31 | #' the result from `foxtrot()` as a list column 32 | #' 33 | #' - `jive()` row binds the results from `foxtrot()` and reconstructs the grouping structure. 34 | #' This is similar in functionality to `dplyr::group_modify()` 35 | #' 36 | #' @examples 37 | #' g <- group_by(iris, Species) 38 | #' 39 | #' \dontrun{ 40 | #' if (requireNamespace("broom", quietly = TRUE)) { 41 | #' ### foxtrot() to return a list of tibbles 42 | #' 43 | #' # A list of tibbles with the `quantiles` column derived from the 44 | #' # formula on each group 45 | #' g %>% 46 | #' foxtrot( 47 | #' quantiles = ~quantile(Petal.Length, probs = c(0.25, 0.5, 0.75)) 48 | #' ) 49 | #' 50 | #' g %>% 51 | #' foxtrot( 52 | #' ~ broom::tidy(lm(Petal.Length ~ Sepal.Length)) 53 | #' ) 54 | #' 55 | #' ### bachata() to nest the results of foxtrot() 56 | #' 57 | #' # bachata() = polka() + a list column of 2 columns tibbles 58 | #' g %>% 59 | #' bachata( 60 | #' Sepal.Length = ~quantile(Sepal.Length, probs = c(0.25, 0.5, 0.75)), 61 | #' Sepal.Width = ~quantile(Sepal.Width, probs = c(0.25, 0.5, 0.75)), 62 | #' ) 63 | #' 64 | #' # because the formula is unnamed, the columns of the tibbles 65 | #' # generated by broom::tidy() are auto-spliced 66 | #' g %>% 67 | #' bachata( 68 | #' ~ broom::tidy(lm(Petal.Length ~ Sepal.Length)) 69 | #' ) 70 | #' 71 | #' ### jive() 72 | #' g %>% 73 | #' jive( 74 | #' Sepal.Length = ~quantile(Sepal.Length, probs = c(0.25, 0.5, 0.75)), 75 | #' Sepal.Width = ~quantile(Sepal.Width, probs = c(0.25, 0.5, 0.75)), 76 | #' ) 77 | #' 78 | #' # unnamed -> columns are spliced 79 | #' g %>% 80 | #' jive( 81 | #' ~ broom::tidy(lm(Petal.Length ~ Sepal.Length)) 82 | #' ) 83 | #' 84 | #' # named -> packed column 85 | #' g %>% 86 | #' jive( 87 | #' model = ~ broom::tidy(lm(Petal.Length ~ Sepal.Length)) 88 | #' ) 89 | #' } 90 | #' } 91 | #' @export 92 | jive <- function(.tbl, ..., .env = caller_env()) { 93 | chunks <- foxtrot(.tbl, ..., .env = .env) 94 | sizes <- map_int(chunks, nrow) 95 | keys <- group_keys(.tbl) 96 | gps <- groups(.tbl) 97 | 98 | out <- vec_cbind( 99 | keys[rep(seq_len(nrow(keys)), sizes), ], 100 | vec_rbind(!!!chunks) 101 | ) 102 | group_by(out, !!!gps) 103 | } 104 | -------------------------------------------------------------------------------- /R/mutate.R: -------------------------------------------------------------------------------- 1 | #' Mutate new columns 2 | #' 3 | #' Applies the [ballet()] and makes sure each result have the same [vctrs::vec_size()] 4 | #' as the number of elements in each group. 5 | #' 6 | #' @param .tbl A data frame, most likely a grouped data frame 7 | #' @param ...,.env formulas for each column to create, and parent environment, see [ballet()] 8 | #' @param .name Name of the packed column made by `charleston()` 9 | #' 10 | #' The four functions play a separate role around the idea of 11 | #' [dplyr::mutate()]: 12 | #' 13 | #' - `chacha()` reorganizes the rows of a grouped data frame so that data for each 14 | #' group is contiguous in each column. 15 | #' 16 | #' - `salsa()` runs the [ballet()] defined by `...` and makes 17 | #' sure the [vctrs::vec_size()] of each result is equal to the number 18 | #' of elements in that group. The result tibble of `salsa()` 19 | #' does not contain the grouping variables, just those columns specified 20 | #' by the `...`. 21 | #' 22 | #' - `samba()` is the closest to [dplyr::mutate()], it column binds 23 | #' the result of `chacha()` and `salsa()` with [vctrs::vec_cbind()]. 24 | #' 25 | #' - `madison()` is similar to `sambda()` but the results are packed 26 | #' instead of being `[vctrs::vec_cbind()]`. The name of the created packed column is 27 | #' controled by the `.name` argument. 28 | #' 29 | #' @examples 30 | #' g <- group_by(iris, Species) 31 | #' 32 | #' # Creates a `dance_grouped_df` tibble, 33 | #' # which is the same as `g` but guarantees that the data for each column 34 | #' # is contiguous within groups 35 | #' chacha(g) 36 | #' 37 | #' # returns a tibble of two columns 38 | #' g %>% 39 | #' salsa( 40 | #' Sepal = ~Sepal.Length * Sepal.Width, 41 | #' Petal = ~Petal.Length * Petal.Width 42 | #' ) 43 | #' 44 | #' # returns a dance_grouped_df with the two 45 | #' # additional columns `Sepal` and `Petal` 46 | #' g %>% 47 | #' samba( 48 | #' Sepal = ~Sepal.Length * Sepal.Width, 49 | #' Petal = ~Petal.Length * Petal.Width 50 | #' ) 51 | #' 52 | #' # returns a dance_grouped_df with the one 53 | #' # additional data frame column 54 | #' g %>% 55 | #' madison( 56 | #' Sepal = ~Sepal.Length * Sepal.Width, 57 | #' Petal = ~Petal.Length * Petal.Width 58 | #' ) 59 | #' 60 | #' @rdname samba 61 | #' @export 62 | salsa <- function(.tbl, ..., .env = caller_env()) { 63 | # evaluate all the formulas in each group 64 | c(ptypes, steps) %<-% ballet(.tbl, ..., .env = .env) 65 | 66 | # check all results are length 1 67 | check_size <- function(result, group_size) { 68 | assert_that(vec_size(result) == group_size) 69 | } 70 | rows <- group_rows(.tbl) 71 | walk2(steps, rows, ~walk(.x, check_size, group_size = length(.y))) 72 | 73 | # transpose and combine 74 | results <- map2(ptypes, seq_along(ptypes), ~vec_c(!!!map(steps, .y), .ptype = .x)) 75 | 76 | # structure results as a tibble 77 | as_tibble_splice(results) 78 | } 79 | 80 | #' @rdname samba 81 | #' @export 82 | chacha <- function(.tbl) { 83 | UseMethod("chacha") 84 | } 85 | 86 | #' @export 87 | chacha.data.frame <- function(.tbl) { 88 | .tbl 89 | } 90 | 91 | .chacha_grouped_df <- function(.tbl, .rows) { 92 | sizes <- lengths(.rows) 93 | starts <- 1L + c(0L, cumsum(head(sizes, -1L))) 94 | ends <- cumsum(sizes) 95 | 96 | new_grouped_df( 97 | vec_slice(.tbl, flatten_int(.rows)), 98 | vec_cbind(group_keys(.tbl), tibble(.rows := map2(starts, ends, seq2))), 99 | class = "dance_grouped_df" 100 | ) 101 | } 102 | 103 | #' @export 104 | chacha.grouped_df <- function(.tbl) { 105 | .chaha_grouped_df(.tbl, group_rows(.tbl)) 106 | } 107 | 108 | #' @export 109 | chacha.dance_grouped_df <- function(.tbl) { 110 | .tbl 111 | } 112 | 113 | #' @rdname samba 114 | #' @export 115 | samba <- function(.tbl, ..., .env = caller_env()) { 116 | .tbl <- chacha(.tbl) 117 | vec_cbind(.tbl, salsa(.tbl, ..., .env = .env)) 118 | } 119 | 120 | #' @rdname samba 121 | #' @export 122 | madison <- function(.tbl, ..., .name = "data", .env = caller_env()) { 123 | .tbl <- chacha(.tbl) 124 | vec_cbind(.tbl, tibble(!!.name := salsa(.tbl, ..., .env = .env))) 125 | } 126 | 127 | 128 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | 6 | 7 | ```{r, include = FALSE} 8 | knitr::opts_chunk$set( 9 | collapse = TRUE, 10 | comment = "#>", 11 | fig.path = "man/figures/README-", 12 | out.width = "100%" 13 | ) 14 | ``` 15 | 16 | # dance 17 | 18 | 19 | [![Lifecycle Status](https://img.shields.io/badge/lifecycle-experimental-blue.svg)](https://www.tidyverse.org/lifecycle/) 20 | [![Travis build status](https://travis-ci.org/romainfrancois/dance.svg?branch=master)](https://travis-ci.org/romainfrancois/dance) 21 | 22 | 23 | ![](https://media.giphy.com/media/mnLGTXoWVzAfm/giphy.gif) 24 | 25 | Dancing `r emo::ji("woman_dancing")` with the stats, aka `tibble()` dancing `r emo::ji("man_dancing")`. 26 | `dance` is a sort of reinvention of `dplyr` classic verbs, with a more modern stack 27 | underneath, i.e. it leverages a lot from `vctrs` and `rlang`. 28 | 29 | # Installation 30 | 31 | You can install the development version from GitHub. 32 | 33 | ```{r, eval=FALSE} 34 | # install.packages("pak") 35 | pak::pkg_install("romainfrancois/dance") 36 | ``` 37 | 38 | # Usage 39 | 40 | We'll illustrate tibble dancing with `iris` grouped by `Species`. 41 | 42 | ```{r example} 43 | library(dance) 44 | g <- iris %>% group_by(Species) 45 | 46 | ``` 47 | 48 | ### waltz(), polka(), tango(), charleston() 49 | 50 | These are in the neighborhood of `dplyr::summarise()`. 51 | 52 | `waltz()` takes a grouped tibble and a list of formulas and returns a tibble with: 53 | as many columns as supplied formulas, one row per group. It does not prepend the grouping 54 | variables (see `tango` for that). 55 | 56 | ```{r} 57 | g %>% 58 | waltz( 59 | Sepal.Length = ~mean(Sepal.Length), 60 | Sepal.Width = ~mean(Sepal.Width) 61 | ) 62 | ``` 63 | 64 | `polka()` deals with peeling off one layer of grouping: 65 | 66 | ```{r} 67 | g %>% 68 | polka() 69 | ``` 70 | 71 | `tango()` binds the results of `polka()` and `waltz()` so is the closest to 72 | `dplyr::summarise()` 73 | 74 | ```{r} 75 | g %>% 76 | tango( 77 | Sepal.Length = ~mean(Sepal.Length), 78 | Sepal.Width = ~mean(Sepal.Width) 79 | ) 80 | ``` 81 | 82 | `charleston()` is like `tango` but it packs the new columns in a tibble: 83 | 84 | ```{r} 85 | g %>% 86 | charleston( 87 | Sepal.Length = ~mean(Sepal.Length), 88 | Sepal.Width = ~mean(Sepal.Width) 89 | ) 90 | ``` 91 | 92 | 93 | ### swing, twist 94 | 95 | There is no `waltz_at()`, `tango_at()`, etc ... but instead we can use 96 | either the same function on a set of columns or a set of functions on the same column. 97 | 98 | For this, we need to learn new dance moves: 99 | 100 | `swing()` and `twist()` are for applying the same function to a set 101 | of columns: 102 | 103 | ```{r} 104 | library(tidyselect) 105 | 106 | g %>% 107 | tango(swing(mean, starts_with("Petal"))) 108 | 109 | g %>% 110 | tango(data = twist(mean, starts_with("Petal"))) 111 | ``` 112 | 113 | They differ in the type of column is created and how to name them: 114 | 115 | - `swing()` makes as many new columns as are selected by the tidy selection, and 116 | the columns are named using a `.name` glue pattern, this way we might `swing()` 117 | several times. 118 | 119 | ```{r} 120 | g %>% 121 | tango( 122 | swing(mean, starts_with("Petal"), .name = "mean_{var}"), 123 | swing(median, starts_with("Petal"), .name = "median_{var}"), 124 | ) 125 | ``` 126 | 127 | - `twist()` instead creates a single data frame column. 128 | 129 | ```{r} 130 | g %>% 131 | tango( 132 | mean = twist(mean, starts_with("Petal")), 133 | median = twist(median, starts_with("Petal")), 134 | ) 135 | ``` 136 | 137 | The first arguments of `swing()` and `twist()` are either a function or a 138 | formula that uses `.` as a placeholder. Subsequent arguments are 139 | tidyselect selections. 140 | 141 | You can combine `swing()` and `twist()` in the same `tango()` or `waltz()`: 142 | 143 | ```{r} 144 | g %>% 145 | tango( 146 | swing(mean, starts_with("Petal"), .name = "mean_{var}"), 147 | median = twist(median, contains(".")) 148 | ) 149 | ``` 150 | 151 | ### rumba, zumba 152 | 153 | Similarly `rumba()` can be used to apply several functions to a single column. 154 | `rumba()` creates single columns and `zumba()` packs them into a data frame column. 155 | 156 | ```{r} 157 | g %>% 158 | tango( 159 | rumba(Sepal.Width, mean = mean, median = median, .name = "Sepal_{fun}"), 160 | Petal = zumba(Petal.Width, mean = mean, median = median) 161 | ) 162 | ``` 163 | 164 | ### salsa, chacha, samba, madison 165 | 166 | Now we enter the realms of `dplyr::mutate()` with: 167 | 168 | - `salsa()` : to create new columns 169 | - `chacha()`: to reorganize a grouped tibble so that data for each group is contiguous 170 | - `samba()` : `chacha()` + `salsa()` 171 | 172 | ```{r} 173 | g %>% 174 | salsa( 175 | Sepal = ~Sepal.Length * Sepal.Width, 176 | Petal = ~Petal.Length * Petal.Width 177 | ) 178 | ``` 179 | 180 | You can `swing()`, `twist()`, `rumba()` and `zumba()` here too, and if you 181 | want the original data, you can use `samba()` instead of `salsa()`: 182 | 183 | ```{r} 184 | g %>% 185 | samba(centered = twist(~ . - mean(.), everything(), -Species)) 186 | ``` 187 | 188 | `madison()` packs the columns `salsa()` would have created 189 | 190 | ```{r} 191 | g %>% 192 | madison(swing(~ . - mean(.), starts_with("Sepal"))) 193 | ``` 194 | 195 | 196 | ### bolero and mambo 197 | 198 | `bolero()` is similar to `dplyr::filter()`. 199 | The formulas may be made by `mambo()` if you want to apply the same 200 | predicate to a tidyselection of columns: 201 | 202 | ```{r} 203 | g %>% 204 | bolero(~Sepal.Width > 4) 205 | 206 | g %>% 207 | bolero(mambo(~. > 4, starts_with("Sepal"))) 208 | 209 | g %>% 210 | bolero(mambo(~. > 4, starts_with("Sepal"), .op = or)) 211 | ``` 212 | 213 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # dance 5 | 6 | 7 | 8 | [![Lifecycle 9 | Status](https://img.shields.io/badge/lifecycle-experimental-blue.svg)](https://www.tidyverse.org/lifecycle/) 10 | [![Travis build 11 | status](https://travis-ci.org/romainfrancois/dance.svg?branch=master)](https://travis-ci.org/romainfrancois/dance) 12 | 13 | 14 | ![](https://media.giphy.com/media/mnLGTXoWVzAfm/giphy.gif) 15 | 16 | Dancing 💃 with the stats, aka `tibble()` dancing 🕺. `dance` is a sort of 17 | reinvention of `dplyr` classic verbs, with a more modern stack 18 | underneath, i.e. it leverages a lot from `vctrs` and `rlang`. 19 | 20 | # Installation 21 | 22 | You can install the development version from GitHub. 23 | 24 | ``` r 25 | # install.packages("pak") 26 | pak::pkg_install("romainfrancois/dance") 27 | ``` 28 | 29 | # Usage 30 | 31 | We’ll illustrate tibble dancing with `iris` grouped by `Species`. 32 | 33 | ``` r 34 | library(dance) 35 | g <- iris %>% group_by(Species) 36 | ``` 37 | 38 | ### waltz(), polka(), tango(), charleston() 39 | 40 | These are in the neighborhood of `dplyr::summarise()`. 41 | 42 | `waltz()` takes a grouped tibble and a list of formulas and returns a 43 | tibble with: as many columns as supplied formulas, one row per group. It 44 | does not prepend the grouping variables (see `tango` for that). 45 | 46 | ``` r 47 | g %>% 48 | waltz( 49 | Sepal.Length = ~mean(Sepal.Length), 50 | Sepal.Width = ~mean(Sepal.Width) 51 | ) 52 | #> # A tibble: 3 x 2 53 | #> Sepal.Length Sepal.Width 54 | #> 55 | #> 1 5.01 3.43 56 | #> 2 5.94 2.77 57 | #> 3 6.59 2.97 58 | ``` 59 | 60 | `polka()` deals with peeling off one layer of grouping: 61 | 62 | ``` r 63 | g %>% 64 | polka() 65 | #> # A tibble: 3 x 1 66 | #> Species 67 | #> 68 | #> 1 setosa 69 | #> 2 versicolor 70 | #> 3 virginica 71 | ``` 72 | 73 | `tango()` binds the results of `polka()` and `waltz()` so is the closest 74 | to `dplyr::summarise()` 75 | 76 | ``` r 77 | g %>% 78 | tango( 79 | Sepal.Length = ~mean(Sepal.Length), 80 | Sepal.Width = ~mean(Sepal.Width) 81 | ) 82 | #> # A tibble: 3 x 3 83 | #> Species Sepal.Length Sepal.Width 84 | #> 85 | #> 1 setosa 5.01 3.43 86 | #> 2 versicolor 5.94 2.77 87 | #> 3 virginica 6.59 2.97 88 | ``` 89 | 90 | `charleston()` is like `tango` but it packs the new columns in a tibble: 91 | 92 | ``` r 93 | g %>% 94 | charleston( 95 | Sepal.Length = ~mean(Sepal.Length), 96 | Sepal.Width = ~mean(Sepal.Width) 97 | ) 98 | #> # A tibble: 3 x 2 99 | #> Species data$Sepal.Length $Sepal.Width 100 | #> 101 | #> 1 setosa 5.01 3.43 102 | #> 2 versicolor 5.94 2.77 103 | #> 3 virginica 6.59 2.97 104 | ``` 105 | 106 | ### swing, twist 107 | 108 | There is no `waltz_at()`, `tango_at()`, etc … but instead we can use 109 | either the same function on a set of columns or a set of functions on 110 | the same column. 111 | 112 | For this, we need to learn new dance moves: 113 | 114 | `swing()` and `twist()` are for applying the same function to a set of 115 | columns: 116 | 117 | ``` r 118 | library(tidyselect) 119 | 120 | g %>% 121 | tango(swing(mean, starts_with("Petal"))) 122 | #> # A tibble: 3 x 3 123 | #> Species Petal.Length Petal.Width 124 | #> 125 | #> 1 setosa 1.46 0.246 126 | #> 2 versicolor 4.26 1.33 127 | #> 3 virginica 5.55 2.03 128 | 129 | g %>% 130 | tango(data = twist(mean, starts_with("Petal"))) 131 | #> # A tibble: 3 x 2 132 | #> Species data$Petal.Length $Petal.Width 133 | #> 134 | #> 1 setosa 1.46 0.246 135 | #> 2 versicolor 4.26 1.33 136 | #> 3 virginica 5.55 2.03 137 | ``` 138 | 139 | They differ in the type of column is created and how to name them: 140 | 141 | - `swing()` makes as many new columns as are selected by the tidy 142 | selection, and the columns are named using a `.name` glue pattern, 143 | this way we might `swing()` several times. 144 | 145 | 146 | 147 | ``` r 148 | g %>% 149 | tango( 150 | swing(mean, starts_with("Petal"), .name = "mean_{var}"), 151 | swing(median, starts_with("Petal"), .name = "median_{var}"), 152 | ) 153 | #> # A tibble: 3 x 5 154 | #> Species mean_Petal.Leng… mean_Petal.Width median_Petal.Le… 155 | #> 156 | #> 1 setosa 1.46 0.246 1.5 157 | #> 2 versic… 4.26 1.33 4.35 158 | #> 3 virgin… 5.55 2.03 5.55 159 | #> # … with 1 more variable: median_Petal.Width 160 | ``` 161 | 162 | - `twist()` instead creates a single data frame column. 163 | 164 | 165 | 166 | ``` r 167 | g %>% 168 | tango( 169 | mean = twist(mean, starts_with("Petal")), 170 | median = twist(median, starts_with("Petal")), 171 | ) 172 | #> # A tibble: 3 x 3 173 | #> Species mean$Petal.Length $Petal.Width median$Petal.Leng… $Petal.Width 174 | #> 175 | #> 1 setosa 1.46 0.246 1.5 0.2 176 | #> 2 versicolor 4.26 1.33 4.35 1.3 177 | #> 3 virginica 5.55 2.03 5.55 2 178 | ``` 179 | 180 | The first arguments of `swing()` and `twist()` are either a function or 181 | a formula that uses `.` as a placeholder. Subsequent arguments are 182 | tidyselect selections. 183 | 184 | You can combine `swing()` and `twist()` in the same `tango()` or 185 | `waltz()`: 186 | 187 | ``` r 188 | g %>% 189 | tango( 190 | swing(mean, starts_with("Petal"), .name = "mean_{var}"), 191 | median = twist(median, contains(".")) 192 | ) 193 | #> # A tibble: 3 x 4 194 | #> Species mean_Petal.Leng… mean_Petal.Width median$Sepal.Le… $Sepal.Width 195 | #> 196 | #> 1 setosa 1.46 0.246 5 3.4 197 | #> 2 versic… 4.26 1.33 5.9 2.8 198 | #> 3 virgin… 5.55 2.03 6.5 3 199 | #> # … with 2 more variables: $Petal.Length , $Petal.Width 200 | ``` 201 | 202 | ### rumba, zumba 203 | 204 | Similarly `rumba()` can be used to apply several functions to a single 205 | column. `rumba()` creates single columns and `zumba()` packs them into a 206 | data frame column. 207 | 208 | ``` r 209 | g %>% 210 | tango( 211 | rumba(Sepal.Width, mean = mean, median = median, .name = "Sepal_{fun}"), 212 | Petal = zumba(Petal.Width, mean = mean, median = median) 213 | ) 214 | #> # A tibble: 3 x 4 215 | #> Species Sepal_mean Sepal_median Petal$mean $median 216 | #> 217 | #> 1 setosa 3.43 3.4 0.246 0.2 218 | #> 2 versicolor 2.77 2.8 1.33 1.3 219 | #> 3 virginica 2.97 3 2.03 2 220 | ``` 221 | 222 | ### salsa, chacha, samba, madison 223 | 224 | Now we enter the realms of `dplyr::mutate()` with: 225 | 226 | - `salsa()` : to create new columns 227 | - `chacha()`: to reorganize a grouped tibble so that data for each 228 | group is contiguous 229 | - `samba()` : `chacha()` + `salsa()` 230 | 231 | 232 | 233 | ``` r 234 | g %>% 235 | salsa( 236 | Sepal = ~Sepal.Length * Sepal.Width, 237 | Petal = ~Petal.Length * Petal.Width 238 | ) 239 | #> # A tibble: 150 x 2 240 | #> Sepal Petal 241 | #> 242 | #> 1 17.8 0.280 243 | #> 2 14.7 0.280 244 | #> 3 15.0 0.26 245 | #> 4 14.3 0.3 246 | #> 5 18 0.280 247 | #> 6 21.1 0.68 248 | #> 7 15.6 0.42 249 | #> 8 17 0.3 250 | #> 9 12.8 0.280 251 | #> 10 15.2 0.15 252 | #> # … with 140 more rows 253 | ``` 254 | 255 | You can `swing()`, `twist()`, `rumba()` and `zumba()` here too, and if 256 | you want the original data, you can use `samba()` instead of `salsa()`: 257 | 258 | ``` r 259 | g %>% 260 | samba(centered = twist(~ . - mean(.), everything(), -Species)) 261 | #> # A tibble: 150 x 6 262 | #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species 263 | #> 264 | #> 1 5.1 3.5 1.4 0.2 setosa 265 | #> 2 4.9 3 1.4 0.2 setosa 266 | #> 3 4.7 3.2 1.3 0.2 setosa 267 | #> 4 4.6 3.1 1.5 0.2 setosa 268 | #> 5 5 3.6 1.4 0.2 setosa 269 | #> 6 5.4 3.9 1.7 0.4 setosa 270 | #> 7 4.6 3.4 1.4 0.3 setosa 271 | #> 8 5 3.4 1.5 0.2 setosa 272 | #> 9 4.4 2.9 1.4 0.2 setosa 273 | #> 10 4.9 3.1 1.5 0.1 setosa 274 | #> # … with 140 more rows, and 4 more variables: centered$Sepal.Length , 275 | #> # $Sepal.Width , $Petal.Length , $Petal.Width 276 | ``` 277 | 278 | `madison()` packs the columns `salsa()` would have created 279 | 280 | ``` r 281 | g %>% 282 | madison(swing(~ . - mean(.), starts_with("Sepal"))) 283 | #> # A tibble: 150 x 6 284 | #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species 285 | #> 286 | #> 1 5.1 3.5 1.4 0.2 setosa 287 | #> 2 4.9 3 1.4 0.2 setosa 288 | #> 3 4.7 3.2 1.3 0.2 setosa 289 | #> 4 4.6 3.1 1.5 0.2 setosa 290 | #> 5 5 3.6 1.4 0.2 setosa 291 | #> 6 5.4 3.9 1.7 0.4 setosa 292 | #> 7 4.6 3.4 1.4 0.3 setosa 293 | #> 8 5 3.4 1.5 0.2 setosa 294 | #> 9 4.4 2.9 1.4 0.2 setosa 295 | #> 10 4.9 3.1 1.5 0.1 setosa 296 | #> # … with 140 more rows, and 2 more variables: data$Sepal.Length , 297 | #> # $Sepal.Width 298 | ``` 299 | 300 | ### bolero and mambo 301 | 302 | `bolero()` is similar to `dplyr::filter()`. The formulas may be made by 303 | `mambo()` if you want to apply the same predicate to a tidyselection of 304 | columns: 305 | 306 | ``` r 307 | g %>% 308 | bolero(~Sepal.Width > 4) 309 | #> # A tibble: 3 x 5 310 | #> # Groups: Species [3] 311 | #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species 312 | #> 313 | #> 1 5.7 4.4 1.5 0.4 setosa 314 | #> 2 5.2 4.1 1.5 0.1 setosa 315 | #> 3 5.5 4.2 1.4 0.2 setosa 316 | 317 | g %>% 318 | bolero(mambo(~. > 4, starts_with("Sepal"))) 319 | #> # A tibble: 3 x 5 320 | #> # Groups: Species [3] 321 | #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species 322 | #> 323 | #> 1 5.7 4.4 1.5 0.4 setosa 324 | #> 2 5.2 4.1 1.5 0.1 setosa 325 | #> 3 5.5 4.2 1.4 0.2 setosa 326 | 327 | g %>% 328 | bolero(mambo(~. > 4, starts_with("Sepal"), .op = or)) 329 | #> # A tibble: 150 x 5 330 | #> # Groups: Species [3] 331 | #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species 332 | #> 333 | #> 1 5.1 3.5 1.4 0.2 setosa 334 | #> 2 4.9 3 1.4 0.2 setosa 335 | #> 3 4.7 3.2 1.3 0.2 setosa 336 | #> 4 4.6 3.1 1.5 0.2 setosa 337 | #> 5 5 3.6 1.4 0.2 setosa 338 | #> 6 5.4 3.9 1.7 0.4 setosa 339 | #> 7 4.6 3.4 1.4 0.3 setosa 340 | #> 8 5 3.4 1.5 0.2 setosa 341 | #> 9 4.4 2.9 1.4 0.2 setosa 342 | #> 10 4.9 3.1 1.5 0.1 setosa 343 | #> # … with 140 more rows 344 | ``` 345 | --------------------------------------------------------------------------------