├── codecov.yml ├── .gitignore ├── .travis.yml ├── .Rbuildignore ├── tests └── tinytest.R ├── inst └── tinytest │ ├── test_coalesce.R │ ├── test_cube.R │ ├── test_text_expand.R │ ├── test_columns.R │ ├── test_utils.R │ ├── test_dcast_melt.R │ ├── test_dplyr_verbs.R │ ├── test_take_all.R │ ├── test_vlookup.R │ ├── test_to_list.R │ ├── test_let_all.R │ ├── test_join.R │ ├── test_to_wide.R │ └── test_verbs.R ├── R ├── cube.R ├── coalesce.R ├── on_attach.R ├── text_expand.R ├── dt_utils.R ├── utils.R ├── verbs.R ├── dcast.R ├── join.R ├── vlookup.R ├── to_list.R ├── maditr-package.R ├── let_all.R └── to_wide.R ├── maditr.Rproj ├── man ├── copy.Rd ├── coalesce.Rd ├── text_expand.Rd ├── dt_count.Rd ├── dt_left_join.Rd ├── columns.Rd ├── reexports.Rd ├── to_list.Rd ├── dcast.Rd ├── dt_mutate.Rd ├── vlookup.Rd ├── to_long.Rd └── maditr-package.Rd ├── DESCRIPTION ├── NEWS ├── NAMESPACE └── vignettes ├── Introduction.R └── Introduction.Rmd /codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | language: R 3 | sudo: false 4 | cache: packages 5 | after_success: 6 | - Rscript -e 'covr::codecov()' -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | Meta 2 | doc 3 | .Rproj.user 4 | .Rhistory 5 | .RData 6 | .Ruserdata 7 | SupplementaryMaterials 8 | 9 | /doc/ 10 | /Meta/ 11 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # R for travis: see documentation at https://docs.travis-ci.com/user/languages/r 2 | 3 | language: R 4 | sudo: false 5 | cache: packages 6 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^Meta$ 2 | ^doc$ 3 | ^.*\.Rproj$ 4 | ^\.Rproj\.user$ 5 | SupplementaryMaterials 6 | \.todo 7 | ^codecov\.yml$ 8 | ^\.travis\.yml$ 9 | README\.MD 10 | 11 | -------------------------------------------------------------------------------- /tests/tinytest.R: -------------------------------------------------------------------------------- 1 | if(capabilities('long.double') && requireNamespace("tinytest", quietly=TRUE)){ 2 | library(tinytest) 3 | options(covr = FALSE) 4 | data.table::setDTthreads(2) 5 | test_package("maditr", remove_side_effects=FALSE) 6 | data.table::setDTthreads(NULL) 7 | } 8 | 9 | -------------------------------------------------------------------------------- /inst/tinytest/test_coalesce.R: -------------------------------------------------------------------------------- 1 | cat("\nContext:","coalesce", "\n") 2 | suppressPackageStartupMessages(library(maditr)) 3 | x = c(1, NA, 2, NA) 4 | res = coalesce(x, 0) 5 | expect_equal( res, c(1, 0, 2, 0)) 6 | expect_error(coalesce(x, 1:2)) 7 | 8 | y = c(1, 2, NA, NA, 5) 9 | z = c(NA, NA, 3, 4, 5) 10 | expect_equal(coalesce(y, z), 1:5) 11 | 12 | 13 | -------------------------------------------------------------------------------- /R/cube.R: -------------------------------------------------------------------------------- 1 | #' @export 2 | rollup.data.frame = function(x, ...){ 3 | eval.parent(substitute(data.table::rollup(as.data.table(x), ...))) 4 | } 5 | 6 | #' @export 7 | cube.data.frame = function(x, ...){ 8 | eval.parent(substitute(data.table::cube(as.data.table(x), ...))) 9 | } 10 | 11 | #' @export 12 | groupingsets.data.frame = function(x, ...){ 13 | eval.parent(substitute(data.table::groupingsets(as.data.table(x), ...))) 14 | } 15 | 16 | -------------------------------------------------------------------------------- /maditr.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 4 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: rd,collate,namespace,vignette 22 | -------------------------------------------------------------------------------- /R/coalesce.R: -------------------------------------------------------------------------------- 1 | #' Return first non-missing element 2 | #' 3 | #' It is an alias for data.table `fcoalesce`. For details see [fcoalesce][data.table::fcoalesce] 4 | #' @param ... vectors 5 | #' 6 | #' @return A vector the same length as the first ... argument with NA 7 | #' values replaced by the first non-missing value. 8 | #' 9 | #' @examples 10 | #' # examples from dplyr 11 | #' x = sample(c(1:5, NA, NA, NA)) 12 | #' coalesce(x, 0L) 13 | #' 14 | #' y = c(1, 2, NA, NA, 5) 15 | #' z = c(NA, NA, 3, 4, 5) 16 | #' coalesce(y, z) 17 | #' @export 18 | coalesce = function(...)data.table::fcoalesce(...) 19 | -------------------------------------------------------------------------------- /man/copy.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/text_expand.R 3 | \name{copy} 4 | \alias{copy} 5 | \title{Copy an entire object} 6 | \usage{ 7 | copy(x) 8 | } 9 | \arguments{ 10 | \item{x}{object} 11 | } 12 | \value{ 13 | copy of the object 'x' 14 | } 15 | \description{ 16 | Mainly intended to copy data.table objects because by default they are modified by reference. See example. 17 | } 18 | \examples{ 19 | data(mtcars) 20 | dt_mtcars = as.data.table(mtcars) 21 | dt_mtcars2 = dt_mtcars 22 | dt_mtcars3 = copy(dt_mtcars) 23 | let(dt_mtcars, new = 1) 24 | 25 | head(dt_mtcars2) # we see 'new' column 26 | head(dt_mtcars3) # no 'new' column 27 | } 28 | -------------------------------------------------------------------------------- /man/coalesce.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/coalesce.R 3 | \name{coalesce} 4 | \alias{coalesce} 5 | \title{Return first non-missing element} 6 | \usage{ 7 | coalesce(...) 8 | } 9 | \arguments{ 10 | \item{...}{vectors} 11 | } 12 | \value{ 13 | A vector the same length as the first ... argument with NA 14 | values replaced by the first non-missing value. 15 | } 16 | \description{ 17 | It is an alias for data.table \code{fcoalesce}. For details see \link[data.table:coalesce]{fcoalesce} 18 | } 19 | \examples{ 20 | # examples from dplyr 21 | x = sample(c(1:5, NA, NA, NA)) 22 | coalesce(x, 0L) 23 | 24 | y = c(1, 2, NA, NA, 5) 25 | z = c(NA, NA, 3, 4, 5) 26 | coalesce(y, z) 27 | } 28 | -------------------------------------------------------------------------------- /man/text_expand.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/text_expand.R 3 | \name{text_expand} 4 | \alias{text_expand} 5 | \title{Evaluate expressions in curly brackets inside strings} 6 | \usage{ 7 | text_expand(..., delim = c("\\\\{", "\\\\}")) 8 | } 9 | \arguments{ 10 | \item{...}{character vectors} 11 | 12 | \item{delim}{character vector of length 2 - pair of opening and closing 13 | delimiters for the templating tags. By default it is curly brackets. Note 14 | that \code{delim} will be used in the perl-style regular expression so you 15 | need to escape special characters, e. g. use "\\\\\{" instead of 16 | "\{".} 17 | } 18 | \value{ 19 | Vector of characters 20 | } 21 | \description{ 22 | \code{text_expand} is simple string interpolation function. It searches in its 23 | arguments expressions in curly brackets \code{{expr}}, evaluate them and substitute with 24 | the result of evaluation. See examples. 25 | } 26 | \examples{ 27 | i = 1:5 28 | text_expand("q{i}") 29 | 30 | i = 1:3 31 | j = 1:3 32 | text_expand("q1_{i}_{j}") 33 | 34 | data(iris) 35 | text_expand("'iris' has {nrow(iris)} rows.") 36 | } 37 | -------------------------------------------------------------------------------- /R/on_attach.R: -------------------------------------------------------------------------------- 1 | .onAttach = function(...) { 2 | hints = c( 3 | 'To select rows from data: rows(mtcars, am==0)', 4 | 'To select columns from data: columns(mtcars, mpg, vs:carb)', 5 | 'To aggregate data: take(mtcars, mean_mpg = mean(mpg), by = am)', 6 | 'To aggregate all non-grouping columns: take_all(mtcars, mean, by = am)', 7 | 'To aggregate several columns with one summary: take(mtcars, mpg, hp, fun = mean, by = am)', 8 | "To get total summary skip 'by' argument: take_all(mtcars, mean)", 9 | "Use magrittr pipe '%>%' to chain several operations: 10 | mtcars %>% 11 | let(mpg_hp = mpg/hp) %>% 12 | take(mean(mpg_hp), by = am) 13 | ", 14 | 'To modify variables or add new variables: 15 | let(mtcars, new_var = 42, new_var2 = new_var*hp) %>% head()', 16 | 'To drop variable use NULL: let(mtcars, am = NULL) %>% head()' 17 | ) 18 | # we don't touch random number generator 19 | curr_hint = hints[round(as.numeric(Sys.time())) %% length(hints) + 1] 20 | packageStartupMessage(paste0("\n", curr_hint, "\n")) 21 | } 22 | -------------------------------------------------------------------------------- /inst/tinytest/test_cube.R: -------------------------------------------------------------------------------- 1 | cat("\nContext:","cube, rollup and etc", "\n") 2 | n = 24L 3 | set.seed(25) 4 | DT = data.table( 5 | color = sample(c("green","yellow","red"), n, TRUE), 6 | year = as.Date(sample(paste0(2011:2015,"-01-01"), n, TRUE)), 7 | status = as.factor(sample(c("removed","active","inactive","archived"), n, TRUE)), 8 | amount = sample(1:5, n, TRUE), 9 | value = sample(c(3, 3.5, 2.5, 2), n, TRUE) 10 | ) 11 | 12 | DF = as.data.frame(DT) 13 | # rollup 14 | expect_equal( 15 | rollup(DT, j = sum(value), by = c("color","year","status")), 16 | rollup(DF, j = sum(value), by = c("color","year","status")) 17 | ) 18 | 19 | 20 | # cube 21 | expect_equal( 22 | cube(DT, j = lapply(.SD, sum), by = c("color","year","status"), id=TRUE, .SDcols="value"), 23 | cube(DF, j = lapply(.SD, sum), by = c("color","year","status"), id=TRUE, .SDcols="value") 24 | ) 25 | # groupingsets 26 | expect_equal( 27 | groupingsets(DT, j = c(list(count=.N), lapply(.SD, sum)), by = c("color","year","status"), 28 | sets = list("color", c("year","status"), character()), id=TRUE), 29 | groupingsets(DF, j = c(list(count=.N), lapply(.SD, sum)), by = c("color","year","status"), 30 | sets = list("color", c("year","status"), character()), id=TRUE) 31 | ) 32 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: maditr 2 | Type: Package 3 | Title: Fast Data Aggregation, Modification, and Filtering with Pipes and 'data.table' 4 | Version: 0.8.6 5 | Maintainer: Gregory Demin 6 | Authors@R: person("Gregory", "Demin", email = "gdemin@gmail.com", role = c("aut", "cre")) 7 | Depends: R (>= 3.4.0) 8 | Imports: data.table (>= 1.12.6), magrittr (>= 1.5) 9 | Suggests: knitr, tinytest, utils, rmarkdown, stats 10 | Description: Provides pipe-style interface for 'data.table'. Package preserves all 'data.table' features without 11 | significant impact on performance. 'let' and 'take' functions are simplified interfaces for most common data 12 | manipulation tasks. For example, you can write 'take(mtcars, mean(mpg), by = am)' for aggregation or 13 | 'let(mtcars, hp_wt = hp/wt, hp_wt_mpg = hp_wt/mpg)' for modification. Use 'take_if/let_if' for conditional 14 | aggregation/modification. Additionally there are some conveniences such as automatic 'data.frame' 15 | conversion to 'data.table'. 16 | License: GPL-2 17 | URL: https://github.com/gdemin/maditr 18 | BugReports: https://github.com/gdemin/maditr/issues 19 | VignetteBuilder: knitr 20 | Encoding: UTF-8 21 | RoxygenNote: 7.3.2 22 | Roxygen: list(markdown = TRUE) 23 | -------------------------------------------------------------------------------- /inst/tinytest/test_text_expand.R: -------------------------------------------------------------------------------- 1 | cat("\nContext: text_expand\n") 2 | 3 | i = 1:2 4 | # expect_error(text_expand(i)) 5 | expect_identical(text_expand("{i}% of sum"), c("1% of sum", "2% of sum")) 6 | expect_identical(text_expand("q"), "q") 7 | expect_identical(text_expand("q{i}"), c("q1", "q2")) 8 | expect_identical(text_expand("q{1:2}"), c("q1", "q2")) 9 | 10 | expect_identical(text_expand("q{i}_{i}"), c("q1_1", "q2_2")) 11 | expect_identical(text_expand("q`i`_`i`", delim = c("`", "`")), c("q1_1", "q2_2")) 12 | expect_identical(text_expand("q/i}_/i}", delim = c("/", "}")), c("q1_1", "q2_2")) 13 | expect_identical(text_expand("q{i}_{i}", "q{i}"), c("q1_1", "q2_2", "q1", "q2")) 14 | expect_identical(text_expand("q{sum(i)}"), c("q3")) 15 | j = 1:2 16 | expect_identical(text_expand("q{i}_{j}"), c("q1_1", "q1_2", "q2_1", "q2_2")) 17 | k = 1:2 18 | expect_identical(text_expand("q{i}_{j}_{k}"), 19 | c("q1_1_1", "q1_1_2", "q1_2_1", "q1_2_2", "q2_1_1", "q2_1_2", "q2_2_1", "q2_2_2")) 20 | 21 | expect_error(text_expand("{ffgfg}")) 22 | 23 | 24 | test_text_expand = function(x){ 25 | i = 45 26 | text_expand(x) 27 | } 28 | 29 | expect_identical(test_text_expand("{i}"),'45') 30 | k = 42 31 | expect_identical(test_text_expand("{k}"),'42') 32 | 33 | dfs = data.frame( zzz = 67:68) 34 | 35 | expect_identical(with(dfs, text_expand("{zzz}")), as.character(67:68)) 36 | expect_identical(with(dfs, text_expand("{k}")), '42') 37 | 38 | 39 | -------------------------------------------------------------------------------- /man/dt_count.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dt_utils.R 3 | \name{dt_count} 4 | \alias{dt_count} 5 | \alias{dt_add_count} 6 | \alias{dt_top_n} 7 | \title{Additional useful functions} 8 | \usage{ 9 | dt_count(data, ..., weight = NULL, sort = FALSE, name = "n") 10 | 11 | dt_add_count(data, ..., weight = NULL, sort = FALSE, name = "n") 12 | 13 | dt_top_n(data, n, by, order_by = NULL) 14 | } 15 | \arguments{ 16 | \item{data}{data.table/data.frame data.frame will be automatically converted 17 | to data.table.} 18 | 19 | \item{...}{variables to group by.} 20 | 21 | \item{weight}{optional. Unquoted variable name. If provided result will be the sum of this variable by groups.} 22 | 23 | \item{sort}{logical. If TRUE result will be sorted in desending order by resulting variable.} 24 | 25 | \item{name}{character. Name of resulting variable.} 26 | 27 | \item{n}{numeric. number of top cases. If n is negative then bottom values will be returned.} 28 | 29 | \item{by}{list or vector of grouping variables} 30 | 31 | \item{order_by}{unquoted variable name by which result will be sorted. If not 32 | specified, defaults to the last variable in the dataset.} 33 | } 34 | \value{ 35 | data.table 36 | } 37 | \description{ 38 | \itemize{ 39 | \item \code{dt_count} calculates number of cases by groups, possibly 40 | weighted. \code{dt_add_count} adds number of cases to existing dataset. 41 | \item \code{dt_top_n} returns top n rows from each group. 42 | } 43 | } 44 | \examples{ 45 | 46 | data(mtcars) 47 | 48 | # dt_count 49 | dt_count(mtcars, am, vs) 50 | dt_add_count(mtcars, am, vs, name = "am_vs")[] # [] for autoprinting 51 | 52 | # dt_top_n 53 | dt_top_n(mtcars, 2, by = list(am, vs)) 54 | dt_top_n(mtcars, 2, order_by = mpg, by = list(am, vs)) 55 | } 56 | -------------------------------------------------------------------------------- /man/dt_left_join.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/join.R 3 | \name{dt_left_join} 4 | \alias{dt_left_join} 5 | \alias{dt_right_join} 6 | \alias{dt_inner_join} 7 | \alias{dt_full_join} 8 | \alias{dt_semi_join} 9 | \alias{dt_anti_join} 10 | \title{Join two data.frames by common columns.} 11 | \usage{ 12 | dt_left_join(x, y, by = NULL, suffix = c(".x", ".y")) 13 | 14 | dt_right_join(x, y, by = NULL, suffix = c(".x", ".y")) 15 | 16 | dt_inner_join(x, y, by = NULL, suffix = c(".x", ".y")) 17 | 18 | dt_full_join(x, y, by = NULL, suffix = c(".x", ".y")) 19 | 20 | dt_semi_join(x, y, by = NULL) 21 | 22 | dt_anti_join(x, y, by = NULL) 23 | } 24 | \arguments{ 25 | \item{x}{data.frame or data.table} 26 | 27 | \item{y}{data.frame or data.table} 28 | 29 | \item{by}{a character vector of variables to join by. If NULL, the default, 30 | *_join() will do a natural join, using all variables with common names 31 | across the two tables. A message lists the variables so that you can check 32 | they're right (to suppress the message, simply explicitly list the 33 | variables that you want to join). To join by different variables on x and y 34 | use a named vector. For example, \code{by = c("a" = "b")} will match x.a to y.b.} 35 | 36 | \item{suffix}{If there are non-joined duplicate variables in x and y, these 37 | suffixes will be added to the output to disambiguate them. Should be a 38 | character vector of length 2.} 39 | } 40 | \value{ 41 | data.table 42 | } 43 | \description{ 44 | Do different versions of SQL join operations. See examples. 45 | } 46 | \examples{ 47 | workers = fread(" 48 | name company 49 | Nick Acme 50 | John Ajax 51 | Daniela Ajax 52 | ") 53 | 54 | positions = fread(" 55 | name position 56 | John designer 57 | Daniela engineer 58 | Cathie manager 59 | ") 60 | 61 | workers \%>\% dt_inner_join(positions) 62 | workers \%>\% dt_left_join(positions) 63 | workers \%>\% dt_right_join(positions) 64 | workers \%>\% dt_full_join(positions) 65 | 66 | # filtering joins 67 | workers \%>\% dt_anti_join(positions) 68 | workers \%>\% dt_semi_join(positions) 69 | 70 | # To suppress the message, supply 'by' argument 71 | workers \%>\% dt_left_join(positions, by = "name") 72 | 73 | # Use a named 'by' if the join variables have different names 74 | positions2 = setNames(positions, c("worker", "position")) # rename first column in 'positions' 75 | workers \%>\% dt_inner_join(positions2, by = c("name" = "worker")) 76 | } 77 | -------------------------------------------------------------------------------- /man/columns.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/columns.R 3 | \name{columns} 4 | \alias{columns} 5 | \alias{cols} 6 | \alias{rows} 7 | \title{Selects columns or rows from the data set} 8 | \usage{ 9 | columns(data, ...) 10 | 11 | cols(data, ...) 12 | 13 | rows(data, ...) 14 | } 15 | \arguments{ 16 | \item{data}{data.table/data.frame} 17 | 18 | \item{...}{unquoted or quoted column names, regex selectors or variable 19 | ranges for 'columns' and logical conditions for 'rows'.} 20 | } 21 | \value{ 22 | data.frame/data.table 23 | } 24 | \description{ 25 | \itemize{ 26 | \item \code{columns}: select columns from dataset. There are four ways of column selection: 27 | } 28 | \enumerate{ 29 | \item Simply by column names 30 | \item By variable ranges, e. g. vs:carb. Alternatively, you can use '\%to\%' 31 | instead of colon: 'vs \%to\% carb'. 32 | \item With regular expressions. Characters which start with '^' or end with '$' 33 | considered as Perl-style regular expression patterns. For example, '^Petal' 34 | returns all variables started with 'Petal'. 'Width$' returns all variables 35 | which end with 'Width'. Pattern '^.' matches all variables and pattern 36 | '^.*my_str' is equivalent to contains "my_str"'. 37 | \item By character variables with interpolated parts. Expression in the curly 38 | brackets inside characters will be evaluated in the parent frame with 39 | \link{text_expand}. For example, \verb{a\{1:3\}} will be transformed to the names 'a1', 40 | 'a2', 'a3'. 'cols' is just a shortcut for 'columns'. See examples. 41 | } 42 | 43 | \if{html}{\out{
}}\preformatted{}\if{html}{\out{
}} 44 | \itemize{ 45 | \item \code{rows}: select rows from dataset by logical conditions. 46 | } 47 | } 48 | \examples{ 49 | 50 | ## columns 51 | mtcars \%>\% 52 | columns(vs:carb, cyl) 53 | mtcars \%>\% 54 | columns(-am, -cyl) 55 | 56 | # regular expression pattern 57 | columns(iris, "^Petal") \%>\% head() # variables which start from 'Petal' 58 | columns(iris, "Width$") \%>\% head() # variables which end with 'Width' 59 | # move Species variable to the front. 60 | # pattern "^." matches all variables 61 | columns(iris, Species, "^.") \%>\% head() 62 | # pattern "^.*i" means "contains 'i'" 63 | columns(iris, "^.*i") \%>\% head() 64 | # numeric indexing - all variables except Species 65 | columns(iris, 1:4) \%>\% head() 66 | 67 | # variable expansion 68 | dims = c("Width", "Length") 69 | columns(iris, "Petal.{dims}") \%>\% head() 70 | 71 | # rows 72 | 73 | mtcars \%>\% 74 | rows(am==0) \%>\% 75 | head() 76 | 77 | # select rows with compound condition 78 | mtcars \%>\% 79 | rows(am==0 & mpg>mean(mpg)) 80 | 81 | } 82 | -------------------------------------------------------------------------------- /R/text_expand.R: -------------------------------------------------------------------------------- 1 | #' Evaluate expressions in curly brackets inside strings 2 | #' 3 | #' `text_expand` is simple string interpolation function. It searches in its 4 | #' arguments expressions in curly brackets `{expr}`, evaluate them and substitute with 5 | #' the result of evaluation. See examples. 6 | #' 7 | #' @param ... character vectors 8 | #' @param delim character vector of length 2 - pair of opening and closing 9 | #' delimiters for the templating tags. By default it is curly brackets. Note 10 | #' that `delim` will be used in the perl-style regular expression so you 11 | #' need to escape special characters, e. g. use "\\\\\{" instead of 12 | #' "\{". 13 | #' @return Vector of characters 14 | #' @examples 15 | #' i = 1:5 16 | #' text_expand("q{i}") 17 | #' 18 | #' i = 1:3 19 | #' j = 1:3 20 | #' text_expand("q1_{i}_{j}") 21 | #' 22 | #' data(iris) 23 | #' text_expand("'iris' has {nrow(iris)} rows.") 24 | #' @export 25 | text_expand = function(..., delim = c("\\{", "\\}")){ 26 | length(delim)!=2 && stop("'text_expand': 'delim' should be vector of length two.") 27 | left = delim[[1]] 28 | right = delim[[2]] 29 | pattern = paste0(left, "(.+?)", right) 30 | all_vars= c(list(...), recursive = TRUE) 31 | res = vector(mode = "list", length = length(all_vars)) 32 | for(each_var in seq_along(all_vars)){ 33 | x = all_vars[each_var] 34 | if(any(grepl(pattern, x, perl = TRUE))){ 35 | positions = gregexpr(pattern, x, perl = TRUE) 36 | matches = rev(unique(unlist(regmatches(x, positions)))) 37 | var_names = gsub(right, "", gsub(left, "", matches, perl = TRUE), perl = TRUE) 38 | 39 | for(i in seq_along(var_names)){ 40 | evaluated_item = eval(parse(text = var_names[i]), 41 | envir = parent.frame(), 42 | enclos = baseenv()) 43 | x = unlist(lapply(evaluated_item, function(item){ 44 | gsub(matches[i], item, x, fixed = TRUE) 45 | 46 | })) 47 | 48 | } 49 | 50 | } 51 | res[[each_var]] = x 52 | } 53 | c(res, recursive = TRUE) 54 | } 55 | 56 | 57 | 58 | #' Copy an entire object 59 | #' 60 | #' Mainly intended to copy data.table objects because by default they are modified by reference. See example. 61 | #' 62 | #' @param x object 63 | #' 64 | #' @return copy of the object 'x' 65 | #' @export 66 | #' 67 | #' @examples 68 | #' data(mtcars) 69 | #' dt_mtcars = as.data.table(mtcars) 70 | #' dt_mtcars2 = dt_mtcars 71 | #' dt_mtcars3 = copy(dt_mtcars) 72 | #' let(dt_mtcars, new = 1) 73 | #' 74 | #' head(dt_mtcars2) # we see 'new' column 75 | #' head(dt_mtcars3) # no 'new' column 76 | copy = function(x) { 77 | if(missing(x)) return(maditr::copy) 78 | if(is.list(x)){ 79 | data.table::copy(x) 80 | } else { 81 | x 82 | } 83 | 84 | } 85 | -------------------------------------------------------------------------------- /NEWS: -------------------------------------------------------------------------------- 1 | 0.8.6 (2025-09-01) 2 | ================ 3 | * remove `sort_by` method in favor to `data.table::sort_by` 4 | * due to removing `sort_by` we can decrease minimum R version to 3.6 in the requirements 5 | 6 | 0.8.5 (2024-11-10) 7 | ================ 8 | * fixes for compatibility with the new versions of R and data.table 9 | * remove generic `sort_by` in favour of base R `sort_by`. Now the syntax for sorting data.frame and data.table is different 10 | * remove `dt_arrange` 11 | * minor fixes 12 | 13 | 0.8.4 (2024-01-11) 14 | ================ 15 | * very minor fixes 16 | 17 | 0.8.3 (2022-04-02) 18 | ================ 19 | * now 'rows' respects 'etable' class 20 | 21 | 0.8.2 (2021-05-25) 22 | ================ 23 | * fix serious bug with '%to%' in the multiassignment expression in the 'let' 24 | 25 | 0.8.1 (2021-05-18) 26 | ================ 27 | * new function 'rows' for selecting rows/filtering dataset 28 | * new function 'columns' for selecting columns 29 | * 'columns' can be used inside expressions to select range of variables 30 | * new functions 'to_long' and 'to_wide' for converting dataset to long form and vice versa 31 | * remove `.res` variable from progress bar indicator in 'to_list' 32 | * bugfixes 33 | 34 | 0.7.4 (2020-07-29) 35 | ================ 36 | * fix tests for R before 4.0 37 | * convert documentation to markdown 38 | 39 | 0.7.3 (2020-07-22) 40 | ================ 41 | * fixes for data.table 1.13 42 | * add new functions: vlookup and xlookup 43 | 44 | 0.7.1 (2020-04-27) 45 | ================ 46 | * 'dt_select' now can match variables by Perl-style regular expression, e. g. '^Petal' returns all variables which start with 'Petal'. 47 | * new functions 'to_list', 'to_vec', 'to_df', 'to_dfc' for convenient lists processing. They have a little pythonic style interface: 'to_list(1:10, if(.x %% 2 == 0) .x)' will return only even numbers. For details see '?to_list'. 48 | * new functions 'take_all' and 'let_all' for summarizing and aggregating multiple variables. 49 | * bugfixes 50 | 51 | 0.6.3 (2019-12-03) 52 | ================ 53 | * 'let' and 'take' now generics (issue #8) 54 | * add functions 'dt_count', 'dt_add_count', 'dt_top_n' (issue #5) 55 | * bumped requried data.table version 56 | * move tests from testthat to tinytest 57 | * bugfixes 58 | 59 | 60 | 0.6.2 (2019-06-09) 61 | ================ 62 | * add 'sort_by' 63 | * minor bugfixes 64 | 65 | 0.6.1 (2019-01-03) 66 | ================ 67 | * fix tests 68 | * add 'coalesce' function 69 | * add 'data.frame' method for 'rollup', 'cube' and 'groupingsets' 70 | * minor fixes 71 | 72 | 0.6.0 (2018-08-12) 73 | ================ 74 | * add joins: 'dt_left_join', 'dt_right_join', 'dt_inner_join', 'dt_full_join', 'dt_anti_join', 'dt_semi_join' 75 | 76 | 0.5.2 (2018-07-18) 77 | ================ 78 | * add 'dcast' and 'melt' methods for data.frame 79 | * add ':=' operator for 'take'/'take_if'/'summarize' for parametric evaluation 80 | 81 | 0.5.1 (2018-05-09) 82 | ================ 83 | * fix stupid bug with incorrect minimal version of data.table in DESCRIPTION 84 | * add 'dplyr'-like interface. 85 | 86 | 0.5.0 (2018-05-05) 87 | ================ 88 | * initial release 89 | 90 | -------------------------------------------------------------------------------- /inst/tinytest/test_columns.R: -------------------------------------------------------------------------------- 1 | cat("\nContext: columns\n") 2 | 3 | mt_dt = as.data.table(mtcars) 4 | res1 = columns(mt_dt, -cyl, -am) 5 | res3 = mt_dt[, -c(2,9), with = FALSE] 6 | expect_identical(res1, res3) 7 | 8 | res1 = columns(mtcars, cyl:wt, am) 9 | res3 = mtcars[, c(2:6, 9)] 10 | expect_identical(res1, res3) 11 | 12 | mt_dt = as.data.table(mtcars) 13 | res1 = columns(mt_dt, -(cyl:wt), -am) 14 | res3 = mt_dt[, -c(2:6, 9), with = FALSE] 15 | expect_identical(res1, res3) 16 | 17 | data(iris) 18 | dt_iris = as.data.table(iris) 19 | res1 = columns(dt_iris, Species, "^Sepal") 20 | expect_identical(res1, dt_iris[,.(Species, Sepal.Length, Sepal.Width)]) 21 | my_sepal = "^Sepal" 22 | res1 = columns(dt_iris, Species, my_sepal) 23 | expect_identical(res1, dt_iris[,.(Species, Sepal.Length, Sepal.Width)]) 24 | 25 | res1 = columns(dt_iris, Species, "^.") 26 | expect_identical(res1, dt_iris[,c(5, 1:4)]) 27 | 28 | res1 = columns(dt_iris, Species, "^Sepal", "Width$") 29 | expect_identical(res1, dt_iris[,c(5, 1:2, 4)]) 30 | 31 | res1 = columns(dt_iris, -"^Sepal", -"Width$") 32 | expect_identical(res1, dt_iris[,c(3,5)]) 33 | 34 | res1 = columns(dt_iris, "^.+\\.") 35 | expect_identical(res1, dt_iris[,c(1:4)]) 36 | 37 | res1 = columns(dt_iris, Species, Species) 38 | expect_identical(res1, dt_iris[,c(5)]) 39 | 40 | res1 = columns(dt_iris, "Species") 41 | expect_identical(res1, dt_iris[,c(5)]) 42 | 43 | etab = data.frame(a = 1:2, b = 3:4) 44 | class(etab) = c("etable", class(etab)) 45 | res = etab[,2, drop = FALSE] 46 | expect_identical(res, columns(etab, "b")) 47 | 48 | mt_dt = as.data.table(mtcars) 49 | 50 | expect_identical( 51 | columns(mt_dt, mpg:hp, carb), 52 | columns(mt_dt, mpg %to% hp, carb) 53 | ) 54 | 55 | 56 | expect_identical( 57 | columns(mtcars, mpg:hp, carb), 58 | columns(mtcars, mpg %to% hp, carb) 59 | ) 60 | ### 61 | dims = c("Width", "Length") 62 | expect_identical( 63 | columns(iris, "Petal.{dims}"), 64 | iris[, paste0("Petal.",dims)] 65 | ) 66 | 67 | 68 | expect_identical( 69 | columns(iris, -"Petal.{dims}"), 70 | iris[, -(3:4)] 71 | ) 72 | ### 73 | my_columns = function(my_dims){ 74 | columns(iris, "Petal.{my_dims}") 75 | } 76 | 77 | expect_identical( 78 | my_columns(dims), 79 | iris[, paste0("Petal.",dims)] 80 | ) 81 | 82 | ### 83 | my_columns = function(my_dims){ 84 | columns(iris, my_dims) 85 | } 86 | 87 | expect_identical( 88 | my_columns("Petal.{dims}"), 89 | iris[, (4:3)] 90 | ) 91 | 92 | ### 93 | 94 | my_columns = function(my_dims){ 95 | columns(iris, -my_dims) 96 | } 97 | 98 | expect_identical( 99 | my_columns("Petal.{dims}"), 100 | iris[, -(4:3)] 101 | ) 102 | 103 | ### 104 | 105 | my_columns = function(my_dims){ 106 | columns(iris, -"Petal.{my_dims}") 107 | } 108 | 109 | expect_identical( 110 | my_columns(dims), 111 | iris[, -(3:4)] 112 | ) 113 | 114 | 115 | my_columns = function(my_dims){ 116 | columns(iris, "Petal.{dims}") 117 | } 118 | 119 | expect_identical( 120 | my_columns(NULL), 121 | iris[, paste0("Petal.",dims)] 122 | ) 123 | 124 | 125 | my_vars = c("Petal.Length", "Petal.Width") 126 | expect_identical( 127 | columns(iris, my_vars, Species), 128 | iris[,3:5] 129 | ) 130 | 131 | expect_identical( 132 | columns(iris, -my_vars, -Species), 133 | iris[,-(3:5)] 134 | ) 135 | 136 | expect_identical( 137 | columns(iris, 5-1), 138 | iris[,4, drop = FALSE] 139 | ) 140 | 141 | 142 | expect_error( 143 | columns(iris, -my_vars, Species) 144 | ) 145 | -------------------------------------------------------------------------------- /R/dt_utils.R: -------------------------------------------------------------------------------- 1 | #' Additional useful functions 2 | #' 3 | #' 4 | #' - `dt_count` calculates number of cases by groups, possibly 5 | #' weighted. `dt_add_count` adds number of cases to existing dataset. 6 | #' - `dt_top_n` returns top n rows from each group. 7 | #' 8 | #' @param data data.table/data.frame data.frame will be automatically converted 9 | #' to data.table. 10 | #' @param ... variables to group by. 11 | #' @param weight optional. Unquoted variable name. If provided result will be the sum of this variable by groups. 12 | #' @param sort logical. If TRUE result will be sorted in desending order by resulting variable. 13 | #' @param name character. Name of resulting variable. 14 | #' @param n numeric. number of top cases. If n is negative then bottom values will be returned. 15 | #' @param by list or vector of grouping variables 16 | #' @param order_by unquoted variable name by which result will be sorted. If not 17 | #' specified, defaults to the last variable in the dataset. 18 | #' @return data.table 19 | #' @export 20 | #' 21 | #' @examples 22 | #' 23 | #' data(mtcars) 24 | #' 25 | #' # dt_count 26 | #' dt_count(mtcars, am, vs) 27 | #' dt_add_count(mtcars, am, vs, name = "am_vs")[] # [] for autoprinting 28 | #' 29 | #' # dt_top_n 30 | #' dt_top_n(mtcars, 2, by = list(am, vs)) 31 | #' dt_top_n(mtcars, 2, order_by = mpg, by = list(am, vs)) 32 | dt_count = function(data, ..., weight = NULL, sort = FALSE, name = "n"){ 33 | name = as.symbol(name) 34 | weight_expr = substitute(weight) 35 | if(is.null(weight_expr)){ 36 | res = eval.parent(substitute(maditr::take(data, name := .N, by = cols(...)))) 37 | } else { 38 | res = eval.parent(substitute(maditr::take(data, name := sum(weight, na.rm = TRUE), by = cols(...)))) 39 | } 40 | if(sort) { 41 | res = eval(substitute(data.table::setorder(res, -name), list(name = name))) 42 | 43 | } 44 | res 45 | } 46 | 47 | #' @export 48 | #' @rdname dt_count 49 | dt_add_count = function(data, ..., weight = NULL, sort = FALSE, name = "n"){ 50 | name = as.symbol(name) 51 | weight_expr = substitute(weight) 52 | if(is.null(weight_expr)){ 53 | res = eval.parent(substitute(maditr::let(data, name := .N, by = cols(...)))) 54 | } else { 55 | res = eval.parent(substitute(maditr::let(data, name := sum(weight, na.rm = TRUE), by = cols(...)))) 56 | } 57 | if(sort) { 58 | res = eval(substitute(data.table::setorder(res, -name), list(name = name))) 59 | 60 | } 61 | res 62 | } 63 | 64 | #' @export 65 | #' @rdname dt_count 66 | dt_top_n = function(data, n, by, order_by = NULL){ 67 | order_by_expr = substitute(order_by) 68 | if(is.null(order_by_expr)){ 69 | order_by = as.symbol(names(data)[ncol(data)]) 70 | } 71 | if(n>0){ 72 | eval.parent(substitute({ 73 | maditr::query(data.table::setorder(data, -order_by), head(.SD, n), by = by) 74 | })) 75 | } else { 76 | eval.parent(substitute({ 77 | maditr::query(data.table::setorder(data, -order_by), tail(.SD, -n), by = by) 78 | })) 79 | } 80 | 81 | } 82 | 83 | # @export 84 | # @rdname dt_count 85 | # dt_sample_n = function(data, n, by, replace = FALSE, weight = NULL){ 86 | # sort_by_expr = substitute(sort_by) 87 | # if(is.null(sort_by_expr)){ 88 | # sort_by = as.symbol(names(data)[ncol(data)]) 89 | # } 90 | # if(n>0){ 91 | # eval.parent(substitute({ 92 | # query(sort_by(data, -sort_by), head(.SD, n), by = by) 93 | # })) 94 | # } else { 95 | # eval.parent(substitute({ 96 | # query(sort_by(data, -sort_by), tail(.SD, -n), by = by) 97 | # })) 98 | # } 99 | # 100 | # } 101 | 102 | -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | # generate missing argument 2 | missing_arg = function(){ 3 | quote(expr=) 4 | } 5 | 6 | safe_deparse = function(expr){ 7 | res = deparse(expr, width.cutoff = 500) 8 | do.call(paste0, as.list(res)) 9 | } 10 | 11 | add_names_to_quoted_list = function(expr_list){ 12 | all_names = names(expr_list) 13 | if(is.null(all_names)){ 14 | names(expr_list) = c("", vapply(expr_list[-1], safe_deparse, FUN.VALUE = character(1))) 15 | return(expr_list) 16 | } 17 | for(i in seq_along(expr_list)){ 18 | if(all_names[i]==""){ 19 | names(expr_list)[i] = safe_deparse(expr_list[[i]]) 20 | } 21 | } 22 | 23 | expr_list 24 | } 25 | 26 | 27 | # expr - expression as after 'substitute' 28 | # symbols - named list - names will be substituted with values 29 | substitute_symbols = function(substitute_result, symbols) { 30 | eval(bquote(substitute(.(substitute_result), symbols))) 31 | } 32 | 33 | 34 | 35 | 36 | # 'expr' is result of substitute with assumption 37 | # that first argument is data.frame. 38 | # When we have huge constant instead of 39 | # variable in the expression, then, in the case of the error 40 | # we have long lag to print this error in the console. 41 | # This function is workaround for this issue. 42 | eval_in_parent_frame = function(data, expr, frame, need_expansion = TRUE){ 43 | `._***data***` = NULL # to pass CRAN check 44 | if(!is.data.table(data)){ 45 | data = as.data.table(data) 46 | } 47 | if(need_expansion){ 48 | data_names = names(data) 49 | # i 50 | if(length(expr)>2) { 51 | expr[[3]] = replace_column_expr(expr[[3]], data_names = data_names, frame = frame, type = "data.table") 52 | } 53 | # j 54 | if(length(expr)>3) { 55 | expr[[4]] = replace_column_expr(expr[[4]], data_names = data_names, frame = frame, type = "data.table") 56 | } 57 | if("by" %in% names(expr)){ 58 | curr_by = expr[["by"]] 59 | if(!missing(curr_by) && is_columns(curr_by)){ 60 | expr[["by"]] = replace_column_expr(curr_by, data_names = data_names, frame = frame, type = "names") 61 | } 62 | } 63 | } 64 | assign("._***data***", data, envir = frame) 65 | on.exit({ 66 | rm(`._***data***`, envir = frame) 67 | }) 68 | expr[[2]] = quote(`._***data***`) 69 | eval(expr, envir = frame) 70 | } 71 | 72 | # j_expr - list from j-expression 73 | # envir - environement where we will evaluate lhs of := 74 | add_names_from_walrus_assignement = function(j_expr, envir){ 75 | j_length = length(j_expr) 76 | if(is.null(names(j_expr))){ 77 | names(j_expr) = rep("", j_length) 78 | } 79 | # parse walrus (a := b) assignement 80 | for(k in seq_len(j_length)){ 81 | if(is.call(j_expr[[k]]) && identical(j_expr[[k]][[1]], as.symbol(":="))){ 82 | name_expr = j_expr[[k]][[2]] 83 | j_expr[[k]] = j_expr[[k]][[3]] 84 | if(is.call(name_expr)){ 85 | names(j_expr)[k] = eval(name_expr, envir = envir) 86 | } else { 87 | if(is.character(name_expr)){ 88 | names(j_expr)[k] = name_expr 89 | } else { 90 | names(j_expr)[k] = safe_deparse(name_expr) 91 | } 92 | } 93 | } 94 | } 95 | 96 | j_expr 97 | } 98 | 99 | add_names_from_single_symbol = function(j_expr){ 100 | if(length(j_expr)>1){ 101 | for(k in seq_along(j_expr)){ 102 | if(names(j_expr)[k]=="" && is.symbol(j_expr[[k]])){ 103 | names(j_expr)[k] = as.character(j_expr[[k]]) 104 | } 105 | } 106 | } 107 | j_expr 108 | } 109 | 110 | 111 | -------------------------------------------------------------------------------- /inst/tinytest/test_utils.R: -------------------------------------------------------------------------------- 1 | cat("\nContext:","dt_count", "\n") 2 | 3 | data(mtcars) 4 | 5 | expect_equal( 6 | dt_count(mtcars, am, vs), 7 | take(mtcars, n = .N, by =.(am, vs)) 8 | ) 9 | 10 | expect_equal( 11 | dt_count(mtcars, am, vs, sort = TRUE), 12 | take(mtcars, n = .N, by =.(am, vs)) %>% setorder(-n) 13 | ) 14 | 15 | expect_equal( 16 | dt_count(mtcars, am, vs, name = "total"), 17 | take(mtcars, total = .N, by =.(am, vs)) 18 | ) 19 | 20 | expect_equal( 21 | dt_count(mtcars, "^(am|vs)", name = "total"), 22 | take(mtcars, total = .N, by =.(vs, am)) 23 | ) 24 | 25 | expect_equal( 26 | dt_count(mtcars, vs %to% gear, name = "total"), 27 | take(mtcars, total = .N, by =.(vs, am, gear)) 28 | ) 29 | 30 | expect_equal( 31 | dt_count(mtcars, am, vs, sort = TRUE, name = "total"), 32 | take(mtcars, total = .N, by =.(am, vs)) %>% setorder(-total) 33 | ) 34 | 35 | mtcars2 = mtcars 36 | 37 | mtcars2$mpg[1:4] = NA 38 | 39 | expect_equal( 40 | dt_count(mtcars2, am, vs, weight = mpg, name = "total"), 41 | take(mtcars2, total = sum(mpg, na.rm = TRUE), by =.(am, vs)) 42 | ) 43 | 44 | expect_equal( 45 | dt_count(mtcars2,weight = mpg, sort = TRUE, name = "total"), 46 | take(mtcars2, total = sum(mpg, na.rm = TRUE)) %>% setorder(-total) 47 | ) 48 | 49 | expect_equal( 50 | dt_count(mtcars2, am, vs, weight = mpg, sort = TRUE, name = "total"), 51 | take(mtcars2, total = sum(mpg, na.rm = TRUE), by =.(am, vs)) %>% setorder(-total) 52 | ) 53 | 54 | ############ 55 | cat("\nContext:","dt_add_count", "\n") 56 | 57 | data(mtcars) 58 | 59 | expect_equal( 60 | dt_add_count(mtcars, am, vs), 61 | let(mtcars, n = .N, by =.(am, vs)) 62 | ) 63 | 64 | expect_equal( 65 | dt_add_count(mtcars, am, vs, sort = TRUE), 66 | let(mtcars, n = .N, by =.(am, vs)) %>% setorder(-n) 67 | ) 68 | 69 | expect_equal( 70 | dt_add_count(mtcars, am, vs, name = "total"), 71 | let(mtcars, total = .N, by =.(am, vs)) 72 | ) 73 | 74 | expect_equal( 75 | dt_add_count(mtcars, am, vs, sort = TRUE, name = "total"), 76 | let(mtcars, total = .N, by =.(am, vs)) %>% setorder(-total) 77 | ) 78 | 79 | 80 | data(mtcars) 81 | mtcars2 = mtcars 82 | 83 | mtcars2$mpg[1:4] = NA 84 | 85 | expect_equal( 86 | dt_add_count(mtcars2, am, vs, weight = mpg, name = "total"), 87 | let(mtcars2, total = sum(mpg, na.rm = TRUE), by =.(am, vs)) 88 | ) 89 | 90 | expect_equal( 91 | dt_add_count(mtcars2, am, vs, weight = mpg, sort = TRUE, name = "total"), 92 | let(mtcars2, total = sum(mpg, na.rm = TRUE), by =.(am, vs)) %>% setorder(-total) 93 | ) 94 | 95 | expect_equal( 96 | dt_add_count(mtcars2, weight = mpg, sort = TRUE, name = "total"), 97 | let(mtcars2, total = sum(mpg, na.rm = TRUE)) %>% setorder(-total) 98 | ) 99 | 100 | cat("\nContext:","dt_top_n", "\n") 101 | 102 | data(mtcars) 103 | 104 | expect_equal( 105 | dt_top_n(mtcars, 2), 106 | head(take_if(mtcars, order(-carb)), 2) 107 | ) 108 | 109 | 110 | expect_equal( 111 | dt_top_n(mtcars, -2), 112 | tail(take_if(mtcars, order(-carb)), 2) 113 | ) 114 | 115 | 116 | expect_equal( 117 | dt_top_n(mtcars, 2, order_by = mpg, by = list(am, vs)), 118 | query_if(mtcars, order(-mpg), head(.SD, 2), by = list(am, vs)) 119 | ) 120 | 121 | indirect = c('am', 'vs') 122 | expect_equal( 123 | dt_top_n(mtcars, 2, order_by = mpg, by = cols("{indirect}")), 124 | query_if(mtcars, order(-mpg), head(.SD, 2), by = list(am, vs)) 125 | ) 126 | 127 | expect_equal( 128 | dt_top_n(mtcars, -2, order_by = mpg, by = list(am, vs)), 129 | query_if(mtcars, order(-mpg), tail(.SD, 2), by = list(am, vs)) 130 | ) 131 | 132 | 133 | cat("\nContext:","copy", "\n") 134 | 135 | expect_identical( 136 | copy(), maditr::copy 137 | ) 138 | 139 | data(iris) 140 | expect_identical( 141 | copy(iris), iris 142 | ) 143 | 144 | 145 | expect_identical( 146 | copy(1), 1 147 | ) 148 | -------------------------------------------------------------------------------- /man/reexports.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/maditr-package.R 3 | \docType{import} 4 | \name{reexports} 5 | \alias{reexports} 6 | \alias{\%>\%} 7 | \alias{\%<>\%} 8 | \alias{\%$\%} 9 | \alias{\%T>\%} 10 | \alias{data.table} 11 | \alias{as.data.table} 12 | \alias{is.data.table} 13 | \alias{last} 14 | \alias{first} 15 | \alias{\%like\%} 16 | \alias{between} 17 | \alias{\%between\%} 18 | \alias{inrange} 19 | \alias{\%inrange\%} 20 | \alias{:=} 21 | \alias{setnames} 22 | \alias{setcolorder} 23 | \alias{set} 24 | \alias{setDT} 25 | \alias{setDF} 26 | \alias{setorder} 27 | \alias{setorderv} 28 | \alias{setkey} 29 | \alias{setkeyv} 30 | \alias{setindex} 31 | \alias{setindexv} 32 | \alias{chmatch} 33 | \alias{\%chin\%} 34 | \alias{rbindlist} 35 | \alias{fread} 36 | \alias{fwrite} 37 | \alias{foverlaps} 38 | \alias{shift} 39 | \alias{transpose} 40 | \alias{tstrsplit} 41 | \alias{frank} 42 | \alias{frankv} 43 | \alias{.SD} 44 | \alias{.N} 45 | \alias{.I} 46 | \alias{.GRP} 47 | \alias{.BY} 48 | \alias{rleid} 49 | \alias{rleidv} 50 | \alias{rowid} 51 | \alias{rowidv} 52 | \alias{uniqueN} 53 | \alias{setDTthreads} 54 | \alias{getDTthreads} 55 | \alias{fintersect} 56 | \alias{fsetdiff} 57 | \alias{funion} 58 | \alias{fsetequal} 59 | \alias{shouldPrint} 60 | \alias{fsort} 61 | \alias{groupingsets} 62 | \alias{cube} 63 | \alias{rollup} 64 | \alias{nafill} 65 | \alias{setnafill} 66 | \alias{frollmean} 67 | \alias{frollsum} 68 | \alias{fcoalesce} 69 | \alias{fifelse} 70 | \alias{fcase} 71 | \alias{frollapply} 72 | \title{Objects exported from other packages} 73 | \keyword{internal} 74 | \description{ 75 | These objects are imported from other packages. Follow the links 76 | below to see their documentation. 77 | 78 | \describe{ 79 | \item{data.table}{\code{\link[data.table:between]{\%between\%}}, \code{\link[data.table:chmatch]{\%chin\%}}, \code{\link[data.table:between]{\%inrange\%}}, \code{\link[data.table:like]{\%like\%}}, \code{\link[data.table:special-symbols]{.BY}}, \code{\link[data.table:special-symbols]{.GRP}}, \code{\link[data.table:special-symbols]{.I}}, \code{\link[data.table:special-symbols]{.N}}, \code{\link[data.table:special-symbols]{.SD}}, \code{\link[data.table:assign]{:=}}, \code{\link[data.table]{as.data.table}}, \code{\link[data.table]{between}}, \code{\link[data.table]{chmatch}}, \code{\link[data.table:groupingsets]{cube}}, \code{\link[data.table]{data.table}}, \code{\link[data.table]{fcase}}, \code{\link[data.table:coalesce]{fcoalesce}}, \code{\link[data.table]{fifelse}}, \code{\link[data.table:setops]{fintersect}}, \code{\link[data.table:last]{first}}, \code{\link[data.table]{foverlaps}}, \code{\link[data.table]{frank}}, \code{\link[data.table:frank]{frankv}}, \code{\link[data.table]{fread}}, \code{\link[data.table:froll]{frollapply}}, \code{\link[data.table:froll]{frollmean}}, \code{\link[data.table:froll]{frollsum}}, \code{\link[data.table:setops]{fsetdiff}}, \code{\link[data.table:setops]{fsetequal}}, \code{\link[data.table]{fsort}}, \code{\link[data.table:setops]{funion}}, \code{\link[data.table]{fwrite}}, \code{\link[data.table:openmp-utils]{getDTthreads}}, \code{\link[data.table]{groupingsets}}, \code{\link[data.table:between]{inrange}}, \code{\link[data.table:as.data.table]{is.data.table}}, \code{\link[data.table]{last}}, \code{\link[data.table]{nafill}}, \code{\link[data.table]{rbindlist}}, \code{\link[data.table]{rleid}}, \code{\link[data.table:rleid]{rleidv}}, \code{\link[data.table:groupingsets]{rollup}}, \code{\link[data.table]{rowid}}, \code{\link[data.table:rowid]{rowidv}}, \code{\link[data.table:assign]{set}}, \code{\link[data.table]{setDF}}, \code{\link[data.table]{setDT}}, \code{\link[data.table:openmp-utils]{setDTthreads}}, \code{\link[data.table]{setcolorder}}, \code{\link[data.table:setkey]{setindex}}, \code{\link[data.table:setkey]{setindexv}}, \code{\link[data.table]{setkey}}, \code{\link[data.table:setkey]{setkeyv}}, \code{\link[data.table:nafill]{setnafill}}, \code{\link[data.table:setattr]{setnames}}, \code{\link[data.table]{setorder}}, \code{\link[data.table:setorder]{setorderv}}, \code{\link[data.table]{shift}}, \code{\link[data.table]{shouldPrint}}, \code{\link[data.table]{transpose}}, \code{\link[data.table]{tstrsplit}}, \code{\link[data.table:duplicated]{uniqueN}}} 80 | 81 | \item{magrittr}{\code{\link[magrittr:exposition]{\%$\%}}, \code{\link[magrittr:compound]{\%<>\%}}, \code{\link[magrittr:pipe]{\%>\%}}, \code{\link[magrittr:tee]{\%T>\%}}} 82 | }} 83 | 84 | -------------------------------------------------------------------------------- /inst/tinytest/test_dcast_melt.R: -------------------------------------------------------------------------------- 1 | cat("\nContext:","dcast/melt", "\n") 2 | # examples are borrowed from 'tidyr' package 3 | set.seed(123) 4 | stocks = data.frame( 5 | time = as.Date('2009-01-01') + 0:9, 6 | X = rnorm(10, 0, 1), 7 | Y = rnorm(10, 0, 2), 8 | Z = rnorm(10, 0, 4) 9 | ) 10 | 11 | res = structure(list(time = structure(c(14245, 14246, 14247, 14248, 12 | 14249, 14250, 14251, 14252, 14253, 14254, 14245, 14246, 14247, 13 | 14248, 14249, 14250, 14251, 14252, 14253, 14254, 14245, 14246, 14 | 14247, 14248, 14249, 14250, 14251, 14252, 14253, 14254), class = "Date"), 15 | stock = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 16 | 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 17 | 3L, 3L, 3L, 3L, 3L), class = "factor", .Label = c("X", "Y", 18 | "Z")), price = c(-0.560475646552213, -0.23017748948328, 1.55870831414912, 19 | 0.070508391424576, 0.129287735160946, 1.71506498688328, 0.460916205989202, 20 | -1.26506123460653, -0.686852851893526, -0.445661970099958, 21 | 2.44816359487892, 0.719627654114728, 0.801542901188104, 0.221365431890239, 22 | -1.11168226950815, 3.57382627360616, 0.995700956458479, -3.93323431325928, 23 | 1.40271180312737, -0.945582815455868, -4.27129482394738, 24 | -0.871899658633181, -4.10401779322896, -2.91556491716456, 25 | -2.50015707139703, -6.74677324296965, 3.3511481779781, 0.613492471346061, 26 | -4.55254774804779, 5.01525968427971)), row.names = c(NA, 27 | -30L), class = c("data.table", "data.frame")) 28 | 29 | stocksm = stocks %>% melt(id.vars = "time", variable.name = "stock", value.name = "price") 30 | expect_equal(stocksm, res) 31 | 32 | stocksm = stocks %>% melt(id.vars = "time", variable.name = "stock", 33 | measure.vars = patterns("X|Y|Z"), 34 | value.name = "price") 35 | expect_equal(stocksm, res) 36 | 37 | stocksm_dt = as.data.table(stocks) %>% melt(id.vars = "time", variable.name = "stock", 38 | value.name = "price") 39 | expect_equal(stocksm_dt, res) 40 | 41 | stocksm_dt = as.data.table(stocks) %>% melt(id.vars = "time", variable.name = "stock", 42 | measure.vars = patterns("X|Y|Z"), 43 | value.name = "price") 44 | expect_equal(stocksm_dt, res) 45 | 46 | res2 = structure(list(time = structure(c(14245, 14246, 14247, 14248, 47 | 14249, 14250, 14251, 14252, 14253, 14254), class = "Date"), X = c(-0.560475646552213, 48 | -0.23017748948328, 1.55870831414912, 0.070508391424576, 0.129287735160946, 49 | 1.71506498688328, 0.460916205989202, -1.26506123460653, -0.686852851893526, 50 | -0.445661970099958), Y = c(2.44816359487892, 0.719627654114728, 51 | 0.801542901188104, 0.221365431890239, -1.11168226950815, 3.57382627360616, 52 | 0.995700956458479, -3.93323431325928, 1.40271180312737, -0.945582815455868 53 | ), Z = c(-4.27129482394738, -0.871899658633181, -4.10401779322896, 54 | -2.91556491716456, -2.50015707139703, -6.74677324296965, 3.3511481779781, 55 | 0.613492471346061, -4.55254774804779, 5.01525968427971)), row.names = c(NA, 56 | -10L), class = c("data.table", "data.frame"), sorted = "time") 57 | stock_wide = as.data.frame(stocksm) %>% dcast(time ~ stock) 58 | expect_equal(res2, stock_wide) 59 | stock_wide_dt = stocksm %>% dcast(time ~ stock) 60 | expect_equal(res2, stock_wide_dt) 61 | 62 | res3 = structure(list(stock = structure(1:3, class = "factor", .Label = c("X", 63 | "Y", "Z")), `2009-01-01` = c(-0.560475646552213, 2.44816359487892, 64 | -4.27129482394738), `2009-01-02` = c(-0.23017748948328, 0.719627654114728, 65 | -0.871899658633181), `2009-01-03` = c(1.55870831414912, 0.801542901188104, 66 | -4.10401779322896), `2009-01-04` = c(0.070508391424576, 0.221365431890239, 67 | -2.91556491716456), `2009-01-05` = c(0.129287735160946, -1.11168226950815, 68 | -2.50015707139703), `2009-01-06` = c(1.71506498688328, 3.57382627360616, 69 | -6.74677324296965), `2009-01-07` = c(0.460916205989202, 0.995700956458479, 70 | 3.3511481779781), `2009-01-08` = c(-1.26506123460653, -3.93323431325928, 71 | 0.613492471346061), `2009-01-09` = c(-0.686852851893526, 1.40271180312737, 72 | -4.55254774804779), `2009-01-10` = c(-0.445661970099958, -0.945582815455868, 73 | 5.01525968427971)), row.names = c(NA, -3L), class = c("data.table", 74 | "data.frame"), sorted = "stock") 75 | 76 | stock_wide2 = as.data.frame(stocksm) %>% dcast(stock ~ time) 77 | expect_equal(res3, stock_wide2) 78 | stock_wide2_dt = stocksm %>% dcast(stock ~ time) 79 | expect_equal(res3, stock_wide2_dt) 80 | -------------------------------------------------------------------------------- /inst/tinytest/test_dplyr_verbs.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | data(mtcars) 4 | 5 | cat("\nContext:", "verbs errors", "\n") 6 | expect_error(dt_select(1:5, am)) 7 | expect_error(dt_select(iris, "^aaa")) 8 | expect_error(dt_mutate(1:5, new := 1)) 9 | expect_error(dt_summarise(1:5, new = mean(mpg))) 10 | expect_error(dt_summarise_all(1:5, by = cyl)) 11 | expect_error(dt_summarise_all(mtcars, by = cyl)) 12 | expect_error(dt_filter(1:5, cyl==1)) 13 | expect_error(dt_filter(mtcars, cyl==2, by = am)) 14 | expect_error(dt_filter(mtcars, cyl=1)) 15 | expect_error(dt_filter(mtcars, c(TRUE, FALSE))) 16 | 17 | ################ 18 | cat("\nContext:", "dt_mutate", "\n") 19 | mt_dt = as.data.table(mtcars) 20 | mt_dt2 = data.table::copy(mt_dt) 21 | new_dt = dt_mutate(mt_dt, mpg_hp = mpg/hp, new = mpg_hp*2) 22 | new_dt2 = dt_mutate(mtcars, mpg_hp = mpg/hp, new = mpg_hp*2) 23 | mt_dt2[, mpg_hp := mpg/hp][, new := mpg_hp*2] 24 | expect_identical(new_dt, mt_dt) 25 | expect_identical(new_dt, mt_dt2) 26 | expect_identical(new_dt2, mt_dt2) 27 | ############### 28 | 29 | ############### 30 | cat("\nContext:", "summarize/summarize_all", "\n") 31 | mt_dt = as.data.table(mtcars) 32 | res = dt_summarize(mtcars, fun = mean, by = am) 33 | res2 = dt_summarize(mt_dt, fun = mean, by = am) 34 | res3 = mt_dt[, lapply(.SD, mean), by = am] 35 | expect_identical(res3, res) 36 | expect_identical(res3, res2) 37 | ########### 38 | res = dt_summarize_all(mtcars, mean, by = am) 39 | res2 = dt_summarize_all(mt_dt, mean, by = am) 40 | res3 = mt_dt[, lapply(.SD, mean), by = am] 41 | expect_identical(res3, res) 42 | expect_identical(res3, res2) 43 | ############### 44 | mt_dt = as.data.table(mtcars) 45 | res = dt_summarize(mtcars, mpg, hp, fun = mean, by = am) 46 | res2 = dt_summarize(mt_dt, mpg, hp, fun = mean, by = am) 47 | res3 = mt_dt[, lapply(list(mpg = mpg, hp = hp), mean), by = am] 48 | expect_identical(res3, res) 49 | expect_identical(res3, res2) 50 | 51 | ############### 52 | mt_dt = as.data.table(mtcars) 53 | res = dt_summarize(mtcars, mpg, just_wow = hp, fun = mean, by = am) 54 | res2 = dt_summarize(mt_dt, mpg, just_wow = hp, fun = mean, by = am) 55 | res3 = mt_dt[, lapply(list(mpg = mpg, just_wow = hp), mean), by = am] 56 | expect_identical(res3, res) 57 | expect_identical(res3, res2) 58 | ################### 59 | ############# 60 | mt_dt = as.data.table(mtcars) 61 | res = dt_summarize(mtcars, agg = mean(mpg), agg2 = mean(hp), by = am) 62 | res2 = dt_summarize(mt_dt, agg = mean(mpg), agg2 = mean(hp), by = am) 63 | res3 = mt_dt[, list(agg = mean(mpg), agg2 = mean(hp)), by = am] 64 | expect_identical(res3, res) 65 | expect_identical(res3, res2) 66 | ############# 67 | mt_dt = as.data.table(mtcars) 68 | res = dt_summarize(mtcars, agg := mean(mpg), agg2 := mean(hp), by = am) 69 | res2 = dt_summarize(mt_dt, agg := mean(mpg), agg2 := mean(hp), by = am) 70 | res3 = mt_dt[, list(agg = mean(mpg), agg2 = mean(hp)), by = am] 71 | expect_identical(res3, res) 72 | expect_identical(res3, res2) 73 | 74 | new_var1 = "agg" 75 | new_var2 = "agg2" 76 | expr1 = quote(mean(mpg)) 77 | expr2 = quote(mean(hp)) 78 | mt_dt = as.data.table(mtcars) 79 | res = dt_summarize(mtcars, (new_var1) := eval(expr1), (new_var2) := eval(expr2), by = am) 80 | res2 = dt_summarize(mt_dt, (new_var1) := eval(expr1), (new_var2) := eval(expr2), by = am) 81 | res3 = mt_dt[, list(agg = mean(mpg), agg2 = mean(hp)), by = am] 82 | expect_equal(res3, res) 83 | expect_equal(res3, res2) 84 | 85 | 86 | 87 | ########################## 88 | mt_dt = as.data.table(mtcars) 89 | res = dt_summarize(mtcars, mean(mpg), mean(hp), by = am) 90 | res2 = dt_summarize(mt_dt, mean(mpg), mean(hp), by = am) 91 | res3 = mt_dt[, list("mean(mpg)" = mean(mpg), "mean(hp)" = mean(hp)), by = am] 92 | expect_identical(res3, res) 93 | expect_identical(res3, res2) 94 | 95 | ########################## 96 | cat("\nContext:", "dt_filter", "\n") 97 | res = dt_filter(mtcars, vs==0, am==0) 98 | res2 = dt_filter(mt_dt, vs==0, am==0) 99 | res3 = mt_dt[vs==0 & am==0, ] 100 | expect_identical(res3, res) 101 | expect_identical(res3, res2) 102 | 103 | data(mtcars) 104 | dt_mt = as.data.table(mtcars) 105 | expect_identical( 106 | rows(mtcars, rowSums(vs %to% am)>0), 107 | dt_mt[vs>0 | am>0, ] 108 | ) 109 | 110 | etab = data.frame(a = 1:2, b = 3:4) 111 | class(etab) = c("etable", class(etab)) 112 | res = etab[2,, drop = FALSE] 113 | rownames(res) = NULL 114 | expect_equal(res, rows(etab, 2)) 115 | 116 | etab = data.frame(a = 1:2, b = 3:4) 117 | class(etab) = c("etable", class(etab)) 118 | res = etab[1,, drop = FALSE] 119 | rownames(res) = NULL 120 | expect_equal(res, rows(etab, b<4)) 121 | ########################## 122 | 123 | 124 | 125 | cat("\nContext:", "dt_select", "\n") 126 | mt_dt = as.data.table(mtcars) 127 | res1 = dt_select(mt_dt, -cyl, -am) 128 | res3 = mt_dt[, -c(2,9), with = FALSE] 129 | expect_identical(res1, res3) 130 | 131 | -------------------------------------------------------------------------------- /man/to_list.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/to_list.R 3 | \name{to_list} 4 | \alias{to_list} 5 | \alias{to_vec} 6 | \alias{to_df} 7 | \alias{to_dfr} 8 | \alias{to_dfc} 9 | \title{Apply an expression to each element of a list or vector} 10 | \usage{ 11 | to_list( 12 | data, 13 | expr = NULL, 14 | ..., 15 | skip_null = TRUE, 16 | trace = FALSE, 17 | trace_step = 1L 18 | ) 19 | 20 | to_vec( 21 | data, 22 | expr = NULL, 23 | ..., 24 | skip_null = TRUE, 25 | trace = FALSE, 26 | trace_step = 1L, 27 | recursive = TRUE, 28 | use.names = TRUE 29 | ) 30 | 31 | to_df( 32 | data, 33 | expr = NULL, 34 | ..., 35 | trace = FALSE, 36 | trace_step = 1L, 37 | idvalue = NULL, 38 | idname = "item_id" 39 | ) 40 | 41 | to_dfr( 42 | data, 43 | expr = NULL, 44 | ..., 45 | trace = FALSE, 46 | trace_step = 1L, 47 | idvalue = NULL, 48 | idname = "item_id" 49 | ) 50 | 51 | to_dfc(data, expr = NULL, ..., trace = FALSE, trace_step = 1) 52 | } 53 | \arguments{ 54 | \item{data}{data.frame/list/vector} 55 | 56 | \item{expr}{expression or function. Expression can use predefined variables: 57 | '.x' is a value of current list element, '.name' is a name of the element 58 | and '.index' is sequential number of the element.} 59 | 60 | \item{...}{further arguments provided if 'expr' is function.} 61 | 62 | \item{skip_null}{logical Should we skip NULL's from result? Default is TRUE} 63 | 64 | \item{trace}{FALSE by default. Should we report progress during execution? 65 | Possible values are TRUE, FALSE, "pb" (progress bar) or custom expression in 'quote', e. g. 'quote(print(.x))'. 66 | Expression can contain '.x', '.name', and '.index' variables.} 67 | 68 | \item{trace_step}{integer. 1 by default. Step for reporting progress. Ignored if 'trace' argument is equal to FALSE.} 69 | 70 | \item{recursive}{logical. Should unlisting be applied to list components of x? For details see \link[base:unlist]{unlist}.} 71 | 72 | \item{use.names}{logical. TRUE by default. Should names of source list be 73 | preserved? Setting it to FALSE in some cases can greatly increase 74 | performance. For details see \link[base:unlist]{unlist}.} 75 | 76 | \item{idvalue}{expression for calculation id column. Usually it is just 77 | unquoted symbols: one of the '.name', '.index' or '.x'.} 78 | 79 | \item{idname}{character, 'item_id' by default. Name for the id column.} 80 | } 81 | \value{ 82 | 'to_list' returns list, 'to_vec' tries to return vector and other functions return data.table 83 | } 84 | \description{ 85 | \itemize{ 86 | \item \code{to_list} always returns a list, each element of which is the 87 | result of expression \code{expr} on the elements of data. By 88 | default, NULL's will be removed from the result. You can change this behavior 89 | with \code{skip_null} argument. 90 | \item \code{to_vec} is the same as \code{to_list} but tries to convert its result 91 | to vector via \link[base:unlist]{unlist}. 92 | \item \code{to_df} and \code{to_dfr} try to combine its results to data.table by rows. 93 | \item \code{to_dfc} tries to combine its result to data.table by columns. 94 | } 95 | 96 | \if{html}{\out{
}}\preformatted{}\if{html}{\out{
}} 97 | 98 | Expression can use predefined variables: '.x' is a value of current list 99 | element, '.name' is a name of the element and '.index' is sequential number 100 | of the element. 101 | } 102 | \examples{ 103 | 1:5 \%>\% 104 | to_list(rnorm(n = 3, .x)) 105 | 106 | # or in 'lapply' style 107 | 1:5 \%>\% 108 | to_list(rnorm, n = 3) \%>\% 109 | to_vec(mean) 110 | 111 | # or use an anonymous function 112 | 1:5 \%>\% 113 | to_list(function(x) rnorm(3, x)) 114 | 115 | # Use to_vec() to reduce output to a vector instead 116 | # of a list: 117 | # filtering - return only even numbers 118 | to_vec(1:10, if(.x \%\% 2 == 0) .x) 119 | 120 | # filtering - calculate mean only on the numeric columns 121 | to_vec(iris, if(is.numeric(.x)) mean(.x)) 122 | 123 | # mean for numerics, number of distincts for others 124 | to_vec(iris, if(is.numeric(.x)) mean(.x) else uniqueN(.x)) 125 | 126 | # means for Sepal 127 | to_vec(iris, if(startsWith(.name, "Sepal")) mean(.x)) 128 | 129 | # A more realistic example: split a data frame into pieces, fit a 130 | # model to each piece, summarise and extract R^2 131 | mtcars \%>\% 132 | split(.$cyl) \%>\% 133 | to_list(summary(lm(mpg ~ wt, data = .x))) \%>\% 134 | to_vec(.x$r.squared) 135 | 136 | # If each element of the output is a data frame, use 137 | # to_df to row-bind them together: 138 | mtcars \%>\% 139 | split(.$cyl) \%>\% 140 | to_list(lm(mpg ~ wt, data = .x)) \%>\% 141 | to_df(c(cyl = .name, coef(.x))) 142 | 143 | \dontrun{ 144 | # read all csv files in "data" to data.frame 145 | all_files = dir("data", pattern = "csv$", full.names = TRUE) \%>\% 146 | to_df(fread, 147 | idvalue = basename(.x), 148 | idname = "filename", 149 | trace = "pb" 150 | ) 151 | } 152 | } 153 | -------------------------------------------------------------------------------- /man/dcast.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dcast.R 3 | \name{dcast} 4 | \alias{dcast} 5 | \alias{melt} 6 | \alias{guess} 7 | \title{Convert data between wide and long forms.} 8 | \usage{ 9 | dcast( 10 | data, 11 | formula, 12 | fun.aggregate = NULL, 13 | sep = "_", 14 | ..., 15 | margins = NULL, 16 | subset = NULL, 17 | fill = NULL, 18 | drop = TRUE, 19 | value.var = guess(data), 20 | verbose = getOption("datatable.verbose") 21 | ) 22 | 23 | melt( 24 | data, 25 | id.vars, 26 | measure.vars, 27 | variable.name = "variable", 28 | value.name = "value", 29 | ..., 30 | na.rm = FALSE, 31 | variable.factor = TRUE, 32 | value.factor = FALSE, 33 | verbose = getOption("datatable.verbose") 34 | ) 35 | 36 | guess(data) 37 | } 38 | \arguments{ 39 | \item{data}{A data.table/data.frame. \code{data.frame} will be automatically 40 | converted to data.table.} 41 | 42 | \item{formula}{A formula of the form LHS ~ RHS to cast. For details see \link[data.table:dcast.data.table]{dcast}.} 43 | 44 | \item{fun.aggregate}{Should the data be aggregated before casting? If the 45 | formula doesn't identify a single observation for each cell, then 46 | aggregation defaults to length with a message.} 47 | 48 | \item{sep}{Character vector of length 1, indicating the separating character 49 | in variable names generated during casting. Default is _ for backwards 50 | compatibility.} 51 | 52 | \item{...}{Any other arguments that may be passed to the aggregating function.} 53 | 54 | \item{margins}{For details see \link[data.table:dcast.data.table]{dcast}.} 55 | 56 | \item{subset}{Specified if casting should be done on a subset of the data.} 57 | 58 | \item{fill}{Value with which to fill missing cells. If fun.aggregate is 59 | present, takes the value by applying the function on a 0-length vector.} 60 | 61 | \item{drop}{FALSE will cast by including all missing combinations. c(FALSE, 62 | TRUE) will only include all missing combinations of formula LHS. And 63 | c(TRUE, FALSE) will only include all missing combinations of formula RHS.} 64 | 65 | \item{value.var}{Name of the column whose values will be filled to cast. 66 | Function 'guess()' tries to, well, guess this column automatically, if none 67 | is provided. It is possible to cast multiple `value.var`` columns 68 | simultaneously. For details see \link[data.table:dcast.data.table]{dcast}.} 69 | 70 | \item{verbose}{For details see \link[data.table:dcast.data.table]{dcast}.} 71 | 72 | \item{id.vars}{vector of id variables. Can be integer (corresponding id 73 | column numbers) or character (id column names) vector. If missing, all 74 | non-measure columns will be assigned to it. If integer, must be positive; 75 | see Details.} 76 | 77 | \item{measure.vars}{Measure variables for melting. Can be missing, vector, 78 | list, or pattern-based. For details see \link[data.table:dcast.data.table]{dcast}.} 79 | 80 | \item{variable.name}{name for the measured variable names column. The default name is 'variable'.} 81 | 82 | \item{value.name}{name for the molten data values column(s). The default name 83 | is 'value'. Multiple names can be provided here for the case when 84 | measure.vars is a list, though note well that the names provided in 85 | measure.vars take precedence.} 86 | 87 | \item{na.rm}{If TRUE, NA values will be removed from the molten data.} 88 | 89 | \item{variable.factor}{If TRUE, the variable column will be converted to 90 | factor, else it will be a character column.} 91 | 92 | \item{value.factor}{If TRUE, the value column will be converted to factor, 93 | else the molten value type is left unchanged.} 94 | } 95 | \value{ 96 | data.table 97 | } 98 | \description{ 99 | The \code{dcast} formula takes the form \code{LHS ~ RHS}, ex: \code{var1 + var2 ~ var3}. The 100 | order of entries in the formula is essential. There are two special 101 | variables: \code{.} and \code{...}. \code{.} represents no variable; \code{...} represents all 102 | variables not otherwise mentioned in formula. LHS variable values will be in 103 | rows. RHS variables values will become column names. 104 | \code{fun.aggregate(value.var)} will be cell values. For details see 105 | \link[data.table:dcast.data.table]{dcast} and \link[data.table:melt.data.table]{melt}. 106 | } 107 | \examples{ 108 | # examples from 'tidyr' package 109 | stocks = data.frame( 110 | time = as.Date('2009-01-01') + 0:9, 111 | X = rnorm(10, 0, 1), 112 | Y = rnorm(10, 0, 2), 113 | Z = rnorm(10, 0, 4) 114 | ) 115 | stocksm = stocks \%>\% 116 | melt(id.vars = "time", variable.name = "stock", value.name = "price") 117 | stocksm \%>\% dcast(time ~ stock) 118 | stocksm \%>\% dcast(stock ~ time) 119 | 120 | # dcast and melt are complements 121 | df = data.frame(x = c("a", "b"), y = c(3, 4), z = c(5, 6)) 122 | df \%>\% 123 | dcast(z ~ x, value.var = "y") \%>\% 124 | melt(id.vars = "z", variable.name = "x", value.name = "y", na.rm = TRUE) 125 | } 126 | \author{ 127 | Matt Dowle \href{mailto:mattjdowle@gmail.com}{mattjdowle@gmail.com} 128 | } 129 | \keyword{internal} 130 | -------------------------------------------------------------------------------- /inst/tinytest/test_take_all.R: -------------------------------------------------------------------------------- 1 | cat("\nContext:","take_all", "\n") 2 | 3 | 4 | data(iris) 5 | data(mtcars) 6 | dt_iris = as.data.table(iris) 7 | expect_equal( 8 | take_all(iris, if(is.numeric(.x)) mean(.x)), 9 | dt_iris[,lapply(.SD, mean), .SDcols = -5] 10 | ) 11 | 12 | expect_equal( 13 | take_all(iris, if(.index<5) mean(.value)), 14 | dt_iris[,lapply(.SD, mean), .SDcols = -5] 15 | ) 16 | expect_equal( 17 | take_all(iris, mean, by = Species), 18 | dt_iris[,lapply(.SD, mean), by = Species] 19 | ) 20 | 21 | expect_equal( 22 | take_all(iris, if(is.numeric(.x)) scale(.x)), 23 | dt_iris[,lapply(.SD, scale), .SDcols = -5] 24 | ) 25 | 26 | expect_equal( 27 | take_all(dt_iris, mean, sd, by = Species, suffix = FALSE), 28 | dt_iris[,c( 29 | setNames( 30 | lapply(.SD, mean), 31 | paste0("mean_", colnames(.SD)) 32 | ), 33 | setNames( 34 | lapply(.SD, sd), 35 | paste0("sd_", colnames(.SD)) 36 | )), by = Species] 37 | ) 38 | 39 | 40 | expect_equal( 41 | take_all(dt_iris, mean, sd, by = Species), 42 | dt_iris[,c( 43 | setNames( 44 | lapply(.SD, mean), 45 | paste0(colnames(.SD), "_mean") 46 | ), 47 | setNames( 48 | lapply(.SD, sd), 49 | paste0(colnames(.SD), "_sd") 50 | )), by = Species] 51 | ) 52 | 53 | 54 | expect_equal( 55 | take_all(dt_iris, mean, sd, by = Species, sep = "."), 56 | dt_iris[,c( 57 | setNames( 58 | lapply(.SD, mean), 59 | paste0(colnames(.SD), ".mean") 60 | ), 61 | setNames( 62 | lapply(.SD, sd), 63 | paste0(colnames(.SD), ".sd") 64 | )), by = Species] 65 | ) 66 | 67 | expect_equal( 68 | take_all(dt_iris, mean, sd, by = Species, sep = "", suffix = FALSE), 69 | dt_iris[,c( 70 | setNames( 71 | lapply(.SD, mean), 72 | paste0("mean", colnames(.SD)) 73 | ), 74 | setNames( 75 | lapply(.SD, sd), 76 | paste0("sd", colnames(.SD)) 77 | )), by = Species] 78 | ) 79 | 80 | expect_equal( 81 | take_all(iris, stat = mean, by = Species, suffix = FALSE), 82 | dt_iris[, 83 | setNames(lapply(.SD, mean), paste0("stat_", colnames(.SD))), 84 | by = Species] 85 | ) 86 | 87 | expect_equal( 88 | take_all(iris, stat = function(x) 2*mean(x), by = Species, suffix = FALSE), 89 | dt_iris[, 90 | setNames(lapply(.SD, function(x) 2*mean(x)), paste0("stat_", colnames(.SD))), 91 | by = Species] 92 | ) 93 | 94 | 95 | expect_equal( 96 | take_all(iris, mean = mean, by = Species, i = Species!="setosa", suffix = TRUE), 97 | dt_iris[Species!="setosa", 98 | setNames(lapply(.SD, mean), paste0(colnames(.SD), "_mean")), 99 | by = Species] 100 | ) 101 | 102 | expect_equal( 103 | take_all(iris, "mean" = mean, by = Species, suffix = FALSE, .SDcols = -(1:2)), 104 | dt_iris[, 105 | setNames(lapply(.SD, mean), paste0("mean_", colnames(.SD))), 106 | by = Species, 107 | .SDcols = -(1:2)] 108 | ) 109 | 110 | expect_equal( 111 | take_all(iris, "mean" = mean, keyby = Species, suffix = FALSE, .SDcols = -(1:2)), 112 | dt_iris[, 113 | setNames(lapply(.SD, mean), paste0("mean_", colnames(.SD))), 114 | keyby = Species, 115 | .SDcols = -(1:2)] 116 | ) 117 | 118 | 119 | expect_equal( 120 | take_all(iris, 121 | if(startsWith(.name, "Sepal")) mean(.value), 122 | if(startsWith(.name, "Petal")) uniqueN(.value), 123 | by = Species), 124 | { 125 | dt_iris[,c( 126 | lapply(.SD[,.(Sepal.Length, Sepal.Width)], mean), 127 | lapply(.SD[,.(Petal.Length, Petal.Width)], uniqueN) 128 | ), 129 | by = Species] 130 | } 131 | ) 132 | 133 | expect_equal( 134 | take_all(iris, 135 | if(.index %in% 1:2) mean(.x), 136 | if(.index %in% 3:4) uniqueN(.x), 137 | by = Species), 138 | { 139 | dt_iris[,c( 140 | lapply(.SD[,.(Sepal.Length, Sepal.Width)], mean), 141 | lapply(.SD[,.(Petal.Length, Petal.Width)], uniqueN) 142 | ), 143 | by = Species] 144 | } 145 | ) 146 | 147 | data(iris) 148 | dt_iris = as.data.table(iris) 149 | my_fun = function(x){ 150 | if(is.numeric(x)){ 151 | mean(x) 152 | } else { 153 | NULL 154 | } 155 | } 156 | 157 | expect_equal( 158 | take_all(iris, mean = my_fun), 159 | dt_iris[,setNames(lapply(.SD, mean), paste0(names(dt_iris)[-5], "_mean")), .SDcols = -"Species"] 160 | ) 161 | 162 | dt_iris = as.data.table(iris) 163 | expect_equal( 164 | take_all(iris, mean = function(x){ 165 | if(is.numeric(x)){ 166 | mean(x) 167 | } else { 168 | NULL 169 | } 170 | }), 171 | dt_iris[,setNames(lapply(.SD, mean), paste0(names(dt_iris)[-5], "_mean")), .SDcols = -"Species"] 172 | ) 173 | 174 | dt_iris = as.data.table(iris) 175 | expect_equal( 176 | take_all(iris, mean = mean, .SDcols = -"Species"), 177 | dt_iris[,setNames(lapply(.SD, mean), paste0(names(dt_iris)[-5], "_mean")), .SDcols = -"Species"] 178 | ) 179 | -------------------------------------------------------------------------------- /inst/tinytest/test_vlookup.R: -------------------------------------------------------------------------------- 1 | cat("vlookup data.frame\n") 2 | 3 | dict = data.frame(num=1:26,small=letters,cap=LETTERS, stringsAsFactors = FALSE) 4 | 5 | expect_identical(vlookup(1:3,dict), dict[1:3,2]) 6 | expect_identical(xlookup(1:3,dict$num, dict$small), dict[1:3,2]) 7 | expect_identical(vlookup(1:3,dict, result_column = names(dict)), dict[1:3,]) 8 | expect_identical(vlookup(1:3,as.data.table(dict), result_column = names(dict)), as.data.table(dict[1:3,])) 9 | 10 | dict2 = data.frame(num = c(1:26, 1:26), small = c(letters, LETTERS), stringsAsFactors = FALSE) 11 | expect_identical(vlookup(1:3,dict2), dict[1:3,2]) 12 | expect_error(xlookup(1:3,dict2$num, dict$small)) 13 | expect_identical(xlookup(1:3,dict2$num, dict2$small), dict[1:3,2]) 14 | expect_identical(vlookup(1:3,dict2, result_column = names(dict2)), dict[1:3,1:2]) 15 | expect_identical(vlookup(1:3,as.data.table(dict2), result_column = names(dict2)), as.data.table(dict[1:3,1:2])) 16 | 17 | 18 | 19 | 20 | 21 | expect_identical(vlookup(c(45,1:3,58,NA),dict,result_column='cap'), 22 | c(NA,"A", "B", "C", NA,NA)) 23 | expect_identical(vlookup(c(45,1:3,58,NA),dict,result_column='cap', no_match = "Not found"), 24 | c("Not found","A", "B", "C", "Not found","Not found")) 25 | 26 | expect_identical(vlookup(c(45,1:3,58,NA),dict,result_column=c("small", "cap"), no_match = "Not found"), 27 | structure(list(small = c("Not found", "a", "b", "c", "Not found", 28 | "Not found"), cap = c("Not found", "A", "B", "C", "Not found", 29 | "Not found")), row.names = c("NA", "1", "2", "3", "NA.1", "NA.2" 30 | ), class = "data.frame")) 31 | 32 | expect_identical(vlookup(c('z','d','f','d'),dict,lookup_column = 'small', result_column = names(dict)), 33 | dict[c(26,4,6,4),]) 34 | 35 | 36 | expect_error(vlookup(iris, mtcars)) 37 | 38 | 39 | cat("vlookup vector\n") 40 | # with vector 41 | dict=seq_along(letters) 42 | names(dict) = letters 43 | 44 | expect_identical(xlookup(c(6, 4, 2), dict, names(dict)), c("f","d","b")) 45 | expect_identical(xlookup(c("f","d","b"), names(dict), dict), dict[c(6L, 4L, 2L)]) 46 | 47 | 48 | expect_identical( 49 | vlookup(c(1, NA, 2), dict = data.frame(a = c(2, NA), b= c(3, 2))), 50 | c(NA, 2, 3) 51 | ) 52 | expect_identical( 53 | vlookup(c(1, NA, 2), dict = data.table(a = c(2, NA), b= c(3, 2))), 54 | c(NA, 2, 3) 55 | ) 56 | 57 | cat("vlookup excel examples ex2\n") 58 | 59 | # Just for fun. Examples borrowed from Microsoft Excel. 60 | # It is not the R way of doing things. 61 | 62 | # Example 2 63 | 64 | ex2 = fread(" 65 | Item_ID Item Cost Markup 66 | ST-340 Stroller 145.67 0.30 67 | BI-567 Bib 3.56 0.40 68 | DI-328 Diapers 21.45 0.35 69 | WI-989 Wipes 5.12 0.40 70 | AS-469 Aspirator 2.56 0.45 71 | ") 72 | 73 | # Calculates the retail price of diapers by adding the markup percentage to the cost. 74 | expect_identical(vlookup("DI-328", ex2, 3) * (1 + vlookup("DI-328", ex2, 4)) ,28.9575) 75 | 76 | # Calculates the sale price of wipes by subtracting a specified discount from 77 | # the retail price. 78 | expect_identical((vlookup("WI-989", ex2, "Cost") * (1 + vlookup("WI-989", ex2, "Markup"))) * (1 - 0.2), 5.7344) 79 | 80 | A2 = ex2[[1]][1] 81 | A3 = ex2[[1]][2] 82 | 83 | # If the cost of an item is greater than or equal to $20.00, displays the string 84 | # "Markup is nn%"; otherwise, displays the string "Cost is under $20.00". 85 | expect_identical(ifelse(vlookup(A2, ex2, "Cost") >= 20, 86 | paste0("Markup is " , 100 * vlookup(A2, ex2, "Markup"),"%"), 87 | "Cost is under $20.00"), 'Markup is 30%') 88 | 89 | 90 | # If the cost of an item is greater than or equal to $20.00, displays the string 91 | # Markup is nn%"; otherwise, displays the string "Cost is $n.nn". 92 | expect_identical(ifelse(vlookup(A3, ex2, "Cost") >= 20, 93 | paste0("Markup is: " , 100 * vlookup(A3, ex2, "Markup") , "%"), 94 | paste0("Cost is $", vlookup(A3, ex2, "Cost"))), 'Cost is $3.56') 95 | 96 | 97 | # Example 3 98 | cat("vlookup excel examples ex3\n") 99 | 100 | ex3 = fread(' 101 | ID Last_name First_name Title Birth_date 102 | 1 Davis Sara "Sales Rep." 12/8/1968 103 | 2 Fontana Olivier "V.P. of Sales" 2/19/1952 104 | 3 Leal Karina "Sales Rep." 8/30/1963 105 | 4 Patten Michael "Sales Rep." 9/19/1958 106 | 5 Burke Brian "Sales Mgr." 3/4/1955 107 | 6 Sousa Luis "Sales Rep." 7/2/1963 108 | ') 109 | 110 | # If there is an employee with an ID of 5, displays the employee's last name; 111 | # otherwise, displays the message "Employee not found". 112 | expect_identical(ifelse(is.na(vlookup(5,ex3,"Last_name")), 113 | "Employee not found", 114 | vlookup(5,ex3,"Last_name")), 'Burke') 115 | expect_identical(ifelse(is.na(vlookup(15,ex3,"Last_name")), 116 | "Employee not found", 117 | vlookup(15,ex3,"Last_name")), 'Employee not found') 118 | 119 | # For the employee with an ID of 4, concatenates the values of three cells into 120 | # a complete sentence. 121 | expect_identical(paste0(vlookup(4,ex3,"First_name"), " ", 122 | vlookup(4,ex3,"Last_name"), " is a ", 123 | vlookup(4,ex3,"Title")), 'Michael Patten is a Sales Rep.') 124 | 125 | -------------------------------------------------------------------------------- /man/dt_mutate.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/verbs.R 3 | \name{dt_mutate} 4 | \alias{dt_mutate} 5 | \alias{dt_summarize} 6 | \alias{dt_summarize_all} 7 | \alias{dt_summarise} 8 | \alias{dt_summarise_all} 9 | \alias{dt_select} 10 | \alias{dt_filter} 11 | \title{'dplyr'-like interface for data.table.} 12 | \usage{ 13 | dt_mutate(data, ..., by) 14 | 15 | dt_summarize(data, ..., by, keyby, fun = NULL) 16 | 17 | dt_summarize_all(data, fun, by, keyby) 18 | 19 | dt_summarise(data, ..., by, keyby, fun = NULL) 20 | 21 | dt_summarise_all(data, fun, by, keyby) 22 | 23 | dt_select(data, ...) 24 | 25 | dt_filter(data, ...) 26 | } 27 | \arguments{ 28 | \item{data}{data.table/data.frame data.frame will be automatically converted 29 | to data.table. \code{dt_mutate} modify data.table object in-place.} 30 | 31 | \item{...}{List of variables or name-value pairs of summary/modifications 32 | functions. The name will be the name of the variable in the result. In the 33 | \code{mutate} function we can use \code{a = b} or \code{a := b} notation. 34 | Advantages of \verb{:=} are multiassignment (\code{c("a", "b") := list(1,2)}) 35 | and parametric assignment (\code{(a) := 2}).} 36 | 37 | \item{by}{unquoted name of grouping variable of list of unquoted names of 38 | grouping variables. For details see \link[data.table:data.table]{data.table}} 39 | 40 | \item{keyby}{Same as \code{by}, but with an additional \code{setkey()} run on the by 41 | columns of the result, for convenience. It is common practice to use 42 | 'keyby=' routinely when you wish the result to be sorted. For details see 43 | \link[data.table:data.table]{data.table}.} 44 | 45 | \item{fun}{function which will be applied to all variables in 46 | \code{dt_summarize} and \code{dt_summarize_all}.} 47 | } 48 | \value{ 49 | data.table 50 | } 51 | \description{ 52 | Subset of 'dplyr' verbs to work with data.table. Note that there is no 53 | \code{group_by} verb - use \code{by} or \code{keyby} argument when needed. 54 | \itemize{ 55 | \item \code{dt_mutate} adds new variables or modify existing variables. If 56 | \code{data} is data.table then it modifies in-place. 57 | \item \code{dt_summarize} computes summary statistics. Splits the data into 58 | subsets, computes summary statistics for each, and returns the result in the 59 | "data.table" form. 60 | \item \code{dt_summarize_all} is the same as \code{dt_summarize} but work over all non-grouping variables. 61 | \item \code{dt_filter} selects rows/cases where conditions are true. Rows 62 | where the condition evaluates to NA are dropped. 63 | \item \code{dt_select} selects column/variables from the data set. Range of 64 | variables are supported, e. g. vs:carb. Characters which start with '^' or 65 | end with '$' considered as Perl-style regular expression patterns. For 66 | example, '^Petal' returns all variables started with 'Petal'. 'Width$' 67 | returns all variables which end with 'Width'. Pattern '^.' matches all 68 | variables and pattern '^.*my_str' is equivalent to \verb{contains "my_str"}. See 69 | examples. 70 | } 71 | } 72 | \examples{ 73 | # examples from 'dplyr' 74 | # newly created variables are available immediately 75 | mtcars \%>\% 76 | dt_mutate( 77 | cyl2 = cyl * 2, 78 | cyl4 = cyl2 * 2 79 | ) \%>\% 80 | head() 81 | 82 | 83 | # you can also use dt_mutate() to remove variables and 84 | # modify existing variables 85 | mtcars \%>\% 86 | dt_mutate( 87 | mpg = NULL, 88 | disp = disp * 0.0163871 # convert to litres 89 | ) \%>\% 90 | head() 91 | 92 | 93 | # window functions are useful for grouped mutates 94 | mtcars \%>\% 95 | dt_mutate( 96 | rank = rank(-mpg, ties.method = "min"), 97 | keyby = cyl) \%>\% 98 | print() 99 | 100 | 101 | # You can drop variables by setting them to NULL 102 | mtcars \%>\% dt_mutate(cyl = NULL) \%>\% head() 103 | 104 | # A summary applied without by returns a single row 105 | mtcars \%>\% 106 | dt_summarise(mean = mean(disp), n = .N) 107 | 108 | # Usually, you'll want to group first 109 | mtcars \%>\% 110 | dt_summarise(mean = mean(disp), n = .N, by = cyl) 111 | 112 | 113 | # Multiple 'by' - variables 114 | mtcars \%>\% 115 | dt_summarise(cyl_n = .N, by = list(cyl, vs)) 116 | 117 | # Newly created summaries immediately 118 | # doesn't overwrite existing variables 119 | mtcars \%>\% 120 | dt_summarise(disp = mean(disp), 121 | sd = sd(disp), 122 | by = cyl) 123 | 124 | # You can group by expressions: 125 | mtcars \%>\% 126 | dt_summarise_all(mean, by = list(vsam = vs + am)) 127 | 128 | # filter by condition 129 | mtcars \%>\% 130 | dt_filter(am==0) 131 | 132 | # filter by compound condition 133 | mtcars \%>\% 134 | dt_filter(am==0, mpg>mean(mpg)) 135 | 136 | 137 | # select 138 | mtcars \%>\% dt_select(vs:carb, cyl) 139 | mtcars \%>\% dt_select(-am, -cyl) 140 | 141 | # regular expression pattern 142 | dt_select(iris, "^Petal") # variables which start from 'Petal' 143 | dt_select(iris, "Width$") # variables which end with 'Width' 144 | # move Species variable to the front. 145 | # pattern "^." matches all variables 146 | dt_select(iris, Species, "^.") 147 | # pattern "^.*i" means "contains 'i'" 148 | dt_select(iris, "^.*i") 149 | dt_select(iris, 1:4) # numeric indexing - all variables except Species 150 | } 151 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(columns,data.frame) 4 | S3method(cube,data.frame) 5 | S3method(dcast,data.frame) 6 | S3method(groupingsets,data.frame) 7 | S3method(let,data.frame) 8 | S3method(let,etable) 9 | S3method(let_all,data.frame) 10 | S3method(let_all,etable) 11 | S3method(let_if,data.frame) 12 | S3method(let_if,etable) 13 | S3method(melt,data.frame) 14 | S3method(query,data.frame) 15 | S3method(query_if,data.frame) 16 | S3method(rollup,data.frame) 17 | S3method(rows,data.frame) 18 | S3method(rows,etable) 19 | S3method(take,data.frame) 20 | S3method(take_all,data.frame) 21 | S3method(take_if,data.frame) 22 | export("%$%") 23 | export("%<>%") 24 | export("%>%") 25 | export("%T>%") 26 | export("%between%") 27 | export("%chin%") 28 | export("%inrange%") 29 | export("%like%") 30 | export(":=") 31 | export(.BY) 32 | export(.GRP) 33 | export(.I) 34 | export(.N) 35 | export(.SD) 36 | export(as.data.table) 37 | export(between) 38 | export(chmatch) 39 | export(coalesce) 40 | export(cols) 41 | export(columns) 42 | export(copy) 43 | export(cube) 44 | export(data.table) 45 | export(dcast) 46 | export(dt_add_count) 47 | export(dt_anti_join) 48 | export(dt_count) 49 | export(dt_filter) 50 | export(dt_full_join) 51 | export(dt_inner_join) 52 | export(dt_left_join) 53 | export(dt_mutate) 54 | export(dt_right_join) 55 | export(dt_select) 56 | export(dt_semi_join) 57 | export(dt_summarise) 58 | export(dt_summarise_all) 59 | export(dt_summarize) 60 | export(dt_summarize_all) 61 | export(dt_top_n) 62 | export(fcase) 63 | export(fcoalesce) 64 | export(fifelse) 65 | export(fintersect) 66 | export(first) 67 | export(foverlaps) 68 | export(frank) 69 | export(frankv) 70 | export(fread) 71 | export(frollapply) 72 | export(frollmean) 73 | export(frollsum) 74 | export(fsetdiff) 75 | export(fsetequal) 76 | export(fsort) 77 | export(funion) 78 | export(fwrite) 79 | export(getDTthreads) 80 | export(groupingsets) 81 | export(guess) 82 | export(inrange) 83 | export(is.data.table) 84 | export(last) 85 | export(let) 86 | export(let_all) 87 | export(let_if) 88 | export(melt) 89 | export(nafill) 90 | export(query) 91 | export(query_if) 92 | export(rbindlist) 93 | export(rleid) 94 | export(rleidv) 95 | export(rollup) 96 | export(rowid) 97 | export(rowidv) 98 | export(rows) 99 | export(set) 100 | export(setDF) 101 | export(setDT) 102 | export(setDTthreads) 103 | export(setcolorder) 104 | export(setindex) 105 | export(setindexv) 106 | export(setkey) 107 | export(setkeyv) 108 | export(setnafill) 109 | export(setnames) 110 | export(setorder) 111 | export(setorderv) 112 | export(shift) 113 | export(shouldPrint) 114 | export(take) 115 | export(take_all) 116 | export(take_if) 117 | export(text_expand) 118 | export(to_df) 119 | export(to_dfc) 120 | export(to_dfr) 121 | export(to_list) 122 | export(to_long) 123 | export(to_vec) 124 | export(to_wide) 125 | export(transpose) 126 | export(tstrsplit) 127 | export(uniqueN) 128 | export(vlookup) 129 | export(xlookup) 130 | import(data.table) 131 | import(magrittr) 132 | importFrom(data.table,"%between%") 133 | importFrom(data.table,"%chin%") 134 | importFrom(data.table,"%inrange%") 135 | importFrom(data.table,"%like%") 136 | importFrom(data.table,":=") 137 | importFrom(data.table,.BY) 138 | importFrom(data.table,.GRP) 139 | importFrom(data.table,.I) 140 | importFrom(data.table,.N) 141 | importFrom(data.table,.SD) 142 | importFrom(data.table,as.data.table) 143 | importFrom(data.table,between) 144 | importFrom(data.table,chmatch) 145 | importFrom(data.table,cube) 146 | importFrom(data.table,data.table) 147 | importFrom(data.table,fcase) 148 | importFrom(data.table,fcoalesce) 149 | importFrom(data.table,fifelse) 150 | importFrom(data.table,fintersect) 151 | importFrom(data.table,first) 152 | importFrom(data.table,foverlaps) 153 | importFrom(data.table,frank) 154 | importFrom(data.table,frankv) 155 | importFrom(data.table,fread) 156 | importFrom(data.table,frollapply) 157 | importFrom(data.table,frollmean) 158 | importFrom(data.table,frollsum) 159 | importFrom(data.table,fsetdiff) 160 | importFrom(data.table,fsetequal) 161 | importFrom(data.table,fsort) 162 | importFrom(data.table,funion) 163 | importFrom(data.table,fwrite) 164 | importFrom(data.table,getDTthreads) 165 | importFrom(data.table,groupingsets) 166 | importFrom(data.table,inrange) 167 | importFrom(data.table,is.data.table) 168 | importFrom(data.table,last) 169 | importFrom(data.table,nafill) 170 | importFrom(data.table,rbindlist) 171 | importFrom(data.table,rleid) 172 | importFrom(data.table,rleidv) 173 | importFrom(data.table,rollup) 174 | importFrom(data.table,rowid) 175 | importFrom(data.table,rowidv) 176 | importFrom(data.table,set) 177 | importFrom(data.table,setDF) 178 | importFrom(data.table,setDT) 179 | importFrom(data.table,setDTthreads) 180 | importFrom(data.table,setcolorder) 181 | importFrom(data.table,setindex) 182 | importFrom(data.table,setindexv) 183 | importFrom(data.table,setkey) 184 | importFrom(data.table,setkeyv) 185 | importFrom(data.table,setnafill) 186 | importFrom(data.table,setnames) 187 | importFrom(data.table,setorder) 188 | importFrom(data.table,setorderv) 189 | importFrom(data.table,shift) 190 | importFrom(data.table,shouldPrint) 191 | importFrom(data.table,transpose) 192 | importFrom(data.table,tstrsplit) 193 | importFrom(data.table,uniqueN) 194 | importFrom(magrittr,"%$%") 195 | importFrom(magrittr,"%<>%") 196 | importFrom(magrittr,"%>%") 197 | importFrom(magrittr,"%T>%") 198 | -------------------------------------------------------------------------------- /man/vlookup.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/vlookup.R 3 | \name{vlookup} 4 | \alias{vlookup} 5 | \alias{xlookup} 6 | \title{Look up values in dictionary.} 7 | \usage{ 8 | vlookup( 9 | lookup_value, 10 | dict, 11 | result_column = 2, 12 | lookup_column = 1, 13 | no_match = NA 14 | ) 15 | 16 | xlookup(lookup_value, lookup_vector, result_vector, no_match = NA) 17 | } 18 | \arguments{ 19 | \item{lookup_value}{Vector of looked up values} 20 | 21 | \item{dict}{data.frame. Dictionary.} 22 | 23 | \item{result_column}{numeric or character. Resulting columns in the 24 | \code{dict}. Default value for \code{result_column} is 2 - for frequent 25 | case of dictionary with keys in the first column and results in the second 26 | column.} 27 | 28 | \item{lookup_column}{Column of \code{dict} in which lookup value will be 29 | searched. By default, it is the first column of the \code{dict}.} 30 | 31 | \item{no_match}{vector of length one. NA by default. Where a valid match is 32 | not found, return the 'no_match' value you supply.} 33 | 34 | \item{lookup_vector}{vector in which 'lookup_value' will be searched during 'xlookup'.} 35 | 36 | \item{result_vector}{vector with resulting values for 'xlookup'.} 37 | } 38 | \value{ 39 | \code{xlookup} always return vector, \code{vlookup} returns vector if 40 | the \code{result_column} is single value. In the opposite case data.frame will 41 | be returned. 42 | } 43 | \description{ 44 | \code{vlookup} function is inspired by VLOOKUP spreadsheet 45 | function. It looks for a \code{lookup_value} in the \code{lookup_column} of 46 | the \code{dict}, and then returns values in the same rows from 47 | \code{result_column}. 48 | \code{xlookup} is simplified version of \code{vlookup}. It searches for a 49 | \code{lookup_value} in the \code{lookup_vector} and return values in the same 50 | position from the \code{result_vector}. 51 | } 52 | \examples{ 53 | # with data.frame 54 | dict = data.frame(num=1:26, small=letters, cap=LETTERS) 55 | vlookup(1:3, dict) 56 | vlookup(c(45,1:3,58), dict, result_column='cap') 57 | vlookup(c(45,1:3,58), dict, result_column='cap', no_match = "Not found") 58 | 59 | # the same with xlookup 60 | xlookup(1:3, dict$num, dict$small) 61 | xlookup(c(45,1:3,58), dict$num, dict$cap) 62 | xlookup(c(45,1:3,58), dict$num, dict$cap, no_match = "Not found") 63 | 64 | 65 | # example from base 'merge' 66 | authors = data.table( 67 | surname = c("Tukey", "Venables", "Tierney", "Ripley", "McNeil"), 68 | nationality = c("US", "Australia", "US", "UK", "Australia"), 69 | deceased = c("yes", rep("no", 4)) 70 | ) 71 | 72 | books = data.table( 73 | surname = c("Tukey", "Venables", "Tierney", 74 | "Ripley", "Ripley", "McNeil", "R Core"), 75 | title = c("Exploratory Data Analysis", 76 | "Modern Applied Statistics ...", 77 | "LISP-STAT", 78 | "Spatial Statistics", "Stochastic Simulation", 79 | "Interactive Data Analysis", 80 | "An Introduction to R") 81 | ) 82 | 83 | let(books, 84 | c("author_nationality", "author_deceased") := vlookup(surname, 85 | dict = authors, 86 | result_column = 2:3 87 | ) 88 | )[] 89 | 90 | # Just for fun. Examples borrowed from Microsoft Excel. 91 | # It is not the R way of doing things. 92 | 93 | # Example 2 94 | 95 | ex2 = fread(" 96 | Item_ID Item Cost Markup 97 | ST-340 Stroller 145.67 0.30 98 | BI-567 Bib 3.56 0.40 99 | DI-328 Diapers 21.45 0.35 100 | WI-989 Wipes 5.12 0.40 101 | AS-469 Aspirator 2.56 0.45 102 | ") 103 | 104 | # Calculates the retail price of diapers by adding the markup percentage to the cost. 105 | vlookup("DI-328", ex2, 3) * (1 + vlookup("DI-328", ex2, 4)) # 28.9575 106 | 107 | # Calculates the sale price of wipes by subtracting a specified discount from 108 | # the retail price. 109 | (vlookup("WI-989", ex2, "Cost") * (1 + vlookup("WI-989", ex2, "Markup"))) * (1 - 0.2) # 5.7344 110 | 111 | A2 = ex2[["Item_ID"]][1] 112 | A3 = ex2[["Item_ID"]][2] 113 | 114 | # If the cost of an item is greater than or equal to $20.00, displays the string 115 | # "Markup is nn\%"; otherwise, displays the string "Cost is under $20.00". 116 | ifelse(vlookup(A2, ex2, "Cost") >= 20, 117 | paste0("Markup is " , 100 * vlookup(A2, ex2, "Markup"),"\%"), 118 | "Cost is under $20.00") # Markup is 30\% 119 | 120 | 121 | # If the cost of an item is greater than or equal to $20.00, displays the string 122 | # Markup is nn\%"; otherwise, displays the string "Cost is $n.nn". 123 | ifelse(vlookup(A3, ex2, "Cost") >= 20, 124 | paste0("Markup is: " , 100 * vlookup(A3, ex2, "Markup") , "\%"), 125 | paste0("Cost is $", vlookup(A3, ex2, "Cost"))) #Cost is $3.56 126 | 127 | 128 | # Example 3 129 | 130 | ex3 = fread(' 131 | ID Last_name First_name Title Birth_date 132 | 1 Davis Sara "Sales Rep." 12/8/1968 133 | 2 Fontana Olivier "V.P. of Sales" 2/19/1952 134 | 3 Leal Karina "Sales Rep." 8/30/1963 135 | 4 Patten Michael "Sales Rep." 9/19/1958 136 | 5 Burke Brian "Sales Mgr." 3/4/1955 137 | 6 Sousa Luis "Sales Rep." 7/2/1963 138 | ') 139 | 140 | # If there is an employee with an ID of 5, displays the employee's last name; 141 | # otherwise, displays the message "Employee not found". 142 | vlookup(5, ex3, "Last_name", no_match = "Employee not found") # Burke 143 | 144 | # Many employees 145 | vlookup(1:10, ex3, "Last_name", no_match = "Employee not found") 146 | 147 | # For the employee with an ID of 4, concatenates the values of three cells into 148 | # a complete sentence. 149 | paste0(vlookup(4, ex3, "First_name"), " ", 150 | vlookup(4, ex3, "Last_name"), " is a ", 151 | vlookup(4, ex3, "Title")) # Michael Patten is a Sales Rep. 152 | } 153 | -------------------------------------------------------------------------------- /R/verbs.R: -------------------------------------------------------------------------------- 1 | #' 'dplyr'-like interface for data.table. 2 | #' 3 | #' Subset of 'dplyr' verbs to work with data.table. Note that there is no 4 | #' `group_by` verb - use `by` or `keyby` argument when needed. 5 | #' - `dt_mutate` adds new variables or modify existing variables. If 6 | #' `data` is data.table then it modifies in-place. 7 | #' - `dt_summarize` computes summary statistics. Splits the data into 8 | #' subsets, computes summary statistics for each, and returns the result in the 9 | #' "data.table" form. 10 | #' - `dt_summarize_all` is the same as `dt_summarize` but work over all non-grouping variables. 11 | #' - `dt_filter` selects rows/cases where conditions are true. Rows 12 | #' where the condition evaluates to NA are dropped. 13 | #' - `dt_select` selects column/variables from the data set. Range of 14 | #' variables are supported, e. g. vs:carb. Characters which start with '^' or 15 | #' end with '$' considered as Perl-style regular expression patterns. For 16 | #' example, '^Petal' returns all variables started with 'Petal'. 'Width$' 17 | #' returns all variables which end with 'Width'. Pattern '^.' matches all 18 | #' variables and pattern '^.*my_str' is equivalent to `contains "my_str"`. See 19 | #' examples. 20 | #' 21 | #' @param data data.table/data.frame data.frame will be automatically converted 22 | #' to data.table. `dt_mutate` modify data.table object in-place. 23 | #' @param ... List of variables or name-value pairs of summary/modifications 24 | #' functions. The name will be the name of the variable in the result. In the 25 | #' `mutate` function we can use `a = b` or `a := b` notation. 26 | #' Advantages of `:=` are multiassignment (`c("a", "b") := list(1,2)`) 27 | #' and parametric assignment (`(a) := 2`). 28 | #' @param by unquoted name of grouping variable of list of unquoted names of 29 | #' grouping variables. For details see [data.table][data.table::data.table] 30 | #' @param keyby Same as `by`, but with an additional `setkey()` run on the by 31 | #' columns of the result, for convenience. It is common practice to use 32 | #' 'keyby=' routinely when you wish the result to be sorted. For details see 33 | #' [data.table][data.table::data.table]. 34 | #' @param fun function which will be applied to all variables in 35 | #' `dt_summarize` and `dt_summarize_all`. 36 | #' @return data.table 37 | #' @export 38 | #' @examples 39 | #' # examples from 'dplyr' 40 | #' # newly created variables are available immediately 41 | #' mtcars %>% 42 | #' dt_mutate( 43 | #' cyl2 = cyl * 2, 44 | #' cyl4 = cyl2 * 2 45 | #' ) %>% 46 | #' head() 47 | #' 48 | #' 49 | #' # you can also use dt_mutate() to remove variables and 50 | #' # modify existing variables 51 | #' mtcars %>% 52 | #' dt_mutate( 53 | #' mpg = NULL, 54 | #' disp = disp * 0.0163871 # convert to litres 55 | #' ) %>% 56 | #' head() 57 | #' 58 | #' 59 | #' # window functions are useful for grouped mutates 60 | #' mtcars %>% 61 | #' dt_mutate( 62 | #' rank = rank(-mpg, ties.method = "min"), 63 | #' keyby = cyl) %>% 64 | #' print() 65 | #' 66 | #' 67 | #' # You can drop variables by setting them to NULL 68 | #' mtcars %>% dt_mutate(cyl = NULL) %>% head() 69 | #' 70 | #' # A summary applied without by returns a single row 71 | #' mtcars %>% 72 | #' dt_summarise(mean = mean(disp), n = .N) 73 | #' 74 | #' # Usually, you'll want to group first 75 | #' mtcars %>% 76 | #' dt_summarise(mean = mean(disp), n = .N, by = cyl) 77 | #' 78 | #' 79 | #' # Multiple 'by' - variables 80 | #' mtcars %>% 81 | #' dt_summarise(cyl_n = .N, by = list(cyl, vs)) 82 | #' 83 | #' # Newly created summaries immediately 84 | #' # doesn't overwrite existing variables 85 | #' mtcars %>% 86 | #' dt_summarise(disp = mean(disp), 87 | #' sd = sd(disp), 88 | #' by = cyl) 89 | #' 90 | #' # You can group by expressions: 91 | #' mtcars %>% 92 | #' dt_summarise_all(mean, by = list(vsam = vs + am)) 93 | #' 94 | #' # filter by condition 95 | #' mtcars %>% 96 | #' dt_filter(am==0) 97 | #' 98 | #' # filter by compound condition 99 | #' mtcars %>% 100 | #' dt_filter(am==0, mpg>mean(mpg)) 101 | #' 102 | #' 103 | #' # select 104 | #' mtcars %>% dt_select(vs:carb, cyl) 105 | #' mtcars %>% dt_select(-am, -cyl) 106 | #' 107 | #' # regular expression pattern 108 | #' dt_select(iris, "^Petal") # variables which start from 'Petal' 109 | #' dt_select(iris, "Width$") # variables which end with 'Width' 110 | #' # move Species variable to the front. 111 | #' # pattern "^." matches all variables 112 | #' dt_select(iris, Species, "^.") 113 | #' # pattern "^.*i" means "contains 'i'" 114 | #' dt_select(iris, "^.*i") 115 | #' dt_select(iris, 1:4) # numeric indexing - all variables except Species 116 | dt_mutate = function(data, ..., by){ 117 | eval.parent(substitute(maditr::let(data, ..., 118 | by = by)) 119 | ) 120 | } 121 | 122 | 123 | #' @rdname dt_mutate 124 | #' @export 125 | dt_summarize = function(data, ..., by, keyby, fun = NULL){ 126 | eval.parent(substitute(maditr::take(data, ..., 127 | by = by, 128 | keyby = keyby, 129 | fun = fun)) 130 | ) 131 | } 132 | 133 | #' @rdname dt_mutate 134 | #' @export 135 | dt_summarize_all = function(data, fun, by, keyby){ 136 | !missing(fun) || stop("'dt_summarize_all': argument 'fun' is missing.") 137 | eval.parent(substitute(maditr::take(data, 138 | by = by, 139 | keyby = keyby, 140 | fun = fun)) 141 | ) 142 | } 143 | 144 | #' @rdname dt_mutate 145 | #' @export 146 | dt_summarise = dt_summarize 147 | 148 | #' @rdname dt_mutate 149 | #' @export 150 | dt_summarise_all = dt_summarize_all 151 | 152 | #' @rdname dt_mutate 153 | #' @export 154 | dt_select = columns 155 | 156 | 157 | 158 | #' @rdname dt_mutate 159 | #' @export 160 | dt_filter = rows 161 | 162 | 163 | 164 | 165 | 166 | -------------------------------------------------------------------------------- /R/dcast.R: -------------------------------------------------------------------------------- 1 | #' Convert data between wide and long forms. 2 | #' 3 | #' The `dcast` formula takes the form `LHS ~ RHS`, ex: `var1 + var2 ~ var3`. The 4 | #' order of entries in the formula is essential. There are two special 5 | #' variables: `.` and `...`. `.` represents no variable; `...` represents all 6 | #' variables not otherwise mentioned in formula. LHS variable values will be in 7 | #' rows. RHS variables values will become column names. 8 | #' `fun.aggregate(value.var)` will be cell values. For details see 9 | #' [dcast][data.table::dcast] and [melt][data.table::melt]. 10 | #' @param data A data.table/data.frame. `data.frame` will be automatically 11 | #' converted to data.table. 12 | #' @param formula A formula of the form LHS ~ RHS to cast. For details see [dcast][data.table::dcast]. 13 | #' @param fun.aggregate Should the data be aggregated before casting? If the 14 | #' formula doesn't identify a single observation for each cell, then 15 | #' aggregation defaults to length with a message. 16 | #' @param sep Character vector of length 1, indicating the separating character 17 | #' in variable names generated during casting. Default is _ for backwards 18 | #' compatibility. 19 | #' @param margins For details see [dcast][data.table::dcast]. 20 | #' @param subset Specified if casting should be done on a subset of the data. 21 | #' @param fill Value with which to fill missing cells. If fun.aggregate is 22 | #' present, takes the value by applying the function on a 0-length vector. 23 | #' @param drop FALSE will cast by including all missing combinations. c(FALSE, 24 | #' TRUE) will only include all missing combinations of formula LHS. And 25 | #' c(TRUE, FALSE) will only include all missing combinations of formula RHS. 26 | #' @param value.var Name of the column whose values will be filled to cast. 27 | #' Function 'guess()' tries to, well, guess this column automatically, if none 28 | #' is provided. It is possible to cast multiple `value.var`` columns 29 | #' simultaneously. For details see [dcast][data.table::dcast]. 30 | #' @param verbose For details see [dcast][data.table::dcast]. 31 | #' @param ... Any other arguments that may be passed to the aggregating function. 32 | #' 33 | #' 34 | #' @param id.vars vector of id variables. Can be integer (corresponding id 35 | #' column numbers) or character (id column names) vector. If missing, all 36 | #' non-measure columns will be assigned to it. If integer, must be positive; 37 | #' see Details. 38 | #' @param measure.vars Measure variables for melting. Can be missing, vector, 39 | #' list, or pattern-based. For details see [dcast][data.table::dcast]. 40 | #' @param variable.name name for the measured variable names column. The default name is 'variable'. 41 | #' @param value.name name for the molten data values column(s). The default name 42 | #' is 'value'. Multiple names can be provided here for the case when 43 | #' measure.vars is a list, though note well that the names provided in 44 | #' measure.vars take precedence. 45 | #' @param na.rm If TRUE, NA values will be removed from the molten data. 46 | #' @param variable.factor If TRUE, the variable column will be converted to 47 | #' factor, else it will be a character column. 48 | #' @param value.factor If TRUE, the value column will be converted to factor, 49 | #' else the molten value type is left unchanged. 50 | #' 51 | #' 52 | #' @return data.table 53 | #' @author Matt Dowle 54 | #' @export 55 | #' @keywords internal 56 | #' @examples 57 | #' # examples from 'tidyr' package 58 | #' stocks = data.frame( 59 | #' time = as.Date('2009-01-01') + 0:9, 60 | #' X = rnorm(10, 0, 1), 61 | #' Y = rnorm(10, 0, 2), 62 | #' Z = rnorm(10, 0, 4) 63 | #' ) 64 | #' stocksm = stocks %>% 65 | #' melt(id.vars = "time", variable.name = "stock", value.name = "price") 66 | #' stocksm %>% dcast(time ~ stock) 67 | #' stocksm %>% dcast(stock ~ time) 68 | #' 69 | #' # dcast and melt are complements 70 | #' df = data.frame(x = c("a", "b"), y = c(3, 4), z = c(5, 6)) 71 | #' df %>% 72 | #' dcast(z ~ x, value.var = "y") %>% 73 | #' melt(id.vars = "z", variable.name = "x", value.name = "y", na.rm = TRUE) 74 | dcast = function(data, formula, fun.aggregate = NULL, sep = "_", 75 | ..., margins = NULL, subset = NULL, fill = NULL, 76 | drop = TRUE, value.var = guess(data), 77 | verbose = getOption("datatable.verbose")){ 78 | UseMethod("dcast") 79 | } 80 | 81 | #' @export 82 | dcast.data.frame = function(data, formula, fun.aggregate = NULL, sep = "_", 83 | ..., margins = NULL, subset = NULL, fill = NULL, 84 | drop = TRUE, value.var = guess(data), 85 | verbose = getOption("datatable.verbose")){ 86 | curr_call = sys.call() 87 | curr_call[[1]] = quote(data.table::dcast.data.table) 88 | curr_call[[2]] = substitute(data.table::as.data.table(data)) 89 | eval.parent(curr_call) 90 | } 91 | 92 | 93 | #' @export 94 | #' @rdname dcast 95 | melt = function(data, id.vars, measure.vars, 96 | variable.name = "variable", value.name = "value", 97 | ..., na.rm = FALSE, variable.factor = TRUE, 98 | value.factor = FALSE, 99 | verbose = getOption("datatable.verbose")){ 100 | UseMethod("melt") 101 | } 102 | 103 | #' @export 104 | melt.data.frame = function(data, id.vars, measure.vars, 105 | variable.name = "variable", value.name = "value", 106 | ..., na.rm = FALSE, variable.factor = TRUE, 107 | value.factor = FALSE, 108 | verbose = getOption("datatable.verbose")){ 109 | curr_call = sys.call() 110 | curr_call[[1]] = quote(data.table::melt.data.table) 111 | curr_call[[2]] = substitute(data.table::as.data.table(data)) 112 | eval.parent(curr_call) 113 | } 114 | 115 | #' @rdname dcast 116 | #' @export 117 | guess = function (data){ 118 | if ("value" %chin% names(data)) 119 | return("value") 120 | if ("(all)" %chin% names(data)) 121 | return("(all)") 122 | var = names(data)[ncol(data)] 123 | message("Using '", var, "' as value column. Use 'value.var' to override") 124 | return(var) 125 | } 126 | -------------------------------------------------------------------------------- /R/join.R: -------------------------------------------------------------------------------- 1 | #' Join two data.frames by common columns. 2 | #' 3 | #' Do different versions of SQL join operations. See examples. 4 | #' 5 | #' @param x data.frame or data.table 6 | #' @param y data.frame or data.table 7 | #' @param by a character vector of variables to join by. If NULL, the default, 8 | #' *_join() will do a natural join, using all variables with common names 9 | #' across the two tables. A message lists the variables so that you can check 10 | #' they're right (to suppress the message, simply explicitly list the 11 | #' variables that you want to join). To join by different variables on x and y 12 | #' use a named vector. For example, `by = c("a" = "b")` will match x.a to y.b. 13 | #' @param suffix If there are non-joined duplicate variables in x and y, these 14 | #' suffixes will be added to the output to disambiguate them. Should be a 15 | #' character vector of length 2. 16 | #' 17 | #' @return data.table 18 | #' @export 19 | #' 20 | #' @examples 21 | #' workers = fread(" 22 | #' name company 23 | #' Nick Acme 24 | #' John Ajax 25 | #' Daniela Ajax 26 | #' ") 27 | #' 28 | #' positions = fread(" 29 | #' name position 30 | #' John designer 31 | #' Daniela engineer 32 | #' Cathie manager 33 | #' ") 34 | #' 35 | #' workers %>% dt_inner_join(positions) 36 | #' workers %>% dt_left_join(positions) 37 | #' workers %>% dt_right_join(positions) 38 | #' workers %>% dt_full_join(positions) 39 | #' 40 | #' # filtering joins 41 | #' workers %>% dt_anti_join(positions) 42 | #' workers %>% dt_semi_join(positions) 43 | #' 44 | #' # To suppress the message, supply 'by' argument 45 | #' workers %>% dt_left_join(positions, by = "name") 46 | #' 47 | #' # Use a named 'by' if the join variables have different names 48 | #' positions2 = setNames(positions, c("worker", "position")) # rename first column in 'positions' 49 | #' workers %>% dt_inner_join(positions2, by = c("name" = "worker")) 50 | dt_left_join = function (x, y, by = NULL, suffix = c(".x", ".y")) { 51 | dt_join(x = x, 52 | y = y, 53 | by = by, 54 | suffix = suffix, 55 | all_x = TRUE, 56 | all_y = FALSE, 57 | src = "dt_left_join" 58 | ) 59 | } 60 | 61 | #' @export 62 | #' @rdname dt_left_join 63 | dt_right_join = function (x, y, by = NULL, suffix = c(".x", ".y")) { 64 | dt_join(x = x, 65 | y = y, 66 | by = by, 67 | suffix = suffix, 68 | all_x = FALSE, 69 | all_y = TRUE, 70 | src = "dt_right_join" 71 | ) 72 | } 73 | 74 | #' @export 75 | #' @rdname dt_left_join 76 | dt_inner_join = function (x, y, by = NULL, suffix = c(".x", ".y")) { 77 | dt_join(x = x, 78 | y = y, 79 | by = by, 80 | suffix = suffix, 81 | all_x = FALSE, 82 | all_y = FALSE, 83 | src = "dt_inner_join" 84 | ) 85 | } 86 | 87 | #' @export 88 | #' @rdname dt_left_join 89 | dt_full_join = function (x, y, by = NULL, suffix = c(".x", ".y")) { 90 | dt_join(x = x, 91 | y = y, 92 | by = by, 93 | suffix = suffix, 94 | all_x = TRUE, 95 | all_y = TRUE, 96 | src = "dt_full_join" 97 | ) 98 | } 99 | 100 | #' @export 101 | #' @rdname dt_left_join 102 | dt_semi_join = function (x, y, by = NULL) { 103 | if(!is.data.table(x)) x = as.data.table(x) 104 | if(!is.data.table(y)) y = as.data.table(y) 105 | if(length(by)==0) { 106 | by = get_column_names(colnames(x), colnames(y), src = "dt_semi_join") 107 | } 108 | fsetdiff(x, x[!y, on=by], all=TRUE) 109 | } 110 | 111 | #' @export 112 | #' @rdname dt_left_join 113 | dt_anti_join = function (x, y, by = NULL) { 114 | if(!is.data.table(x)) x = as.data.table(x) 115 | if(!is.data.table(y)) y = as.data.table(y) 116 | if(length(by)==0) { 117 | by = get_column_names(colnames(x), colnames(y), src = "dt_anti_join") 118 | } 119 | by_x = names(by) 120 | by_y = unname(by) 121 | if(is.null(by_x)){ 122 | by_x = by_y 123 | } else { 124 | empty_names = is.na(by_x)|(by_x=="") 125 | by_x[empty_names] = by_y[empty_names] 126 | } 127 | check_existense(by_x, by_y, colnames(x), colnames(y), src = "dt_anti_join") 128 | x[!y, on = by] 129 | } 130 | 131 | get_column_names = function(col_x, col_y, src){ 132 | by = intersect(col_x, col_y) 133 | if(length(by)>0){ 134 | message(sprintf("%s: joining, by = %s", src, paste(paste0('"', by, '"'), collapse = ", "))) 135 | } else { 136 | stop(sprintf("%s: 'by' required, because the data sources have no common variables", src), call. = FALSE) 137 | } 138 | by 139 | } 140 | 141 | dt_join = function(x, y, by, suffix, all_x, all_y, src){ 142 | if(!is.data.table(x)) x = as.data.table(x) 143 | if(!is.data.table(y)) y = as.data.table(y) 144 | if(length(by)==0) { 145 | by_x = by_y = get_column_names(colnames(x), colnames(y), src) 146 | } else { 147 | by_x = names(by) 148 | by_y = unname(by) 149 | if(is.null(by_x)){ 150 | by_x = by_y 151 | } else { 152 | empty_names = is.na(by_x)|(by_x=="") 153 | by_x[empty_names] = by_y[empty_names] 154 | } 155 | } 156 | check_existense(by_x, by_y, colnames(x), colnames(y), src) 157 | merge(x = x, 158 | y = y, 159 | by.x = by_x, 160 | by.y = by_y, 161 | all.x = all_x, 162 | all.y = all_y, 163 | suffixes = suffix, 164 | sort = FALSE, 165 | no.dups = TRUE, 166 | allow.cartesian = TRUE 167 | ) 168 | 169 | } 170 | 171 | 172 | check_existense = function(by_x, by_y, names_x, names_y, src){ 173 | err = setdiff(by_x, names_x) 174 | if(length(err)>0){ 175 | err = paste(paste0("'", err, "'"), collapse = ", ") 176 | stop(sprintf("'%s': 'by' can't contain join column(s) %s which is missing from LHS", src, err), call. = FALSE) 177 | } 178 | err = setdiff(by_y, names_y) 179 | if(length(err)>0){ 180 | err = paste(paste0("'", err, "'"), collapse = ", ") 181 | stop(sprintf("'%s': 'by' can't contain join column(s) %s which is missing from RHS", src, err), call. = FALSE) 182 | } 183 | } 184 | -------------------------------------------------------------------------------- /inst/tinytest/test_to_list.R: -------------------------------------------------------------------------------- 1 | cat("\nContext:","to_list", "\n") 2 | 3 | expect_identical( 4 | to_list(1:3), 5 | as.list(1:3) 6 | ) 7 | 8 | data(iris) 9 | expect_identical( 10 | to_list(iris, if(is.numeric(.value)) .value), 11 | as.list(iris[1:4]) 12 | ) 13 | 14 | 15 | expect_identical( 16 | to_list(iris, if(is.numeric(.x)) .x, skip_null = FALSE), 17 | c(as.list(iris[1:4]), list(Species = NULL)) 18 | ) 19 | 20 | expect_identical( 21 | iris %>% to_list(if(is.numeric(.x)) .x, skip_null = FALSE), 22 | c(as.list(iris[1:4]), list(Species = NULL)) 23 | ) 24 | 25 | expect_identical( 26 | to_list(iris, if(is.numeric(.x)) mean(.x) else uniqueN(.x)), 27 | c(as.list(colMeans(iris[1:4])), list(Species = uniqueN(iris$Species))) 28 | ) 29 | 30 | expect_identical( 31 | to_list(1:5, .index^2 + .index), 32 | as.list((1:5)^2 + 1:5) 33 | 34 | ) 35 | 36 | 37 | expect_identical( 38 | to_list((1:5)*100, sqrt), 39 | lapply((1:5)*100, sqrt) 40 | ) 41 | 42 | 43 | 44 | expect_identical( 45 | to_list((1:5)*10, (.index)), 46 | as.list(1:5) 47 | ) 48 | 49 | 50 | expect_identical( 51 | to_list((1:5)*10, (.name)), 52 | as.list(rep("", 5)) 53 | ) 54 | 55 | expect_identical( 56 | to_list(iris, if(grepl("Sepal", .name)) .x), 57 | as.list(iris[,c("Sepal.Length", "Sepal.Width")]) 58 | ) 59 | 60 | 61 | ######## 62 | cat("\nContext:","to_vec", "\n") 63 | expect_identical( 64 | to_vec(iris, if(is.numeric(.x)) mean(.x) else uniqueN(.x)), 65 | c(colMeans(iris[1:4]), Species = uniqueN(iris$Species)) 66 | ) 67 | 68 | 69 | expect_identical( 70 | to_vec(iris, if(is.numeric(.x)) mean(.x) else uniqueN(.x), use.names = FALSE), 71 | unname(c(colMeans(iris[1:4]), Species = uniqueN(iris$Species))) 72 | ) 73 | 74 | expect_identical( 75 | to_vec(iris, is.numeric), 76 | sapply(iris, is.numeric) 77 | ) 78 | 79 | cat("\nContext:","to_df", "\n") 80 | 81 | data("mtcars") 82 | 83 | expect_identical( 84 | to_df(list(1:2, 2:3, 3:4)), 85 | data.table(V1 = 1:3, V2 = 2:4) 86 | ) 87 | expect_identical( 88 | to_df(mtcars, list(var = .name, mean = mean(.x), sd = sd(.x))), 89 | data.table(var = names(mtcars), mean = colMeans(mtcars), sd = sapply(mtcars, sd)) 90 | ) 91 | 92 | expect_identical( 93 | to_df(mtcars, list(mean = mean(.x), sd = sd(.x)), idvalue = .name, idname = "var"), 94 | data.table(mean = colMeans(mtcars), sd = sapply(mtcars, sd), var = names(mtcars)) 95 | ) 96 | 97 | expect_identical( 98 | to_df(unname(as.list(mtcars)), list(mean = mean(.x), sd = sd(.x)), idvalue = .name, idname = "var"), 99 | data.table(mean = colMeans(mtcars), sd = sapply(mtcars, sd), var = "") 100 | ) 101 | 102 | expect_identical( 103 | to_dfr(mtcars, list(mean = mean(.x), sd = sd(.x)), idvalue = .index), 104 | data.table(mean = colMeans(mtcars), sd = sapply(mtcars, sd), item_id = seq_along(mtcars)) 105 | ) 106 | 107 | vec = c("a", "b", "c") 108 | expect_identical( 109 | to_dfr(vec, paste0(.x, 1:3), idvalue = .x), 110 | data.table(V1 = paste0(vec, 1), 111 | V2 = paste0(vec, 2), 112 | V3 = paste0(vec, 3), 113 | item_id = vec) 114 | ) 115 | 116 | 117 | 118 | expect_identical( 119 | to_dfr(vec, paste0(.x, 1:3), idvalue = .x), 120 | data.table(V1 = paste0(vec, 1), 121 | V2 = paste0(vec, 2), 122 | V3 = paste0(vec, 3), 123 | item_id = vec) 124 | ) 125 | 126 | 127 | expect_identical( 128 | to_dfr(vec, paste0(.x, 1:3), idvalue = paste0(.x, .index)), 129 | data.table(V1 = paste0(vec, 1), 130 | V2 = paste0(vec, 2), 131 | V3 = paste0(vec, 3), 132 | item_id = paste0(vec, 1:3)) 133 | ) 134 | 135 | expect_identical( 136 | to_dfc(mtcars, c( mean(.x), sd(.x))), 137 | as.data.table(rbind(colMeans(mtcars), sd = sapply(mtcars, sd))) 138 | ) 139 | 140 | cat("\nContext:","to_list scoping", "\n") 141 | 142 | my_fun = function(x) { 143 | d = 2 144 | to_vec(x, .x*d) 145 | } 146 | 147 | expect_equal( 148 | my_fun(1:3), 149 | 2*(1:3) 150 | ) 151 | 152 | cat("\nContext:","to_list nested", "\n") 153 | 154 | a = c(a = 1, b = 2, c = 3) 155 | b = c(aa = 10, bb = 20, cc = 30) 156 | 157 | expect_identical( 158 | to_list(a, 159 | paste(.index, .name, .x, to_vec(b, paste(.index, .name, .x))) 160 | ), 161 | list(a = c("1 a 1 1 aa 10", "1 a 1 2 bb 20", "1 a 1 3 cc 30"), 162 | b = c("2 b 2 1 aa 10", "2 b 2 2 bb 20", "2 b 2 3 cc 30"), 163 | c = c("3 c 3 1 aa 10", "3 c 3 2 bb 20", "3 c 3 3 cc 30")) 164 | ) 165 | if(getOption("covr", TRUE)){ 166 | cat("\nContext:","progress_bars", "\n") 167 | expect_equal( 168 | to_list(1:10, identity, trace = TRUE), 169 | as.list(1:10) 170 | ) 171 | expect_equal( 172 | to_vec(setNames(1:26, letters), identity, trace = TRUE), 173 | setNames(1:26, letters) 174 | ) 175 | expect_equal( 176 | to_vec(setNames(1:26, letters), identity, trace = quote(cat("current name:", .name, "\n"))), 177 | setNames(1:26, letters) 178 | ) 179 | expect_equal( 180 | to_list(1:100, function(x) { 181 | Sys.sleep(0.01) 182 | x^2 183 | }, trace = "pb"), 184 | as.list((1:100)^2) 185 | ) 186 | expect_equal( 187 | to_list(1:100,{ 188 | Sys.sleep(.01) 189 | .x^2 190 | }, trace = "pb"), 191 | as.list((1:100)^2) 192 | ) 193 | expect_equal( 194 | to_vec(1:100, function(x) { 195 | Sys.sleep(.01) 196 | x^2 197 | }, trace = "pb", trace_step = 10), 198 | ((1:100)^2) 199 | ) 200 | expect_equal( 201 | to_vec(1:100, { 202 | Sys.sleep(.01) 203 | .x^2 204 | }, trace = TRUE, trace_step = 10), 205 | ((1:100)^2) 206 | ) 207 | 208 | expect_equal( 209 | to_list(1:100, function(x) { 210 | Sys.sleep(.01) 211 | x^2 212 | }, trace = TRUE, trace_step = 10), 213 | as.list((1:100)^2) 214 | ) 215 | expect_equal( 216 | to_dfc(mtcars, `+`, 1, trace = TRUE), 217 | as.data.table(mtcars + 1) 218 | 219 | ) 220 | 221 | expect_equal( 222 | to_list(iris, grepl, pattern = "versi", trace = "pb"), 223 | lapply(iris, grepl, pattern = "versi") 224 | 225 | ) 226 | } 227 | -------------------------------------------------------------------------------- /inst/tinytest/test_let_all.R: -------------------------------------------------------------------------------- 1 | cat("\nContext:","let_all", "\n") 2 | 3 | scale2 = function(x) c(scale(x)) 4 | data(iris) 5 | data(mtcars) 6 | dt_iris = as.data.table(iris) 7 | dt_mt = as.data.table(mtcars) 8 | expect_equal( 9 | let_all(iris, if(is.numeric(.x)) scale2(.x)), 10 | dt_iris[, names(dt_iris[,-5]) := lapply(.SD, scale2), .SDcols = -5] 11 | ) 12 | 13 | expect_equal( 14 | let_all(mtcars, wow = if(.index==2) .x, by = cyl), 15 | dt_mt 16 | ) 17 | 18 | dt_iris = as.data.table(iris) 19 | expect_equal( 20 | let_all(iris, scaled = if(is.numeric(.x)) scale2(.x)), 21 | dt_iris[, paste0(names(dt_iris[,-5]), "_scaled") := lapply(.SD, scale2), .SDcols = -5] 22 | ) 23 | 24 | dt_iris = as.data.table(iris) 25 | expect_equal( 26 | let_all(iris, scaled = if(is.numeric(.x)) c(scale2(.x)), by = Species), 27 | dt_iris[, paste0(names(dt_iris[,-5]), "_scaled") := lapply(.SD, function(x) c(scale2(x))), by = Species] 28 | ) 29 | 30 | dt_iris = as.data.table(iris) 31 | res = dt_iris[, paste0(names(dt_iris[,-5]), "_scaled") := lapply(.SD, function(x) c(scale2(x))), by = Species] 32 | dt_iris = as.data.table(iris) 33 | expect_equal( 34 | let_all(dt_iris, scaled = if(is.numeric(.x)) c(scale2(.x)), by = Species), 35 | res 36 | ) 37 | 38 | dt_iris = as.data.table(iris) 39 | res = dt_iris[, paste0(c("Sepal.Width", "Petal.Length"), "_scaled") := lapply(.SD[,c("Sepal.Width", "Petal.Length")], 40 | function(x) c(scale2(x))), by = Species] 41 | dt_iris = as.data.table(iris) 42 | expect_equal( 43 | let_all(dt_iris, scaled = if(.index %in% 2:3) c(scale2(.x)), by = Species), 44 | res 45 | ) 46 | 47 | dt_iris = as.data.table(iris) 48 | res = dt_iris[, paste0(c("Sepal.Width", "Petal.Length"), "_scaled") := lapply(.SD[,c("Sepal.Width", "Petal.Length")], 49 | function(x) c(scale2(x))), keyby = Species] 50 | dt_iris = as.data.table(iris) 51 | expect_equal( 52 | let_all(dt_iris, scaled = if(.index %in% 2:3) c(scale2(.x)), keyby = Species), 53 | res 54 | ) 55 | 56 | 57 | dt_iris = as.data.table(iris) 58 | expect_equal( 59 | let_all(iris[FALSE, ], scaled = if(is.numeric(.x)) scale2(.x)), 60 | dt_iris[FALSE, ][,paste0(names(dt_iris[,-5]), "_scaled") := lapply(.SD, scale2), .SDcols = -5] 61 | ) 62 | 63 | dt_iris = as.data.table(iris) 64 | expect_equal( 65 | let_all(iris, scaled = if(is.numeric(.x)) scale2(.x), uniqueN, by = Species), 66 | dt_iris[, c(paste0(names(dt_iris[,-5]), "_scaled"), paste0(names(dt_iris[,-5]), "_uniqueN")) := c(lapply(.SD, scale2), lapply(.SD, uniqueN)), 67 | by = Species] 68 | ) 69 | 70 | dt_iris = as.data.table(iris) 71 | expect_equal( 72 | let_all(iris, scaled = if(.index %in% 2:3) scale2(.x), scaled = uniqueN, by = Species), 73 | dt_iris[, c(paste0(c("Sepal.Width", "Petal.Length"), "_scaled"), paste0(names(dt_iris[,-5]), "_scaled.1")) := c(lapply(.SD[,2:3, with = TRUE], scale2), lapply(.SD, uniqueN)), 74 | by = Species] 75 | ) 76 | 77 | dt_iris = as.data.table(iris) 78 | expect_equal( 79 | let_all(iris, scaled = if(.index %in% 2:3) scale2(.x), scaled = uniqueN), 80 | dt_iris[, c(paste0(c("Sepal.Width", "Petal.Length"), "_scaled"), paste0(names(dt_iris), "_scaled.1")) := c(lapply(.SD[,2:3, with = TRUE], scale2), lapply(.SD, uniqueN))] 81 | ) 82 | 83 | dt_iris = as.data.table(iris) 84 | expect_equal( 85 | let_all(dt_iris, scaled = if(is.numeric(.x)) scale2(.x), uniqueN, by = Species), 86 | dt_iris 87 | ) 88 | 89 | dt_iris = as.data.table(iris) 90 | expect_equal( 91 | let_all(iris, scaled = if(is.numeric(.x)) scale2(.x), uniqueN), 92 | dt_iris[, c(paste0(names(dt_iris[,-5]), "_scaled"), paste0(names(dt_iris), "_uniqueN")) := c(lapply(.SD[,-5], scale2), lapply(.SD, uniqueN))] 93 | ) 94 | 95 | dt_iris = as.data.table(iris) 96 | expect_equal( 97 | let_all(iris, scaled = if(is.numeric(.x)) scale2(.x), i = FALSE), 98 | dt_iris[FALSE, paste0(names(dt_iris[,-5]), "_scaled") := lapply(.SD, scale2), .SDcols = -5] 99 | ) 100 | 101 | dt_iris = as.data.table(iris) 102 | expect_equal( 103 | let_all(iris, mean = mean(.x), by = Species, suffix = FALSE), 104 | dt_iris[, paste0("mean_", names(dt_iris)[-5]) := lapply(.SD, mean), by = Species] 105 | ) 106 | 107 | dt_iris = as.data.table(iris) 108 | expect_equal( 109 | let_all(iris, mean = mean(.x), by = Species, sep = ""), 110 | dt_iris[, paste0(names(dt_iris)[-5], "mean") := lapply(.SD, mean), by = Species] 111 | ) 112 | 113 | 114 | dt_iris = as.data.table(iris) 115 | expect_equal( 116 | let_all(iris, mean, sd, length, by = Species, sep = "_"), 117 | dt_iris[, c(paste0(names(dt_iris)[-5], "_mean"), paste0(names(dt_iris)[-5], "_sd"), paste0(names(dt_iris)[-5], "_length")) := 118 | c(lapply(.SD, mean), lapply(.SD, sd), lapply(.SD, length)), by = Species] 119 | ) 120 | 121 | dt_iris = as.data.table(iris) 122 | my_name = "N" 123 | expect_equal( 124 | let_all(iris, mean, sd, (my_name) := length, by = Species, sep = "_"), 125 | dt_iris[, c(paste0(names(dt_iris)[-5], "_mean"), paste0(names(dt_iris)[-5], "_sd"), paste0(names(dt_iris)[-5], "_N")) := 126 | c(lapply(.SD, mean), lapply(.SD, sd), lapply(.SD, length)), by = Species] 127 | ) 128 | 129 | data(iris) 130 | dt_iris = as.data.table(iris) 131 | my_fun = function(x){ 132 | if(is.numeric(x)){ 133 | mean(x) 134 | } else { 135 | NULL 136 | } 137 | } 138 | 139 | expect_equal( 140 | let_all(iris, mean = my_fun), 141 | dt_iris[,paste0(names(dt_iris)[-5], "_mean") := lapply(.SD, mean), .SDcols = -"Species"] 142 | ) 143 | 144 | dt_iris = as.data.table(iris) 145 | expect_equal( 146 | let_all(iris, mean = function(x){ 147 | if(is.numeric(x)){ 148 | mean(x) 149 | } else { 150 | NULL 151 | } 152 | }), 153 | dt_iris[,paste0(names(dt_iris)[-5], "_mean") := lapply(.SD, mean), .SDcols = -"Species"] 154 | ) 155 | 156 | dt_iris = as.data.table(iris) 157 | expect_equal( 158 | let_all(iris, mean = mean, .SDcols = -"Species"), 159 | dt_iris[,paste0(names(dt_iris)[-5], "_mean") := lapply(.SD, mean), .SDcols = -"Species"] 160 | ) 161 | 162 | 163 | #### with etable 164 | 165 | etab = data.frame(a = 1:2, b = 3:4) 166 | class(etab) = c("etable", class(etab)) 167 | res = etab 168 | res$a_new = res$a + 1 169 | res$b_new = res$b + 1 170 | 171 | etab2 = let_all(etab, new = .x + 1) 172 | expect_identical(etab2, res) 173 | 174 | -------------------------------------------------------------------------------- /inst/tinytest/test_join.R: -------------------------------------------------------------------------------- 1 | cat("\nContext:","dt_join_*", "\n") 2 | workers = fread(" 3 | name company 4 | Nick Acme 5 | John Ajax 6 | Daniela Ajax 7 | ", stringsAsFactors=FALSE, data.table = FALSE) 8 | positions = fread(" 9 | name position 10 | John designer 11 | Daniela engineer 12 | Cathie manager 13 | ", stringsAsFactors=FALSE, data.table = FALSE) 14 | 15 | data("iris") 16 | data("mtcars") 17 | expect_error(dt_left_join(mtcars, iris)) 18 | expect_error(dt_left_join(workers, positions, by = "xxx")) 19 | expect_error(dt_anti_join(mtcars, iris)) 20 | expect_error(dt_anti_join(mtcars, iris, by = "am")) 21 | expect_error(dt_anti_join(mtcars, iris, by = "Species")) 22 | expect_error(dt_anti_join(mtcars, iris, by = c("Species", "Sepal.Length"))) 23 | expect_error(dt_anti_join(mtcars, iris, by = c("am", "vs"))) 24 | expect_error(dt_anti_join(mtcars, iris, by = c('xxx' = "am", 'yyy' = "vs"))) 25 | expect_error(dt_anti_join(mtcars, iris, by = c('xxx' = "am", "vs"))) 26 | expect_error(dt_anti_join(workers, positions, by = "xxx")) 27 | 28 | res = workers %>% dt_inner_join(positions) 29 | etal = structure(list(name = c("John", "Daniela"), company = c("Ajax", 30 | "Ajax"), position = c("designer", "engineer")), class = c("data.table", 31 | "data.frame"), row.names = c(NA, -2L)) 32 | expect_equal(res, etal) 33 | 34 | res = workers %>% dt_left_join(positions) 35 | etal = structure(list(name = c("Nick", "John", "Daniela"), company = c("Acme", 36 | "Ajax", "Ajax"), position = c(NA, "designer", "engineer")), class = c("data.table", 37 | "data.frame"), row.names = c(NA, -3L)) 38 | expect_equal(res, etal) 39 | 40 | res = workers %>% dt_right_join(positions) 41 | etal = structure(list(name = c("John", "Daniela", "Cathie"), company = c("Ajax", 42 | "Ajax", NA), position = c("designer", "engineer", "manager")), row.names = c(NA, 43 | -3L), class = c("data.table", "data.frame")) 44 | expect_equal(res, etal) 45 | 46 | res = workers %>% dt_full_join(positions) 47 | etal = structure(list(name = c("Nick", "John", "Daniela", "Cathie"), 48 | company = c("Acme", "Ajax", "Ajax", NA), position = c(NA, 49 | "designer", "engineer", "manager")), row.names = c(NA, -4L 50 | ), class = c("data.table", "data.frame")) 51 | expect_equal(res, etal) 52 | 53 | res = workers %>% dt_anti_join(positions) 54 | etal = structure(list(name = "Nick", company = "Acme"), class = c("data.table", 55 | "data.frame"), row.names = c(NA, -1L)) 56 | expect_equal(res, etal) 57 | 58 | res = workers %>% dt_semi_join(positions) 59 | etal = structure(list(name = c("John", "Daniela"), company = c("Ajax", 60 | "Ajax")), class = c("data.table", "data.frame"), row.names = c(NA, 61 | -2L)) 62 | expect_equal(res, etal) 63 | # To suppress the message, supply 'by' argument 64 | res = workers %>% dt_left_join(positions, by = "name") 65 | etal = structure(list(name = c("Nick", "John", "Daniela"), company = c("Acme", 66 | "Ajax", "Ajax"), position = c(NA, "designer", "engineer")), class = c("data.table", 67 | "data.frame"), row.names = c(NA, -3L)) 68 | expect_equal(res, etal) 69 | # Use a named 'by' if the join variables have different names 70 | positions2 = setNames(positions, c("worker", "position")) # rename first column in 'positions' 71 | res = workers %>% dt_inner_join(positions2, by = c("name" = "worker")) 72 | etal = structure(list(name = c("John", "Daniela"), company = c("Ajax", 73 | "Ajax"), position = c("designer", "engineer")), class = c("data.table", 74 | "data.frame"), row.names = c(NA, -2L)) 75 | expect_equal(res, etal) 76 | 77 | 78 | res = workers %>% dt_anti_join(positions2, by = c("name" = "worker")) 79 | etal = structure(list(name = "Nick", company = "Acme"), class = c("data.table", 80 | "data.frame"), row.names = c(NA, -1L)) 81 | expect_equal(res, etal) 82 | 83 | res = workers %>% dt_semi_join(positions2, by = c("name" = "worker")) 84 | etal = structure(list(name = c("John", "Daniela"), company = c("Ajax", 85 | "Ajax")), class = c("data.table", "data.frame"), row.names = c(NA, 86 | -2L)) 87 | expect_equal(res, etal) 88 | 89 | 90 | positions3 = positions[c(1,1,1:3), ] 91 | workers2 = workers[c(1, 2,2,2, 3),] 92 | res = workers2 %>% dt_left_join(positions3, by = "name") 93 | etal = structure(list(name = c("Nick", "John", "John", "John", "John", 94 | "John", "John", "John", "John", "John", "Daniela"), company = c("Acme", 95 | "Ajax", "Ajax", "Ajax", "Ajax", "Ajax", "Ajax", "Ajax", "Ajax", 96 | "Ajax", "Ajax"), position = c(NA, "designer", "designer", "designer", 97 | "designer", "designer", "designer", "designer", "designer", "designer", 98 | "engineer")), class = c("data.table", "data.frame"), row.names = c(NA, 99 | -11L)) 100 | expect_equal(res, etal) 101 | 102 | res = workers2 %>% dt_semi_join(positions3, by = "name") 103 | etal = structure(list(name = c("John", "John", "John", "Daniela"), company = c("Ajax", 104 | "Ajax", "Ajax", "Ajax")), class = c("data.table", "data.frame" 105 | ), row.names = c(NA, -4L)) 106 | expect_equal(res, etal) 107 | 108 | d1 = data.table(a=rep(1:2,each=3), b=1:6, key=c("a","b")) 109 | d2 = data.table(a=0:1, b=0:1, key=c("a","b")) 110 | 111 | 112 | res = dt_inner_join(d1, d2, by="a") 113 | etal = structure(list(a = c(1L, 1L, 1L), b.x = 1:3, b.y = c(1L, 1L, 114 | 1L)), class = c("data.table", "data.frame"), row.names = c(NA, 115 | -3L)) 116 | expect_equal(res, etal) 117 | res = dt_inner_join(d1, d2, by="a", suffix=c(".d1", ".d2")) 118 | etal = structure(list(a = c(1L, 1L, 1L), b.d1 = 1:3, b.d2 = c(1L, 1L, 119 | 1L)), class = c("data.table", "data.frame"), row.names = c(NA, 120 | -3L)) 121 | expect_equal(res, etal) 122 | 123 | d1 = data.table(a=rep(1:2,each=3), b=1:6, e = 1) 124 | d2 = data.table(a=0:1, b=0:1, key=c("a","b"), d = 3) 125 | 126 | res = dt_left_join(d1, d2, by = c("a", "b")) 127 | etal = structure(list(a = c(1L, 1L, 1L, 2L, 2L, 2L), b = 1:6, e = c(1, 128 | 1, 1, 1, 1, 1), d = c(3, NA, NA, NA, NA, NA)), class = c("data.table", 129 | "data.frame"), row.names = c(NA, -6L)) 130 | expect_equal(res, etal) 131 | 132 | d3 = setnames(d2, c("a", "v", "d")) 133 | 134 | res = dt_full_join(d1, d3, by = c("a", "b" = "v")) 135 | etal = structure(list(a = c(1L, 1L, 1L, 2L, 2L, 2L, 0L), b = c(1L, 2L, 136 | 3L, 4L, 5L, 6L, 0L), e = c(1, 1, 1, 1, 1, 1, NA), d = c(3, NA, 137 | NA, NA, NA, NA, 3)), row.names = c(NA, -7L), class = c("data.table", 138 | "data.frame")) 139 | expect_equal(res, etal) 140 | 141 | res = dt_anti_join(d1, d3, by = c("a", "b" = "v")) 142 | etal =structure(list(a = c(1L, 1L, 2L, 2L, 2L), b = 2:6, e = c(1, 1, 143 | 1, 1, 1)), class = c("data.table", "data.frame"), row.names = c(NA, 144 | -5L)) 145 | expect_equal(res, etal) 146 | res = dt_semi_join(d1, d3, by = c("a", "b" = "v")) 147 | etal =structure(list(a = 1L, b = 1L, e = 1), class = c("data.table", 148 | "data.frame"), row.names = c(NA, -1L)) 149 | expect_equal(res, etal) 150 | 151 | -------------------------------------------------------------------------------- /man/to_long.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/to_wide.R 3 | \name{to_long} 4 | \alias{to_long} 5 | \alias{to_wide} 6 | \title{Convert data to long or to wide form} 7 | \usage{ 8 | to_long( 9 | data, 10 | columns = NULL, 11 | keep = NULL, 12 | names_in = "variable", 13 | values_in = "value", 14 | drop_na = FALSE, 15 | names_factor = TRUE, 16 | value_factor = FALSE, 17 | ... 18 | ) 19 | 20 | to_wide( 21 | data, 22 | keep = NULL, 23 | names_in = variable, 24 | values_in = value, 25 | fun = identity, 26 | sep = "_", 27 | fill = NA, 28 | missing_comb = c("none", "rows", "columns", "all"), 29 | ... 30 | ) 31 | } 32 | \arguments{ 33 | \item{data}{A data.frame to convert} 34 | 35 | \item{columns}{unquoted names of variables for stacking. When missing, we 36 | will stack all columns outside \code{keep} columns.} 37 | 38 | \item{keep}{unquoted names of columns which will be kept as is, e. g. only 39 | recycled or deduplicated. If missing, it is all columns except stacked or 40 | unstacked. If \code{FALSE} then nothing will be kept.} 41 | 42 | \item{names_in}{name of the stacked variable names column. The default name 43 | is 'variable'. It is quoted in the \code{to_long} and unquoted in \code{to_wide}. If 44 | \code{FALSE} in the \code{to_wide} than nothing will be widening.} 45 | 46 | \item{values_in}{name(-s) of the stacked data values column(s). The default 47 | name is 'value'. Multiple names can be provided here for the case when 48 | \code{columns} is a list, though note well that the names provided in 49 | \code{columns} take precedence. It is quoted in the \code{to_long} and unqoted in \code{to_wide}} 50 | 51 | \item{drop_na}{If TRUE, NA values will be removed from the stacked data.} 52 | 53 | \item{names_factor}{If TRUE, the column with names will be converted to 54 | factor, else it will be a character column. TRUE by default.} 55 | 56 | \item{value_factor}{If TRUE, the value column will be converted to factor, 57 | else the stacked values type is left unchanged. FALSE by default.} 58 | 59 | \item{...}{other arguments passed to \code{data.table::melt}/\code{data.table::dcast}} 60 | 61 | \item{fun}{Should the data be aggregated before casting? By default, it is 62 | \code{identity} - no aggregation. To use multiple aggregation functions, pass a 63 | list; see Examples.} 64 | 65 | \item{sep}{Character vector of length 1, indicating the separating character 66 | in variable names generated during casting. Default is "_".} 67 | 68 | \item{fill}{Value with which to fill missing cells. \code{NA} by default. If \code{fun} is 69 | present, takes the value by applying the function on a 0-length vector.} 70 | 71 | \item{missing_comb}{One of "none" (the default), "rows" - include missing 72 | combinations in rows, "columns" - include missing combinations in columns, 73 | and "all" include all missing combinations.} 74 | } 75 | \value{ 76 | data.table in the wide or long form. 77 | } 78 | \description{ 79 | \code{to_long} increases number of rows in the dataset and reduce number of 80 | columns. \code{to_wide} makes invert transformation. You can use \link{cols} for 81 | selecting variables in the arguments. See examples. 82 | } 83 | \examples{ 84 | data(iris) 85 | 86 | # 'to_long' 87 | 88 | long_iris = iris \%>\% 89 | to_long(keep = Species) 90 | 91 | long_iris 92 | 93 | iris_with_stat = long_iris \%>\% 94 | take(mean = mean(value), 95 | sd = sd(value), 96 | n = .N*1.0, 97 | by = .(Species, variable) 98 | ) \%>\% 99 | to_long(columns = c(mean, sd, n), names_in = "stat") 100 | 101 | # 'to_wide' - table with multiple stats 102 | iris_with_stat \%>\% 103 | to_wide() 104 | 105 | 106 | iris_with_stat \%>\% 107 | to_wide(names_in = c(variable, stat)) 108 | 109 | iris_with_stat \%>\% 110 | to_wide(names_in = c(variable, Species)) 111 | 112 | # 'to_wide' - aggregation function 113 | long_iris \%>\% 114 | to_wide(fun = list(Mean = mean, SD = sd, N = length)) 115 | 116 | 117 | # '\%to\%' selector - example from tidyr::pivot_longer 118 | 119 | data(anscombe) 120 | anscombe \%>\% 121 | to_long( 122 | list(x = x1 \%to\% x4, y = y1 \%to\% y4), 123 | names_in = "set" 124 | ) 125 | 126 | ###################################### 127 | ## Examples from data.table melt/dcast 128 | ###################################### 129 | 130 | set.seed(45) 131 | DT = data.table( 132 | i_1 = c(1:5, NA)*1.0, 133 | i_2 = c(NA,6,7,8,9,10)*1.0, 134 | f_1 = factor(sample(c(letters[1:3], NA), 6, TRUE)), 135 | f_2 = factor(c("z", "a", "x", "c", "x", "x"), ordered=TRUE), 136 | c_1 = sample(c(letters[1:3], NA), 6, TRUE), 137 | d_1 = as.Date(c(1:3,NA,4:5), origin="2013-09-01"), 138 | d_2 = as.Date(6:1, origin="2012-01-01") 139 | ) 140 | 141 | # id, values as character/integer/numeric vectors 142 | 143 | to_long(DT, f_1, keep = 1:2) 144 | to_long(DT, f_1, keep = c(i_1, i_2)) 145 | to_long(DT, f_1, keep = i_1 \%to\% i_2) 146 | to_long(DT, f_1, keep = cols(i_1:i_2), names_factor = FALSE) 147 | to_long(DT, f_1, keep = cols("i_{1:2}")) 148 | to_long(DT, f_1, keep = cols("^i_")) 149 | to_long(DT, f_1, keep = cols("^i_"), names_in = "var", values_in = "val") 150 | 151 | col_var = "^i_" 152 | to_long(DT, 3, keep = cols(col_var)) 153 | 154 | to_long(DT, cols("^f_"), keep = cols("^i_"), value_factor = TRUE) 155 | 156 | to_long(mtcars) 157 | to_long(mtcars, keep = am) 158 | to_long(mtcars, columns = c(am, vs, mpg)) 159 | to_long(mtcars, columns = c(am, vs, mpg), keep = FALSE) 160 | to_long(DT, keep = f_1, columns = c(i_1, i_2), drop_na = TRUE) 161 | to_long(DT, keep=1:2, columns = list(cols("^f_"), cols("^d_")), value_factor=TRUE) 162 | 163 | data("ChickWeight") 164 | names(ChickWeight) = tolower(names(ChickWeight)) 165 | DT = to_long(ChickWeight, keep=2:4) 166 | 167 | to_wide(DT, keep = time, fun = mean) 168 | to_wide(DT, keep = FALSE, fun = mean) 169 | to_wide(DT, keep = diet, fun = mean) 170 | to_wide(DT, keep = c(diet, chick), names_in = time, missing_comb = "all") 171 | to_wide(DT, keep = c(diet, chick), names_in = time, missing_comb = "all", fill = 0) 172 | to_wide(DT, chick, time, fun = mean) 173 | 174 | 175 | 176 | # using FALSE 177 | DT = data.table(v1 = rep(1:2, each = 6), 178 | v2 = rep(rep(1:3, 2), each = 2), 179 | v3 = rep(1:2, 6), 180 | v4 = rnorm(6)) 181 | 182 | ## for each combination of (v1, v2), add up all values of v4 183 | to_wide(DT, 184 | cols("^v(1|2)"), 185 | names_in = FALSE, 186 | values_in = v4, 187 | fun = sum 188 | ) 189 | 190 | # multiple values_in and multiple fun 191 | DT = data.table(x=sample(5,20,TRUE), 192 | y=sample(2,20,TRUE), 193 | z=sample(letters[1:2], 20,TRUE), 194 | d1 = runif(20), 195 | d2=1L) 196 | 197 | # multiple values_in 198 | to_wide(DT, 199 | keep = c(x, y), 200 | names_in = z, 201 | values_in = c(d1, d2), 202 | fun = sum, 203 | fill = 0) 204 | 205 | # multiple funs 206 | to_wide(DT, 207 | keep = c(x, y), 208 | names_in = z, 209 | values_in = d1, 210 | fun = list(sum = sum, mean = mean), 211 | fill = NULL) 212 | 213 | # multiple fun and values_in (all combinations) 214 | to_wide(DT, 215 | keep = c(x, y), 216 | names_in = z, 217 | values_in = c(d1, d2), 218 | fun = list(sum = sum, mean = mean) 219 | ) 220 | 221 | # multiple fun and values_in (one-to-one) 222 | to_wide(DT, 223 | keep = c(x, y), 224 | names_in = z, 225 | values_in = list(d1, d2), 226 | fun = list(sum = sum, mean = mean) 227 | ) 228 | } 229 | -------------------------------------------------------------------------------- /R/vlookup.R: -------------------------------------------------------------------------------- 1 | #' Look up values in dictionary. 2 | #' 3 | #' `vlookup` function is inspired by VLOOKUP spreadsheet 4 | #' function. It looks for a `lookup_value` in the `lookup_column` of 5 | #' the `dict`, and then returns values in the same rows from 6 | #' `result_column`. 7 | #' `xlookup` is simplified version of `vlookup`. It searches for a 8 | #' `lookup_value` in the `lookup_vector` and return values in the same 9 | #' position from the `result_vector`. 10 | #' 11 | #' @param lookup_value Vector of looked up values 12 | #' @param dict data.frame. Dictionary. 13 | #' @param result_column numeric or character. Resulting columns in the 14 | #' `dict`. Default value for `result_column` is 2 - for frequent 15 | #' case of dictionary with keys in the first column and results in the second 16 | #' column. 17 | #' @param lookup_column Column of `dict` in which lookup value will be 18 | #' searched. By default, it is the first column of the `dict`. 19 | #' @param lookup_vector vector in which 'lookup_value' will be searched during 'xlookup'. 20 | #' @param result_vector vector with resulting values for 'xlookup'. 21 | #' @param no_match vector of length one. NA by default. Where a valid match is 22 | #' not found, return the 'no_match' value you supply. 23 | #' @return `xlookup` always return vector, `vlookup` returns vector if 24 | #' the `result_column` is single value. In the opposite case data.frame will 25 | #' be returned. 26 | #' 27 | #' @export 28 | #' @examples 29 | #' # with data.frame 30 | #' dict = data.frame(num=1:26, small=letters, cap=LETTERS) 31 | #' vlookup(1:3, dict) 32 | #' vlookup(c(45,1:3,58), dict, result_column='cap') 33 | #' vlookup(c(45,1:3,58), dict, result_column='cap', no_match = "Not found") 34 | #' 35 | #' # the same with xlookup 36 | #' xlookup(1:3, dict$num, dict$small) 37 | #' xlookup(c(45,1:3,58), dict$num, dict$cap) 38 | #' xlookup(c(45,1:3,58), dict$num, dict$cap, no_match = "Not found") 39 | #' 40 | #' 41 | #' # example from base 'merge' 42 | #' authors = data.table( 43 | #' surname = c("Tukey", "Venables", "Tierney", "Ripley", "McNeil"), 44 | #' nationality = c("US", "Australia", "US", "UK", "Australia"), 45 | #' deceased = c("yes", rep("no", 4)) 46 | #' ) 47 | #' 48 | #' books = data.table( 49 | #' surname = c("Tukey", "Venables", "Tierney", 50 | #' "Ripley", "Ripley", "McNeil", "R Core"), 51 | #' title = c("Exploratory Data Analysis", 52 | #' "Modern Applied Statistics ...", 53 | #' "LISP-STAT", 54 | #' "Spatial Statistics", "Stochastic Simulation", 55 | #' "Interactive Data Analysis", 56 | #' "An Introduction to R") 57 | #' ) 58 | #' 59 | #' let(books, 60 | #' c("author_nationality", "author_deceased") := vlookup(surname, 61 | #' dict = authors, 62 | #' result_column = 2:3 63 | #' ) 64 | #' )[] 65 | #' 66 | #' # Just for fun. Examples borrowed from Microsoft Excel. 67 | #' # It is not the R way of doing things. 68 | #' 69 | #' # Example 2 70 | #' 71 | #' ex2 = fread(" 72 | #' Item_ID Item Cost Markup 73 | #' ST-340 Stroller 145.67 0.30 74 | #' BI-567 Bib 3.56 0.40 75 | #' DI-328 Diapers 21.45 0.35 76 | #' WI-989 Wipes 5.12 0.40 77 | #' AS-469 Aspirator 2.56 0.45 78 | #' ") 79 | #' 80 | #' # Calculates the retail price of diapers by adding the markup percentage to the cost. 81 | #' vlookup("DI-328", ex2, 3) * (1 + vlookup("DI-328", ex2, 4)) # 28.9575 82 | #' 83 | #' # Calculates the sale price of wipes by subtracting a specified discount from 84 | #' # the retail price. 85 | #' (vlookup("WI-989", ex2, "Cost") * (1 + vlookup("WI-989", ex2, "Markup"))) * (1 - 0.2) # 5.7344 86 | #' 87 | #' A2 = ex2[["Item_ID"]][1] 88 | #' A3 = ex2[["Item_ID"]][2] 89 | #' 90 | #' # If the cost of an item is greater than or equal to $20.00, displays the string 91 | #' # "Markup is nn%"; otherwise, displays the string "Cost is under $20.00". 92 | #' ifelse(vlookup(A2, ex2, "Cost") >= 20, 93 | #' paste0("Markup is " , 100 * vlookup(A2, ex2, "Markup"),"%"), 94 | #' "Cost is under $20.00") # Markup is 30% 95 | #' 96 | #' 97 | #' # If the cost of an item is greater than or equal to $20.00, displays the string 98 | #' # Markup is nn%"; otherwise, displays the string "Cost is $n.nn". 99 | #' ifelse(vlookup(A3, ex2, "Cost") >= 20, 100 | #' paste0("Markup is: " , 100 * vlookup(A3, ex2, "Markup") , "%"), 101 | #' paste0("Cost is $", vlookup(A3, ex2, "Cost"))) #Cost is $3.56 102 | #' 103 | #' 104 | #' # Example 3 105 | #' 106 | #' ex3 = fread(' 107 | #' ID Last_name First_name Title Birth_date 108 | #' 1 Davis Sara "Sales Rep." 12/8/1968 109 | #' 2 Fontana Olivier "V.P. of Sales" 2/19/1952 110 | #' 3 Leal Karina "Sales Rep." 8/30/1963 111 | #' 4 Patten Michael "Sales Rep." 9/19/1958 112 | #' 5 Burke Brian "Sales Mgr." 3/4/1955 113 | #' 6 Sousa Luis "Sales Rep." 7/2/1963 114 | #' ') 115 | #' 116 | #' # If there is an employee with an ID of 5, displays the employee's last name; 117 | #' # otherwise, displays the message "Employee not found". 118 | #' vlookup(5, ex3, "Last_name", no_match = "Employee not found") # Burke 119 | #' 120 | #' # Many employees 121 | #' vlookup(1:10, ex3, "Last_name", no_match = "Employee not found") 122 | #' 123 | #' # For the employee with an ID of 4, concatenates the values of three cells into 124 | #' # a complete sentence. 125 | #' paste0(vlookup(4, ex3, "First_name"), " ", 126 | #' vlookup(4, ex3, "Last_name"), " is a ", 127 | #' vlookup(4, ex3, "Title")) # Michael Patten is a Sales Rep. 128 | vlookup = function(lookup_value, dict, result_column = 2, lookup_column = 1, no_match = NA){ 129 | (!is.list(lookup_value) && NCOL(lookup_value)==1) || stop("'vlookup': 'lookup_value' should be vector.") 130 | is.data.frame(dict) || stop("'vlookup': 'dict' should be data.frame.") 131 | length(lookup_column)==1 || stop("'vlookup': 'lookup_column' should be vector of length 1.") 132 | (length(no_match)==1) || stop("'vlookup': 'no_match' should be length 1.") 133 | if(length(result_column)==1){ 134 | return(xlookup(lookup_value, dict[[lookup_column]], dict[[result_column]], no_match = no_match)) 135 | } 136 | ind = fast_match(lookup_value, dict[[lookup_column]]) 137 | if(is.data.table(dict)){ 138 | res = dict[ind, result_column, with = FALSE] 139 | } else { 140 | res = dict[ind, result_column, drop = FALSE] 141 | } 142 | if(!identical(no_match, NA)) { 143 | ind = is.na(ind) 144 | res[ind, ] = no_match 145 | } 146 | res 147 | } 148 | 149 | #' @rdname vlookup 150 | #' @export 151 | xlookup = function(lookup_value, lookup_vector, result_vector, no_match = NA){ 152 | (!is.list(lookup_value) && NCOL(lookup_value)==1) || stop("'xlookup': 'lookup_value' should be vector.") 153 | (!is.list(lookup_vector) && NCOL(lookup_vector)==1) || stop("'xlookup': 'lookup_vector' should be vector.") 154 | (!is.list(result_vector) && NCOL(result_vector)==1) || stop("'xlookup': 'result_vector' should be vector.") 155 | (length(no_match)==1) || stop("'xlookup': 'no_match' should be length 1.") 156 | (NROW(lookup_vector) == NROW(result_vector)) || stop("'xlookup': 'lookup_vector' and 'result_vector' should be the same length.") 157 | ind = fast_match(lookup_value, lookup_vector) 158 | res = result_vector[ind] 159 | if(!identical(no_match, NA)) res[is.na(ind)] = no_match 160 | res 161 | } 162 | 163 | fast_match = function(x, table, nomatch = NA_integer_){ 164 | if(is.character(x) && is.character(table)){ 165 | return(chmatch(x, table, nomatch = nomatch)) 166 | } 167 | match(x, table, nomatch = nomatch, incomparables = NULL) 168 | } 169 | 170 | -------------------------------------------------------------------------------- /man/maditr-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/maditr-package.R 3 | \docType{package} 4 | \name{maditr-package} 5 | \alias{maditr} 6 | \alias{maditr-package} 7 | \title{maditr: Pipe-Style Interface for 'data.table'} 8 | \description{ 9 | Package provides pipe-style interface for \code{data.table}. It preserves 10 | all data.table features without significant impact on performance. '\code{let}' 11 | and '\code{take}' functions are simplified interfaces for most common data 12 | manipulation tasks. 13 | } 14 | \details{ 15 | \itemize{ 16 | \item To select rows from data: \code{rows(mtcars, am==0)} 17 | \item To select columns from data: \code{columns(mtcars, mpg, vs:carb)} 18 | \item To aggregate data: \code{take(mtcars, mean_mpg = mean(mpg), by = am)} 19 | \item To aggregate all non-grouping columns: \code{take_all(mtcars, mean, by = am)} 20 | \item To aggregate several columns with one summary: \code{take(mtcars, mpg, hp, fun = mean, by = am)} 21 | \item To get total summary skip \code{by} argument: \code{take_all(mtcars, mean)} 22 | \item Use magrittr pipe '\%>\%' to chain several operations: 23 | } 24 | 25 | \if{html}{\out{
}}\preformatted{ mtcars \%>\% 26 | let(mpg_hp = mpg/hp) \%>\% 27 | take(mean(mpg_hp), by = am) 28 | }\if{html}{\out{
}} 29 | \itemize{ 30 | \item To modify variables or add new variables: 31 | } 32 | 33 | \if{html}{\out{
}}\preformatted{ mtcars \%>\% 34 | let(new_var = 42, 35 | new_var2 = new_var*hp) \%>\% 36 | head() 37 | }\if{html}{\out{
}} 38 | \itemize{ 39 | \item To modify all non-grouping variables: 40 | } 41 | 42 | \if{html}{\out{
}}\preformatted{ iris \%>\% 43 | let_all( 44 | scaled = (.x - mean(.x))/sd(.x), 45 | by = Species) \%>\% 46 | head() 47 | }\if{html}{\out{
}} 48 | \itemize{ 49 | \item To drop variable assign NULL: \code{let(mtcars, am = NULL) \%>\% head()} 50 | \item To aggregate all variables conditionally on name: 51 | } 52 | 53 | \if{html}{\out{
}}\preformatted{ iris \%>\% 54 | take_all( 55 | mean = if(startsWith(.name, "Sepal")) mean(.x), 56 | median = if(startsWith(.name, "Petal")) median(.x), 57 | by = Species 58 | ) 59 | }\if{html}{\out{
}} 60 | \itemize{ 61 | \item For parametric assignment use ':=': 62 | } 63 | 64 | \if{html}{\out{
}}\preformatted{ new_var = "my_var" 65 | old_var = "mpg" 66 | mtcars \%>\% 67 | let((new_var) := get(old_var)*2) \%>\% 68 | head() 69 | }\if{html}{\out{
}} 70 | \itemize{ 71 | \item For more sophisticated operations see 'query'/'query_if': these 72 | functions translates its arguments one-to-one to '\verb{[.data.table}' 73 | method. Additionally there are some conveniences such as automatic 74 | 'data.frame' conversion to 'data.table'. 75 | } 76 | } 77 | \examples{ 78 | # examples form 'dplyr' package 79 | data(mtcars) 80 | \donttest{ 81 | # Newly created variables are available immediately 82 | mtcars \%>\% 83 | let( 84 | cyl2 = cyl * 2, 85 | cyl4 = cyl2 * 2 86 | ) \%>\% 87 | head() 88 | 89 | # You can also use let() to remove variables and 90 | # modify existing variables 91 | mtcars \%>\% 92 | let( 93 | mpg = NULL, 94 | disp = disp * 0.0163871 # convert to litres 95 | ) \%>\% 96 | head() 97 | 98 | 99 | # window functions are useful for grouped computations 100 | mtcars \%>\% 101 | let(rank = rank(-mpg, ties.method = "min"), 102 | by = cyl) \%>\% 103 | head() 104 | 105 | # You can drop variables by setting them to NULL 106 | mtcars \%>\% let(cyl = NULL) \%>\% head() 107 | 108 | # keeps all existing variables 109 | mtcars \%>\% 110 | let(displ_l = disp / 61.0237) \%>\% 111 | head() 112 | 113 | # keeps only the variables you create 114 | mtcars \%>\% 115 | take(displ_l = disp / 61.0237) 116 | 117 | 118 | # can refer to both contextual variables and variable names: 119 | var = 100 120 | mtcars \%>\% 121 | let(cyl = cyl * var) \%>\% 122 | head() 123 | 124 | # select rows 125 | mtcars \%>\% 126 | rows(am==0) \%>\% 127 | head() 128 | 129 | # select rows with compound condition 130 | mtcars \%>\% 131 | rows(am==0 & mpg>mean(mpg)) 132 | 133 | # select columns 134 | mtcars \%>\% 135 | columns(vs:carb, cyl) 136 | 137 | mtcars \%>\% 138 | columns(-am, -cyl) 139 | 140 | # regular expression pattern 141 | columns(iris, "^Petal") # variables which start from 'Petal' 142 | columns(iris, "Width$") # variables which end with 'Width' 143 | 144 | # move Species variable to the front 145 | # pattern "^." matches all variables 146 | columns(iris, Species, "^.") 147 | 148 | # pattern "^.*al" means "contains 'al'" 149 | columns(iris, "^.*al") 150 | 151 | # numeric indexing - all variables except Species 152 | columns(iris, 1:4) 153 | 154 | # A 'take' with summary functions applied without 'by' argument returns an aggregated data 155 | mtcars \%>\% 156 | take(mean = mean(disp), n = .N) 157 | 158 | # Usually, you'll want to group first 159 | mtcars \%>\% 160 | take(mean = mean(disp), n = .N, by = cyl) 161 | 162 | # You can group by expressions: 163 | mtcars \%>\% 164 | take_all(mean, by = list(vsam = vs + am)) 165 | 166 | # modify all non-grouping variables in-place 167 | mtcars \%>\% 168 | let_all((.x - mean(.x))/sd(.x), by = am) \%>\% 169 | head() 170 | 171 | # modify all non-grouping variables to new variables 172 | mtcars \%>\% 173 | let_all(scaled = (.x - mean(.x))/sd(.x), by = am) \%>\% 174 | head() 175 | 176 | # conditionally modify all variables 177 | iris \%>\% 178 | let_all(mean = if(is.numeric(.x)) mean(.x)) \%>\% 179 | head() 180 | 181 | # modify all variables conditionally on name 182 | iris \%>\% 183 | let_all( 184 | mean = if(startsWith(.name, "Sepal")) mean(.x), 185 | median = if(startsWith(.name, "Petal")) median(.x), 186 | by = Species 187 | ) \%>\% 188 | head() 189 | 190 | # aggregation with 'take_all' 191 | mtcars \%>\% 192 | take_all(mean = mean(.x), sd = sd(.x), n = .N, by = am) 193 | 194 | # conditionally aggregate all variables 195 | iris \%>\% 196 | take_all(mean = if(is.numeric(.x)) mean(.x)) 197 | 198 | # aggregate all variables conditionally on name 199 | iris \%>\% 200 | take_all( 201 | mean = if(startsWith(.name, "Sepal")) mean(.x), 202 | median = if(startsWith(.name, "Petal")) median(.x), 203 | by = Species 204 | ) 205 | 206 | # parametric evaluation: 207 | var = quote(mean(cyl)) 208 | mtcars \%>\% 209 | let(mean_cyl = eval(var)) \%>\% 210 | head() 211 | take(mtcars, eval(var)) 212 | 213 | # all together 214 | new_var = "mean_cyl" 215 | mtcars \%>\% 216 | let((new_var) := eval(var)) \%>\% 217 | head() 218 | take(mtcars, (new_var) := eval(var)) 219 | 220 | ######################################## 221 | # variable selection 222 | 223 | # range selection 224 | iris \%>\% 225 | let( 226 | avg = rowMeans(Sepal.Length \%to\% Petal.Width) 227 | ) \%>\% 228 | head() 229 | 230 | # multiassignment 231 | iris \%>\% 232 | let( 233 | # starts with Sepal or Petal 234 | multipled1 \%to\% multipled4 := cols("^(Sepal|Petal)")*2 235 | ) \%>\% 236 | head() 237 | 238 | 239 | mtcars \%>\% 240 | let( 241 | # text expansion 242 | cols("scaled_{names(mtcars)}") := lapply(cols("{names(mtcars)}"), scale) 243 | ) \%>\% 244 | head() 245 | 246 | # range selection in 'by' 247 | # range selection + additional column 248 | mtcars \%>\% 249 | take( 250 | res = sum(cols(mpg, disp \%to\% drat)), 251 | by = vs \%to\% gear 252 | ) 253 | } 254 | } 255 | \seealso{ 256 | Useful links: 257 | \itemize{ 258 | \item \url{https://github.com/gdemin/maditr} 259 | \item Report bugs at \url{https://github.com/gdemin/maditr/issues} 260 | } 261 | 262 | } 263 | \author{ 264 | \strong{Maintainer}: Gregory Demin \email{gdemin@gmail.com} 265 | 266 | } 267 | -------------------------------------------------------------------------------- /inst/tinytest/test_to_wide.R: -------------------------------------------------------------------------------- 1 | cat("\nContext:", "to_long", "\n") 2 | 3 | set.seed(45) 4 | DT = data.table( 5 | i_1 = c(1:5, NA), 6 | i_2 = c(NA,6,7,8,9,10), 7 | f_1 = factor(sample(c(letters[1:3], NA), 6, TRUE)), 8 | f_2 = factor(c("z", "a", "x", "c", "x", "x"), ordered=TRUE), 9 | c_1 = sample(c(letters[1:3], NA), 6, TRUE), 10 | d_1 = as.Date(c(1:3,NA,4:5), origin="2013-09-01"), 11 | d_2 = as.Date(6:1, origin="2012-01-01")) 12 | # add a couple of list cols 13 | DT[, l_1 := DT[, list(c=list(rep(i_1, sample(5,1)))), by = i_1]$c] 14 | DT[, l_2 := DT[, list(c=list(rep(c_1, sample(5,1)))), by = i_1]$c] 15 | 16 | # id, measure as character/integer/numeric vectors 17 | expect_identical( 18 | to_long(DT, f_1, keep = 1:2), 19 | melt(DT, id=1:2, measure="f_1") 20 | ) 21 | 22 | expect_identical( 23 | to_long(DT, f_1, keep = c(i_1, i_2)), 24 | melt(DT, id=1:2, measure="f_1") 25 | ) 26 | 27 | expect_identical( 28 | to_long(DT, f_1, keep = cols(i_1:i_2)), 29 | melt(DT, id=1:2, measure="f_1") 30 | ) 31 | 32 | expect_identical( 33 | to_long(DT, f_1, keep = cols(i_1:i_2), names_factor = FALSE), 34 | melt(DT, id=1:2, measure="f_1", variable.factor = FALSE) 35 | ) 36 | 37 | expect_identical( 38 | to_long(DT, f_1, keep = cols("i_{1:2}")), 39 | melt(DT, id=1:2, measure="f_1") 40 | ) 41 | 42 | expect_identical( 43 | to_long(DT, f_1, keep = cols("^i_")), 44 | melt(DT, id=1:2, measure="f_1") 45 | ) 46 | 47 | expect_identical( 48 | to_long(DT, f_1, keep = cols("^i_"), names_in = "var", values_in = "val"), 49 | melt(DT, id=1:2, measure="f_1", variable.name = "var", value.name = "val") 50 | ) 51 | 52 | 53 | 54 | col_var = "^i_" 55 | expect_identical( 56 | to_long(DT, 3, keep = cols(col_var)), 57 | melt(DT, id=1:2, measure="f_1") 58 | ) 59 | 60 | expect_identical( 61 | to_long(DT, cols("^f_"), keep = cols("^i_"), value_factor = TRUE), 62 | melt(DT, id=1:2, measure=3:4, value.factor=TRUE) # 'value' is *ordered* factor 63 | ) 64 | 65 | data("mtcars") 66 | expect_identical( 67 | to_long(mtcars), 68 | suppressWarnings(melt(mtcars)) 69 | ) 70 | 71 | data("mtcars") 72 | expect_identical( 73 | to_long(mtcars, keep = am), 74 | melt(mtcars, id.vars = "am") 75 | ) 76 | 77 | data("mtcars") 78 | expect_identical( 79 | to_long(mtcars, columns = c(am, vs, mpg)), 80 | melt(mtcars, measure.vars = c("am", "vs", "mpg")) 81 | ) 82 | 83 | 84 | data("mtcars") 85 | expect_identical( 86 | to_long(mtcars, columns = c(am, vs, mpg), keep = FALSE), 87 | melt(mtcars, measure.vars = c("am", "vs", "mpg"), id = integer(0)) 88 | ) 89 | 90 | # on na.rm=TRUE. NAs are removed efficiently, from within C 91 | expect_identical( 92 | suppressWarnings(to_long(DT, keep = f_1, columns = c(i_1, i_2), drop_na = TRUE)), 93 | suppressWarnings(melt(DT, id="f_1", measure=c("i_1", "i_2"), na.rm=TRUE)) # remove NA 94 | ) 95 | 96 | expect_identical( 97 | suppressWarnings(to_long(DT, keep = f_1, columns = c(i_1, i_2), drop_na = FALSE)), 98 | suppressWarnings(melt(DT, id="f_1", measure=c("i_1", "i_2"), na.rm = FALSE)) 99 | ) 100 | 101 | expect_identical( 102 | to_long(DT, keep=1:2, columns = list(cols("^f_"), cols("^d_")), value_factor=TRUE), 103 | melt(DT, id=1:2, measure=patterns("^f_", "^d_"), value.factor=TRUE) 104 | ) 105 | 106 | 107 | cat("\nContext:", "to_wide", "\n") 108 | 109 | data("ChickWeight") 110 | names(ChickWeight) = tolower(names(ChickWeight)) 111 | DT = to_long(ChickWeight, keep=2:4) # calls melt.data.table 112 | 113 | expect_identical( 114 | to_wide(DT, keep = time, fun = mean), 115 | dcast(DT, time ~ variable, fun=mean) # using partial matching of argument 116 | ) 117 | 118 | expect_identical( 119 | to_wide(DT, keep = FALSE, fun = mean), 120 | dcast(DT, . ~ variable, fun=mean) # using partial matching of argument 121 | ) 122 | 123 | expect_identical( 124 | to_wide(DT, keep = diet, fun = mean), 125 | dcast(DT, diet ~ variable, fun=mean) 126 | ) 127 | expect_identical( 128 | to_wide(DT, keep = c(diet, chick), names_in = time, missing_comb = "all"), 129 | dcast(DT, diet+chick ~ time, drop=FALSE) 130 | ) 131 | expect_identical( 132 | to_wide(DT, keep = c(diet, chick), names_in = time, missing_comb = "all", fill = 0), 133 | dcast(DT, diet+chick ~ time, drop=FALSE, fill=0) 134 | ) 135 | 136 | expect_equal( 137 | to_wide(DT, chick, time, fun = mean), 138 | dcast(DT, chick ~ time, fun=mean) 139 | ) 140 | 141 | DT <- data.table(v1 = c(1.1, 1.1, 1.1, 2.2, 2.2, 2.2), 142 | v2 = factor(c(1L, 1L, 1L, 3L, 3L, 3L), levels=1:3), 143 | v3 = factor(c(2L, 3L, 5L, 1L, 2L, 6L), levels=1:6), 144 | v4 = c(3L, 2L, 2L, 5L, 4L, 3L)) 145 | # drop=TRUE 146 | 147 | expect_error(to_wide(DT, c(v1, v2), v3, missing_comb = "none")) 148 | expect_identical( 149 | to_wide(DT, c(v1, v2), v3, values_in = v4, missing_comb = "none"), 150 | dcast(DT, v1 + v2 ~ v3, value.var = "v4") # default is drop=TRUE 151 | ) 152 | expect_identical( 153 | to_wide(DT, c(v1, v2), v3, values_in = v4, missing_comb = "all"), 154 | dcast(DT, v1 + v2 ~ v3, value.var = "v4", drop=FALSE) # all missing combinations of both LHS and RHS 155 | ) 156 | expect_identical( 157 | to_wide(DT, c(v1, v2), v3, values_in = v4, missing_comb = "rows"), 158 | dcast(DT, v1 + v2 ~ v3, value.var = "v4", drop=c(FALSE, TRUE)) # all missing combinations of only LHS 159 | ) 160 | expect_identical( 161 | to_wide(DT, c(v1, v2), v3, values_in = v4, missing_comb = "columns"), 162 | dcast(DT, v1 + v2 ~ v3, value.var = "v4", drop=c(TRUE, FALSE)) # all missing combinations of only RHS 163 | ) 164 | 165 | # using . and ... 166 | DT <- data.table(v1 = rep(1:2, each = 6), 167 | v2 = rep(rep(1:3, 2), each = 2), 168 | v3 = rep(1:2, 6), 169 | v4 = rnorm(6)) 170 | expect_identical( 171 | to_wide(DT, names_in = v3, values_in = v4), 172 | dcast(DT, ... ~ v3, value.var = "v4") #same as v1 + v2 ~ v3, value.var = "v4" 173 | ) 174 | expect_identical( 175 | to_wide(DT, cols(v1 %to% v3), names_in = FALSE, values_in = v4), 176 | dcast(DT, v1 + v2 + v3 ~ ., value.var = "v4") 177 | ) 178 | 179 | ## for each combination of (v1, v2), add up all values of v4 180 | expect_identical( 181 | to_wide(DT, cols("^v(1|2)"), names_in = FALSE, values_in = v4, fun = sum), 182 | dcast(DT, v1 + v2 ~ ., value.var = "v4", fun.aggregate = sum) 183 | ) 184 | 185 | expect_identical( 186 | to_wide(DT, cols("v{1:2}"), names_in = FALSE, values_in = v4, fun = sum), 187 | dcast(DT, v1 + v2 ~ ., value.var = "v4", fun.aggregate = sum) 188 | ) 189 | 190 | my_keep = "v{1:2}" 191 | my_names = FALSE 192 | my_values = "v4" 193 | 194 | expect_error(to_wide(DT, my_keep, names_in = FALSE, values_in = v4, fun = sum)) 195 | 196 | 197 | expect_identical( 198 | to_wide(DT, cols(my_keep), names_in = my_names, values_in = my_values, fun = sum), 199 | dcast(DT, v1 + v2 ~ ., value.var = "v4", fun.aggregate = sum) 200 | ) 201 | 202 | # multiple value.var and multiple fun.aggregate 203 | DT = data.table(x=sample(5,20,TRUE), y=sample(2,20,TRUE), 204 | z=sample(letters[1:2], 20,TRUE), d1 = runif(20), d2=1L) 205 | # multiple value.var 206 | expect_identical( 207 | to_wide(DT, c(x, y), z, c(d1, d2), fun = sum, fill = 0), 208 | dcast(DT, x + y ~ z, fun=sum, value.var=c("d1","d2")) 209 | ) 210 | # multiple fun.aggregate 211 | expect_identical( 212 | to_wide(DT, c(x, y), z, d1, fun = list(sum = sum, mean = mean), fill = NULL), 213 | dcast(DT, x + y ~ z, fun=list(sum, mean), value.var="d1") 214 | ) 215 | 216 | 217 | my_fun = list(sum = sum, mean = mean) 218 | expect_identical( 219 | to_wide(DT, c(x, y), z, d1, fun = my_fun), 220 | dcast(DT, x + y ~ z, fun=my_fun, value.var="d1", fill = NA) 221 | ) 222 | # multiple fun.agg and value.var (all combinations) 223 | expect_identical( 224 | to_wide(DT, c(x, y), z, c(d1, d2), fun = my_fun), 225 | dcast(DT, x + y ~ z, fun=my_fun, value.var=c("d1", "d2"), fill = NA) 226 | ) 227 | # multiple fun.agg and value.var (one-to-one) 228 | expect_identical( 229 | to_wide(DT, c(x, y), z, list(d1, d2), fun = my_fun), 230 | dcast(DT, x + y ~ z, fun=my_fun, value.var=list("d1", "d2"), fill = NA) 231 | ) 232 | -------------------------------------------------------------------------------- /vignettes/Introduction.R: -------------------------------------------------------------------------------- 1 | ## ----setup, include=FALSE----------------------------------------------------- 2 | knitr::opts_chunk$set(echo = TRUE) 3 | data.table::setDTthreads(2) 4 | 5 | ## ----eval=FALSE--------------------------------------------------------------- 6 | # mtcars %>% 7 | # let(mpg_hp = mpg/hp) %>% 8 | # take(mean(mpg_hp), by = am) 9 | 10 | ## ----eval=FALSE--------------------------------------------------------------- 11 | # mtcars %>% 12 | # let(new_var = 42, 13 | # new_var2 = new_var*hp) %>% 14 | # head() 15 | 16 | ## ----eval=FALSE--------------------------------------------------------------- 17 | # iris %>% 18 | # let_all( 19 | # scaled = (.x - mean(.x))/sd(.x), 20 | # by = Species) %>% 21 | # head() 22 | 23 | ## ----eval=FALSE--------------------------------------------------------------- 24 | # iris %>% 25 | # take_all( 26 | # mean = if(startsWith(.name, "Sepal")) mean(.x), 27 | # median = if(startsWith(.name, "Petal")) median(.x), 28 | # by = Species 29 | # ) 30 | 31 | ## ----eval=FALSE--------------------------------------------------------------- 32 | # new_var = "my_var" 33 | # old_var = "mpg" 34 | # mtcars %>% 35 | # let((new_var) := get(old_var)*2) %>% 36 | # head() 37 | # 38 | # # or, 39 | # expr = quote(mean(cyl)) 40 | # mtcars %>% 41 | # let((new_var) := eval(expr)) %>% 42 | # head() 43 | # 44 | # # the same with `take` 45 | # by_var = "vs,am" 46 | # take(mtcars, (new_var) := eval(expr), by = by_var) 47 | 48 | ## ----include=FALSE------------------------------------------------------------ 49 | library(maditr) 50 | 51 | ## ----------------------------------------------------------------------------- 52 | 53 | workers = fread(" 54 | name company 55 | Nick Acme 56 | John Ajax 57 | Daniela Ajax 58 | ") 59 | 60 | positions = fread(" 61 | name position 62 | John designer 63 | Daniela engineer 64 | Cathie manager 65 | ") 66 | 67 | # xlookup 68 | workers = let(workers, 69 | position = xlookup(name, positions$name, positions$position) 70 | ) 71 | 72 | # vlookup 73 | # by default we search in the first column and return values from second column 74 | workers = let(workers, 75 | position = vlookup(name, positions, no_match = "Not found") 76 | ) 77 | 78 | # the same 79 | workers = let(workers, 80 | position = vlookup(name, positions, 81 | result_column = "position", 82 | no_match = "Not found") # or, result_column = 2 83 | ) 84 | 85 | head(workers) 86 | 87 | ## ----------------------------------------------------------------------------- 88 | library(maditr) 89 | data(mtcars) 90 | 91 | # Newly created variables are available immediately 92 | mtcars %>% 93 | let( 94 | cyl2 = cyl * 2, 95 | cyl4 = cyl2 * 2 96 | ) %>% head() 97 | 98 | # You can also use let() to remove variables and 99 | # modify existing variables 100 | mtcars %>% 101 | let( 102 | mpg = NULL, 103 | disp = disp * 0.0163871 # convert to litres 104 | ) %>% head() 105 | 106 | 107 | # window functions are useful for grouped computations 108 | mtcars %>% 109 | let(rank = rank(-mpg, ties.method = "min"), 110 | by = cyl) %>% 111 | head() 112 | 113 | # You can drop variables by setting them to NULL 114 | mtcars %>% 115 | let(cyl = NULL) %>% 116 | head() 117 | 118 | # keeps all existing variables 119 | mtcars %>% 120 | let(displ_l = disp / 61.0237) %>% 121 | head() 122 | 123 | # keeps only the variables you create 124 | mtcars %>% 125 | take(displ_l = disp / 61.0237) %>% 126 | head() 127 | 128 | 129 | # can refer to both contextual variables and variable names: 130 | var = 100 131 | mtcars %>% 132 | let(cyl = cyl * var) %>% 133 | head() 134 | 135 | # select rows 136 | mtcars %>% 137 | rows(am==0) %>% 138 | head() 139 | 140 | # select rows with compound condition 141 | mtcars %>% 142 | rows(am==0 & mpg>mean(mpg)) 143 | 144 | # select columns 145 | mtcars %>% 146 | columns(vs:carb, cyl) 147 | 148 | mtcars %>% 149 | columns(-am, -cyl) 150 | 151 | # regular expression pattern 152 | columns(iris, "^Petal") %>% head() # variables which start from 'Petal' 153 | columns(iris, "Width$") %>% head() # variables which end with 'Width' 154 | 155 | # move Species variable to the front 156 | # pattern "^." matches all variables 157 | columns(iris, Species, "^.") %>% head() 158 | 159 | # pattern "^.*al" means "contains 'al'" 160 | columns(iris, "^.*al") %>% head() 161 | 162 | # numeric indexing - all variables except Species 163 | columns(iris, 1:4) %>% head() 164 | 165 | # A 'take' with summary functions applied without 'by' argument returns an aggregated data 166 | mtcars %>% 167 | take(mean = mean(disp), n = .N) 168 | 169 | # Usually, you'll want to group first 170 | mtcars %>% 171 | take(mean = mean(disp), n = .N, by = am) 172 | 173 | # grouping by multiple variables 174 | mtcars %>% 175 | take(mean = mean(disp), n = .N, by = list(am, vs)) 176 | 177 | # You can group by expressions: 178 | mtcars %>% 179 | take_all( 180 | mean, 181 | by = list(vsam = vs + am) 182 | ) 183 | 184 | # modify all non-grouping variables in-place 185 | mtcars %>% 186 | let_all((.x - mean(.x))/sd(.x), by = am) %>% 187 | head() 188 | 189 | # modify all non-grouping variables to new variables 190 | mtcars %>% 191 | let_all(scaled = (.x - mean(.x))/sd(.x), by = am) %>% 192 | head() 193 | 194 | # conditionally modify all variables 195 | iris %>% 196 | let_all(mean = if(is.numeric(.x)) mean(.x)) %>% 197 | head() 198 | 199 | # modify all variables conditionally on name 200 | iris %>% 201 | let_all( 202 | mean = if(startsWith(.name, "Sepal")) mean(.x), 203 | median = if(startsWith(.name, "Petal")) median(.x), 204 | by = Species 205 | ) %>% 206 | head() 207 | 208 | # aggregation with 'take_all' 209 | mtcars %>% 210 | take_all(mean = mean(.x), sd = sd(.x), n = .N, by = am) 211 | 212 | # conditionally aggregate all variables 213 | iris %>% 214 | take_all(mean = if(is.numeric(.x)) mean(.x)) 215 | 216 | # aggregate all variables conditionally on name 217 | iris %>% 218 | take_all( 219 | mean = if(startsWith(.name, "Sepal")) mean(.x), 220 | median = if(startsWith(.name, "Petal")) median(.x), 221 | by = Species 222 | ) 223 | 224 | # parametric evaluation: 225 | var = quote(mean(cyl)) 226 | mtcars %>% 227 | let(mean_cyl = eval(var)) %>% 228 | head() 229 | take(mtcars, eval(var)) 230 | 231 | # all together 232 | new_var = "mean_cyl" 233 | mtcars %>% 234 | let((new_var) := eval(var)) %>% 235 | head() 236 | take(mtcars, (new_var) := eval(var)) 237 | 238 | 239 | 240 | ## ----------------------------------------------------------------------------- 241 | # range selection 242 | iris %>% 243 | let( 244 | avg = rowMeans(Sepal.Length %to% Petal.Width) 245 | ) %>% 246 | head() 247 | 248 | # multiassignment 249 | iris %>% 250 | let( 251 | # starts with Sepal or Petal 252 | multipled1 %to% multipled4 := cols("^(Sepal|Petal)")*2 253 | ) %>% 254 | head() 255 | 256 | 257 | mtcars %>% 258 | let( 259 | # text expansion 260 | cols("scaled_{names(mtcars)}") := lapply(cols("{names(mtcars)}"), scale) 261 | ) %>% 262 | head() 263 | 264 | # range selection in 'by' 265 | # selection of range + additional column 266 | mtcars %>% 267 | take( 268 | res = sum(cols(mpg, disp %to% drat)), 269 | by = vs %to% gear 270 | ) 271 | 272 | ## ----------------------------------------------------------------------------- 273 | workers = fread(" 274 | name company 275 | Nick Acme 276 | John Ajax 277 | Daniela Ajax 278 | ") 279 | 280 | positions = fread(" 281 | name position 282 | John designer 283 | Daniela engineer 284 | Cathie manager 285 | ") 286 | 287 | workers 288 | positions 289 | 290 | ## ----------------------------------------------------------------------------- 291 | workers %>% dt_inner_join(positions) 292 | workers %>% dt_left_join(positions) 293 | workers %>% dt_right_join(positions) 294 | workers %>% dt_full_join(positions) 295 | 296 | # filtering joins 297 | workers %>% dt_anti_join(positions) 298 | workers %>% dt_semi_join(positions) 299 | 300 | ## ----eval=FALSE--------------------------------------------------------------- 301 | # workers %>% dt_left_join(positions, by = "name") 302 | 303 | ## ----eval=FALSE--------------------------------------------------------------- 304 | # positions2 = setNames(positions, c("worker", "position")) # rename first column in 'positions' 305 | # workers %>% dt_inner_join(positions2, by = c("name" = "worker")) 306 | 307 | -------------------------------------------------------------------------------- /inst/tinytest/test_verbs.R: -------------------------------------------------------------------------------- 1 | data(mtcars) 2 | 3 | cat("\nContext:", "errors", "\n") 4 | expect_error(take(1:5, fun = sum)) 5 | expect_error(let(1:5, new = 1)) 6 | expect_error(query(1:5, new := 1)) 7 | 8 | expect_error(let(mtcars)) 9 | expect_error(let(mtcars, am*2)) 10 | ################ 11 | cat("\nContext:", "let/let_if", "\n") 12 | mt_dt = as.data.table(mtcars) 13 | mt_dt2 = data.table::copy(mt_dt) 14 | new_dt = let_if(mt_dt, am==0, mpg_hp = mpg/hp, new = mpg_hp*2, new2 := new*4) 15 | new_dt2 = let_if(mtcars, am==0, mpg_hp = mpg/hp, new = mpg_hp*2, new2 := new*4) 16 | mt_dt2[am==0, mpg_hp := mpg/hp][am==0, new := mpg_hp*2][am==0, new2 := new*4] 17 | expect_identical(new_dt, mt_dt) 18 | expect_identical(new_dt, mt_dt2) 19 | expect_identical(new_dt2, mt_dt2) 20 | 21 | 22 | # test scopes 23 | my_take = function(coef2){ 24 | take(mtcars, res = sum(am*coef2)) 25 | } 26 | 27 | my_let = function(coef3){ 28 | let(mtcars, res := am*coef3) 29 | } 30 | 31 | 32 | expect_identical(my_take(3), mt_dt[, .(res = sum(am*3))]) 33 | mt_dt = as.data.table(mtcars) 34 | expect_identical(my_let(3), mt_dt[, res := am*3]) 35 | ############### 36 | mt_dt = as.data.table(mtcars) 37 | mt_dt2 = data.table::copy(mt_dt) 38 | new_dt = let_if(mt_dt, am==0, mpg_hp := mpg/hp, "new" := mpg_hp*2) 39 | new_dt2 = let_if(mtcars, am==0, mpg_hp := mpg/hp, "new" := mpg_hp*2) 40 | mt_dt2[am==0, mpg_hp := mpg/hp][am==0, new := mpg_hp*2] 41 | expect_identical(new_dt, mt_dt) 42 | expect_identical(new_dt, mt_dt2) 43 | expect_identical(new_dt2, mt_dt2) 44 | ############### 45 | mt_dt = as.data.table(mtcars) 46 | mt_dt2 = data.table::copy(mt_dt) 47 | my_var = "very_new" 48 | new_dt = let(mt_dt, mpg_hp = mpg/hp, new = mpg_hp*2, (my_var) := mpg_hp + new) 49 | new_dt2 = let(mtcars, mpg_hp = mpg/hp, new = mpg_hp*2, (my_var) := mpg_hp + new) 50 | mt_dt2[, mpg_hp := mpg/hp][, new := mpg_hp*2][,(my_var) := mpg_hp + new] 51 | expect_identical(new_dt, mt_dt) 52 | expect_identical(new_dt, mt_dt2) 53 | expect_identical(new_dt2, mt_dt2) 54 | 55 | etab = data.frame(a = 1:2, b = 3:4) 56 | class(etab) = c("etable", class(etab)) 57 | res = etab 58 | res$new = c(1, NA) 59 | etab2 = let_if( 60 | etab, 61 | 1, 62 | new = 1 63 | ) 64 | expect_identical(res, etab2) 65 | res$new = 1 66 | etab2 = let( 67 | etab, 68 | new = 1 69 | ) 70 | expect_identical(res, etab2) 71 | ############### 72 | mt_dt = as.data.table(mtcars) 73 | mt_dt2 = data.table::copy(mt_dt) 74 | new_dt = let(mt_dt, mpg_hp = mean(mpg), keyby = cyl) 75 | new_dt2 = let(mtcars, mpg_hp = mean(mpg), keyby = cyl) 76 | mt_dt2[, mpg_hp := mean(mpg), keyby = cyl] 77 | expect_identical(new_dt, mt_dt) 78 | expect_identical(new_dt, mt_dt2) 79 | expect_identical(new_dt2, mt_dt2) 80 | 81 | ########## 82 | 83 | mt_dt = as.data.table(mtcars) 84 | mt_dt2 = data.table::copy(mt_dt) 85 | let(mt_dt, filt = am==0) 86 | let_if(mt_dt, (filt), counter = 5) 87 | 88 | 89 | mt_dt2[, filt:= am==0] 90 | mt_dt2[(filt), counter :=5] 91 | 92 | expect_identical(mt_dt, mt_dt2) 93 | expect_identical(take_if(mt_dt, (filt)), mt_dt[filt==TRUE,]) 94 | 95 | ############### 96 | cat("\nContext:", "take/take_if", "\n") 97 | mt_dt = as.data.table(mtcars) 98 | res = take(mtcars, fun = mean, by = am) 99 | res2 = take(mt_dt, fun = mean, by = am) 100 | res3 = mt_dt[, lapply(.SD, mean), by = am] 101 | expect_identical(res3, res) 102 | expect_identical(res3, res2) 103 | ############### 104 | mt_dt = as.data.table(mtcars) 105 | res = take(mtcars, fun = mean, by = am, .SDcols = c("mpg", "qsec")) 106 | res2 = take(mt_dt, fun = mean, by = am, .SDcols = c("mpg", "qsec")) 107 | res3 = mt_dt[, lapply(.SD, mean), by = am, .SDcols = c("mpg", "qsec")] 108 | expect_identical(res3, res) 109 | expect_identical(res3, res2) 110 | ############### 111 | mt_dt = as.data.table(mtcars) 112 | res = take(mtcars, mpg, hp, fun = mean, by = am) 113 | res2 = take(mt_dt, mpg, hp, fun = mean, by = am) 114 | res3 = mt_dt[, lapply(list(mpg = mpg, hp = hp), mean), by = am] 115 | expect_identical(res3, res) 116 | expect_identical(res3, res2) 117 | 118 | ############### 119 | mt_dt = as.data.table(mtcars) 120 | res = take(mtcars, mpg, just_wow = hp, fun = mean, by = am) 121 | res2 = take(mt_dt, mpg, just_wow = hp, fun = mean, by = am) 122 | res3 = mt_dt[, lapply(list(mpg = mpg, just_wow = hp), mean), by = am] 123 | expect_identical(res3, res) 124 | expect_identical(res3, res2) 125 | ################### 126 | ############### 127 | mt_dt = as.data.table(mtcars) 128 | res = take(mtcars, mpg, just_wow := hp, fun = mean, by = am) 129 | res2 = take(mt_dt, mpg, just_wow := hp, fun = mean, by = am) 130 | res3 = mt_dt[, lapply(list(mpg = mpg, just_wow = hp), mean), by = am] 131 | expect_identical(res3, res) 132 | expect_identical(res3, res2) 133 | ################### 134 | mt_dt = as.data.table(mtcars) 135 | res = take_if(mtcars, am==0, fun = mean, by = am) 136 | res2 = take_if(mt_dt, am==0, fun = mean, by = am) 137 | res3 = mt_dt[am==0, lapply(.SD, mean), by = am] 138 | expect_identical(res3, res) 139 | expect_identical(res3, res2) 140 | 141 | ############# 142 | mt_dt = as.data.table(mtcars) 143 | res = take(mtcars, agg = mean(mpg), agg2 = mean(hp), by = am) 144 | res2 = take(mt_dt, agg = mean(mpg), agg2 = mean(hp), by = am) 145 | res3 = mt_dt[, list(agg = mean(mpg), agg2 = mean(hp)), by = am] 146 | expect_identical(res3, res) 147 | expect_identical(res3, res2) 148 | ############# 149 | mt_dt = as.data.table(mtcars) 150 | res = take(mtcars, agg := mean(mpg), agg2 = mean(hp), by = am) 151 | res2 = take(mt_dt, agg := mean(mpg), agg2 = mean(hp), by = am) 152 | res3 = mt_dt[, list(agg = mean(mpg), agg2 = mean(hp)), by = am] 153 | expect_identical(res3, res) 154 | expect_identical(res3, res2) 155 | ############# 156 | mt_dt = as.data.table(mtcars) 157 | res = take(mtcars, agg := mean(mpg), agg2 := mean(hp), by = am) 158 | res2 = take(mt_dt, agg := mean(mpg), agg2 := mean(hp), by = am) 159 | res3 = mt_dt[, list(agg = mean(mpg), agg2 = mean(hp)), by = am] 160 | expect_identical(res3, res) 161 | expect_identical(res3, res2) 162 | ########################## 163 | mt_dt = as.data.table(mtcars) 164 | res = take(mtcars, mean(mpg), mean(hp), by = am) 165 | res2 = take(mt_dt, mean(mpg), mean(hp), by = am) 166 | res3 = mt_dt[, list("mean(mpg)" = mean(mpg), "mean(hp)" = mean(hp)), by = am] 167 | expect_identical(res3, res) 168 | expect_identical(res3, res2) 169 | ########################## 170 | mt_dt = as.data.table(mtcars) 171 | res = take(mtcars, mean(mpg), mean(hp), by = am, autoname = FALSE) 172 | res2 = take(mt_dt, mean(mpg), mean(hp), by = am, autoname = FALSE) 173 | res3 = mt_dt[, list(mean(mpg), mean(hp)), by = am] 174 | expect_identical(res3, res) 175 | expect_identical(res3, res2) 176 | ########################## 177 | mt_dt = as.data.table(mtcars) 178 | res = take_if(mtcars, vs==0, mean(mpg), mean(hp), by = am, autoname = FALSE) 179 | res2 = take_if(mt_dt, vs==0, mean(mpg), mean(hp), by = am, autoname = FALSE) 180 | res3 = mt_dt[vs==0, list(mean(mpg), mean(hp)), by = am] 181 | expect_identical(res3, res) 182 | expect_identical(res3, res2) 183 | ########################## 184 | 185 | cat("\nContext:", "query/query_if", "\n") 186 | ########################## 187 | mt_dt = as.data.table(mtcars) 188 | res = query(mtcars, list(mean(mpg), mean(hp)), by = am) 189 | res2 = query(mt_dt, list(mean(mpg), mean(hp)), by = am) 190 | res3 = mt_dt[, list(mean(mpg), mean(hp)), by = am] 191 | expect_identical(res3, res) 192 | expect_identical(res3, res2) 193 | ########################## 194 | mt_dt = as.data.table(mtcars) 195 | res = query_if(mtcars, vs==0, list(mean(mpg), mean(hp)), by = am) 196 | res2 = query_if(mt_dt, vs==0, list(mean(mpg), mean(hp)), by = am) 197 | res3 = mt_dt[vs==0, list(mean(mpg), mean(hp)), by = am] 198 | expect_identical(res3, res) 199 | expect_identical(res3, res2) 200 | 201 | cat("\nContext:", "let/take: parametric evaluation", "\n") 202 | 203 | 204 | 205 | ############# 206 | mt_dt = as.data.table(mtcars) 207 | new_var = "agg" 208 | res = take(mtcars, (new_var) := mean(mpg), agg2 = mean(hp), by = am) 209 | res2 = take(mt_dt, (new_var) := mean(mpg), agg2 = mean(hp), by = am) 210 | res3 = mt_dt[, list(agg = mean(mpg), agg2 = mean(hp)), by = am] 211 | expect_identical(res3, res) 212 | expect_identical(res3, res2) 213 | 214 | 215 | mt_dt = as.data.table(mtcars) 216 | new_var = "agg" 217 | new_var2 = "agg2" 218 | res = take(mtcars, (new_var) := mean(mpg), (new_var2) := mean(hp), by = am) 219 | res2 = take(mt_dt, (new_var) := mean(mpg), (new_var2) := mean(hp), by = am) 220 | res3 = mt_dt[, list(agg = mean(mpg), agg2 = mean(hp)), by = am] 221 | expect_identical(res3, res) 222 | expect_identical(res3, res2) 223 | 224 | 225 | expr1 = quote(mean(mpg)) 226 | expr2 = quote(mean(hp)) 227 | mt_dt = as.data.table(mtcars) 228 | new_var = "agg" 229 | new_var2 = "agg2" 230 | res = take(mtcars, (new_var) := eval(expr1), (new_var2) := eval(expr2), by = am) 231 | res2 = take(mt_dt, (new_var) := eval(expr1), (new_var2) := eval(expr2), by = am) 232 | res3 = mt_dt[, list(agg = mean(mpg), agg2 = mean(hp)), by = am] 233 | expect_equal(res3, res) 234 | expect_equal(res3, res2) 235 | 236 | 237 | #### 238 | data(mtcars) 239 | mt_dt = as.data.table(mtcars) 240 | expect_identical( 241 | take(mtcars, m = mean(mpg), by = cols(vs %to% gear)), 242 | mt_dt[,.(m = mean(mpg)), by = .(vs, am, gear)] 243 | ) 244 | -------------------------------------------------------------------------------- /R/to_list.R: -------------------------------------------------------------------------------- 1 | #' Apply an expression to each element of a list or vector 2 | #' 3 | #' - `to_list` always returns a list, each element of which is the 4 | #' result of expression `expr` on the elements of data. By 5 | #' default, NULL's will be removed from the result. You can change this behavior 6 | #' with `skip_null` argument. 7 | #' - `to_vec` is the same as `to_list` but tries to convert its result 8 | #' to vector via [unlist][base::unlist]. 9 | #' - `to_df` and `to_dfr` try to combine its results to data.table by rows. 10 | #' - `to_dfc` tries to combine its result to data.table by columns. 11 | #' ``` 12 | #' ``` 13 | #' Expression can use predefined variables: '.x' is a value of current list 14 | #' element, '.name' is a name of the element and '.index' is sequential number 15 | #' of the element. 16 | #' 17 | #' @param data data.frame/list/vector 18 | #' @param expr expression or function. Expression can use predefined variables: 19 | #' '.x' is a value of current list element, '.name' is a name of the element 20 | #' and '.index' is sequential number of the element. 21 | #' @param skip_null logical Should we skip NULL's from result? Default is TRUE 22 | #' @param trace FALSE by default. Should we report progress during execution? 23 | #' Possible values are TRUE, FALSE, "pb" (progress bar) or custom expression in 'quote', e. g. 'quote(print(.x))'. 24 | #' Expression can contain '.x', '.name', and '.index' variables. 25 | #' @param ... further arguments provided if 'expr' is function. 26 | #' @param trace_step integer. 1 by default. Step for reporting progress. Ignored if 'trace' argument is equal to FALSE. 27 | #' @param recursive logical. Should unlisting be applied to list components of x? For details see [unlist][base::unlist]. 28 | #' @param use.names logical. TRUE by default. Should names of source list be 29 | #' preserved? Setting it to FALSE in some cases can greatly increase 30 | #' performance. For details see [unlist][base::unlist]. 31 | #' @param idvalue expression for calculation id column. Usually it is just 32 | #' unquoted symbols: one of the '.name', '.index' or '.x'. 33 | #' @param idname character, 'item_id' by default. Name for the id column. 34 | #' 35 | #' @return 'to_list' returns list, 'to_vec' tries to return vector and other functions return data.table 36 | #' @export 37 | #' 38 | #' @examples 39 | #' 1:5 %>% 40 | #' to_list(rnorm(n = 3, .x)) 41 | #' 42 | #' # or in 'lapply' style 43 | #' 1:5 %>% 44 | #' to_list(rnorm, n = 3) %>% 45 | #' to_vec(mean) 46 | #' 47 | #' # or use an anonymous function 48 | #' 1:5 %>% 49 | #' to_list(function(x) rnorm(3, x)) 50 | #' 51 | #' # Use to_vec() to reduce output to a vector instead 52 | #' # of a list: 53 | #' # filtering - return only even numbers 54 | #' to_vec(1:10, if(.x %% 2 == 0) .x) 55 | #' 56 | #' # filtering - calculate mean only on the numeric columns 57 | #' to_vec(iris, if(is.numeric(.x)) mean(.x)) 58 | #' 59 | #' # mean for numerics, number of distincts for others 60 | #' to_vec(iris, if(is.numeric(.x)) mean(.x) else uniqueN(.x)) 61 | #' 62 | #' # means for Sepal 63 | #' to_vec(iris, if(startsWith(.name, "Sepal")) mean(.x)) 64 | #' 65 | #' # A more realistic example: split a data frame into pieces, fit a 66 | #' # model to each piece, summarise and extract R^2 67 | #' mtcars %>% 68 | #' split(.$cyl) %>% 69 | #' to_list(summary(lm(mpg ~ wt, data = .x))) %>% 70 | #' to_vec(.x$r.squared) 71 | #' 72 | #' # If each element of the output is a data frame, use 73 | #' # to_df to row-bind them together: 74 | #' mtcars %>% 75 | #' split(.$cyl) %>% 76 | #' to_list(lm(mpg ~ wt, data = .x)) %>% 77 | #' to_df(c(cyl = .name, coef(.x))) 78 | #' 79 | #' \dontrun{ 80 | #' # read all csv files in "data" to data.frame 81 | #' all_files = dir("data", pattern = "csv$", full.names = TRUE) %>% 82 | #' to_df(fread, 83 | #' idvalue = basename(.x), 84 | #' idname = "filename", 85 | #' trace = "pb" 86 | #' ) 87 | #' } 88 | to_list = function(data, 89 | expr = NULL, 90 | ..., 91 | skip_null = TRUE, 92 | trace = FALSE, 93 | trace_step = 1L 94 | ){ 95 | expr_expr = substitute(expr) 96 | if(is.null(expr_expr)) return(as.list(data)) 97 | is.numeric(trace_step) || stop("'to_list': non-numeric 'trace_step' argument.") 98 | trace_step>=1 || stop("'to_list': 'trace_step' argument should be greater or equal to one.") 99 | 100 | ## progress bar and trace 101 | if(identical(trace, "pb")){ 102 | # progress bar 103 | ._data_length = length(data) 104 | pbar = utils::txtProgressBar(min = 0, max = ._data_length, style = 3) 105 | on.exit({ 106 | utils::setTxtProgressBar(pbar, ._data_length) 107 | close(pbar) 108 | }) 109 | 110 | # progress bar 111 | trace_expr = substitute({ 112 | utils::setTxtProgressBar(pbar, min(.index - 1, ._data_length)) 113 | }) 114 | 115 | 116 | } else if(isTRUE(trace)){ 117 | # info 118 | trace_expr = quote(cat(as.character(Sys.time()), " ", 119 | .index, ": ", 120 | .name, " ", 121 | if(is.atomic(.x) && length(.x)==1 && object.size(.x)<400) .x, 122 | "\n", sep = "")) 123 | 124 | } else if(is.null(trace) || isFALSE(trace)){ 125 | 126 | # no tracing 127 | trace_expr = NULL 128 | 129 | } else { 130 | # custom tracing 131 | trace_expr = trace 132 | } 133 | 134 | if(!is.null(trace_expr) && (trace_step>1)){ 135 | trace_expr = substitute( 136 | { 137 | if(.index %% trace_step == 0) trace_expr 138 | }, 139 | list(trace_expr = trace_expr, trace_step = trace_step) 140 | ) 141 | } 142 | 143 | ### main expression 144 | ._indexes = seq_along(data) 145 | ._names = names(data) 146 | names(._indexes) = ._names 147 | if(is.null(._names)) ._names = rep("", length(data)) 148 | data = force(data) # for new version of magrittr 149 | if(is.symbol(expr_expr) || (length(expr_expr)>1 && identical(as.character(expr_expr[[1]]), "function"))){ 150 | if(is.null(trace_expr)){ 151 | # simple lapply case 152 | res = lapply(data, expr, ...) 153 | } else { 154 | # simple lapply case with trace 155 | expr_expr = substitute(expr(.x, ...)) 156 | expr_expr = eval.parent(substitute({function(.index, ...) 157 | { 158 | .x = data[[.index]] 159 | .value = data[[.index]] 160 | .name = ._names[[.index]] 161 | trace_expr 162 | expr_expr 163 | }})) 164 | res = lapply(._indexes, expr_expr, ...) 165 | } 166 | } else { 167 | # expression 168 | expr_expr = eval.parent(substitute({function(.index, ...) 169 | { 170 | .x = data[[.index]] 171 | .value = data[[.index]] 172 | .name = ._names[[.index]] 173 | trace_expr 174 | expr_expr 175 | }})) 176 | res = lapply(._indexes, expr_expr, ...) 177 | } 178 | 179 | ####### 180 | if(skip_null){ 181 | nulls = vapply(res, is.null, FUN.VALUE = logical(1), USE.NAMES = FALSE) 182 | res = res[!nulls] 183 | } 184 | res 185 | } 186 | 187 | #' @rdname to_list 188 | #' @export 189 | to_vec = function(data, 190 | expr = NULL, 191 | ..., 192 | skip_null = TRUE, 193 | trace = FALSE, 194 | trace_step = 1L, 195 | recursive = TRUE, 196 | use.names = TRUE){ 197 | res = eval.parent(substitute(maditr::to_list(data, expr, ..., skip_null = skip_null, trace = trace, trace_step = trace_step))) 198 | unlist(res, recursive = recursive, use.names = use.names) 199 | 200 | } 201 | 202 | 203 | #' @rdname to_list 204 | #' @export 205 | to_df = function(data, 206 | expr = NULL, 207 | ..., 208 | trace = FALSE, 209 | trace_step = 1L, 210 | idvalue = NULL, 211 | idname = "item_id"){ 212 | res = eval.parent( 213 | substitute( 214 | maditr::to_list(data, 215 | expr, 216 | ..., 217 | trace = trace, 218 | trace_step = trace_step, 219 | skip_null = FALSE) 220 | ) 221 | ) 222 | for(i in seq_along(res)){ 223 | if(!is.null(res[[i]]) && !is.list(res[[i]])){ 224 | res[[i]] = as.data.table(as.list(res[[i]])) 225 | } 226 | } 227 | idvalue_expr = substitute(idvalue) 228 | if(!is.null(idvalue_expr)){ 229 | idvalue = eval.parent(substitute(maditr::to_list(data, expr = (idvalue), skip_null = FALSE))) 230 | for(i in seq_along(data)){ 231 | if(!is.null(res[[i]]) && !is.null(idvalue[[i]])){ 232 | res[[i]][[idname]] = idvalue[[i]] 233 | } 234 | } 235 | } 236 | rbindlist(res, use.names=TRUE, fill=TRUE) 237 | } 238 | 239 | #' @rdname to_list 240 | #' @export 241 | to_dfr = to_df 242 | 243 | #' @rdname to_list 244 | #' @export 245 | to_dfc = function(data, 246 | expr = NULL, 247 | ..., 248 | trace = FALSE, 249 | trace_step = 1){ 250 | res = eval.parent( 251 | substitute( 252 | maditr::to_list(data, 253 | expr, 254 | ..., 255 | trace = trace, 256 | trace_step = trace_step) 257 | ) 258 | ) 259 | as.data.table(res) 260 | } 261 | 262 | 263 | 264 | -------------------------------------------------------------------------------- /R/maditr-package.R: -------------------------------------------------------------------------------- 1 | #' maditr: Pipe-Style Interface for 'data.table' 2 | #' 3 | #' Package provides pipe-style interface for `data.table`. It preserves 4 | #' all data.table features without significant impact on performance. '`let`' 5 | #' and '`take`' functions are simplified interfaces for most common data 6 | #' manipulation tasks. 7 | #' 8 | #' - To select rows from data: `rows(mtcars, am==0)` 9 | #' - To select columns from data: `columns(mtcars, mpg, vs:carb)` 10 | #' - To aggregate data: `take(mtcars, mean_mpg = mean(mpg), by = am)` 11 | #' - To aggregate all non-grouping columns: `take_all(mtcars, mean, by = am)` 12 | #' - To aggregate several columns with one summary: `take(mtcars, mpg, hp, fun = mean, by = am)` 13 | #' - To get total summary skip `by` argument: `take_all(mtcars, mean)` 14 | #' - Use magrittr pipe '%>%' to chain several operations: 15 | #' ``` 16 | #' mtcars %>% 17 | #' let(mpg_hp = mpg/hp) %>% 18 | #' take(mean(mpg_hp), by = am) 19 | #' ``` 20 | #' - To modify variables or add new variables: 21 | #' ``` 22 | #' mtcars %>% 23 | #' let(new_var = 42, 24 | #' new_var2 = new_var*hp) %>% 25 | #' head() 26 | #' ``` 27 | #' - To modify all non-grouping variables: 28 | #' ``` 29 | #' iris %>% 30 | #' let_all( 31 | #' scaled = (.x - mean(.x))/sd(.x), 32 | #' by = Species) %>% 33 | #' head() 34 | #' ``` 35 | #' - To drop variable assign NULL: `let(mtcars, am = NULL) %>% head()` 36 | #' - To aggregate all variables conditionally on name: 37 | #' ``` 38 | #' iris %>% 39 | #' take_all( 40 | #' mean = if(startsWith(.name, "Sepal")) mean(.x), 41 | #' median = if(startsWith(.name, "Petal")) median(.x), 42 | #' by = Species 43 | #' ) 44 | #' ``` 45 | #' - For parametric assignment use ':=': 46 | #' ``` 47 | #' new_var = "my_var" 48 | #' old_var = "mpg" 49 | #' mtcars %>% 50 | #' let((new_var) := get(old_var)*2) %>% 51 | #' head() 52 | #' ``` 53 | #' - For more sophisticated operations see 'query'/'query_if': these 54 | #' functions translates its arguments one-to-one to '`[.data.table`' 55 | #' method. Additionally there are some conveniences such as automatic 56 | #' 'data.frame' conversion to 'data.table'. 57 | #' 58 | #' @examples 59 | #' # examples form 'dplyr' package 60 | #' data(mtcars) 61 | #' \donttest{ 62 | #' # Newly created variables are available immediately 63 | #' mtcars %>% 64 | #' let( 65 | #' cyl2 = cyl * 2, 66 | #' cyl4 = cyl2 * 2 67 | #' ) %>% 68 | #' head() 69 | #' 70 | #' # You can also use let() to remove variables and 71 | #' # modify existing variables 72 | #' mtcars %>% 73 | #' let( 74 | #' mpg = NULL, 75 | #' disp = disp * 0.0163871 # convert to litres 76 | #' ) %>% 77 | #' head() 78 | #' 79 | #' 80 | #' # window functions are useful for grouped computations 81 | #' mtcars %>% 82 | #' let(rank = rank(-mpg, ties.method = "min"), 83 | #' by = cyl) %>% 84 | #' head() 85 | #' 86 | #' # You can drop variables by setting them to NULL 87 | #' mtcars %>% let(cyl = NULL) %>% head() 88 | #' 89 | #' # keeps all existing variables 90 | #' mtcars %>% 91 | #' let(displ_l = disp / 61.0237) %>% 92 | #' head() 93 | #' 94 | #' # keeps only the variables you create 95 | #' mtcars %>% 96 | #' take(displ_l = disp / 61.0237) 97 | #' 98 | #' 99 | #' # can refer to both contextual variables and variable names: 100 | #' var = 100 101 | #' mtcars %>% 102 | #' let(cyl = cyl * var) %>% 103 | #' head() 104 | #' 105 | #' # select rows 106 | #' mtcars %>% 107 | #' rows(am==0) %>% 108 | #' head() 109 | #' 110 | #' # select rows with compound condition 111 | #' mtcars %>% 112 | #' rows(am==0 & mpg>mean(mpg)) 113 | #' 114 | #' # select columns 115 | #' mtcars %>% 116 | #' columns(vs:carb, cyl) 117 | #' 118 | #' mtcars %>% 119 | #' columns(-am, -cyl) 120 | #' 121 | #' # regular expression pattern 122 | #' columns(iris, "^Petal") # variables which start from 'Petal' 123 | #' columns(iris, "Width$") # variables which end with 'Width' 124 | #' 125 | #' # move Species variable to the front 126 | #' # pattern "^." matches all variables 127 | #' columns(iris, Species, "^.") 128 | #' 129 | #' # pattern "^.*al" means "contains 'al'" 130 | #' columns(iris, "^.*al") 131 | #' 132 | #' # numeric indexing - all variables except Species 133 | #' columns(iris, 1:4) 134 | #' 135 | #' # A 'take' with summary functions applied without 'by' argument returns an aggregated data 136 | #' mtcars %>% 137 | #' take(mean = mean(disp), n = .N) 138 | #' 139 | #' # Usually, you'll want to group first 140 | #' mtcars %>% 141 | #' take(mean = mean(disp), n = .N, by = cyl) 142 | #' 143 | #' # You can group by expressions: 144 | #' mtcars %>% 145 | #' take_all(mean, by = list(vsam = vs + am)) 146 | #' 147 | #' # modify all non-grouping variables in-place 148 | #' mtcars %>% 149 | #' let_all((.x - mean(.x))/sd(.x), by = am) %>% 150 | #' head() 151 | #' 152 | #' # modify all non-grouping variables to new variables 153 | #' mtcars %>% 154 | #' let_all(scaled = (.x - mean(.x))/sd(.x), by = am) %>% 155 | #' head() 156 | #' 157 | #' # conditionally modify all variables 158 | #' iris %>% 159 | #' let_all(mean = if(is.numeric(.x)) mean(.x)) %>% 160 | #' head() 161 | #' 162 | #' # modify all variables conditionally on name 163 | #' iris %>% 164 | #' let_all( 165 | #' mean = if(startsWith(.name, "Sepal")) mean(.x), 166 | #' median = if(startsWith(.name, "Petal")) median(.x), 167 | #' by = Species 168 | #' ) %>% 169 | #' head() 170 | #' 171 | #' # aggregation with 'take_all' 172 | #' mtcars %>% 173 | #' take_all(mean = mean(.x), sd = sd(.x), n = .N, by = am) 174 | #' 175 | #' # conditionally aggregate all variables 176 | #' iris %>% 177 | #' take_all(mean = if(is.numeric(.x)) mean(.x)) 178 | #' 179 | #' # aggregate all variables conditionally on name 180 | #' iris %>% 181 | #' take_all( 182 | #' mean = if(startsWith(.name, "Sepal")) mean(.x), 183 | #' median = if(startsWith(.name, "Petal")) median(.x), 184 | #' by = Species 185 | #' ) 186 | #' 187 | #' # parametric evaluation: 188 | #' var = quote(mean(cyl)) 189 | #' mtcars %>% 190 | #' let(mean_cyl = eval(var)) %>% 191 | #' head() 192 | #' take(mtcars, eval(var)) 193 | #' 194 | #' # all together 195 | #' new_var = "mean_cyl" 196 | #' mtcars %>% 197 | #' let((new_var) := eval(var)) %>% 198 | #' head() 199 | #' take(mtcars, (new_var) := eval(var)) 200 | #' 201 | #' ######################################## 202 | #' # variable selection 203 | #' 204 | #' # range selection 205 | #' iris %>% 206 | #' let( 207 | #' avg = rowMeans(Sepal.Length %to% Petal.Width) 208 | #' ) %>% 209 | #' head() 210 | #' 211 | #' # multiassignment 212 | #' iris %>% 213 | #' let( 214 | #' # starts with Sepal or Petal 215 | #' multipled1 %to% multipled4 := cols("^(Sepal|Petal)")*2 216 | #' ) %>% 217 | #' head() 218 | #' 219 | #' 220 | #' mtcars %>% 221 | #' let( 222 | #' # text expansion 223 | #' cols("scaled_{names(mtcars)}") := lapply(cols("{names(mtcars)}"), scale) 224 | #' ) %>% 225 | #' head() 226 | #' 227 | #' # range selection in 'by' 228 | #' # range selection + additional column 229 | #' mtcars %>% 230 | #' take( 231 | #' res = sum(cols(mpg, disp %to% drat)), 232 | #' by = vs %to% gear 233 | #' ) 234 | #' } 235 | "_PACKAGE" 236 | 237 | #' @import magrittr 238 | #' @import data.table 239 | 240 | 241 | #' @export 242 | magrittr::`%>%` 243 | 244 | #' @export 245 | magrittr::`%<>%` 246 | 247 | #' @export 248 | magrittr::`%$%` 249 | 250 | #' @export 251 | magrittr::`%T>%` 252 | 253 | #' @export 254 | data.table::data.table 255 | 256 | 257 | #' @export 258 | data.table::as.data.table 259 | #' @export 260 | data.table::is.data.table 261 | #' @export 262 | data.table::last 263 | #' @export 264 | data.table::first 265 | 266 | #' @export 267 | data.table::`%like%` 268 | #' @export 269 | data.table::between 270 | #' @export 271 | data.table::`%between%` 272 | #' @export 273 | data.table::inrange 274 | #' @export 275 | data.table::`%inrange%` 276 | #' @export 277 | data.table::`:=` 278 | #' @export 279 | data.table::setnames 280 | #' @export 281 | data.table::setcolorder 282 | #' @export 283 | data.table::set 284 | #' @export 285 | data.table::setDT 286 | #' @export 287 | data.table::setDF 288 | #' @export 289 | data.table::setorder 290 | #' @export 291 | data.table::setorderv 292 | #' @export 293 | data.table::setkey 294 | #' @export 295 | data.table::setkeyv 296 | #' @export 297 | data.table::setindex 298 | #' @export 299 | data.table::setindexv 300 | #' @export 301 | data.table::chmatch 302 | #' @export 303 | data.table::`%chin%` 304 | #' @export 305 | data.table::rbindlist 306 | #' @export 307 | data.table::fread 308 | #' @export 309 | data.table::fwrite 310 | #' @export 311 | data.table::foverlaps 312 | #' @export 313 | data.table::shift 314 | #' @export 315 | data.table::transpose 316 | #' @export 317 | data.table::tstrsplit 318 | #' @export 319 | data.table::frank 320 | #' @export 321 | data.table::frankv 322 | #' @export 323 | data.table::.SD 324 | #' @export 325 | data.table::.N 326 | #' @export 327 | data.table::.I 328 | #' @export 329 | data.table::.GRP 330 | #' @export 331 | data.table::.BY 332 | #' @export 333 | data.table::rleid 334 | #' @export 335 | data.table::rleidv 336 | #' @export 337 | data.table::rowid 338 | #' @export 339 | data.table::rowidv 340 | #' @export 341 | data.table::uniqueN 342 | #' @export 343 | data.table::setDTthreads 344 | #' @export 345 | data.table::getDTthreads 346 | #' @export 347 | data.table::fintersect 348 | #' @export 349 | data.table::fsetdiff 350 | #' @export 351 | data.table::funion 352 | #' @export 353 | data.table::fsetequal 354 | #' @export 355 | data.table::shouldPrint 356 | #' @export 357 | data.table::fsort 358 | #' @export 359 | data.table::groupingsets 360 | #' @export 361 | data.table::cube 362 | #' @export 363 | data.table::rollup 364 | #' @export 365 | data.table::nafill 366 | #' @export 367 | data.table::setnafill 368 | #' @export 369 | data.table::frollmean 370 | #' @export 371 | data.table::frollsum 372 | #' @export 373 | data.table::fcoalesce 374 | #' @export 375 | data.table::fifelse 376 | #' @export 377 | data.table::fcase 378 | #' @export 379 | data.table::frollapply 380 | -------------------------------------------------------------------------------- /R/let_all.R: -------------------------------------------------------------------------------- 1 | ###################### let_all ################# 2 | 3 | #' @export 4 | #' @rdname let_if 5 | let_all = function(data, 6 | ..., 7 | by, 8 | keyby, 9 | .SDcols, 10 | suffix = TRUE, 11 | sep = "_", 12 | i 13 | ){ 14 | UseMethod("let_all") 15 | } 16 | 17 | #' @export 18 | let_all.etable = function(data, 19 | ..., 20 | by, 21 | keyby, 22 | .SDcols, 23 | suffix = TRUE, 24 | sep = "_", 25 | i 26 | ){ 27 | data_class = class(data) 28 | data = as.data.table(data) 29 | res = eval.parent( 30 | substitute(maditr::let_all(data, 31 | ..., 32 | by = by, 33 | keyby = keyby, 34 | .SDcols = .SDcols, 35 | suffix = suffix, 36 | sep = sep, 37 | i = i 38 | ) 39 | ) 40 | ) 41 | setDF(res) 42 | class(res) = data_class 43 | res 44 | } 45 | 46 | #' @export 47 | let_all.data.frame = function(data, 48 | ..., 49 | by, 50 | keyby, 51 | .SDcols, 52 | suffix = TRUE, 53 | sep = "_", 54 | i 55 | ){ 56 | 57 | # if data is expression we want to calculate it only once 58 | data = force(data) 59 | 60 | j_expr = substitute(list(...)) 61 | 62 | parent_frame = parent.frame() 63 | 64 | j_expr = as.list(j_expr)[-1] 65 | (length(j_expr) == 0) && stop("'let_all' - missing expressions. You should provide at least one expression.") 66 | 67 | 68 | j_expr = add_names_from_walrus_assignement(j_expr, envir = parent.frame()) 69 | j_expr = add_names_from_single_symbol(j_expr) 70 | 71 | ################# 72 | ## naming 73 | ## suffix = FALSE - prefix, if TRUE it will be suffix 74 | ## no names 75 | ## if single symbol expr all names will be left as is 76 | ## if multiple symbol expr names will be prefixed/suffixed with this symbol "_" 77 | ## if complex expr and there is no names then original names will be left as is 78 | ## duplicated names will be made unique 79 | # 80 | # we need to know resulting names 81 | # this is simplest method to escape complexities with by, keyby and SDCols interaction 82 | one_row = as.data.table(data[1,, drop = FALSE]) 83 | ._orig_names = eval.parent(substitute(maditr::query(one_row, list(._res_names = names(.SD)), 84 | by = by, 85 | keyby = keyby, 86 | .SDcols = .SDcols 87 | )))[["._res_names"]] 88 | j_expr_names = names(j_expr) 89 | j_expr_names[!(j_expr_names %in% "")] = make.unique(j_expr_names[!(j_expr_names %in% "")]) 90 | ._all_names = lapply(j_expr_names, function(curr_name){ 91 | if(curr_name == "") return(._orig_names) 92 | if(suffix){ 93 | paste(._orig_names, curr_name, sep = sep) 94 | } else { 95 | paste(curr_name, ._orig_names, sep = sep) 96 | } 97 | }) 98 | 99 | 100 | to_drop = new.env() 101 | ._data_names = names(data) 102 | names(j_expr) = NULL 103 | ### 104 | for(j in seq_along(j_expr)){ 105 | expr = j_expr[[j]] 106 | 107 | 108 | if((is.symbol(expr) && !identical(expr, quote(.name)) && 109 | !identical(expr, quote(.index)) && 110 | !identical(expr, quote(.x)) && 111 | !identical(expr, quote(.value)) && 112 | !identical(expr, quote(.N)) && 113 | !identical(expr, quote(.GRP))) || 114 | (length(expr)>1 && as.character(expr[[1]]) == "function")){ 115 | # let_all(data, scale) 116 | # let_all(data, function(x) scale(x)) 117 | expr = substitute(lapply(.SD, expr)) 118 | } else { 119 | # let_all(data, scale(.x)) 120 | expr = substitute({ 121 | .data_names = ._data_names 122 | lapply(names(.SD), function(.name) { 123 | .value = get(.name) 124 | .x = get(.name) 125 | .index = match(.name, .data_names) 126 | expr 127 | }) 128 | }) 129 | } 130 | 131 | if(identical(._all_names[[j]], ._orig_names)){ 132 | j_expr[[j]] = substitute( 133 | { 134 | 135 | res = expr 136 | # if expression returns NULL we leave this variable unchanged 137 | empty = which(vapply(res, is.null, FUN.VALUE = logical(1))) 138 | if(length(empty)>0) { 139 | res[empty] = .SD[ ,empty, with = FALSE] 140 | } 141 | res 142 | } 143 | ) 144 | next() 145 | } 146 | 147 | ################## 148 | ._new_names = ._all_names[[j]] 149 | # let_all(data, my_new_name = scale(.x)) 150 | j_expr[[j]] = substitute( 151 | { 152 | res = expr 153 | # if expression returns NULL we should skip this variable 154 | empty = which(vapply(res, is.null, FUN.VALUE = logical(1))) 155 | if(length(empty)>0) { 156 | res[empty] = NA 157 | lapply(._new_names[empty], assign, value = NA, envir = to_drop) 158 | } 159 | res 160 | } 161 | ) 162 | 163 | } 164 | 165 | #### 166 | if(length(j_expr)>1){ 167 | j_expr = as.call(c(list(quote(c)), j_expr)) 168 | } else { 169 | j_expr = j_expr[[1]] 170 | } 171 | #### 172 | ._all_names = unlist(._all_names, recursive = TRUE, use.names = FALSE) 173 | 174 | # NULL is just a placeholder 175 | expr = substitute(NULL[i, 176 | (._all_names) := j_expr, 177 | by = by, 178 | keyby = keyby, 179 | .SDcols = .SDcols 180 | ] 181 | ) 182 | 183 | res = eval_in_parent_frame(data, expr, frame = parent_frame) 184 | if(length(to_drop)>0){ 185 | res[,(names(to_drop)):=NULL] 186 | } 187 | res 188 | } 189 | 190 | 191 | 192 | ###################### take_all ################# 193 | 194 | #' @export 195 | #' @rdname let_if 196 | take_all = function(data, 197 | ..., 198 | by, 199 | keyby, 200 | .SDcols, 201 | suffix = TRUE, 202 | sep = "_", 203 | i 204 | ){ 205 | UseMethod("take_all") 206 | } 207 | 208 | #' @export 209 | take_all.data.frame = function(data, 210 | ..., 211 | by, 212 | keyby, 213 | .SDcols, 214 | suffix = TRUE, 215 | sep = "_", 216 | i 217 | ){ 218 | 219 | j_expr = substitute(list(...)) 220 | 221 | parent_frame = parent.frame() 222 | 223 | j_expr = as.list(j_expr)[-1] 224 | (length(j_expr) == 0) && stop("'take_all' - missing expressions. You should provide at least one expression.") 225 | 226 | 227 | j_expr = add_names_from_walrus_assignement(j_expr, envir = parent.frame()) 228 | j_expr = add_names_from_single_symbol(j_expr) 229 | 230 | 231 | ################# 232 | ## naming 233 | ## suffix = FALSE - prefix, if TRUE it will be suffix 234 | ## no names 235 | ## if single symbol expr all names will be left as is 236 | ## if multiple symbol expr names will be prefixed/suffixed with this symbol "_" 237 | ## if complex expr and there is no names then original names will be left as is 238 | ## duplicated names will be made unique (??) 239 | 240 | # if data is expression we want to calculate it only once 241 | 242 | 243 | ._data_names = names(data) 244 | j_names = names(j_expr) 245 | names(j_expr) = NULL 246 | for(j in seq_along(j_expr)){ 247 | expr = j_expr[[j]] 248 | expr = substitute_symbols(expr, list( 249 | '.value' = quote(.SD[[.name]]), 250 | '.x' = quote(.SD[[.name]]), 251 | '.index' = substitute(match(.name, ._data_names)) 252 | )) 253 | 254 | if((is.symbol(expr) && !identical(expr, quote(.name)) && 255 | !identical(expr, quote(.N)) && 256 | !identical(expr, quote(.GRP))) || 257 | (length(expr)>1 && as.character(expr[[1]]) == "function")){ 258 | # take_all(data, mean) 259 | # take_all(data, function(x) mean(x, na.rm = TRUE)) 260 | expr = substitute(lapply(.SD, expr)) 261 | } else { 262 | # take_all(data, mean(.x)) 263 | expr = substitute(lapply(names(.SD), function(.name) expr)) 264 | } 265 | curr_name = j_names[j] 266 | if(curr_name != ""){ 267 | if(suffix){ 268 | name_expr = substitute(paste(names(.SD), curr_name, sep = sep)) 269 | } else { 270 | name_expr = substitute(paste(curr_name, names(.SD), sep = sep)) 271 | } 272 | } else { 273 | name_expr = substitute(names(.SD)) 274 | } 275 | 276 | ########################## 277 | j_expr[[j]] = substitute({ 278 | res = expr 279 | names(res) = name_expr 280 | not_empty = which(!vapply(res, is.null, FUN.VALUE = logical(1))) 281 | res[not_empty] 282 | }) 283 | 284 | } 285 | if(length(j_expr)>1){ 286 | j_expr = as.call(c(list(quote(c)), j_expr)) 287 | } else { 288 | j_expr = j_expr[[1]] 289 | } 290 | #### 291 | # NULL is just a placeholder 292 | expr = substitute(NULL[i, j_expr, 293 | by = by, 294 | keyby = keyby, 295 | .SDcols = .SDcols]) 296 | 297 | res = eval_in_parent_frame(data, expr, frame = parent_frame) 298 | setnames(res, make.unique(names(res))) 299 | res 300 | } 301 | 302 | 303 | 304 | -------------------------------------------------------------------------------- /R/to_wide.R: -------------------------------------------------------------------------------- 1 | variable = NULL # to pass CRAN check 2 | value = NULL # to pass CRAN check 3 | 4 | 5 | #' Convert data to long or to wide form 6 | #' 7 | #' `to_long` increases number of rows in the dataset and reduce number of 8 | #' columns. `to_wide` makes invert transformation. You can use [cols] for 9 | #' selecting variables in the arguments. See examples. 10 | #' 11 | #' @param data A data.frame to convert 12 | #' @param columns unquoted names of variables for stacking. When missing, we 13 | #' will stack all columns outside `keep` columns. 14 | #' @param keep unquoted names of columns which will be kept as is, e. g. only 15 | #' recycled or deduplicated. If missing, it is all columns except stacked or 16 | #' unstacked. If `FALSE` then nothing will be kept. 17 | #' @param names_in name of the stacked variable names column. The default name 18 | #' is 'variable'. It is quoted in the `to_long` and unquoted in `to_wide`. If 19 | #' `FALSE` in the `to_wide` than nothing will be widening. 20 | #' @param values_in name(-s) of the stacked data values column(s). The default 21 | #' name is 'value'. Multiple names can be provided here for the case when 22 | #' `columns` is a list, though note well that the names provided in 23 | #' `columns` take precedence. It is quoted in the `to_long` and unqoted in `to_wide` 24 | #' @param drop_na If TRUE, NA values will be removed from the stacked data. 25 | #' @param names_factor If TRUE, the column with names will be converted to 26 | #' factor, else it will be a character column. TRUE by default. 27 | #' @param value_factor If TRUE, the value column will be converted to factor, 28 | #' else the stacked values type is left unchanged. FALSE by default. 29 | #' @param fun Should the data be aggregated before casting? By default, it is 30 | #' `identity` - no aggregation. To use multiple aggregation functions, pass a 31 | #' list; see Examples. 32 | #' @param sep Character vector of length 1, indicating the separating character 33 | #' in variable names generated during casting. Default is "_". 34 | #' @param fill Value with which to fill missing cells. `NA` by default. If `fun` is 35 | #' present, takes the value by applying the function on a 0-length vector. 36 | #' @param missing_comb One of "none" (the default), "rows" - include missing 37 | #' combinations in rows, "columns" - include missing combinations in columns, 38 | #' and "all" include all missing combinations. 39 | #' @param ... other arguments passed to `data.table::melt`/`data.table::dcast` 40 | #' @return data.table in the wide or long form. 41 | #' @export 42 | #' 43 | #' @examples 44 | #' data(iris) 45 | #' 46 | #' # 'to_long' 47 | #' 48 | #' long_iris = iris %>% 49 | #' to_long(keep = Species) 50 | #' 51 | #' long_iris 52 | #' 53 | #' iris_with_stat = long_iris %>% 54 | #' take(mean = mean(value), 55 | #' sd = sd(value), 56 | #' n = .N*1.0, 57 | #' by = .(Species, variable) 58 | #' ) %>% 59 | #' to_long(columns = c(mean, sd, n), names_in = "stat") 60 | #' 61 | #' # 'to_wide' - table with multiple stats 62 | #' iris_with_stat %>% 63 | #' to_wide() 64 | #' 65 | #' 66 | #' iris_with_stat %>% 67 | #' to_wide(names_in = c(variable, stat)) 68 | #' 69 | #' iris_with_stat %>% 70 | #' to_wide(names_in = c(variable, Species)) 71 | #' 72 | #' # 'to_wide' - aggregation function 73 | #' long_iris %>% 74 | #' to_wide(fun = list(Mean = mean, SD = sd, N = length)) 75 | #' 76 | #' 77 | #' # '%to%' selector - example from tidyr::pivot_longer 78 | #' 79 | #' data(anscombe) 80 | #' anscombe %>% 81 | #' to_long( 82 | #' list(x = x1 %to% x4, y = y1 %to% y4), 83 | #' names_in = "set" 84 | #' ) 85 | #' 86 | #' ###################################### 87 | #' ## Examples from data.table melt/dcast 88 | #' ###################################### 89 | #' 90 | #' set.seed(45) 91 | #' DT = data.table( 92 | #' i_1 = c(1:5, NA)*1.0, 93 | #' i_2 = c(NA,6,7,8,9,10)*1.0, 94 | #' f_1 = factor(sample(c(letters[1:3], NA), 6, TRUE)), 95 | #' f_2 = factor(c("z", "a", "x", "c", "x", "x"), ordered=TRUE), 96 | #' c_1 = sample(c(letters[1:3], NA), 6, TRUE), 97 | #' d_1 = as.Date(c(1:3,NA,4:5), origin="2013-09-01"), 98 | #' d_2 = as.Date(6:1, origin="2012-01-01") 99 | #' ) 100 | #' 101 | #' # id, values as character/integer/numeric vectors 102 | #' 103 | #' to_long(DT, f_1, keep = 1:2) 104 | #' to_long(DT, f_1, keep = c(i_1, i_2)) 105 | #' to_long(DT, f_1, keep = i_1 %to% i_2) 106 | #' to_long(DT, f_1, keep = cols(i_1:i_2), names_factor = FALSE) 107 | #' to_long(DT, f_1, keep = cols("i_{1:2}")) 108 | #' to_long(DT, f_1, keep = cols("^i_")) 109 | #' to_long(DT, f_1, keep = cols("^i_"), names_in = "var", values_in = "val") 110 | #' 111 | #' col_var = "^i_" 112 | #' to_long(DT, 3, keep = cols(col_var)) 113 | #' 114 | #' to_long(DT, cols("^f_"), keep = cols("^i_"), value_factor = TRUE) 115 | #' 116 | #' to_long(mtcars) 117 | #' to_long(mtcars, keep = am) 118 | #' to_long(mtcars, columns = c(am, vs, mpg)) 119 | #' to_long(mtcars, columns = c(am, vs, mpg), keep = FALSE) 120 | #' to_long(DT, keep = f_1, columns = c(i_1, i_2), drop_na = TRUE) 121 | #' to_long(DT, keep=1:2, columns = list(cols("^f_"), cols("^d_")), value_factor=TRUE) 122 | #' 123 | #' data("ChickWeight") 124 | #' names(ChickWeight) = tolower(names(ChickWeight)) 125 | #' DT = to_long(ChickWeight, keep=2:4) 126 | #' 127 | #' to_wide(DT, keep = time, fun = mean) 128 | #' to_wide(DT, keep = FALSE, fun = mean) 129 | #' to_wide(DT, keep = diet, fun = mean) 130 | #' to_wide(DT, keep = c(diet, chick), names_in = time, missing_comb = "all") 131 | #' to_wide(DT, keep = c(diet, chick), names_in = time, missing_comb = "all", fill = 0) 132 | #' to_wide(DT, chick, time, fun = mean) 133 | #' 134 | #' 135 | #' 136 | #' # using FALSE 137 | #' DT = data.table(v1 = rep(1:2, each = 6), 138 | #' v2 = rep(rep(1:3, 2), each = 2), 139 | #' v3 = rep(1:2, 6), 140 | #' v4 = rnorm(6)) 141 | #' 142 | #' ## for each combination of (v1, v2), add up all values of v4 143 | #' to_wide(DT, 144 | #' cols("^v(1|2)"), 145 | #' names_in = FALSE, 146 | #' values_in = v4, 147 | #' fun = sum 148 | #' ) 149 | #' 150 | #' # multiple values_in and multiple fun 151 | #' DT = data.table(x=sample(5,20,TRUE), 152 | #' y=sample(2,20,TRUE), 153 | #' z=sample(letters[1:2], 20,TRUE), 154 | #' d1 = runif(20), 155 | #' d2=1L) 156 | #' 157 | #' # multiple values_in 158 | #' to_wide(DT, 159 | #' keep = c(x, y), 160 | #' names_in = z, 161 | #' values_in = c(d1, d2), 162 | #' fun = sum, 163 | #' fill = 0) 164 | #' 165 | #' # multiple funs 166 | #' to_wide(DT, 167 | #' keep = c(x, y), 168 | #' names_in = z, 169 | #' values_in = d1, 170 | #' fun = list(sum = sum, mean = mean), 171 | #' fill = NULL) 172 | #' 173 | #' # multiple fun and values_in (all combinations) 174 | #' to_wide(DT, 175 | #' keep = c(x, y), 176 | #' names_in = z, 177 | #' values_in = c(d1, d2), 178 | #' fun = list(sum = sum, mean = mean) 179 | #' ) 180 | #' 181 | #' # multiple fun and values_in (one-to-one) 182 | #' to_wide(DT, 183 | #' keep = c(x, y), 184 | #' names_in = z, 185 | #' values_in = list(d1, d2), 186 | #' fun = list(sum = sum, mean = mean) 187 | #' ) 188 | to_long = function(data, 189 | columns = NULL, 190 | keep = NULL, 191 | names_in = "variable", 192 | values_in = "value", 193 | drop_na = FALSE, 194 | names_factor = TRUE, 195 | value_factor = FALSE, 196 | ...){ 197 | parent_frame = parent.frame() 198 | data_names = names(data) 199 | data_names_list = create_list_with_names(data_names) 200 | columns_expr = substitute(columns) 201 | columns_expr = replace_column_expr(columns_expr, data_names, parent_frame, type = "names") 202 | columns = eval(columns_expr, data_names_list, enclos = parent_frame) 203 | 204 | keep_expr = substitute(keep) 205 | keep_expr = replace_column_expr(keep_expr, data_names, parent_frame, type = "names") 206 | keep = eval(keep_expr, data_names_list, enclos = parent_frame) 207 | if(isFALSE(keep)) keep = integer(0)# no keep 208 | 209 | if(is.null(columns) && is.null(keep)) columns = data_names 210 | maditr::melt(data = data, 211 | id.vars = keep, 212 | measure.vars = columns, 213 | variable.name = names_in, 214 | value.name = values_in, 215 | na.rm = drop_na, 216 | variable.factor = names_factor, 217 | value.factor = value_factor, 218 | ... 219 | ) 220 | } 221 | 222 | 223 | #' @export 224 | #' @rdname to_long 225 | to_wide = function(data, 226 | keep = NULL, 227 | names_in = variable, 228 | values_in = value, 229 | fun = identity, 230 | sep = "_", 231 | fill = NA, 232 | missing_comb = c("none", "rows", "columns", "all"), 233 | ... 234 | ){ 235 | parent_frame = parent.frame() 236 | data_names = names(data) 237 | data_names_list = create_list_with_names(data_names) 238 | names_in_expr = substitute(names_in) 239 | names_in_expr = replace_column_expr(names_in_expr, data_names, parent_frame, type = "names") 240 | names_in = eval(names_in_expr, data_names_list, enclos = parent_frame) 241 | if(isFALSE(names_in)) names_in = "." # no widening 242 | 243 | values_in_expr = substitute(values_in) 244 | values_in_expr = replace_column_expr(values_in_expr, data_names, parent_frame, type = "names") 245 | values_in = eval(values_in_expr, data_names_list, enclos = parent_frame) 246 | 247 | keep_expr = substitute(keep) 248 | keep_expr = replace_column_expr(keep_expr, data_names, parent_frame, type = "names") 249 | keep = eval(keep_expr, data_names_list, enclos = parent_frame) 250 | if(isFALSE(keep)) keep = "." # no keep 251 | if(is.null(keep)) keep = "..." # keep all 252 | 253 | 254 | 255 | formula = stats::as.formula(paste(paste(keep, collapse = "+"), "~", paste(names_in, collapse = "+"))) 256 | missing_comb = match.arg(missing_comb) 257 | drop = switch(missing_comb, 258 | "none" = TRUE, 259 | "rows" = c(FALSE, TRUE), 260 | "columns" = c(TRUE, FALSE), 261 | "all" = c(FALSE, FALSE) 262 | ) 263 | maditr::dcast(data = data, 264 | formula, 265 | fun.aggregate = fun, 266 | sep = sep, 267 | fill = fill, 268 | drop = drop, 269 | value.var = values_in, 270 | ... 271 | ) 272 | } 273 | 274 | 275 | -------------------------------------------------------------------------------- /vignettes/Introduction.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Introduction" 3 | output: 4 | html_document: 5 | toc: true 6 | vignette: > 7 | %\VignetteIndexEntry{maditr: Introduction} 8 | %\VignetteEngine{knitr::rmarkdown} 9 | %\VignetteEncoding{utf8} 10 | --- 11 | 12 | ```{r setup, include=FALSE} 13 | knitr::opts_chunk$set(echo = TRUE) 14 | data.table::setDTthreads(2) 15 | ``` 16 | 17 | ## Links 18 | 19 | - [maditr on CRAN](https://cran.r-project.org/package=maditr) 20 | - [maditr on Github](https://github.com/gdemin/maditr) 21 | - [Issues](https://github.com/gdemin/maditr/issues) 22 | 23 | ## Overview 24 | 25 | Package provides pipe-style interface for [data.table](https://cran.r-project.org/package=data.table) package. It preserves all data.table features without significant impact on performance. `let` and `take` functions are simplified interfaces for most common data manipulation tasks. 26 | 27 | - To select rows from data: `rows(mtcars, am==0)` 28 | - To select columns from data: `columns(mtcars, mpg, vs:carb)` 29 | - To aggregate data: `take(mtcars, mean_mpg = mean(mpg), by = am)` 30 | - To aggregate all non-grouping columns: `take_all(mtcars, mean, by = am)` 31 | - To aggregate several columns with one summary: `take(mtcars, mpg, hp, fun = mean, by = am)` 32 | - To get total summary skip `by` argument: `take_all(mtcars, mean)` 33 | - Use magrittr pipe `%>%` to chain several operations: 34 | ```{r, eval=FALSE} 35 | mtcars %>% 36 | let(mpg_hp = mpg/hp) %>% 37 | take(mean(mpg_hp), by = am) 38 | ``` 39 | - To modify variables or add new variables: 40 | ```{r, eval=FALSE} 41 | mtcars %>% 42 | let(new_var = 42, 43 | new_var2 = new_var*hp) %>% 44 | head() 45 | ``` 46 | - To drop variable assign NULL: `let(mtcars, am = NULL) %>% head()` 47 | - To modify all non-grouping variables: 48 | ```{r, eval=FALSE} 49 | iris %>% 50 | let_all( 51 | scaled = (.x - mean(.x))/sd(.x), 52 | by = Species) %>% 53 | head() 54 | ``` 55 | - To aggregate all variables conditionally on name: 56 | ```{r, eval=FALSE} 57 | iris %>% 58 | take_all( 59 | mean = if(startsWith(.name, "Sepal")) mean(.x), 60 | median = if(startsWith(.name, "Petal")) median(.x), 61 | by = Species 62 | ) 63 | ``` 64 | - For parametric assignment use `:=`: 65 | ```{r, eval=FALSE} 66 | new_var = "my_var" 67 | old_var = "mpg" 68 | mtcars %>% 69 | let((new_var) := get(old_var)*2) %>% 70 | head() 71 | 72 | # or, 73 | expr = quote(mean(cyl)) 74 | mtcars %>% 75 | let((new_var) := eval(expr)) %>% 76 | head() 77 | 78 | # the same with `take` 79 | by_var = "vs,am" 80 | take(mtcars, (new_var) := eval(expr), by = by_var) 81 | ``` 82 | 83 | `query_if` function translates its arguments one-to-one to `[.data.table` method. Additionally there are some conveniences such as automatic `data.frame` conversion to `data.table`. 84 | 85 | ## vlookup & xlookup 86 | 87 | Let's make datasets for lookups: 88 | ```{r include=FALSE} 89 | library(maditr) 90 | ``` 91 | 92 | ```{r} 93 | 94 | workers = fread(" 95 | name company 96 | Nick Acme 97 | John Ajax 98 | Daniela Ajax 99 | ") 100 | 101 | positions = fread(" 102 | name position 103 | John designer 104 | Daniela engineer 105 | Cathie manager 106 | ") 107 | 108 | # xlookup 109 | workers = let(workers, 110 | position = xlookup(name, positions$name, positions$position) 111 | ) 112 | 113 | # vlookup 114 | # by default we search in the first column and return values from second column 115 | workers = let(workers, 116 | position = vlookup(name, positions, no_match = "Not found") 117 | ) 118 | 119 | # the same 120 | workers = let(workers, 121 | position = vlookup(name, positions, 122 | result_column = "position", 123 | no_match = "Not found") # or, result_column = 2 124 | ) 125 | 126 | head(workers) 127 | ``` 128 | 129 | ## More examples 130 | 131 | We will use for demonstartion well-known `mtcars` dataset and some examples from `dplyr` package. 132 | 133 | ```{r} 134 | library(maditr) 135 | data(mtcars) 136 | 137 | # Newly created variables are available immediately 138 | mtcars %>% 139 | let( 140 | cyl2 = cyl * 2, 141 | cyl4 = cyl2 * 2 142 | ) %>% head() 143 | 144 | # You can also use let() to remove variables and 145 | # modify existing variables 146 | mtcars %>% 147 | let( 148 | mpg = NULL, 149 | disp = disp * 0.0163871 # convert to litres 150 | ) %>% head() 151 | 152 | 153 | # window functions are useful for grouped computations 154 | mtcars %>% 155 | let(rank = rank(-mpg, ties.method = "min"), 156 | by = cyl) %>% 157 | head() 158 | 159 | # You can drop variables by setting them to NULL 160 | mtcars %>% 161 | let(cyl = NULL) %>% 162 | head() 163 | 164 | # keeps all existing variables 165 | mtcars %>% 166 | let(displ_l = disp / 61.0237) %>% 167 | head() 168 | 169 | # keeps only the variables you create 170 | mtcars %>% 171 | take(displ_l = disp / 61.0237) %>% 172 | head() 173 | 174 | 175 | # can refer to both contextual variables and variable names: 176 | var = 100 177 | mtcars %>% 178 | let(cyl = cyl * var) %>% 179 | head() 180 | 181 | # select rows 182 | mtcars %>% 183 | rows(am==0) %>% 184 | head() 185 | 186 | # select rows with compound condition 187 | mtcars %>% 188 | rows(am==0 & mpg>mean(mpg)) 189 | 190 | # select columns 191 | mtcars %>% 192 | columns(vs:carb, cyl) 193 | 194 | mtcars %>% 195 | columns(-am, -cyl) 196 | 197 | # regular expression pattern 198 | columns(iris, "^Petal") %>% head() # variables which start from 'Petal' 199 | columns(iris, "Width$") %>% head() # variables which end with 'Width' 200 | 201 | # move Species variable to the front 202 | # pattern "^." matches all variables 203 | columns(iris, Species, "^.") %>% head() 204 | 205 | # pattern "^.*al" means "contains 'al'" 206 | columns(iris, "^.*al") %>% head() 207 | 208 | # numeric indexing - all variables except Species 209 | columns(iris, 1:4) %>% head() 210 | 211 | # A 'take' with summary functions applied without 'by' argument returns an aggregated data 212 | mtcars %>% 213 | take(mean = mean(disp), n = .N) 214 | 215 | # Usually, you'll want to group first 216 | mtcars %>% 217 | take(mean = mean(disp), n = .N, by = am) 218 | 219 | # grouping by multiple variables 220 | mtcars %>% 221 | take(mean = mean(disp), n = .N, by = list(am, vs)) 222 | 223 | # You can group by expressions: 224 | mtcars %>% 225 | take_all( 226 | mean, 227 | by = list(vsam = vs + am) 228 | ) 229 | 230 | # modify all non-grouping variables in-place 231 | mtcars %>% 232 | let_all((.x - mean(.x))/sd(.x), by = am) %>% 233 | head() 234 | 235 | # modify all non-grouping variables to new variables 236 | mtcars %>% 237 | let_all(scaled = (.x - mean(.x))/sd(.x), by = am) %>% 238 | head() 239 | 240 | # conditionally modify all variables 241 | iris %>% 242 | let_all(mean = if(is.numeric(.x)) mean(.x)) %>% 243 | head() 244 | 245 | # modify all variables conditionally on name 246 | iris %>% 247 | let_all( 248 | mean = if(startsWith(.name, "Sepal")) mean(.x), 249 | median = if(startsWith(.name, "Petal")) median(.x), 250 | by = Species 251 | ) %>% 252 | head() 253 | 254 | # aggregation with 'take_all' 255 | mtcars %>% 256 | take_all(mean = mean(.x), sd = sd(.x), n = .N, by = am) 257 | 258 | # conditionally aggregate all variables 259 | iris %>% 260 | take_all(mean = if(is.numeric(.x)) mean(.x)) 261 | 262 | # aggregate all variables conditionally on name 263 | iris %>% 264 | take_all( 265 | mean = if(startsWith(.name, "Sepal")) mean(.x), 266 | median = if(startsWith(.name, "Petal")) median(.x), 267 | by = Species 268 | ) 269 | 270 | # parametric evaluation: 271 | var = quote(mean(cyl)) 272 | mtcars %>% 273 | let(mean_cyl = eval(var)) %>% 274 | head() 275 | take(mtcars, eval(var)) 276 | 277 | # all together 278 | new_var = "mean_cyl" 279 | mtcars %>% 280 | let((new_var) := eval(var)) %>% 281 | head() 282 | take(mtcars, (new_var) := eval(var)) 283 | 284 | 285 | ``` 286 | 287 | ## Variable selection in the expressions 288 | 289 | You can use 'columns' inside expression in the 'take'/'let'. 'columns' will 290 | be replaced with data.table with selected columns. In 'let' in the 291 | expressions with ':=', 'cols' or '%to%' can be placed in the left part of the 292 | expression. It is usefull for multiple assignment. 293 | There are four ways of column selection: 294 | 295 | 1. Simply by column names 296 | 2. By variable ranges, e. g. vs:carb. Alternatively, you can use '%to%' 297 | instead of colon: 'vs %to% carb'. 298 | 3. With regular expressions. Characters which start with '^' or end with $ 299 | considered as Perl-style regular expression patterns. For example, '^Petal' 300 | returns all variables started with 'Petal'. 'Width$' returns all variables 301 | which end with 'Width'. Pattern '^.' matches all variables and pattern 302 | '^.*my_str' is equivalent to contains "my_str"'. 303 | 4. By character variables with interpolated parts. Expression in the curly 304 | brackets inside characters will be evaluated in the parent frame with 305 | 'text_expand' function. For example, `a{1:3}` will be transformed to the names 'a1', 306 | 'a2', 'a3'. 'cols' is just a shortcut for 'columns'. 307 | 308 | ```{r} 309 | # range selection 310 | iris %>% 311 | let( 312 | avg = rowMeans(Sepal.Length %to% Petal.Width) 313 | ) %>% 314 | head() 315 | 316 | # multiassignment 317 | iris %>% 318 | let( 319 | # starts with Sepal or Petal 320 | multipled1 %to% multipled4 := cols("^(Sepal|Petal)")*2 321 | ) %>% 322 | head() 323 | 324 | 325 | mtcars %>% 326 | let( 327 | # text expansion 328 | cols("scaled_{names(mtcars)}") := lapply(cols("{names(mtcars)}"), scale) 329 | ) %>% 330 | head() 331 | 332 | # range selection in 'by' 333 | # selection of range + additional column 334 | mtcars %>% 335 | take( 336 | res = sum(cols(mpg, disp %to% drat)), 337 | by = vs %to% gear 338 | ) 339 | ``` 340 | 341 | ## Joins 342 | 343 | Here we use the same datasets as with lookups: 344 | 345 | ```{r} 346 | workers = fread(" 347 | name company 348 | Nick Acme 349 | John Ajax 350 | Daniela Ajax 351 | ") 352 | 353 | positions = fread(" 354 | name position 355 | John designer 356 | Daniela engineer 357 | Cathie manager 358 | ") 359 | 360 | workers 361 | positions 362 | ``` 363 | 364 | Different kinds of joins: 365 | 366 | ```{r} 367 | workers %>% dt_inner_join(positions) 368 | workers %>% dt_left_join(positions) 369 | workers %>% dt_right_join(positions) 370 | workers %>% dt_full_join(positions) 371 | 372 | # filtering joins 373 | workers %>% dt_anti_join(positions) 374 | workers %>% dt_semi_join(positions) 375 | ``` 376 | 377 | To suppress the message, supply `by` argument: 378 | ```{r, eval=FALSE} 379 | workers %>% dt_left_join(positions, by = "name") 380 | ``` 381 | 382 | Use a named `by` if the join variables have different names: 383 | ```{r, eval=FALSE} 384 | positions2 = setNames(positions, c("worker", "position")) # rename first column in 'positions' 385 | workers %>% dt_inner_join(positions2, by = c("name" = "worker")) 386 | ``` 387 | 388 | 389 | 390 | 391 | 392 | 393 | --------------------------------------------------------------------------------