├── codecov.yml
├── .gitignore
├── .travis.yml
├── .Rbuildignore
├── tests
    └── tinytest.R
├── inst
    └── tinytest
    │   ├── test_coalesce.R
    │   ├── test_cube.R
    │   ├── test_text_expand.R
    │   ├── test_columns.R
    │   ├── test_utils.R
    │   ├── test_dcast_melt.R
    │   ├── test_dplyr_verbs.R
    │   ├── test_take_all.R
    │   ├── test_vlookup.R
    │   ├── test_to_list.R
    │   ├── test_let_all.R
    │   ├── test_join.R
    │   ├── test_to_wide.R
    │   └── test_verbs.R
├── R
    ├── cube.R
    ├── coalesce.R
    ├── on_attach.R
    ├── text_expand.R
    ├── dt_utils.R
    ├── utils.R
    ├── verbs.R
    ├── dcast.R
    ├── join.R
    ├── vlookup.R
    ├── to_list.R
    ├── maditr-package.R
    ├── let_all.R
    └── to_wide.R
├── maditr.Rproj
├── man
    ├── copy.Rd
    ├── coalesce.Rd
    ├── text_expand.Rd
    ├── dt_count.Rd
    ├── dt_left_join.Rd
    ├── columns.Rd
    ├── reexports.Rd
    ├── to_list.Rd
    ├── dcast.Rd
    ├── dt_mutate.Rd
    ├── vlookup.Rd
    ├── to_long.Rd
    └── maditr-package.Rd
├── DESCRIPTION
├── NEWS
├── NAMESPACE
└── vignettes
    ├── Introduction.R
    └── Introduction.Rmd


/codecov.yml:
--------------------------------------------------------------------------------
1 | comment: false
2 | language: R
3 | sudo: false
4 | cache: packages
5 | after_success:
6 | - Rscript -e 'covr::codecov()'


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | Meta
 2 | doc
 3 | .Rproj.user
 4 | .Rhistory
 5 | .RData
 6 | .Ruserdata
 7 | SupplementaryMaterials
 8 | 
 9 | /doc/
10 | /Meta/
11 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | # R for travis: see documentation at https://docs.travis-ci.com/user/languages/r
2 | 
3 | language: R
4 | sudo: false
5 | cache: packages
6 | 


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^Meta$
 2 | ^doc$
 3 | ^.*\.Rproj$
 4 | ^\.Rproj\.user$
 5 | SupplementaryMaterials
 6 | \.todo
 7 | ^codecov\.yml$
 8 | ^\.travis\.yml$
 9 | README\.MD
10 | 
11 | 


--------------------------------------------------------------------------------
/tests/tinytest.R:
--------------------------------------------------------------------------------
1 | if(capabilities('long.double') && requireNamespace("tinytest", quietly=TRUE)){
2 |     library(tinytest)
3 |     options(covr = FALSE)
4 |     data.table::setDTthreads(2)
5 |     test_package("maditr", remove_side_effects=FALSE)
6 |     data.table::setDTthreads(NULL)
7 | }
8 | 
9 | 


--------------------------------------------------------------------------------
/inst/tinytest/test_coalesce.R:
--------------------------------------------------------------------------------
 1 | cat("\nContext:","coalesce", "\n")
 2 | suppressPackageStartupMessages(library(maditr))
 3 | x = c(1, NA, 2, NA)
 4 | res = coalesce(x, 0)
 5 | expect_equal( res, c(1, 0, 2, 0))
 6 | expect_error(coalesce(x, 1:2))
 7 | 
 8 | y = c(1, 2, NA, NA, 5)
 9 | z = c(NA, NA, 3, 4, 5)
10 | expect_equal(coalesce(y, z), 1:5)
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/R/cube.R:
--------------------------------------------------------------------------------
 1 | #' @export
 2 | rollup.data.frame = function(x, ...){
 3 |     eval.parent(substitute(data.table::rollup(as.data.table(x), ...)))
 4 | }
 5 | 
 6 | #' @export
 7 | cube.data.frame = function(x, ...){
 8 |     eval.parent(substitute(data.table::cube(as.data.table(x), ...)))
 9 | }
10 | 
11 | #' @export
12 | groupingsets.data.frame = function(x, ...){
13 |     eval.parent(substitute(data.table::groupingsets(as.data.table(x), ...)))
14 | }
15 | 
16 | 


--------------------------------------------------------------------------------
/maditr.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 4
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageRoxygenize: rd,collate,namespace,vignette
22 | 


--------------------------------------------------------------------------------
/R/coalesce.R:
--------------------------------------------------------------------------------
 1 | #' Return first non-missing element
 2 | #'
 3 | #' It is an alias for data.table `fcoalesce`. For details see [fcoalesce][data.table::fcoalesce]
 4 | #' @param ... vectors
 5 | #'
 6 | #' @return A vector the same length as the first ... argument with NA
 7 | #'   values replaced by the first non-missing value.
 8 | #'
 9 | #' @examples
10 | #' # examples from dplyr
11 | #' x = sample(c(1:5, NA, NA, NA))
12 | #' coalesce(x, 0L)
13 | #'
14 | #' y = c(1, 2, NA, NA, 5)
15 | #' z = c(NA, NA, 3, 4, 5)
16 | #' coalesce(y, z)
17 | #' @export
18 | coalesce = function(...)data.table::fcoalesce(...)
19 | 


--------------------------------------------------------------------------------
/man/copy.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/text_expand.R
 3 | \name{copy}
 4 | \alias{copy}
 5 | \title{Copy an entire object}
 6 | \usage{
 7 | copy(x)
 8 | }
 9 | \arguments{
10 | \item{x}{object}
11 | }
12 | \value{
13 | copy of the object 'x'
14 | }
15 | \description{
16 | Mainly intended to copy data.table objects because by default they are modified by reference. See example.
17 | }
18 | \examples{
19 | data(mtcars)
20 | dt_mtcars = as.data.table(mtcars)
21 | dt_mtcars2 = dt_mtcars
22 | dt_mtcars3 = copy(dt_mtcars)
23 | let(dt_mtcars, new = 1)
24 | 
25 | head(dt_mtcars2) # we see 'new' column
26 | head(dt_mtcars3) # no 'new' column
27 | }
28 | 


--------------------------------------------------------------------------------
/man/coalesce.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/coalesce.R
 3 | \name{coalesce}
 4 | \alias{coalesce}
 5 | \title{Return first non-missing element}
 6 | \usage{
 7 | coalesce(...)
 8 | }
 9 | \arguments{
10 | \item{...}{vectors}
11 | }
12 | \value{
13 | A vector the same length as the first ... argument with NA
14 | values replaced by the first non-missing value.
15 | }
16 | \description{
17 | It is an alias for data.table \code{fcoalesce}. For details see \link[data.table:coalesce]{fcoalesce}
18 | }
19 | \examples{
20 | # examples from dplyr
21 | x = sample(c(1:5, NA, NA, NA))
22 | coalesce(x, 0L)
23 | 
24 | y = c(1, 2, NA, NA, 5)
25 | z = c(NA, NA, 3, 4, 5)
26 | coalesce(y, z)
27 | }
28 | 


--------------------------------------------------------------------------------
/man/text_expand.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/text_expand.R
 3 | \name{text_expand}
 4 | \alias{text_expand}
 5 | \title{Evaluate expressions in curly brackets inside strings}
 6 | \usage{
 7 | text_expand(..., delim = c("\\\\{", "\\\\}"))
 8 | }
 9 | \arguments{
10 | \item{...}{character vectors}
11 | 
12 | \item{delim}{character vector of length 2 - pair of opening and closing
13 | delimiters for the templating tags. By default it is curly brackets. Note
14 | that \code{delim} will be used in the perl-style regular expression so you
15 | need to escape special characters, e. g. use "\\\\\{" instead of
16 | "\{".}
17 | }
18 | \value{
19 | Vector of characters
20 | }
21 | \description{
22 | \code{text_expand} is simple string interpolation function. It searches in its
23 | arguments expressions in curly brackets \code{{expr}}, evaluate them and substitute with
24 | the result of evaluation. See examples.
25 | }
26 | \examples{
27 | i = 1:5
28 | text_expand("q{i}")
29 | 
30 | i = 1:3
31 | j = 1:3
32 | text_expand("q1_{i}_{j}")
33 | 
34 | data(iris)
35 | text_expand("'iris' has {nrow(iris)} rows.")
36 | }
37 | 


--------------------------------------------------------------------------------
/R/on_attach.R:
--------------------------------------------------------------------------------
 1 | .onAttach = function(...) {
 2 |     hints = c(
 3 |         'To select rows from data: rows(mtcars, am==0)',
 4 |         'To select columns from data: columns(mtcars, mpg, vs:carb)',
 5 |         'To aggregate data: take(mtcars, mean_mpg = mean(mpg), by = am)',
 6 |         'To aggregate all non-grouping columns: take_all(mtcars, mean, by = am)',
 7 |         'To aggregate several columns with one summary: take(mtcars, mpg, hp, fun = mean, by = am)',
 8 |         "To get total summary skip 'by' argument: take_all(mtcars, mean)",
 9 |         "Use magrittr pipe '%>%' to chain several operations:
10 |              mtcars %>%
11 |                  let(mpg_hp = mpg/hp) %>%
12 |                  take(mean(mpg_hp), by = am)
13 |         ",
14 |         'To modify variables or add new variables:
15 |              let(mtcars, new_var = 42, new_var2 = new_var*hp) %>% head()',
16 |         'To drop variable use NULL: let(mtcars, am = NULL) %>% head()'
17 |     )
18 |     # we don't touch random number generator
19 |     curr_hint = hints[round(as.numeric(Sys.time())) %% length(hints) + 1]
20 |     packageStartupMessage(paste0("\n", curr_hint, "\n"))
21 | }
22 | 


--------------------------------------------------------------------------------
/inst/tinytest/test_cube.R:
--------------------------------------------------------------------------------
 1 | cat("\nContext:","cube, rollup and etc", "\n")
 2 | n = 24L
 3 | set.seed(25)
 4 | DT = data.table(
 5 |     color = sample(c("green","yellow","red"), n, TRUE),
 6 |     year = as.Date(sample(paste0(2011:2015,"-01-01"), n, TRUE)),
 7 |     status = as.factor(sample(c("removed","active","inactive","archived"), n, TRUE)),
 8 |     amount = sample(1:5, n, TRUE),
 9 |     value = sample(c(3, 3.5, 2.5, 2), n, TRUE)
10 | )
11 | 
12 | DF = as.data.frame(DT)
13 | # rollup
14 | expect_equal(
15 | rollup(DT, j = sum(value), by = c("color","year","status")),
16 | rollup(DF, j = sum(value), by = c("color","year","status"))
17 | )
18 | 
19 | 
20 | # cube
21 | expect_equal(
22 | cube(DT, j = lapply(.SD, sum), by = c("color","year","status"), id=TRUE, .SDcols="value"),
23 | cube(DF, j = lapply(.SD, sum), by = c("color","year","status"), id=TRUE, .SDcols="value")
24 | )
25 | # groupingsets
26 | expect_equal(
27 | groupingsets(DT, j = c(list(count=.N), lapply(.SD, sum)), by = c("color","year","status"),
28 |              sets = list("color", c("year","status"), character()), id=TRUE),
29 | groupingsets(DF, j = c(list(count=.N), lapply(.SD, sum)), by = c("color","year","status"),
30 |              sets = list("color", c("year","status"), character()), id=TRUE)
31 | )
32 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: maditr
 2 | Type: Package
 3 | Title: Fast Data Aggregation, Modification, and Filtering with Pipes and 'data.table'
 4 | Version: 0.8.6
 5 | Maintainer: Gregory Demin <gdemin@gmail.com>
 6 | Authors@R: person("Gregory", "Demin", email = "gdemin@gmail.com", role = c("aut", "cre"))
 7 | Depends: R (>= 3.4.0)
 8 | Imports: data.table (>= 1.12.6), magrittr (>= 1.5)
 9 | Suggests: knitr, tinytest, utils, rmarkdown, stats
10 | Description: Provides pipe-style interface for 'data.table'. Package preserves all 'data.table' features without
11 |               significant impact on performance. 'let' and 'take' functions are simplified interfaces for most common data
12 |               manipulation tasks. For example, you can write 'take(mtcars, mean(mpg), by = am)' for aggregation or 
13 |               'let(mtcars, hp_wt = hp/wt, hp_wt_mpg = hp_wt/mpg)' for modification. Use 'take_if/let_if' for conditional
14 |               aggregation/modification. Additionally there are some conveniences such as automatic 'data.frame' 
15 |               conversion to 'data.table'.
16 | License: GPL-2
17 | URL: https://github.com/gdemin/maditr
18 | BugReports: https://github.com/gdemin/maditr/issues
19 | VignetteBuilder: knitr
20 | Encoding: UTF-8
21 | RoxygenNote: 7.3.2
22 | Roxygen: list(markdown = TRUE)
23 | 


--------------------------------------------------------------------------------
/inst/tinytest/test_text_expand.R:
--------------------------------------------------------------------------------
 1 | cat("\nContext: text_expand\n")
 2 | 
 3 | i = 1:2
 4 | # expect_error(text_expand(i))
 5 | expect_identical(text_expand("{i}% of sum"), c("1% of sum", "2% of sum"))
 6 | expect_identical(text_expand("q"), "q")
 7 | expect_identical(text_expand("q{i}"), c("q1", "q2"))
 8 | expect_identical(text_expand("q{1:2}"), c("q1", "q2"))
 9 | 
10 | expect_identical(text_expand("q{i}_{i}"), c("q1_1", "q2_2"))
11 | expect_identical(text_expand("q`i`_`i`", delim = c("`", "`")), c("q1_1", "q2_2"))
12 | expect_identical(text_expand("q/i}_/i}", delim = c("/", "}")), c("q1_1", "q2_2"))
13 | expect_identical(text_expand("q{i}_{i}", "q{i}"), c("q1_1", "q2_2", "q1", "q2"))
14 | expect_identical(text_expand("q{sum(i)}"), c("q3"))
15 | j = 1:2
16 | expect_identical(text_expand("q{i}_{j}"), c("q1_1", "q1_2", "q2_1", "q2_2"))
17 | k = 1:2
18 | expect_identical(text_expand("q{i}_{j}_{k}"),
19 |                  c("q1_1_1", "q1_1_2", "q1_2_1", "q1_2_2", "q2_1_1", "q2_1_2", "q2_2_1", "q2_2_2"))
20 | 
21 | expect_error(text_expand("{ffgfg}"))
22 | 
23 | 
24 | test_text_expand = function(x){
25 |     i = 45
26 |     text_expand(x)
27 | }
28 | 
29 | expect_identical(test_text_expand("{i}"),'45')
30 | k = 42
31 | expect_identical(test_text_expand("{k}"),'42')
32 | 
33 | dfs = data.frame( zzz = 67:68)
34 | 
35 | expect_identical(with(dfs, text_expand("{zzz}")), as.character(67:68))
36 | expect_identical(with(dfs, text_expand("{k}")), '42')
37 | 
38 | 
39 | 


--------------------------------------------------------------------------------
/man/dt_count.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dt_utils.R
 3 | \name{dt_count}
 4 | \alias{dt_count}
 5 | \alias{dt_add_count}
 6 | \alias{dt_top_n}
 7 | \title{Additional useful functions}
 8 | \usage{
 9 | dt_count(data, ..., weight = NULL, sort = FALSE, name = "n")
10 | 
11 | dt_add_count(data, ..., weight = NULL, sort = FALSE, name = "n")
12 | 
13 | dt_top_n(data, n, by, order_by = NULL)
14 | }
15 | \arguments{
16 | \item{data}{data.table/data.frame data.frame will be automatically converted
17 | to data.table.}
18 | 
19 | \item{...}{variables to group by.}
20 | 
21 | \item{weight}{optional. Unquoted variable name. If provided result will be the sum of this variable by groups.}
22 | 
23 | \item{sort}{logical. If TRUE result will be sorted in desending order by resulting variable.}
24 | 
25 | \item{name}{character. Name of resulting variable.}
26 | 
27 | \item{n}{numeric. number of top cases. If n is negative then bottom values will be returned.}
28 | 
29 | \item{by}{list or vector of grouping variables}
30 | 
31 | \item{order_by}{unquoted variable name by which result will be sorted. If not
32 | specified, defaults to the last variable in the dataset.}
33 | }
34 | \value{
35 | data.table
36 | }
37 | \description{
38 | \itemize{
39 | \item \code{dt_count} calculates number of cases by groups, possibly
40 | weighted. \code{dt_add_count} adds number of cases to existing dataset.
41 | \item \code{dt_top_n} returns top n rows from each group.
42 | }
43 | }
44 | \examples{
45 | 
46 | data(mtcars)
47 | 
48 | # dt_count
49 | dt_count(mtcars, am, vs)
50 | dt_add_count(mtcars, am, vs, name = "am_vs")[] # [] for autoprinting
51 | 
52 | # dt_top_n
53 | dt_top_n(mtcars, 2, by  = list(am, vs))
54 | dt_top_n(mtcars, 2, order_by = mpg, by  = list(am, vs))
55 | }
56 | 


--------------------------------------------------------------------------------
/man/dt_left_join.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/join.R
 3 | \name{dt_left_join}
 4 | \alias{dt_left_join}
 5 | \alias{dt_right_join}
 6 | \alias{dt_inner_join}
 7 | \alias{dt_full_join}
 8 | \alias{dt_semi_join}
 9 | \alias{dt_anti_join}
10 | \title{Join two data.frames by common columns.}
11 | \usage{
12 | dt_left_join(x, y, by = NULL, suffix = c(".x", ".y"))
13 | 
14 | dt_right_join(x, y, by = NULL, suffix = c(".x", ".y"))
15 | 
16 | dt_inner_join(x, y, by = NULL, suffix = c(".x", ".y"))
17 | 
18 | dt_full_join(x, y, by = NULL, suffix = c(".x", ".y"))
19 | 
20 | dt_semi_join(x, y, by = NULL)
21 | 
22 | dt_anti_join(x, y, by = NULL)
23 | }
24 | \arguments{
25 | \item{x}{data.frame or data.table}
26 | 
27 | \item{y}{data.frame or data.table}
28 | 
29 | \item{by}{a character vector of variables to join by. If NULL, the default,
30 | *_join() will do a natural join, using all variables with common names
31 | across the two tables. A message lists the variables so that you can check
32 | they're right (to suppress the message, simply explicitly list the
33 | variables that you want to join). To join by different variables on x and y
34 | use a named vector. For example, \code{by = c("a" = "b")} will match x.a to y.b.}
35 | 
36 | \item{suffix}{If there are non-joined duplicate variables in x and y, these
37 | suffixes will be added to the output to disambiguate them. Should be a
38 | character vector of length 2.}
39 | }
40 | \value{
41 | data.table
42 | }
43 | \description{
44 | Do different versions of SQL join operations. See examples.
45 | }
46 | \examples{
47 | workers = fread("
48 |     name company
49 |     Nick Acme
50 |     John Ajax
51 |     Daniela Ajax
52 | ")
53 | 
54 | positions = fread("
55 |     name position
56 |     John designer
57 |     Daniela engineer
58 |     Cathie manager
59 | ")
60 | 
61 | workers \%>\% dt_inner_join(positions)
62 | workers \%>\% dt_left_join(positions)
63 | workers \%>\% dt_right_join(positions)
64 | workers \%>\% dt_full_join(positions)
65 | 
66 | # filtering joins
67 | workers \%>\% dt_anti_join(positions)
68 | workers \%>\% dt_semi_join(positions)
69 | 
70 | # To suppress the message, supply 'by' argument
71 | workers \%>\% dt_left_join(positions, by = "name")
72 | 
73 | # Use a named 'by' if the join variables have different names
74 | positions2 = setNames(positions, c("worker", "position")) # rename first column in 'positions'
75 | workers \%>\% dt_inner_join(positions2, by = c("name" = "worker"))
76 | }
77 | 


--------------------------------------------------------------------------------
/man/columns.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/columns.R
 3 | \name{columns}
 4 | \alias{columns}
 5 | \alias{cols}
 6 | \alias{rows}
 7 | \title{Selects columns or rows from the data set}
 8 | \usage{
 9 | columns(data, ...)
10 | 
11 | cols(data, ...)
12 | 
13 | rows(data, ...)
14 | }
15 | \arguments{
16 | \item{data}{data.table/data.frame}
17 | 
18 | \item{...}{unquoted or quoted column names, regex selectors or variable
19 | ranges for 'columns' and logical conditions for 'rows'.}
20 | }
21 | \value{
22 | data.frame/data.table
23 | }
24 | \description{
25 | \itemize{
26 | \item \code{columns}: select columns from dataset. There are four ways of column selection:
27 | }
28 | \enumerate{
29 | \item Simply by column names
30 | \item By variable ranges, e. g. vs:carb. Alternatively, you can use '\%to\%'
31 | instead of colon: 'vs \%to\% carb'.
32 | \item With regular expressions. Characters which start with '^' or end with '$'
33 | considered as Perl-style regular expression patterns. For example, '^Petal'
34 | returns all variables started with 'Petal'. 'Width$' returns all variables
35 | which end with 'Width'. Pattern '^.' matches all variables and pattern
36 | '^.*my_str' is equivalent to contains "my_str"'.
37 | \item By character variables with interpolated parts. Expression in the curly
38 | brackets inside characters will be evaluated in the parent frame with
39 | \link{text_expand}. For example, \verb{a\{1:3\}} will be transformed to the names 'a1',
40 | 'a2', 'a3'. 'cols' is just a shortcut for 'columns'. See examples.
41 | }
42 | 
43 | \if{html}{\out{<div class="sourceCode">}}\preformatted{}\if{html}{\out{</div>}}
44 | \itemize{
45 | \item \code{rows}: select rows from dataset by logical conditions.
46 | }
47 | }
48 | \examples{
49 | 
50 | ## columns
51 | mtcars \%>\%
52 |     columns(vs:carb, cyl)
53 | mtcars \%>\%
54 |     columns(-am, -cyl)
55 | 
56 | # regular expression pattern
57 | columns(iris, "^Petal") \%>\% head() # variables which start from 'Petal'
58 | columns(iris, "Width$") \%>\% head() # variables which end with 'Width'
59 | # move Species variable to the front.
60 | # pattern "^." matches all variables
61 | columns(iris, Species, "^.") \%>\% head()
62 | # pattern "^.*i" means "contains 'i'"
63 | columns(iris, "^.*i") \%>\% head()
64 | # numeric indexing - all variables except Species
65 | columns(iris, 1:4) \%>\% head()
66 | 
67 | # variable expansion
68 | dims = c("Width", "Length")
69 | columns(iris, "Petal.{dims}") \%>\% head()
70 | 
71 | # rows
72 | 
73 | mtcars \%>\%
74 |     rows(am==0) \%>\%
75 |     head()
76 | 
77 | # select rows with compound condition
78 | mtcars \%>\%
79 |     rows(am==0 & mpg>mean(mpg))
80 | 
81 | }
82 | 


--------------------------------------------------------------------------------
/R/text_expand.R:
--------------------------------------------------------------------------------
 1 | #' Evaluate expressions in curly brackets inside strings
 2 | #'
 3 | #' `text_expand` is simple string interpolation function. It searches in its
 4 | #' arguments expressions in curly brackets `{expr}`, evaluate them and substitute with
 5 | #' the result of evaluation. See examples.
 6 | #'
 7 | #' @param ... character vectors
 8 | #' @param delim character vector of length 2 - pair of opening and closing
 9 | #'   delimiters for the templating tags. By default it is curly brackets. Note
10 | #'   that `delim` will be used in the perl-style regular expression so you
11 | #'   need to escape special characters, e. g. use "\\\\\{" instead of
12 | #'   "\{".
13 | #' @return Vector of characters
14 | #' @examples
15 | #' i = 1:5
16 | #' text_expand("q{i}")
17 | #'
18 | #' i = 1:3
19 | #' j = 1:3
20 | #' text_expand("q1_{i}_{j}")
21 | #'
22 | #' data(iris)
23 | #' text_expand("'iris' has {nrow(iris)} rows.")
24 | #' @export
25 | text_expand = function(..., delim = c("\\{", "\\}")){
26 |     length(delim)!=2 && stop("'text_expand': 'delim' should be vector of length two.")
27 |     left = delim[[1]]
28 |     right = delim[[2]]
29 |     pattern = paste0(left, "(.+?)", right)
30 |     all_vars= c(list(...), recursive = TRUE)
31 |     res = vector(mode = "list", length = length(all_vars))
32 |     for(each_var in seq_along(all_vars)){
33 |         x = all_vars[each_var]
34 |         if(any(grepl(pattern, x, perl = TRUE))){
35 |             positions = gregexpr(pattern, x, perl = TRUE)
36 |             matches = rev(unique(unlist(regmatches(x, positions))))
37 |             var_names = gsub(right, "", gsub(left, "", matches, perl = TRUE), perl = TRUE)
38 | 
39 |             for(i in seq_along(var_names)){
40 |                 evaluated_item = eval(parse(text = var_names[i]),
41 |                                       envir = parent.frame(),
42 |                                       enclos = baseenv())
43 |                 x = unlist(lapply(evaluated_item, function(item){
44 |                     gsub(matches[i], item, x, fixed = TRUE)
45 | 
46 |                 }))
47 | 
48 |             }
49 | 
50 |         }
51 |         res[[each_var]] = x
52 |     }
53 |     c(res, recursive = TRUE)
54 | }
55 | 
56 | 
57 | 
58 | #' Copy an entire object
59 | #'
60 | #' Mainly intended to copy data.table objects because by default they are modified by reference. See example.
61 | #'
62 | #' @param x object
63 | #'
64 | #' @return copy of the object 'x'
65 | #' @export
66 | #'
67 | #' @examples
68 | #' data(mtcars)
69 | #' dt_mtcars = as.data.table(mtcars)
70 | #' dt_mtcars2 = dt_mtcars
71 | #' dt_mtcars3 = copy(dt_mtcars)
72 | #' let(dt_mtcars, new = 1)
73 | #'
74 | #' head(dt_mtcars2) # we see 'new' column
75 | #' head(dt_mtcars3) # no 'new' column
76 | copy = function(x) {
77 |     if(missing(x)) return(maditr::copy)
78 |     if(is.list(x)){
79 |         data.table::copy(x)
80 |     } else {
81 |         x
82 |     }
83 | 
84 | }
85 | 


--------------------------------------------------------------------------------
/NEWS:
--------------------------------------------------------------------------------
 1 | 0.8.6 (2025-09-01)
 2 | ================
 3 | * remove `sort_by` method in favor to `data.table::sort_by`
 4 | * due to removing `sort_by` we can decrease minimum R version to 3.6 in the requirements
 5 | 
 6 | 0.8.5 (2024-11-10)
 7 | ================
 8 | * fixes for compatibility with the new versions of R and data.table
 9 | * remove generic `sort_by` in favour of base R `sort_by`. Now the syntax for sorting data.frame and data.table is different
10 | * remove `dt_arrange`
11 | * minor fixes
12 | 
13 | 0.8.4 (2024-01-11)
14 | ================
15 | * very minor fixes
16 | 
17 | 0.8.3 (2022-04-02)
18 | ================
19 | * now 'rows' respects 'etable' class
20 | 
21 | 0.8.2 (2021-05-25)
22 | ================
23 | * fix serious bug with '%to%' in the multiassignment expression in the 'let'
24 | 
25 | 0.8.1 (2021-05-18)
26 | ================
27 | * new function 'rows' for selecting rows/filtering dataset
28 | * new function 'columns' for selecting columns
29 | * 'columns' can be used inside expressions to select range of variables
30 | * new functions 'to_long' and 'to_wide' for converting dataset to long form and vice versa
31 | * remove `.res` variable from progress bar indicator in 'to_list'
32 | * bugfixes
33 | 
34 | 0.7.4 (2020-07-29)
35 | ================
36 | * fix tests for R before 4.0
37 | * convert documentation to markdown
38 | 
39 | 0.7.3 (2020-07-22)
40 | ================
41 | * fixes for data.table 1.13
42 | * add new functions: vlookup and xlookup
43 | 
44 | 0.7.1 (2020-04-27)
45 | ================
46 | * 'dt_select' now can match variables by Perl-style regular expression, e. g. '^Petal' returns all variables which start with 'Petal'.
47 | * new functions 'to_list', 'to_vec', 'to_df', 'to_dfc' for convenient lists processing. They have a little pythonic style interface: 'to_list(1:10, if(.x %% 2 == 0) .x)' will return only even numbers. For details see '?to_list'.
48 | * new functions 'take_all' and 'let_all' for summarizing and aggregating multiple variables.
49 | * bugfixes
50 | 
51 | 0.6.3 (2019-12-03)
52 | ================
53 | * 'let' and 'take' now generics (issue #8)
54 | * add functions 'dt_count', 'dt_add_count', 'dt_top_n' (issue #5)
55 | * bumped requried data.table version
56 | * move tests from testthat to tinytest
57 | * bugfixes
58 | 
59 | 
60 | 0.6.2 (2019-06-09)
61 | ================
62 | * add 'sort_by'
63 | * minor bugfixes
64 | 
65 | 0.6.1 (2019-01-03)
66 | ================
67 | * fix tests
68 | * add 'coalesce' function
69 | * add 'data.frame' method for 'rollup', 'cube' and 'groupingsets'
70 | * minor fixes
71 | 
72 | 0.6.0 (2018-08-12)
73 | ================
74 | * add joins: 'dt_left_join', 'dt_right_join', 'dt_inner_join', 'dt_full_join', 'dt_anti_join', 'dt_semi_join'
75 | 
76 | 0.5.2 (2018-07-18)
77 | ================
78 | * add 'dcast' and 'melt' methods for data.frame
79 | * add ':=' operator for 'take'/'take_if'/'summarize' for parametric evaluation
80 | 
81 | 0.5.1 (2018-05-09)
82 | ================
83 | * fix stupid bug with incorrect minimal version of data.table in DESCRIPTION
84 | * add 'dplyr'-like interface.
85 | 
86 | 0.5.0 (2018-05-05)
87 | ================
88 | * initial release
89 | 
90 | 


--------------------------------------------------------------------------------
/inst/tinytest/test_columns.R:
--------------------------------------------------------------------------------
  1 | cat("\nContext: columns\n")
  2 | 
  3 | mt_dt = as.data.table(mtcars)
  4 | res1 = columns(mt_dt, -cyl, -am)
  5 | res3 = mt_dt[, -c(2,9), with = FALSE]
  6 | expect_identical(res1, res3)
  7 | 
  8 | res1 = columns(mtcars, cyl:wt, am)
  9 | res3 = mtcars[, c(2:6, 9)]
 10 | expect_identical(res1, res3)
 11 | 
 12 | mt_dt = as.data.table(mtcars)
 13 | res1 = columns(mt_dt, -(cyl:wt), -am)
 14 | res3 = mt_dt[, -c(2:6, 9), with = FALSE]
 15 | expect_identical(res1, res3)
 16 | 
 17 | data(iris)
 18 | dt_iris = as.data.table(iris)
 19 | res1 = columns(dt_iris, Species, "^Sepal")
 20 | expect_identical(res1, dt_iris[,.(Species, Sepal.Length, Sepal.Width)])
 21 | my_sepal = "^Sepal"
 22 | res1 = columns(dt_iris, Species, my_sepal)
 23 | expect_identical(res1, dt_iris[,.(Species, Sepal.Length, Sepal.Width)])
 24 | 
 25 | res1 = columns(dt_iris, Species, "^.")
 26 | expect_identical(res1, dt_iris[,c(5, 1:4)])
 27 | 
 28 | res1 = columns(dt_iris, Species, "^Sepal", "Width$")
 29 | expect_identical(res1, dt_iris[,c(5, 1:2, 4)])
 30 | 
 31 | res1 = columns(dt_iris, -"^Sepal", -"Width$")
 32 | expect_identical(res1, dt_iris[,c(3,5)])
 33 | 
 34 | res1 = columns(dt_iris,  "^.+\\.")
 35 | expect_identical(res1, dt_iris[,c(1:4)])
 36 | 
 37 | res1 = columns(dt_iris,  Species, Species)
 38 | expect_identical(res1, dt_iris[,c(5)])
 39 | 
 40 | res1 = columns(dt_iris,  "Species")
 41 | expect_identical(res1, dt_iris[,c(5)])
 42 | 
 43 | etab = data.frame(a = 1:2, b = 3:4)
 44 | class(etab) = c("etable", class(etab))
 45 | res = etab[,2, drop = FALSE]
 46 | expect_identical(res, columns(etab, "b"))
 47 | 
 48 | mt_dt = as.data.table(mtcars)
 49 | 
 50 | expect_identical(
 51 |     columns(mt_dt, mpg:hp, carb),
 52 |     columns(mt_dt, mpg %to% hp, carb)
 53 | )
 54 | 
 55 | 
 56 | expect_identical(
 57 |     columns(mtcars, mpg:hp, carb),
 58 |     columns(mtcars, mpg %to% hp, carb)
 59 | )
 60 | ###
 61 | dims = c("Width", "Length")
 62 | expect_identical(
 63 |     columns(iris, "Petal.{dims}"),
 64 |     iris[, paste0("Petal.",dims)]
 65 | )
 66 | 
 67 | 
 68 | expect_identical(
 69 |     columns(iris, -"Petal.{dims}"),
 70 |     iris[, -(3:4)]
 71 | )
 72 | ###
 73 | my_columns = function(my_dims){
 74 |     columns(iris, "Petal.{my_dims}")
 75 | }
 76 | 
 77 | expect_identical(
 78 |     my_columns(dims),
 79 |     iris[, paste0("Petal.",dims)]
 80 | )
 81 | 
 82 | ###
 83 | my_columns = function(my_dims){
 84 |     columns(iris, my_dims)
 85 | }
 86 | 
 87 | expect_identical(
 88 |     my_columns("Petal.{dims}"),
 89 |     iris[, (4:3)]
 90 | )
 91 | 
 92 | ###
 93 | 
 94 | my_columns = function(my_dims){
 95 |     columns(iris, -my_dims)
 96 | }
 97 | 
 98 | expect_identical(
 99 |     my_columns("Petal.{dims}"),
100 |     iris[, -(4:3)]
101 | )
102 | 
103 | ###
104 | 
105 | my_columns = function(my_dims){
106 |     columns(iris, -"Petal.{my_dims}")
107 | }
108 | 
109 | expect_identical(
110 |     my_columns(dims),
111 |     iris[, -(3:4)]
112 | )
113 | 
114 | 
115 | my_columns = function(my_dims){
116 |     columns(iris, "Petal.{dims}")
117 | }
118 | 
119 | expect_identical(
120 |     my_columns(NULL),
121 |     iris[, paste0("Petal.",dims)]
122 | )
123 | 
124 | 
125 | my_vars = c("Petal.Length", "Petal.Width")
126 | expect_identical(
127 |     columns(iris, my_vars, Species),
128 |     iris[,3:5]
129 | )
130 | 
131 | expect_identical(
132 |     columns(iris, -my_vars, -Species),
133 |     iris[,-(3:5)]
134 | )
135 | 
136 | expect_identical(
137 |     columns(iris, 5-1),
138 |     iris[,4, drop = FALSE]
139 | )
140 | 
141 | 
142 | expect_error(
143 |     columns(iris, -my_vars, Species)
144 | )
145 | 


--------------------------------------------------------------------------------
/R/dt_utils.R:
--------------------------------------------------------------------------------
  1 | #' Additional useful functions
  2 | #'
  3 | #'
  4 | #' - `dt_count` calculates number of cases by groups, possibly
  5 | #' weighted. `dt_add_count` adds number of cases to existing dataset.
  6 | #' - `dt_top_n` returns top n rows from each group.
  7 | #'
  8 | #' @param data data.table/data.frame data.frame will be automatically converted
  9 | #'   to data.table.
 10 | #' @param ... variables to group by.
 11 | #' @param weight optional. Unquoted variable name. If provided result will be the sum of this variable by groups.
 12 | #' @param sort logical. If TRUE result will be sorted in desending order by resulting variable.
 13 | #' @param name character. Name of resulting variable.
 14 | #' @param n numeric. number of top cases. If n is negative then bottom values will be returned.
 15 | #' @param by list or vector of grouping variables
 16 | #' @param order_by unquoted variable name by which result will be sorted. If not
 17 | #'   specified, defaults to the last variable in the dataset.
 18 | #' @return data.table
 19 | #' @export
 20 | #'
 21 | #' @examples
 22 | #'
 23 | #' data(mtcars)
 24 | #'
 25 | #' # dt_count
 26 | #' dt_count(mtcars, am, vs)
 27 | #' dt_add_count(mtcars, am, vs, name = "am_vs")[] # [] for autoprinting
 28 | #'
 29 | #' # dt_top_n
 30 | #' dt_top_n(mtcars, 2, by  = list(am, vs))
 31 | #' dt_top_n(mtcars, 2, order_by = mpg, by  = list(am, vs))
 32 | dt_count = function(data, ..., weight = NULL, sort = FALSE, name = "n"){
 33 |     name = as.symbol(name)
 34 |     weight_expr = substitute(weight)
 35 |     if(is.null(weight_expr)){
 36 |         res = eval.parent(substitute(maditr::take(data, name := .N, by = cols(...))))
 37 |     } else {
 38 |         res = eval.parent(substitute(maditr::take(data, name := sum(weight, na.rm = TRUE), by = cols(...))))
 39 |     }
 40 |     if(sort) {
 41 |         res = eval(substitute(data.table::setorder(res, -name), list(name = name)))
 42 | 
 43 |     }
 44 |     res
 45 | }
 46 | 
 47 | #' @export
 48 | #' @rdname dt_count
 49 | dt_add_count = function(data, ..., weight = NULL, sort = FALSE, name = "n"){
 50 |     name = as.symbol(name)
 51 |     weight_expr = substitute(weight)
 52 |     if(is.null(weight_expr)){
 53 |         res = eval.parent(substitute(maditr::let(data, name := .N, by = cols(...))))
 54 |     } else {
 55 |         res = eval.parent(substitute(maditr::let(data, name := sum(weight, na.rm = TRUE), by = cols(...))))
 56 |     }
 57 |     if(sort) {
 58 |         res = eval(substitute(data.table::setorder(res, -name), list(name = name)))
 59 | 
 60 |     }
 61 |     res
 62 | }
 63 | 
 64 | #' @export
 65 | #' @rdname dt_count
 66 | dt_top_n = function(data, n, by, order_by = NULL){
 67 |     order_by_expr = substitute(order_by)
 68 |     if(is.null(order_by_expr)){
 69 |         order_by = as.symbol(names(data)[ncol(data)])
 70 |     }
 71 |     if(n>0){
 72 |         eval.parent(substitute({
 73 |             maditr::query(data.table::setorder(data, -order_by), head(.SD, n), by = by)
 74 |         }))
 75 |     } else {
 76 |         eval.parent(substitute({
 77 |             maditr::query(data.table::setorder(data, -order_by), tail(.SD, -n), by = by)
 78 |         }))
 79 |     }
 80 | 
 81 | }
 82 | 
 83 | # @export
 84 | # @rdname dt_count
 85 | # dt_sample_n = function(data, n, by, replace = FALSE, weight = NULL){
 86 | #     sort_by_expr = substitute(sort_by)
 87 | #     if(is.null(sort_by_expr)){
 88 | #         sort_by = as.symbol(names(data)[ncol(data)])
 89 | #     }
 90 | #     if(n>0){
 91 | #         eval.parent(substitute({
 92 | #             query(sort_by(data, -sort_by), head(.SD, n), by = by)
 93 | #         }))
 94 | #     } else {
 95 | #         eval.parent(substitute({
 96 | #             query(sort_by(data, -sort_by), tail(.SD, -n), by = by)
 97 | #         }))
 98 | #     }
 99 | #
100 | # }
101 | 
102 | 


--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
  1 | # generate missing argument
  2 | missing_arg = function(){
  3 |     quote(expr=)
  4 | }
  5 | 
  6 | safe_deparse = function(expr){
  7 |     res = deparse(expr, width.cutoff = 500)
  8 |     do.call(paste0, as.list(res))
  9 | }
 10 | 
 11 | add_names_to_quoted_list = function(expr_list){
 12 |     all_names = names(expr_list)
 13 |     if(is.null(all_names)){
 14 |         names(expr_list) = c("", vapply(expr_list[-1], safe_deparse, FUN.VALUE = character(1)))
 15 |         return(expr_list)
 16 |     }
 17 |     for(i in seq_along(expr_list)){
 18 |         if(all_names[i]==""){
 19 |             names(expr_list)[i] = safe_deparse(expr_list[[i]])
 20 |         }
 21 |     }
 22 | 
 23 |     expr_list
 24 | }
 25 | 
 26 | 
 27 | # expr - expression as after 'substitute'
 28 | # symbols - named list  - names will be substituted with values
 29 | substitute_symbols = function(substitute_result, symbols) {
 30 |     eval(bquote(substitute(.(substitute_result), symbols)))
 31 | }
 32 | 
 33 | 
 34 | 
 35 | 
 36 | # 'expr' is result of substitute with assumption
 37 | # that first argument is data.frame.
 38 | # When we have huge constant instead of
 39 | # variable in the expression, then, in the case of the error
 40 | # we have long lag to print this error in the console.
 41 | # This function is workaround for this issue.
 42 | eval_in_parent_frame = function(data, expr, frame, need_expansion = TRUE){
 43 |     `._***data***` = NULL # to pass CRAN check
 44 |     if(!is.data.table(data)){
 45 |         data = as.data.table(data)
 46 |     }
 47 |     if(need_expansion){
 48 |         data_names = names(data)
 49 |         # i
 50 |         if(length(expr)>2) {
 51 |             expr[[3]] = replace_column_expr(expr[[3]], data_names = data_names, frame = frame, type = "data.table")
 52 |         }
 53 |         # j
 54 |         if(length(expr)>3) {
 55 |             expr[[4]] = replace_column_expr(expr[[4]], data_names = data_names, frame = frame, type = "data.table")
 56 |         }
 57 |         if("by" %in% names(expr)){
 58 |             curr_by = expr[["by"]]
 59 |             if(!missing(curr_by) && is_columns(curr_by)){
 60 |                 expr[["by"]] = replace_column_expr(curr_by, data_names = data_names, frame = frame, type = "names")
 61 |             }
 62 |         }
 63 |     }
 64 |     assign("._***data***", data, envir = frame)
 65 |     on.exit({
 66 |         rm(`._***data***`, envir = frame)
 67 |     })
 68 |     expr[[2]] = quote(`._***data***`)
 69 |     eval(expr, envir = frame)
 70 | }
 71 | 
 72 | # j_expr - list from j-expression
 73 | # envir - environement where we will evaluate lhs of :=
 74 | add_names_from_walrus_assignement = function(j_expr, envir){
 75 |     j_length = length(j_expr)
 76 |     if(is.null(names(j_expr))){
 77 |         names(j_expr) = rep("", j_length)
 78 |     }
 79 |     # parse walrus (a := b) assignement
 80 |     for(k in seq_len(j_length)){
 81 |         if(is.call(j_expr[[k]]) && identical(j_expr[[k]][[1]], as.symbol(":="))){
 82 |             name_expr = j_expr[[k]][[2]]
 83 |             j_expr[[k]] = j_expr[[k]][[3]]
 84 |             if(is.call(name_expr)){
 85 |                 names(j_expr)[k] = eval(name_expr, envir = envir)
 86 |             } else {
 87 |                 if(is.character(name_expr)){
 88 |                     names(j_expr)[k] = name_expr
 89 |                 } else {
 90 |                     names(j_expr)[k] = safe_deparse(name_expr)
 91 |                 }
 92 |             }
 93 |         }
 94 |     }
 95 | 
 96 |     j_expr
 97 | }
 98 | 
 99 | add_names_from_single_symbol = function(j_expr){
100 |     if(length(j_expr)>1){
101 |         for(k in seq_along(j_expr)){
102 |             if(names(j_expr)[k]=="" && is.symbol(j_expr[[k]])){
103 |                 names(j_expr)[k] = as.character(j_expr[[k]])
104 |             }
105 |         }
106 |     }
107 |     j_expr
108 | }
109 | 
110 | 
111 | 


--------------------------------------------------------------------------------
/inst/tinytest/test_utils.R:
--------------------------------------------------------------------------------
  1 | cat("\nContext:","dt_count", "\n")
  2 | 
  3 | data(mtcars)
  4 | 
  5 | expect_equal(
  6 |     dt_count(mtcars, am, vs),
  7 |     take(mtcars, n = .N, by =.(am, vs))
  8 | )
  9 | 
 10 | expect_equal(
 11 |     dt_count(mtcars, am, vs, sort = TRUE),
 12 |     take(mtcars, n = .N, by =.(am, vs)) %>% setorder(-n)
 13 | )
 14 | 
 15 | expect_equal(
 16 |     dt_count(mtcars, am, vs, name = "total"),
 17 |     take(mtcars, total = .N, by =.(am, vs))
 18 | )
 19 | 
 20 | expect_equal(
 21 |     dt_count(mtcars, "^(am|vs)", name = "total"),
 22 |     take(mtcars, total = .N, by =.(vs, am))
 23 | )
 24 | 
 25 | expect_equal(
 26 |     dt_count(mtcars, vs %to% gear, name = "total"),
 27 |     take(mtcars, total = .N, by =.(vs, am, gear))
 28 | )
 29 | 
 30 | expect_equal(
 31 |     dt_count(mtcars, am, vs, sort = TRUE, name = "total"),
 32 |     take(mtcars, total = .N, by =.(am, vs)) %>% setorder(-total)
 33 | )
 34 | 
 35 | mtcars2 = mtcars
 36 | 
 37 | mtcars2$mpg[1:4] = NA
 38 | 
 39 | expect_equal(
 40 |     dt_count(mtcars2, am, vs, weight = mpg, name = "total"),
 41 |     take(mtcars2, total = sum(mpg, na.rm = TRUE), by =.(am, vs))
 42 | )
 43 | 
 44 | expect_equal(
 45 |     dt_count(mtcars2,weight = mpg, sort = TRUE, name = "total"),
 46 |     take(mtcars2, total = sum(mpg, na.rm = TRUE)) %>% setorder(-total)
 47 | )
 48 | 
 49 | expect_equal(
 50 |     dt_count(mtcars2, am, vs, weight = mpg, sort = TRUE, name = "total"),
 51 |     take(mtcars2, total = sum(mpg, na.rm = TRUE), by =.(am, vs)) %>% setorder(-total)
 52 | )
 53 | 
 54 | ############
 55 | cat("\nContext:","dt_add_count", "\n")
 56 | 
 57 | data(mtcars)
 58 | 
 59 | expect_equal(
 60 |     dt_add_count(mtcars, am, vs),
 61 |     let(mtcars, n = .N, by =.(am, vs))
 62 | )
 63 | 
 64 | expect_equal(
 65 |     dt_add_count(mtcars, am, vs, sort = TRUE),
 66 |     let(mtcars, n = .N, by =.(am, vs)) %>% setorder(-n)
 67 | )
 68 | 
 69 | expect_equal(
 70 |     dt_add_count(mtcars, am, vs, name = "total"),
 71 |     let(mtcars, total = .N, by =.(am, vs))
 72 | )
 73 | 
 74 | expect_equal(
 75 |     dt_add_count(mtcars, am, vs, sort = TRUE, name = "total"),
 76 |     let(mtcars, total = .N, by =.(am, vs)) %>% setorder(-total)
 77 | )
 78 | 
 79 | 
 80 | data(mtcars)
 81 | mtcars2 = mtcars
 82 | 
 83 | mtcars2$mpg[1:4] = NA
 84 | 
 85 | expect_equal(
 86 |     dt_add_count(mtcars2, am, vs, weight = mpg, name = "total"),
 87 |     let(mtcars2, total = sum(mpg, na.rm = TRUE), by =.(am, vs))
 88 | )
 89 | 
 90 | expect_equal(
 91 |     dt_add_count(mtcars2, am, vs, weight = mpg, sort = TRUE, name = "total"),
 92 |     let(mtcars2, total = sum(mpg, na.rm = TRUE), by =.(am, vs)) %>% setorder(-total)
 93 | )
 94 | 
 95 | expect_equal(
 96 |     dt_add_count(mtcars2,  weight = mpg, sort = TRUE, name = "total"),
 97 |     let(mtcars2, total = sum(mpg, na.rm = TRUE)) %>% setorder(-total)
 98 | )
 99 | 
100 | cat("\nContext:","dt_top_n", "\n")
101 | 
102 | data(mtcars)
103 | 
104 | expect_equal(
105 |     dt_top_n(mtcars, 2),
106 |     head(take_if(mtcars, order(-carb)), 2)
107 | )
108 | 
109 | 
110 | expect_equal(
111 |     dt_top_n(mtcars, -2),
112 |     tail(take_if(mtcars, order(-carb)), 2)
113 | )
114 | 
115 | 
116 | expect_equal(
117 |     dt_top_n(mtcars, 2, order_by = mpg, by = list(am, vs)),
118 |     query_if(mtcars, order(-mpg), head(.SD, 2), by = list(am, vs))
119 | )
120 | 
121 | indirect = c('am', 'vs')
122 | expect_equal(
123 |     dt_top_n(mtcars, 2, order_by = mpg, by = cols("{indirect}")),
124 |     query_if(mtcars, order(-mpg), head(.SD, 2), by = list(am, vs))
125 | )
126 | 
127 | expect_equal(
128 |     dt_top_n(mtcars, -2, order_by = mpg, by = list(am, vs)),
129 |     query_if(mtcars, order(-mpg), tail(.SD, 2), by = list(am, vs))
130 | )
131 | 
132 | 
133 | cat("\nContext:","copy", "\n")
134 | 
135 | expect_identical(
136 |     copy(), maditr::copy
137 | )
138 | 
139 | data(iris)
140 | expect_identical(
141 |     copy(iris), iris
142 | )
143 | 
144 | 
145 | expect_identical(
146 |     copy(1), 1
147 | )
148 | 


--------------------------------------------------------------------------------
/man/reexports.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/maditr-package.R
 3 | \docType{import}
 4 | \name{reexports}
 5 | \alias{reexports}
 6 | \alias{\%>\%}
 7 | \alias{\%<>\%}
 8 | \alias{\%$\%}
 9 | \alias{\%T>\%}
10 | \alias{data.table}
11 | \alias{as.data.table}
12 | \alias{is.data.table}
13 | \alias{last}
14 | \alias{first}
15 | \alias{\%like\%}
16 | \alias{between}
17 | \alias{\%between\%}
18 | \alias{inrange}
19 | \alias{\%inrange\%}
20 | \alias{:=}
21 | \alias{setnames}
22 | \alias{setcolorder}
23 | \alias{set}
24 | \alias{setDT}
25 | \alias{setDF}
26 | \alias{setorder}
27 | \alias{setorderv}
28 | \alias{setkey}
29 | \alias{setkeyv}
30 | \alias{setindex}
31 | \alias{setindexv}
32 | \alias{chmatch}
33 | \alias{\%chin\%}
34 | \alias{rbindlist}
35 | \alias{fread}
36 | \alias{fwrite}
37 | \alias{foverlaps}
38 | \alias{shift}
39 | \alias{transpose}
40 | \alias{tstrsplit}
41 | \alias{frank}
42 | \alias{frankv}
43 | \alias{.SD}
44 | \alias{.N}
45 | \alias{.I}
46 | \alias{.GRP}
47 | \alias{.BY}
48 | \alias{rleid}
49 | \alias{rleidv}
50 | \alias{rowid}
51 | \alias{rowidv}
52 | \alias{uniqueN}
53 | \alias{setDTthreads}
54 | \alias{getDTthreads}
55 | \alias{fintersect}
56 | \alias{fsetdiff}
57 | \alias{funion}
58 | \alias{fsetequal}
59 | \alias{shouldPrint}
60 | \alias{fsort}
61 | \alias{groupingsets}
62 | \alias{cube}
63 | \alias{rollup}
64 | \alias{nafill}
65 | \alias{setnafill}
66 | \alias{frollmean}
67 | \alias{frollsum}
68 | \alias{fcoalesce}
69 | \alias{fifelse}
70 | \alias{fcase}
71 | \alias{frollapply}
72 | \title{Objects exported from other packages}
73 | \keyword{internal}
74 | \description{
75 | These objects are imported from other packages. Follow the links
76 | below to see their documentation.
77 | 
78 | \describe{
79 |   \item{data.table}{\code{\link[data.table:between]{\%between\%}}, \code{\link[data.table:chmatch]{\%chin\%}}, \code{\link[data.table:between]{\%inrange\%}}, \code{\link[data.table:like]{\%like\%}}, \code{\link[data.table:special-symbols]{.BY}}, \code{\link[data.table:special-symbols]{.GRP}}, \code{\link[data.table:special-symbols]{.I}}, \code{\link[data.table:special-symbols]{.N}}, \code{\link[data.table:special-symbols]{.SD}}, \code{\link[data.table:assign]{:=}}, \code{\link[data.table]{as.data.table}}, \code{\link[data.table]{between}}, \code{\link[data.table]{chmatch}}, \code{\link[data.table:groupingsets]{cube}}, \code{\link[data.table]{data.table}}, \code{\link[data.table]{fcase}}, \code{\link[data.table:coalesce]{fcoalesce}}, \code{\link[data.table]{fifelse}}, \code{\link[data.table:setops]{fintersect}}, \code{\link[data.table:last]{first}}, \code{\link[data.table]{foverlaps}}, \code{\link[data.table]{frank}}, \code{\link[data.table:frank]{frankv}}, \code{\link[data.table]{fread}}, \code{\link[data.table:froll]{frollapply}}, \code{\link[data.table:froll]{frollmean}}, \code{\link[data.table:froll]{frollsum}}, \code{\link[data.table:setops]{fsetdiff}}, \code{\link[data.table:setops]{fsetequal}}, \code{\link[data.table]{fsort}}, \code{\link[data.table:setops]{funion}}, \code{\link[data.table]{fwrite}}, \code{\link[data.table:openmp-utils]{getDTthreads}}, \code{\link[data.table]{groupingsets}}, \code{\link[data.table:between]{inrange}}, \code{\link[data.table:as.data.table]{is.data.table}}, \code{\link[data.table]{last}}, \code{\link[data.table]{nafill}}, \code{\link[data.table]{rbindlist}}, \code{\link[data.table]{rleid}}, \code{\link[data.table:rleid]{rleidv}}, \code{\link[data.table:groupingsets]{rollup}}, \code{\link[data.table]{rowid}}, \code{\link[data.table:rowid]{rowidv}}, \code{\link[data.table:assign]{set}}, \code{\link[data.table]{setDF}}, \code{\link[data.table]{setDT}}, \code{\link[data.table:openmp-utils]{setDTthreads}}, \code{\link[data.table]{setcolorder}}, \code{\link[data.table:setkey]{setindex}}, \code{\link[data.table:setkey]{setindexv}}, \code{\link[data.table]{setkey}}, \code{\link[data.table:setkey]{setkeyv}}, \code{\link[data.table:nafill]{setnafill}}, \code{\link[data.table:setattr]{setnames}}, \code{\link[data.table]{setorder}}, \code{\link[data.table:setorder]{setorderv}}, \code{\link[data.table]{shift}}, \code{\link[data.table]{shouldPrint}}, \code{\link[data.table]{transpose}}, \code{\link[data.table]{tstrsplit}}, \code{\link[data.table:duplicated]{uniqueN}}}
80 | 
81 |   \item{magrittr}{\code{\link[magrittr:exposition]{\%$\%}}, \code{\link[magrittr:compound]{\%<>\%}}, \code{\link[magrittr:pipe]{\%>\%}}, \code{\link[magrittr:tee]{\%T>\%}}}
82 | }}
83 | 
84 | 


--------------------------------------------------------------------------------
/inst/tinytest/test_dcast_melt.R:
--------------------------------------------------------------------------------
 1 | cat("\nContext:","dcast/melt", "\n")
 2 | # examples are borrowed from 'tidyr' package
 3 | set.seed(123)
 4 | stocks = data.frame(
 5 |     time = as.Date('2009-01-01') + 0:9,
 6 |     X = rnorm(10, 0, 1),
 7 |     Y = rnorm(10, 0, 2),
 8 |     Z = rnorm(10, 0, 4)
 9 | )
10 | 
11 | res = structure(list(time = structure(c(14245, 14246, 14247, 14248,
12 | 14249, 14250, 14251, 14252, 14253, 14254, 14245, 14246, 14247,
13 | 14248, 14249, 14250, 14251, 14252, 14253, 14254, 14245, 14246,
14 | 14247, 14248, 14249, 14250, 14251, 14252, 14253, 14254), class = "Date"),
15 | stock = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
16 | 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L,
17 | 3L, 3L, 3L, 3L, 3L), class = "factor", .Label = c("X", "Y",
18 | "Z")), price = c(-0.560475646552213, -0.23017748948328, 1.55870831414912,
19 | 0.070508391424576, 0.129287735160946, 1.71506498688328, 0.460916205989202,
20 | -1.26506123460653, -0.686852851893526, -0.445661970099958,
21 | 2.44816359487892, 0.719627654114728, 0.801542901188104, 0.221365431890239,
22 | -1.11168226950815, 3.57382627360616, 0.995700956458479, -3.93323431325928,
23 | 1.40271180312737, -0.945582815455868, -4.27129482394738,
24 | -0.871899658633181, -4.10401779322896, -2.91556491716456,
25 | -2.50015707139703, -6.74677324296965, 3.3511481779781, 0.613492471346061,
26 | -4.55254774804779, 5.01525968427971)), row.names = c(NA,
27 | -30L), class = c("data.table", "data.frame"))
28 | 
29 | stocksm = stocks %>% melt(id.vars = "time", variable.name = "stock", value.name = "price")
30 | expect_equal(stocksm, res)
31 | 
32 | stocksm = stocks %>% melt(id.vars = "time", variable.name = "stock",
33 |                           measure.vars = patterns("X|Y|Z"),
34 |                           value.name = "price")
35 | expect_equal(stocksm, res)
36 | 
37 | stocksm_dt = as.data.table(stocks) %>% melt(id.vars = "time", variable.name = "stock",
38 |                                             value.name = "price")
39 | expect_equal(stocksm_dt, res)
40 | 
41 | stocksm_dt = as.data.table(stocks) %>% melt(id.vars = "time", variable.name = "stock",
42 |                                             measure.vars = patterns("X|Y|Z"),
43 |                                             value.name = "price")
44 | expect_equal(stocksm_dt, res)
45 | 
46 | res2 = structure(list(time = structure(c(14245, 14246, 14247, 14248,
47 | 14249, 14250, 14251, 14252, 14253, 14254), class = "Date"), X = c(-0.560475646552213,
48 | -0.23017748948328, 1.55870831414912, 0.070508391424576, 0.129287735160946,
49 | 1.71506498688328, 0.460916205989202, -1.26506123460653, -0.686852851893526,
50 | -0.445661970099958), Y = c(2.44816359487892, 0.719627654114728,
51 | 0.801542901188104, 0.221365431890239, -1.11168226950815, 3.57382627360616,
52 | 0.995700956458479, -3.93323431325928, 1.40271180312737, -0.945582815455868
53 | ), Z = c(-4.27129482394738, -0.871899658633181, -4.10401779322896,
54 | -2.91556491716456, -2.50015707139703, -6.74677324296965, 3.3511481779781,
55 | 0.613492471346061, -4.55254774804779, 5.01525968427971)), row.names = c(NA,
56 | -10L), class = c("data.table", "data.frame"), sorted = "time")
57 | stock_wide = as.data.frame(stocksm) %>% dcast(time ~ stock)
58 | expect_equal(res2, stock_wide)
59 | stock_wide_dt = stocksm %>% dcast(time ~ stock)
60 | expect_equal(res2, stock_wide_dt)
61 | 
62 | res3 = structure(list(stock = structure(1:3, class = "factor", .Label = c("X",
63 | "Y", "Z")), `2009-01-01` = c(-0.560475646552213, 2.44816359487892,
64 | -4.27129482394738), `2009-01-02` = c(-0.23017748948328, 0.719627654114728,
65 | -0.871899658633181), `2009-01-03` = c(1.55870831414912, 0.801542901188104,
66 | -4.10401779322896), `2009-01-04` = c(0.070508391424576, 0.221365431890239,
67 | -2.91556491716456), `2009-01-05` = c(0.129287735160946, -1.11168226950815,
68 | -2.50015707139703), `2009-01-06` = c(1.71506498688328, 3.57382627360616,
69 | -6.74677324296965), `2009-01-07` = c(0.460916205989202, 0.995700956458479,
70 | 3.3511481779781), `2009-01-08` = c(-1.26506123460653, -3.93323431325928,
71 | 0.613492471346061), `2009-01-09` = c(-0.686852851893526, 1.40271180312737,
72 | -4.55254774804779), `2009-01-10` = c(-0.445661970099958, -0.945582815455868,
73 | 5.01525968427971)), row.names = c(NA, -3L), class = c("data.table",
74 | "data.frame"), sorted = "stock")
75 | 
76 | stock_wide2 = as.data.frame(stocksm) %>% dcast(stock ~ time)
77 | expect_equal(res3, stock_wide2)
78 | stock_wide2_dt = stocksm %>% dcast(stock ~ time)
79 | expect_equal(res3, stock_wide2_dt)
80 | 


--------------------------------------------------------------------------------
/inst/tinytest/test_dplyr_verbs.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | data(mtcars)
  4 | 
  5 | cat("\nContext:", "verbs errors", "\n")
  6 | expect_error(dt_select(1:5, am))
  7 | expect_error(dt_select(iris, "^aaa"))
  8 | expect_error(dt_mutate(1:5, new := 1))
  9 | expect_error(dt_summarise(1:5, new = mean(mpg)))
 10 | expect_error(dt_summarise_all(1:5, by = cyl))
 11 | expect_error(dt_summarise_all(mtcars, by = cyl))
 12 | expect_error(dt_filter(1:5, cyl==1))
 13 | expect_error(dt_filter(mtcars, cyl==2, by = am))
 14 | expect_error(dt_filter(mtcars, cyl=1))
 15 | expect_error(dt_filter(mtcars, c(TRUE, FALSE)))
 16 | 
 17 | ################
 18 | cat("\nContext:", "dt_mutate", "\n")
 19 | mt_dt = as.data.table(mtcars)
 20 | mt_dt2 = data.table::copy(mt_dt)
 21 | new_dt = dt_mutate(mt_dt, mpg_hp = mpg/hp, new = mpg_hp*2)
 22 | new_dt2 = dt_mutate(mtcars, mpg_hp = mpg/hp, new = mpg_hp*2)
 23 | mt_dt2[, mpg_hp := mpg/hp][, new := mpg_hp*2]
 24 | expect_identical(new_dt, mt_dt)
 25 | expect_identical(new_dt, mt_dt2)
 26 | expect_identical(new_dt2, mt_dt2)
 27 | ###############
 28 | 
 29 | ###############
 30 | cat("\nContext:", "summarize/summarize_all", "\n")
 31 | mt_dt = as.data.table(mtcars)
 32 | res = dt_summarize(mtcars, fun = mean, by = am)
 33 | res2 = dt_summarize(mt_dt, fun = mean, by = am)
 34 | res3 = mt_dt[, lapply(.SD, mean), by = am]
 35 | expect_identical(res3, res)
 36 | expect_identical(res3, res2)
 37 | ###########
 38 | res = dt_summarize_all(mtcars, mean, by = am)
 39 | res2 = dt_summarize_all(mt_dt, mean, by = am)
 40 | res3 = mt_dt[, lapply(.SD, mean), by = am]
 41 | expect_identical(res3, res)
 42 | expect_identical(res3, res2)
 43 | ###############
 44 | mt_dt = as.data.table(mtcars)
 45 | res = dt_summarize(mtcars, mpg, hp, fun = mean, by = am)
 46 | res2 = dt_summarize(mt_dt, mpg, hp, fun = mean, by = am)
 47 | res3 = mt_dt[, lapply(list(mpg = mpg, hp = hp), mean), by = am]
 48 | expect_identical(res3, res)
 49 | expect_identical(res3, res2)
 50 | 
 51 | ###############
 52 | mt_dt = as.data.table(mtcars)
 53 | res = dt_summarize(mtcars, mpg, just_wow = hp, fun = mean, by = am)
 54 | res2 = dt_summarize(mt_dt, mpg, just_wow = hp, fun = mean, by = am)
 55 | res3 = mt_dt[, lapply(list(mpg = mpg, just_wow = hp), mean), by = am]
 56 | expect_identical(res3, res)
 57 | expect_identical(res3, res2)
 58 | ###################
 59 | #############
 60 | mt_dt = as.data.table(mtcars)
 61 | res = dt_summarize(mtcars, agg = mean(mpg), agg2 = mean(hp), by = am)
 62 | res2 = dt_summarize(mt_dt, agg = mean(mpg), agg2 = mean(hp), by = am)
 63 | res3 = mt_dt[, list(agg = mean(mpg), agg2 = mean(hp)), by = am]
 64 | expect_identical(res3, res)
 65 | expect_identical(res3, res2)
 66 | #############
 67 | mt_dt = as.data.table(mtcars)
 68 | res = dt_summarize(mtcars, agg := mean(mpg), agg2 := mean(hp), by = am)
 69 | res2 = dt_summarize(mt_dt, agg := mean(mpg), agg2 := mean(hp), by = am)
 70 | res3 = mt_dt[, list(agg = mean(mpg), agg2 = mean(hp)), by = am]
 71 | expect_identical(res3, res)
 72 | expect_identical(res3, res2)
 73 | 
 74 | new_var1 = "agg"
 75 | new_var2 = "agg2"
 76 | expr1 = quote(mean(mpg))
 77 | expr2 = quote(mean(hp))
 78 | mt_dt = as.data.table(mtcars)
 79 | res = dt_summarize(mtcars, (new_var1) := eval(expr1), (new_var2) := eval(expr2), by = am)
 80 | res2 = dt_summarize(mt_dt, (new_var1) := eval(expr1), (new_var2) := eval(expr2), by = am)
 81 | res3 = mt_dt[, list(agg = mean(mpg), agg2 = mean(hp)), by = am]
 82 | expect_equal(res3, res)
 83 | expect_equal(res3, res2)
 84 | 
 85 | 
 86 | 
 87 | ##########################
 88 | mt_dt = as.data.table(mtcars)
 89 | res = dt_summarize(mtcars, mean(mpg), mean(hp), by = am)
 90 | res2 = dt_summarize(mt_dt, mean(mpg), mean(hp), by = am)
 91 | res3 = mt_dt[, list("mean(mpg)" = mean(mpg), "mean(hp)" = mean(hp)), by = am]
 92 | expect_identical(res3, res)
 93 | expect_identical(res3, res2)
 94 | 
 95 | ##########################
 96 | cat("\nContext:", "dt_filter", "\n")
 97 | res = dt_filter(mtcars, vs==0, am==0)
 98 | res2 = dt_filter(mt_dt, vs==0, am==0)
 99 | res3 = mt_dt[vs==0 & am==0, ]
100 | expect_identical(res3, res)
101 | expect_identical(res3, res2)
102 | 
103 | data(mtcars)
104 | dt_mt = as.data.table(mtcars)
105 | expect_identical(
106 |     rows(mtcars, rowSums(vs %to% am)>0),
107 |     dt_mt[vs>0 | am>0, ]
108 | )
109 | 
110 | etab = data.frame(a = 1:2, b = 3:4)
111 | class(etab) = c("etable", class(etab))
112 | res = etab[2,, drop = FALSE]
113 | rownames(res) = NULL
114 | expect_equal(res, rows(etab, 2))
115 | 
116 | etab = data.frame(a = 1:2, b = 3:4)
117 | class(etab) = c("etable", class(etab))
118 | res = etab[1,, drop = FALSE]
119 | rownames(res) = NULL
120 | expect_equal(res, rows(etab, b<4))
121 | ##########################
122 | 
123 | 
124 | 
125 | cat("\nContext:", "dt_select", "\n")
126 | mt_dt = as.data.table(mtcars)
127 | res1 = dt_select(mt_dt, -cyl, -am)
128 | res3 = mt_dt[, -c(2,9), with = FALSE]
129 | expect_identical(res1, res3)
130 | 
131 | 


--------------------------------------------------------------------------------
/man/to_list.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/to_list.R
  3 | \name{to_list}
  4 | \alias{to_list}
  5 | \alias{to_vec}
  6 | \alias{to_df}
  7 | \alias{to_dfr}
  8 | \alias{to_dfc}
  9 | \title{Apply an expression to each element of a list or vector}
 10 | \usage{
 11 | to_list(
 12 |   data,
 13 |   expr = NULL,
 14 |   ...,
 15 |   skip_null = TRUE,
 16 |   trace = FALSE,
 17 |   trace_step = 1L
 18 | )
 19 | 
 20 | to_vec(
 21 |   data,
 22 |   expr = NULL,
 23 |   ...,
 24 |   skip_null = TRUE,
 25 |   trace = FALSE,
 26 |   trace_step = 1L,
 27 |   recursive = TRUE,
 28 |   use.names = TRUE
 29 | )
 30 | 
 31 | to_df(
 32 |   data,
 33 |   expr = NULL,
 34 |   ...,
 35 |   trace = FALSE,
 36 |   trace_step = 1L,
 37 |   idvalue = NULL,
 38 |   idname = "item_id"
 39 | )
 40 | 
 41 | to_dfr(
 42 |   data,
 43 |   expr = NULL,
 44 |   ...,
 45 |   trace = FALSE,
 46 |   trace_step = 1L,
 47 |   idvalue = NULL,
 48 |   idname = "item_id"
 49 | )
 50 | 
 51 | to_dfc(data, expr = NULL, ..., trace = FALSE, trace_step = 1)
 52 | }
 53 | \arguments{
 54 | \item{data}{data.frame/list/vector}
 55 | 
 56 | \item{expr}{expression or function. Expression can use predefined variables:
 57 | '.x' is a value of current list element, '.name' is a name of the element
 58 | and '.index' is sequential number of the element.}
 59 | 
 60 | \item{...}{further arguments provided if 'expr' is function.}
 61 | 
 62 | \item{skip_null}{logical Should we skip NULL's from result? Default is TRUE}
 63 | 
 64 | \item{trace}{FALSE by default. Should we report progress during execution?
 65 | Possible values are TRUE, FALSE, "pb" (progress bar) or custom expression in 'quote', e. g. 'quote(print(.x))'.
 66 | Expression can contain '.x', '.name', and '.index' variables.}
 67 | 
 68 | \item{trace_step}{integer. 1 by default.  Step for reporting progress. Ignored if 'trace' argument is equal to FALSE.}
 69 | 
 70 | \item{recursive}{logical. Should unlisting be applied to list components of x? For details see \link[base:unlist]{unlist}.}
 71 | 
 72 | \item{use.names}{logical. TRUE by default. Should names of source list be
 73 | preserved? Setting it to FALSE in some cases can greatly increase
 74 | performance. For details see \link[base:unlist]{unlist}.}
 75 | 
 76 | \item{idvalue}{expression for calculation id column. Usually it is just
 77 | unquoted symbols: one of the '.name', '.index' or '.x'.}
 78 | 
 79 | \item{idname}{character, 'item_id' by default. Name for the id column.}
 80 | }
 81 | \value{
 82 | 'to_list' returns list, 'to_vec' tries to return vector and other functions return data.table
 83 | }
 84 | \description{
 85 | \itemize{
 86 | \item \code{to_list} always returns a list, each element of which is the
 87 | result of expression \code{expr} on the elements of data. By
 88 | default, NULL's will be removed from the result. You can change this behavior
 89 | with \code{skip_null} argument.
 90 | \item \code{to_vec} is the same as \code{to_list} but tries to convert its result
 91 | to vector via \link[base:unlist]{unlist}.
 92 | \item \code{to_df} and \code{to_dfr} try to combine its results to data.table by rows.
 93 | \item \code{to_dfc} tries to combine its result to data.table by columns.
 94 | }
 95 | 
 96 | \if{html}{\out{<div class="sourceCode">}}\preformatted{}\if{html}{\out{</div>}}
 97 | 
 98 | Expression can use predefined variables: '.x' is a value of current list
 99 | element, '.name' is a name of the element and '.index' is sequential number
100 | of the element.
101 | }
102 | \examples{
103 | 1:5 \%>\%
104 |     to_list(rnorm(n = 3, .x))
105 | 
106 | # or in 'lapply' style
107 | 1:5 \%>\%
108 |     to_list(rnorm, n = 3) \%>\%
109 |     to_vec(mean)
110 | 
111 | # or use an anonymous function
112 | 1:5 \%>\%
113 |     to_list(function(x) rnorm(3, x))
114 | 
115 | # Use to_vec() to reduce output to a vector instead
116 | # of a list:
117 | # filtering - return only even numbers
118 | to_vec(1:10, if(.x \%\% 2 == 0) .x)
119 | 
120 | # filtering - calculate mean only on the numeric columns
121 | to_vec(iris, if(is.numeric(.x)) mean(.x))
122 | 
123 | # mean for numerics, number of distincts for others
124 | to_vec(iris, if(is.numeric(.x)) mean(.x) else uniqueN(.x))
125 | 
126 | # means for Sepal
127 | to_vec(iris, if(startsWith(.name, "Sepal")) mean(.x))
128 | 
129 | # A more realistic example: split a data frame into pieces, fit a
130 | # model to each piece, summarise and extract R^2
131 | mtcars \%>\%
132 |     split(.$cyl) \%>\%
133 |     to_list(summary(lm(mpg ~ wt, data = .x))) \%>\%
134 |     to_vec(.x$r.squared)
135 | 
136 | # If each element of the output is a data frame, use
137 | # to_df to row-bind them together:
138 | mtcars \%>\%
139 |     split(.$cyl) \%>\%
140 |     to_list(lm(mpg ~ wt, data = .x)) \%>\%
141 |     to_df(c(cyl = .name, coef(.x)))
142 | 
143 | \dontrun{
144 | # read all csv files in "data" to data.frame
145 | all_files = dir("data", pattern = "csv$", full.names = TRUE) \%>\%
146 |     to_df(fread,
147 |           idvalue = basename(.x),
148 |           idname = "filename",
149 |           trace = "pb"
150 |           )
151 | }
152 | }
153 | 


--------------------------------------------------------------------------------
/man/dcast.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/dcast.R
  3 | \name{dcast}
  4 | \alias{dcast}
  5 | \alias{melt}
  6 | \alias{guess}
  7 | \title{Convert data between wide and long forms.}
  8 | \usage{
  9 | dcast(
 10 |   data,
 11 |   formula,
 12 |   fun.aggregate = NULL,
 13 |   sep = "_",
 14 |   ...,
 15 |   margins = NULL,
 16 |   subset = NULL,
 17 |   fill = NULL,
 18 |   drop = TRUE,
 19 |   value.var = guess(data),
 20 |   verbose = getOption("datatable.verbose")
 21 | )
 22 | 
 23 | melt(
 24 |   data,
 25 |   id.vars,
 26 |   measure.vars,
 27 |   variable.name = "variable",
 28 |   value.name = "value",
 29 |   ...,
 30 |   na.rm = FALSE,
 31 |   variable.factor = TRUE,
 32 |   value.factor = FALSE,
 33 |   verbose = getOption("datatable.verbose")
 34 | )
 35 | 
 36 | guess(data)
 37 | }
 38 | \arguments{
 39 | \item{data}{A data.table/data.frame. \code{data.frame} will be automatically
 40 | converted to data.table.}
 41 | 
 42 | \item{formula}{A formula of the form LHS ~ RHS to cast. For details see \link[data.table:dcast.data.table]{dcast}.}
 43 | 
 44 | \item{fun.aggregate}{Should the data be aggregated before casting? If the
 45 | formula doesn't identify a single observation for each cell, then
 46 | aggregation defaults to length with a message.}
 47 | 
 48 | \item{sep}{Character vector of length 1, indicating the separating character
 49 | in variable names generated during casting. Default is _ for backwards
 50 | compatibility.}
 51 | 
 52 | \item{...}{Any other arguments that may be passed to the aggregating function.}
 53 | 
 54 | \item{margins}{For details see \link[data.table:dcast.data.table]{dcast}.}
 55 | 
 56 | \item{subset}{Specified if casting should be done on a subset of the data.}
 57 | 
 58 | \item{fill}{Value with which to fill missing cells. If fun.aggregate is
 59 | present, takes the value by applying the function on a 0-length vector.}
 60 | 
 61 | \item{drop}{FALSE will cast by including all missing combinations. c(FALSE,
 62 | TRUE) will only include all missing combinations of formula LHS. And
 63 | c(TRUE, FALSE) will only include all missing combinations of formula RHS.}
 64 | 
 65 | \item{value.var}{Name of the column whose values will be filled to cast.
 66 | Function 'guess()' tries to, well, guess this column automatically, if none
 67 | is provided. It is possible to cast multiple `value.var`` columns
 68 | simultaneously. For details see \link[data.table:dcast.data.table]{dcast}.}
 69 | 
 70 | \item{verbose}{For details see \link[data.table:dcast.data.table]{dcast}.}
 71 | 
 72 | \item{id.vars}{vector of id variables. Can be integer (corresponding id
 73 | column numbers) or character (id column names) vector. If missing, all
 74 | non-measure columns will be assigned to it. If integer, must be positive;
 75 | see Details.}
 76 | 
 77 | \item{measure.vars}{Measure variables for melting. Can be missing, vector,
 78 | list, or pattern-based. For details see \link[data.table:dcast.data.table]{dcast}.}
 79 | 
 80 | \item{variable.name}{name for the measured variable names column. The default name is 'variable'.}
 81 | 
 82 | \item{value.name}{name for the molten data values column(s). The default name
 83 | is 'value'. Multiple names can be provided here for the case when
 84 | measure.vars is a list, though note well that the names provided in
 85 | measure.vars take precedence.}
 86 | 
 87 | \item{na.rm}{If TRUE, NA values will be removed from the molten data.}
 88 | 
 89 | \item{variable.factor}{If TRUE, the variable column will be converted to
 90 | factor, else it will be a character column.}
 91 | 
 92 | \item{value.factor}{If TRUE, the value column will be converted to factor,
 93 | else the molten value type is left unchanged.}
 94 | }
 95 | \value{
 96 | data.table
 97 | }
 98 | \description{
 99 | The \code{dcast} formula takes the form \code{LHS ~ RHS}, ex: \code{var1 + var2 ~ var3}. The
100 | order of entries in the formula is essential. There are two special
101 | variables: \code{.} and \code{...}. \code{.} represents no variable; \code{...} represents all
102 | variables not otherwise mentioned in formula. LHS variable values will be in
103 | rows. RHS variables values will become column names.
104 | \code{fun.aggregate(value.var)} will be cell values. For details see
105 | \link[data.table:dcast.data.table]{dcast} and \link[data.table:melt.data.table]{melt}.
106 | }
107 | \examples{
108 | # examples from 'tidyr' package
109 | stocks = data.frame(
110 |     time = as.Date('2009-01-01') + 0:9,
111 |     X = rnorm(10, 0, 1),
112 |     Y = rnorm(10, 0, 2),
113 |     Z = rnorm(10, 0, 4)
114 | )
115 | stocksm = stocks \%>\%
116 |     melt(id.vars = "time", variable.name = "stock", value.name = "price")
117 | stocksm \%>\% dcast(time ~ stock)
118 | stocksm \%>\% dcast(stock ~ time)
119 | 
120 | # dcast and melt are complements
121 | df = data.frame(x = c("a", "b"), y = c(3, 4), z = c(5, 6))
122 | df \%>\%
123 |     dcast(z ~ x, value.var = "y") \%>\%
124 |     melt(id.vars = "z", variable.name = "x", value.name = "y", na.rm = TRUE)
125 | }
126 | \author{
127 | Matt Dowle \href{mailto:mattjdowle@gmail.com}{mattjdowle@gmail.com}
128 | }
129 | \keyword{internal}
130 | 


--------------------------------------------------------------------------------
/inst/tinytest/test_take_all.R:
--------------------------------------------------------------------------------
  1 | cat("\nContext:","take_all", "\n")
  2 | 
  3 | 
  4 | data(iris)
  5 | data(mtcars)
  6 | dt_iris = as.data.table(iris)
  7 | expect_equal(
  8 |     take_all(iris, if(is.numeric(.x)) mean(.x)),
  9 |     dt_iris[,lapply(.SD, mean), .SDcols = -5]
 10 | )
 11 | 
 12 | expect_equal(
 13 |     take_all(iris, if(.index<5) mean(.value)),
 14 |     dt_iris[,lapply(.SD, mean), .SDcols = -5]
 15 | )
 16 | expect_equal(
 17 |     take_all(iris, mean, by = Species),
 18 |     dt_iris[,lapply(.SD, mean), by = Species]
 19 | )
 20 | 
 21 | expect_equal(
 22 |     take_all(iris, if(is.numeric(.x)) scale(.x)),
 23 |     dt_iris[,lapply(.SD, scale), .SDcols = -5]
 24 | )
 25 | 
 26 | expect_equal(
 27 |     take_all(dt_iris, mean, sd, by = Species, suffix = FALSE),
 28 |     dt_iris[,c(
 29 |         setNames(
 30 |         lapply(.SD, mean),
 31 |         paste0("mean_", colnames(.SD))
 32 |         ),
 33 |         setNames(
 34 |             lapply(.SD, sd),
 35 |             paste0("sd_", colnames(.SD))
 36 |         )), by = Species]
 37 | )
 38 | 
 39 | 
 40 | expect_equal(
 41 |     take_all(dt_iris, mean, sd, by = Species),
 42 |     dt_iris[,c(
 43 |         setNames(
 44 |             lapply(.SD, mean),
 45 |             paste0(colnames(.SD), "_mean")
 46 |         ),
 47 |         setNames(
 48 |             lapply(.SD, sd),
 49 |             paste0(colnames(.SD), "_sd")
 50 |         )), by = Species]
 51 | )
 52 | 
 53 | 
 54 | expect_equal(
 55 |     take_all(dt_iris, mean, sd, by = Species, sep = "."),
 56 |     dt_iris[,c(
 57 |         setNames(
 58 |             lapply(.SD, mean),
 59 |             paste0(colnames(.SD), ".mean")
 60 |         ),
 61 |         setNames(
 62 |             lapply(.SD, sd),
 63 |             paste0(colnames(.SD), ".sd")
 64 |         )), by = Species]
 65 | )
 66 | 
 67 | expect_equal(
 68 |     take_all(dt_iris, mean, sd, by = Species, sep = "", suffix = FALSE),
 69 |     dt_iris[,c(
 70 |         setNames(
 71 |             lapply(.SD, mean),
 72 |             paste0("mean", colnames(.SD))
 73 |         ),
 74 |         setNames(
 75 |             lapply(.SD, sd),
 76 |             paste0("sd", colnames(.SD))
 77 |         )), by = Species]
 78 | )
 79 | 
 80 | expect_equal(
 81 |     take_all(iris, stat = mean, by = Species, suffix = FALSE),
 82 |     dt_iris[,
 83 |         setNames(lapply(.SD, mean), paste0("stat_", colnames(.SD))),
 84 |         by = Species]
 85 | )
 86 | 
 87 | expect_equal(
 88 |     take_all(iris, stat = function(x) 2*mean(x), by = Species, suffix = FALSE),
 89 |     dt_iris[,
 90 |         setNames(lapply(.SD, function(x) 2*mean(x)), paste0("stat_", colnames(.SD))),
 91 |         by = Species]
 92 | )
 93 | 
 94 | 
 95 | expect_equal(
 96 |     take_all(iris, mean = mean, by = Species, i = Species!="setosa", suffix = TRUE),
 97 |     dt_iris[Species!="setosa",
 98 |       setNames(lapply(.SD, mean), paste0(colnames(.SD), "_mean")),
 99 |       by = Species]
100 | )
101 | 
102 | expect_equal(
103 |     take_all(iris, "mean" = mean, by = Species, suffix = FALSE, .SDcols = -(1:2)),
104 |     dt_iris[,
105 |         setNames(lapply(.SD, mean), paste0("mean_", colnames(.SD))),
106 |         by = Species,
107 |         .SDcols = -(1:2)]
108 | )
109 | 
110 | expect_equal(
111 |     take_all(iris, "mean" = mean, keyby = Species, suffix = FALSE, .SDcols = -(1:2)),
112 |     dt_iris[,
113 |         setNames(lapply(.SD, mean), paste0("mean_", colnames(.SD))),
114 |         keyby = Species,
115 |         .SDcols = -(1:2)]
116 | )
117 | 
118 | 
119 | expect_equal(
120 |     take_all(iris,
121 |              if(startsWith(.name, "Sepal")) mean(.value),
122 |              if(startsWith(.name, "Petal")) uniqueN(.value),
123 |              by = Species),
124 |     {
125 |         dt_iris[,c(
126 |                 lapply(.SD[,.(Sepal.Length, Sepal.Width)], mean),
127 |                 lapply(.SD[,.(Petal.Length, Petal.Width)], uniqueN)
128 |         ),
129 |                  by = Species]
130 |     }
131 | )
132 | 
133 | expect_equal(
134 |     take_all(iris,
135 |              if(.index %in% 1:2) mean(.x),
136 |              if(.index %in% 3:4) uniqueN(.x),
137 |              by = Species),
138 |     {
139 |         dt_iris[,c(
140 |             lapply(.SD[,.(Sepal.Length, Sepal.Width)], mean),
141 |             lapply(.SD[,.(Petal.Length, Petal.Width)], uniqueN)
142 |         ),
143 |         by = Species]
144 |     }
145 | )
146 | 
147 | data(iris)
148 | dt_iris = as.data.table(iris)
149 | my_fun = function(x){
150 |     if(is.numeric(x)){
151 |         mean(x)
152 |     } else {
153 |         NULL
154 |     }
155 | }
156 | 
157 | expect_equal(
158 |     take_all(iris, mean = my_fun),
159 |     dt_iris[,setNames(lapply(.SD, mean), paste0(names(dt_iris)[-5], "_mean")), .SDcols = -"Species"]
160 | )
161 | 
162 | dt_iris = as.data.table(iris)
163 | expect_equal(
164 |     take_all(iris, mean = function(x){
165 |         if(is.numeric(x)){
166 |             mean(x)
167 |         } else {
168 |             NULL
169 |         }
170 |     }),
171 |     dt_iris[,setNames(lapply(.SD, mean), paste0(names(dt_iris)[-5], "_mean")), .SDcols = -"Species"]
172 | )
173 | 
174 | dt_iris = as.data.table(iris)
175 | expect_equal(
176 |     take_all(iris, mean = mean, .SDcols = -"Species"),
177 |     dt_iris[,setNames(lapply(.SD, mean), paste0(names(dt_iris)[-5], "_mean")), .SDcols = -"Species"]
178 | )
179 | 


--------------------------------------------------------------------------------
/inst/tinytest/test_vlookup.R:
--------------------------------------------------------------------------------
  1 | cat("vlookup data.frame\n")
  2 | 
  3 | dict = data.frame(num=1:26,small=letters,cap=LETTERS, stringsAsFactors = FALSE)
  4 | 
  5 | expect_identical(vlookup(1:3,dict), dict[1:3,2])
  6 | expect_identical(xlookup(1:3,dict$num, dict$small), dict[1:3,2])
  7 | expect_identical(vlookup(1:3,dict, result_column = names(dict)), dict[1:3,])
  8 | expect_identical(vlookup(1:3,as.data.table(dict), result_column = names(dict)), as.data.table(dict[1:3,]))
  9 | 
 10 | dict2 = data.frame(num = c(1:26, 1:26), small = c(letters, LETTERS), stringsAsFactors = FALSE)
 11 | expect_identical(vlookup(1:3,dict2), dict[1:3,2])
 12 | expect_error(xlookup(1:3,dict2$num, dict$small))
 13 | expect_identical(xlookup(1:3,dict2$num, dict2$small), dict[1:3,2])
 14 | expect_identical(vlookup(1:3,dict2, result_column = names(dict2)), dict[1:3,1:2])
 15 | expect_identical(vlookup(1:3,as.data.table(dict2), result_column = names(dict2)), as.data.table(dict[1:3,1:2]))
 16 | 
 17 | 
 18 | 
 19 | 
 20 | 
 21 | expect_identical(vlookup(c(45,1:3,58,NA),dict,result_column='cap'),
 22 |                  c(NA,"A", "B", "C", NA,NA))
 23 | expect_identical(vlookup(c(45,1:3,58,NA),dict,result_column='cap', no_match = "Not found"),
 24 |                  c("Not found","A", "B", "C", "Not found","Not found"))
 25 | 
 26 | expect_identical(vlookup(c(45,1:3,58,NA),dict,result_column=c("small", "cap"), no_match = "Not found"),
 27 |  structure(list(small = c("Not found", "a", "b", "c", "Not found",
 28 |                           "Not found"), cap = c("Not found", "A", "B", "C", "Not found",
 29 |                                                 "Not found")), row.names = c("NA", "1", "2", "3", "NA.1", "NA.2"
 30 |                                                 ), class = "data.frame"))
 31 | 
 32 | expect_identical(vlookup(c('z','d','f','d'),dict,lookup_column = 'small', result_column = names(dict)),
 33 |                  dict[c(26,4,6,4),])
 34 | 
 35 | 
 36 | expect_error(vlookup(iris, mtcars))
 37 | 
 38 | 
 39 | cat("vlookup vector\n")
 40 | # with vector
 41 | dict=seq_along(letters)
 42 | names(dict) = letters
 43 | 
 44 | expect_identical(xlookup(c(6, 4, 2), dict, names(dict)), c("f","d","b"))
 45 | expect_identical(xlookup(c("f","d","b"), names(dict), dict), dict[c(6L, 4L, 2L)])
 46 | 
 47 | 
 48 | expect_identical(
 49 |     vlookup(c(1, NA, 2), dict = data.frame(a = c(2, NA), b= c(3, 2))),
 50 |     c(NA, 2, 3)
 51 | )
 52 | expect_identical(
 53 |     vlookup(c(1, NA, 2), dict = data.table(a = c(2, NA), b= c(3, 2))),
 54 |     c(NA, 2, 3)
 55 | )
 56 | 
 57 | cat("vlookup excel examples ex2\n")
 58 | 
 59 | # Just for fun. Examples borrowed from Microsoft Excel.
 60 | # It is not the R way of doing things.
 61 | 
 62 | # Example 2
 63 | 
 64 | ex2 = fread("
 65 |     Item_ID Item Cost Markup
 66 |     ST-340 Stroller 145.67  0.30
 67 |     BI-567 Bib 3.56  0.40
 68 |     DI-328 Diapers  21.45  0.35
 69 |     WI-989 Wipes  5.12  0.40
 70 |     AS-469 Aspirator 2.56  0.45
 71 | ")
 72 | 
 73 | # Calculates the retail price of diapers by adding the markup percentage to the cost.
 74 | expect_identical(vlookup("DI-328", ex2, 3) * (1 + vlookup("DI-328", ex2, 4)) ,28.9575)
 75 | 
 76 | # Calculates the sale price of wipes by subtracting a specified discount from
 77 | # the retail price.
 78 | expect_identical((vlookup("WI-989", ex2, "Cost") * (1 + vlookup("WI-989", ex2, "Markup"))) * (1 - 0.2), 5.7344)
 79 | 
 80 | A2 = ex2[[1]][1]
 81 | A3 = ex2[[1]][2]
 82 | 
 83 | # If the cost of an item is greater than or equal to $20.00, displays the string
 84 | # "Markup is nn%"; otherwise, displays the string "Cost is under $20.00".
 85 | expect_identical(ifelse(vlookup(A2, ex2, "Cost") >= 20,
 86 |        paste0("Markup is " , 100 * vlookup(A2, ex2, "Markup"),"%"),
 87 |        "Cost is under $20.00"), 'Markup is 30%')
 88 | 
 89 | 
 90 | # If the cost of an item is greater than or equal to $20.00, displays the string
 91 | # Markup is nn%"; otherwise, displays the string "Cost is $n.nn".
 92 | expect_identical(ifelse(vlookup(A3, ex2, "Cost") >= 20,
 93 |        paste0("Markup is: " , 100 * vlookup(A3, ex2, "Markup") , "%"),
 94 |        paste0("Cost is $", vlookup(A3, ex2, "Cost"))), 'Cost is $3.56')
 95 | 
 96 | 
 97 | # Example 3
 98 | cat("vlookup excel examples ex3\n")
 99 | 
100 | ex3 = fread('
101 |     ID  Last_name  First_name  Title Birth_date
102 |     1 Davis Sara "Sales Rep."  12/8/1968
103 |     2 Fontana Olivier "V.P. of Sales" 2/19/1952
104 |     3 Leal Karina "Sales Rep." 8/30/1963
105 |     4 Patten Michael "Sales Rep." 9/19/1958
106 |     5 Burke Brian "Sales Mgr." 3/4/1955
107 |     6 Sousa Luis "Sales Rep."  7/2/1963
108 | ')
109 | 
110 | # If there is an employee with an ID of 5, displays the employee's last name;
111 | # otherwise, displays the message "Employee not found".
112 | expect_identical(ifelse(is.na(vlookup(5,ex3,"Last_name")),
113 |        "Employee not found",
114 |        vlookup(5,ex3,"Last_name")), 'Burke')
115 | expect_identical(ifelse(is.na(vlookup(15,ex3,"Last_name")),
116 |        "Employee not found",
117 |        vlookup(15,ex3,"Last_name")), 'Employee not found')
118 | 
119 | # For the employee with an ID of 4, concatenates the values of three cells into
120 | # a complete sentence.
121 | expect_identical(paste0(vlookup(4,ex3,"First_name"), " ",
122 |        vlookup(4,ex3,"Last_name"), " is a ",
123 |        vlookup(4,ex3,"Title")), 'Michael Patten is a Sales Rep.')
124 | 
125 | 


--------------------------------------------------------------------------------
/man/dt_mutate.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/verbs.R
  3 | \name{dt_mutate}
  4 | \alias{dt_mutate}
  5 | \alias{dt_summarize}
  6 | \alias{dt_summarize_all}
  7 | \alias{dt_summarise}
  8 | \alias{dt_summarise_all}
  9 | \alias{dt_select}
 10 | \alias{dt_filter}
 11 | \title{'dplyr'-like interface for data.table.}
 12 | \usage{
 13 | dt_mutate(data, ..., by)
 14 | 
 15 | dt_summarize(data, ..., by, keyby, fun = NULL)
 16 | 
 17 | dt_summarize_all(data, fun, by, keyby)
 18 | 
 19 | dt_summarise(data, ..., by, keyby, fun = NULL)
 20 | 
 21 | dt_summarise_all(data, fun, by, keyby)
 22 | 
 23 | dt_select(data, ...)
 24 | 
 25 | dt_filter(data, ...)
 26 | }
 27 | \arguments{
 28 | \item{data}{data.table/data.frame data.frame will be automatically converted
 29 | to data.table. \code{dt_mutate} modify data.table object in-place.}
 30 | 
 31 | \item{...}{List of variables or name-value pairs of summary/modifications
 32 | functions. The name will be the name of the variable in the result. In the
 33 | \code{mutate} function we can use \code{a = b} or \code{a := b} notation.
 34 | Advantages of \verb{:=} are multiassignment (\code{c("a", "b") := list(1,2)})
 35 | and parametric assignment (\code{(a) := 2}).}
 36 | 
 37 | \item{by}{unquoted name of grouping variable of list of unquoted names of
 38 | grouping variables. For details see \link[data.table:data.table]{data.table}}
 39 | 
 40 | \item{keyby}{Same as \code{by}, but with an additional \code{setkey()} run on the by
 41 | columns of the result, for convenience. It is common practice to use
 42 | 'keyby=' routinely when you wish the result to be sorted. For details see
 43 | \link[data.table:data.table]{data.table}.}
 44 | 
 45 | \item{fun}{function which will be applied to all variables in
 46 | \code{dt_summarize} and \code{dt_summarize_all}.}
 47 | }
 48 | \value{
 49 | data.table
 50 | }
 51 | \description{
 52 | Subset of 'dplyr' verbs to work with data.table. Note that there is no
 53 | \code{group_by} verb - use \code{by} or \code{keyby} argument when needed.
 54 | \itemize{
 55 | \item \code{dt_mutate} adds new variables or modify existing variables. If
 56 | \code{data} is data.table then it modifies in-place.
 57 | \item \code{dt_summarize} computes summary statistics. Splits the data into
 58 | subsets, computes summary statistics for each, and returns the result in the
 59 | "data.table" form.
 60 | \item \code{dt_summarize_all} is the same as \code{dt_summarize} but work over all non-grouping variables.
 61 | \item \code{dt_filter} selects rows/cases where conditions are true. Rows
 62 | where the condition evaluates to NA are dropped.
 63 | \item \code{dt_select} selects column/variables from the data set. Range of
 64 | variables are supported, e. g. vs:carb. Characters which start with '^' or
 65 | end with '$' considered as Perl-style regular expression patterns. For
 66 | example, '^Petal' returns all variables started with 'Petal'. 'Width$'
 67 | returns all variables which end with 'Width'. Pattern '^.' matches all
 68 | variables and pattern '^.*my_str' is equivalent to \verb{contains "my_str"}. See
 69 | examples.
 70 | }
 71 | }
 72 | \examples{
 73 | # examples from 'dplyr'
 74 | # newly created variables are available immediately
 75 | mtcars  \%>\%
 76 |     dt_mutate(
 77 |         cyl2 = cyl * 2,
 78 |         cyl4 = cyl2 * 2
 79 |     ) \%>\%
 80 |     head()
 81 | 
 82 | 
 83 | # you can also use dt_mutate() to remove variables and
 84 | # modify existing variables
 85 | mtcars \%>\%
 86 |     dt_mutate(
 87 |         mpg = NULL,
 88 |         disp = disp * 0.0163871 # convert to litres
 89 |     ) \%>\%
 90 |     head()
 91 | 
 92 | 
 93 | # window functions are useful for grouped mutates
 94 | mtcars \%>\%
 95 |     dt_mutate(
 96 |         rank = rank(-mpg, ties.method = "min"),
 97 |         keyby = cyl) \%>\%
 98 |     print()
 99 | 
100 | 
101 | # You can drop variables by setting them to NULL
102 | mtcars \%>\% dt_mutate(cyl = NULL) \%>\% head()
103 | 
104 | # A summary applied without by returns a single row
105 | mtcars \%>\%
106 |     dt_summarise(mean = mean(disp), n = .N)
107 | 
108 | # Usually, you'll want to group first
109 | mtcars \%>\%
110 |     dt_summarise(mean = mean(disp), n = .N, by = cyl)
111 | 
112 | 
113 | # Multiple 'by' - variables
114 | mtcars \%>\%
115 |     dt_summarise(cyl_n = .N, by = list(cyl, vs))
116 | 
117 | # Newly created summaries immediately
118 | # doesn't overwrite existing variables
119 | mtcars \%>\%
120 |     dt_summarise(disp = mean(disp),
121 |                   sd = sd(disp),
122 |                   by = cyl)
123 | 
124 | # You can group by expressions:
125 | mtcars \%>\%
126 |     dt_summarise_all(mean, by = list(vsam = vs + am))
127 | 
128 | # filter by condition
129 | mtcars \%>\%
130 |     dt_filter(am==0)
131 | 
132 | # filter by compound condition
133 | mtcars \%>\%
134 |     dt_filter(am==0,  mpg>mean(mpg))
135 | 
136 | 
137 | # select
138 | mtcars \%>\% dt_select(vs:carb, cyl)
139 | mtcars \%>\% dt_select(-am, -cyl)
140 | 
141 | # regular expression pattern
142 | dt_select(iris, "^Petal") # variables which start from 'Petal'
143 | dt_select(iris, "Width$") # variables which end with 'Width'
144 | # move Species variable to the front.
145 | # pattern "^." matches all variables
146 | dt_select(iris, Species, "^.")
147 | # pattern "^.*i" means "contains 'i'"
148 | dt_select(iris, "^.*i")
149 | dt_select(iris, 1:4) # numeric indexing - all variables except Species
150 | }
151 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
  1 | # Generated by roxygen2: do not edit by hand
  2 | 
  3 | S3method(columns,data.frame)
  4 | S3method(cube,data.frame)
  5 | S3method(dcast,data.frame)
  6 | S3method(groupingsets,data.frame)
  7 | S3method(let,data.frame)
  8 | S3method(let,etable)
  9 | S3method(let_all,data.frame)
 10 | S3method(let_all,etable)
 11 | S3method(let_if,data.frame)
 12 | S3method(let_if,etable)
 13 | S3method(melt,data.frame)
 14 | S3method(query,data.frame)
 15 | S3method(query_if,data.frame)
 16 | S3method(rollup,data.frame)
 17 | S3method(rows,data.frame)
 18 | S3method(rows,etable)
 19 | S3method(take,data.frame)
 20 | S3method(take_all,data.frame)
 21 | S3method(take_if,data.frame)
 22 | export("%$%")
 23 | export("%<>%")
 24 | export("%>%")
 25 | export("%T>%")
 26 | export("%between%")
 27 | export("%chin%")
 28 | export("%inrange%")
 29 | export("%like%")
 30 | export(":=")
 31 | export(.BY)
 32 | export(.GRP)
 33 | export(.I)
 34 | export(.N)
 35 | export(.SD)
 36 | export(as.data.table)
 37 | export(between)
 38 | export(chmatch)
 39 | export(coalesce)
 40 | export(cols)
 41 | export(columns)
 42 | export(copy)
 43 | export(cube)
 44 | export(data.table)
 45 | export(dcast)
 46 | export(dt_add_count)
 47 | export(dt_anti_join)
 48 | export(dt_count)
 49 | export(dt_filter)
 50 | export(dt_full_join)
 51 | export(dt_inner_join)
 52 | export(dt_left_join)
 53 | export(dt_mutate)
 54 | export(dt_right_join)
 55 | export(dt_select)
 56 | export(dt_semi_join)
 57 | export(dt_summarise)
 58 | export(dt_summarise_all)
 59 | export(dt_summarize)
 60 | export(dt_summarize_all)
 61 | export(dt_top_n)
 62 | export(fcase)
 63 | export(fcoalesce)
 64 | export(fifelse)
 65 | export(fintersect)
 66 | export(first)
 67 | export(foverlaps)
 68 | export(frank)
 69 | export(frankv)
 70 | export(fread)
 71 | export(frollapply)
 72 | export(frollmean)
 73 | export(frollsum)
 74 | export(fsetdiff)
 75 | export(fsetequal)
 76 | export(fsort)
 77 | export(funion)
 78 | export(fwrite)
 79 | export(getDTthreads)
 80 | export(groupingsets)
 81 | export(guess)
 82 | export(inrange)
 83 | export(is.data.table)
 84 | export(last)
 85 | export(let)
 86 | export(let_all)
 87 | export(let_if)
 88 | export(melt)
 89 | export(nafill)
 90 | export(query)
 91 | export(query_if)
 92 | export(rbindlist)
 93 | export(rleid)
 94 | export(rleidv)
 95 | export(rollup)
 96 | export(rowid)
 97 | export(rowidv)
 98 | export(rows)
 99 | export(set)
100 | export(setDF)
101 | export(setDT)
102 | export(setDTthreads)
103 | export(setcolorder)
104 | export(setindex)
105 | export(setindexv)
106 | export(setkey)
107 | export(setkeyv)
108 | export(setnafill)
109 | export(setnames)
110 | export(setorder)
111 | export(setorderv)
112 | export(shift)
113 | export(shouldPrint)
114 | export(take)
115 | export(take_all)
116 | export(take_if)
117 | export(text_expand)
118 | export(to_df)
119 | export(to_dfc)
120 | export(to_dfr)
121 | export(to_list)
122 | export(to_long)
123 | export(to_vec)
124 | export(to_wide)
125 | export(transpose)
126 | export(tstrsplit)
127 | export(uniqueN)
128 | export(vlookup)
129 | export(xlookup)
130 | import(data.table)
131 | import(magrittr)
132 | importFrom(data.table,"%between%")
133 | importFrom(data.table,"%chin%")
134 | importFrom(data.table,"%inrange%")
135 | importFrom(data.table,"%like%")
136 | importFrom(data.table,":=")
137 | importFrom(data.table,.BY)
138 | importFrom(data.table,.GRP)
139 | importFrom(data.table,.I)
140 | importFrom(data.table,.N)
141 | importFrom(data.table,.SD)
142 | importFrom(data.table,as.data.table)
143 | importFrom(data.table,between)
144 | importFrom(data.table,chmatch)
145 | importFrom(data.table,cube)
146 | importFrom(data.table,data.table)
147 | importFrom(data.table,fcase)
148 | importFrom(data.table,fcoalesce)
149 | importFrom(data.table,fifelse)
150 | importFrom(data.table,fintersect)
151 | importFrom(data.table,first)
152 | importFrom(data.table,foverlaps)
153 | importFrom(data.table,frank)
154 | importFrom(data.table,frankv)
155 | importFrom(data.table,fread)
156 | importFrom(data.table,frollapply)
157 | importFrom(data.table,frollmean)
158 | importFrom(data.table,frollsum)
159 | importFrom(data.table,fsetdiff)
160 | importFrom(data.table,fsetequal)
161 | importFrom(data.table,fsort)
162 | importFrom(data.table,funion)
163 | importFrom(data.table,fwrite)
164 | importFrom(data.table,getDTthreads)
165 | importFrom(data.table,groupingsets)
166 | importFrom(data.table,inrange)
167 | importFrom(data.table,is.data.table)
168 | importFrom(data.table,last)
169 | importFrom(data.table,nafill)
170 | importFrom(data.table,rbindlist)
171 | importFrom(data.table,rleid)
172 | importFrom(data.table,rleidv)
173 | importFrom(data.table,rollup)
174 | importFrom(data.table,rowid)
175 | importFrom(data.table,rowidv)
176 | importFrom(data.table,set)
177 | importFrom(data.table,setDF)
178 | importFrom(data.table,setDT)
179 | importFrom(data.table,setDTthreads)
180 | importFrom(data.table,setcolorder)
181 | importFrom(data.table,setindex)
182 | importFrom(data.table,setindexv)
183 | importFrom(data.table,setkey)
184 | importFrom(data.table,setkeyv)
185 | importFrom(data.table,setnafill)
186 | importFrom(data.table,setnames)
187 | importFrom(data.table,setorder)
188 | importFrom(data.table,setorderv)
189 | importFrom(data.table,shift)
190 | importFrom(data.table,shouldPrint)
191 | importFrom(data.table,transpose)
192 | importFrom(data.table,tstrsplit)
193 | importFrom(data.table,uniqueN)
194 | importFrom(magrittr,"%$%")
195 | importFrom(magrittr,"%<>%")
196 | importFrom(magrittr,"%>%")
197 | importFrom(magrittr,"%T>%")
198 | 


--------------------------------------------------------------------------------
/man/vlookup.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/vlookup.R
  3 | \name{vlookup}
  4 | \alias{vlookup}
  5 | \alias{xlookup}
  6 | \title{Look up values in dictionary.}
  7 | \usage{
  8 | vlookup(
  9 |   lookup_value,
 10 |   dict,
 11 |   result_column = 2,
 12 |   lookup_column = 1,
 13 |   no_match = NA
 14 | )
 15 | 
 16 | xlookup(lookup_value, lookup_vector, result_vector, no_match = NA)
 17 | }
 18 | \arguments{
 19 | \item{lookup_value}{Vector of looked up values}
 20 | 
 21 | \item{dict}{data.frame. Dictionary.}
 22 | 
 23 | \item{result_column}{numeric or character. Resulting columns in the
 24 | \code{dict}. Default  value for \code{result_column} is 2 - for frequent
 25 | case of dictionary with keys in the first column and results in the second
 26 | column.}
 27 | 
 28 | \item{lookup_column}{Column of \code{dict} in which lookup value will be
 29 | searched. By default, it is the first column of the \code{dict}.}
 30 | 
 31 | \item{no_match}{vector of length one. NA by default. Where a valid match is
 32 | not found, return the 'no_match' value you supply.}
 33 | 
 34 | \item{lookup_vector}{vector in which 'lookup_value' will be searched during 'xlookup'.}
 35 | 
 36 | \item{result_vector}{vector with resulting values for 'xlookup'.}
 37 | }
 38 | \value{
 39 | \code{xlookup} always return vector, \code{vlookup} returns vector if
 40 | the \code{result_column} is single value. In the opposite case data.frame will
 41 | be returned.
 42 | }
 43 | \description{
 44 | \code{vlookup} function is inspired by VLOOKUP spreadsheet
 45 | function. It looks for a \code{lookup_value} in the \code{lookup_column} of
 46 | the \code{dict}, and then returns values in the same rows from
 47 | \code{result_column}.
 48 | \code{xlookup} is simplified version of \code{vlookup}. It searches for a
 49 | \code{lookup_value} in the \code{lookup_vector} and return values in the same
 50 | position from the \code{result_vector}.
 51 | }
 52 | \examples{
 53 | # with data.frame
 54 | dict = data.frame(num=1:26, small=letters, cap=LETTERS)
 55 | vlookup(1:3, dict)
 56 | vlookup(c(45,1:3,58), dict, result_column='cap')
 57 | vlookup(c(45,1:3,58), dict, result_column='cap', no_match = "Not found")
 58 | 
 59 | # the same with xlookup
 60 | xlookup(1:3, dict$num, dict$small)
 61 | xlookup(c(45,1:3,58), dict$num, dict$cap)
 62 | xlookup(c(45,1:3,58), dict$num, dict$cap, no_match = "Not found")
 63 | 
 64 | 
 65 | # example from base 'merge'
 66 | authors = data.table(
 67 |     surname = c("Tukey", "Venables", "Tierney", "Ripley", "McNeil"),
 68 |     nationality = c("US", "Australia", "US", "UK", "Australia"),
 69 |     deceased = c("yes", rep("no", 4))
 70 | )
 71 | 
 72 | books = data.table(
 73 |     surname = c("Tukey", "Venables", "Tierney",
 74 |                 "Ripley", "Ripley", "McNeil", "R Core"),
 75 |     title = c("Exploratory Data Analysis",
 76 |               "Modern Applied Statistics ...",
 77 |               "LISP-STAT",
 78 |               "Spatial Statistics", "Stochastic Simulation",
 79 |               "Interactive Data Analysis",
 80 |               "An Introduction to R")
 81 | )
 82 | 
 83 | let(books,
 84 |      c("author_nationality", "author_deceased") := vlookup(surname,
 85 |              dict = authors,
 86 |              result_column = 2:3
 87 |          )
 88 | )[]
 89 | 
 90 | # Just for fun. Examples borrowed from Microsoft Excel.
 91 | # It is not the R way of doing things.
 92 | 
 93 | # Example 2
 94 | 
 95 | ex2 = fread("
 96 |     Item_ID Item Cost Markup
 97 |     ST-340 Stroller 145.67  0.30
 98 |     BI-567 Bib 3.56  0.40
 99 |     DI-328 Diapers  21.45  0.35
100 |     WI-989 Wipes  5.12  0.40
101 |     AS-469 Aspirator 2.56  0.45
102 | ")
103 | 
104 | # Calculates the retail price of diapers by adding the markup percentage to the cost.
105 | vlookup("DI-328", ex2, 3) * (1 + vlookup("DI-328", ex2, 4)) # 28.9575
106 | 
107 | # Calculates the sale price of wipes by subtracting a specified discount from
108 | # the retail price.
109 | (vlookup("WI-989", ex2, "Cost") * (1 + vlookup("WI-989", ex2, "Markup"))) * (1 - 0.2)  # 5.7344
110 | 
111 | A2 = ex2[["Item_ID"]][1]
112 | A3 = ex2[["Item_ID"]][2]
113 | 
114 | # If the cost of an item is greater than or equal to $20.00, displays the string
115 | # "Markup is nn\%"; otherwise, displays the string "Cost is under $20.00".
116 | ifelse(vlookup(A2, ex2, "Cost") >= 20,
117 |        paste0("Markup is " , 100 * vlookup(A2, ex2, "Markup"),"\%"),
118 |        "Cost is under $20.00") # Markup is 30\%
119 | 
120 | 
121 | # If the cost of an item is greater than or equal to $20.00, displays the string
122 | # Markup is nn\%"; otherwise, displays the string "Cost is $n.nn".
123 | ifelse(vlookup(A3, ex2, "Cost") >= 20,
124 |        paste0("Markup is: " , 100 * vlookup(A3, ex2, "Markup") , "\%"),
125 |        paste0("Cost is $", vlookup(A3, ex2, "Cost"))) #Cost is $3.56
126 | 
127 | 
128 | # Example 3
129 | 
130 | ex3 = fread('
131 |     ID  Last_name  First_name  Title Birth_date
132 |     1 Davis Sara "Sales Rep."  12/8/1968
133 |     2 Fontana Olivier "V.P. of Sales" 2/19/1952
134 |     3 Leal Karina "Sales Rep." 8/30/1963
135 |     4 Patten Michael "Sales Rep." 9/19/1958
136 |     5 Burke Brian "Sales Mgr." 3/4/1955
137 |     6 Sousa Luis "Sales Rep."  7/2/1963
138 | ')
139 | 
140 | # If there is an employee with an ID of 5, displays the employee's last name;
141 | # otherwise, displays the message "Employee not found".
142 | vlookup(5, ex3, "Last_name", no_match = "Employee not found") # Burke
143 | 
144 | # Many employees
145 | vlookup(1:10, ex3, "Last_name", no_match =  "Employee not found")
146 | 
147 | # For the employee with an ID of 4, concatenates the values of three cells into
148 | # a complete sentence.
149 | paste0(vlookup(4, ex3, "First_name"), " ",
150 |        vlookup(4, ex3, "Last_name"), " is a ",
151 |        vlookup(4, ex3, "Title")) # Michael Patten is a Sales Rep.
152 | }
153 | 


--------------------------------------------------------------------------------
/R/verbs.R:
--------------------------------------------------------------------------------
  1 | #' 'dplyr'-like interface for data.table.
  2 | #'
  3 | #' Subset of 'dplyr' verbs to work with data.table. Note that there is no
  4 | #' `group_by` verb - use `by` or `keyby` argument when needed.
  5 | #' - `dt_mutate` adds new variables or modify existing variables. If
  6 | #' `data` is data.table then it modifies in-place.
  7 | #' - `dt_summarize` computes summary statistics. Splits the data into
  8 | #' subsets, computes summary statistics for each, and returns the result in the
  9 | #' "data.table" form.
 10 | #' - `dt_summarize_all` is the same as `dt_summarize` but work over all non-grouping variables.
 11 | #' - `dt_filter` selects rows/cases where conditions are true. Rows
 12 | #' where the condition evaluates to NA are dropped.
 13 | #' - `dt_select` selects column/variables from the data set. Range of
 14 | #' variables are supported, e. g. vs:carb. Characters which start with '^' or
 15 | #' end with '$' considered as Perl-style regular expression patterns. For
 16 | #' example, '^Petal' returns all variables started with 'Petal'. 'Width$'
 17 | #' returns all variables which end with 'Width'. Pattern '^.' matches all
 18 | #' variables and pattern '^.*my_str' is equivalent to `contains "my_str"`. See
 19 | #' examples.
 20 | #'
 21 | #' @param data data.table/data.frame data.frame will be automatically converted
 22 | #'   to data.table. `dt_mutate` modify data.table object in-place.
 23 | #' @param ... List of variables or name-value pairs of summary/modifications
 24 | #'   functions. The name will be the name of the variable in the result. In the
 25 | #'   `mutate` function we can use `a = b` or `a := b` notation.
 26 | #'   Advantages of `:=` are multiassignment (`c("a", "b") := list(1,2)`)
 27 | #'   and parametric assignment (`(a) := 2`).
 28 | #' @param by unquoted name of grouping variable of list of unquoted names of
 29 | #'   grouping variables. For details see [data.table][data.table::data.table]
 30 | #' @param keyby Same as `by`, but with an additional `setkey()` run on the by
 31 | #'   columns of the result, for convenience. It is common practice to use
 32 | #'   'keyby=' routinely when you wish the result to be sorted. For details see
 33 | #'   [data.table][data.table::data.table].
 34 | #' @param fun function which will be applied to all variables in
 35 | #'   `dt_summarize` and `dt_summarize_all`.
 36 | #' @return data.table
 37 | #' @export
 38 | #' @examples
 39 | #' # examples from 'dplyr'
 40 | #' # newly created variables are available immediately
 41 | #' mtcars  %>%
 42 | #'     dt_mutate(
 43 | #'         cyl2 = cyl * 2,
 44 | #'         cyl4 = cyl2 * 2
 45 | #'     ) %>%
 46 | #'     head()
 47 | #'
 48 | #'
 49 | #' # you can also use dt_mutate() to remove variables and
 50 | #' # modify existing variables
 51 | #' mtcars %>%
 52 | #'     dt_mutate(
 53 | #'         mpg = NULL,
 54 | #'         disp = disp * 0.0163871 # convert to litres
 55 | #'     ) %>%
 56 | #'     head()
 57 | #'
 58 | #'
 59 | #' # window functions are useful for grouped mutates
 60 | #' mtcars %>%
 61 | #'     dt_mutate(
 62 | #'         rank = rank(-mpg, ties.method = "min"),
 63 | #'         keyby = cyl) %>%
 64 | #'     print()
 65 | #'
 66 | #'
 67 | #' # You can drop variables by setting them to NULL
 68 | #' mtcars %>% dt_mutate(cyl = NULL) %>% head()
 69 | #'
 70 | #' # A summary applied without by returns a single row
 71 | #' mtcars %>%
 72 | #'     dt_summarise(mean = mean(disp), n = .N)
 73 | #'
 74 | #' # Usually, you'll want to group first
 75 | #' mtcars %>%
 76 | #'     dt_summarise(mean = mean(disp), n = .N, by = cyl)
 77 | #'
 78 | #'
 79 | #' # Multiple 'by' - variables
 80 | #' mtcars %>%
 81 | #'     dt_summarise(cyl_n = .N, by = list(cyl, vs))
 82 | #'
 83 | #' # Newly created summaries immediately
 84 | #' # doesn't overwrite existing variables
 85 | #' mtcars %>%
 86 | #'     dt_summarise(disp = mean(disp),
 87 | #'                   sd = sd(disp),
 88 | #'                   by = cyl)
 89 | #'
 90 | #' # You can group by expressions:
 91 | #' mtcars %>%
 92 | #'     dt_summarise_all(mean, by = list(vsam = vs + am))
 93 | #'
 94 | #' # filter by condition
 95 | #' mtcars %>%
 96 | #'     dt_filter(am==0)
 97 | #'
 98 | #' # filter by compound condition
 99 | #' mtcars %>%
100 | #'     dt_filter(am==0,  mpg>mean(mpg))
101 | #'
102 | #'
103 | #' # select
104 | #' mtcars %>% dt_select(vs:carb, cyl)
105 | #' mtcars %>% dt_select(-am, -cyl)
106 | #'
107 | #' # regular expression pattern
108 | #' dt_select(iris, "^Petal") # variables which start from 'Petal'
109 | #' dt_select(iris, "Width$") # variables which end with 'Width'
110 | #' # move Species variable to the front.
111 | #' # pattern "^." matches all variables
112 | #' dt_select(iris, Species, "^.")
113 | #' # pattern "^.*i" means "contains 'i'"
114 | #' dt_select(iris, "^.*i")
115 | #' dt_select(iris, 1:4) # numeric indexing - all variables except Species
116 | dt_mutate = function(data, ..., by){
117 |     eval.parent(substitute(maditr::let(data, ...,
118 |                                by = by))
119 |     )
120 | }
121 | 
122 | 
123 | #' @rdname dt_mutate
124 | #' @export
125 | dt_summarize = function(data, ..., by, keyby, fun = NULL){
126 |     eval.parent(substitute(maditr::take(data, ...,
127 |                                 by = by,
128 |                                 keyby = keyby,
129 |                                 fun = fun))
130 |     )
131 | }
132 | 
133 | #' @rdname dt_mutate
134 | #' @export
135 | dt_summarize_all = function(data, fun, by, keyby){
136 |     !missing(fun) || stop("'dt_summarize_all': argument 'fun' is missing.")
137 |     eval.parent(substitute(maditr::take(data,
138 |                                 by = by,
139 |                                 keyby = keyby,
140 |                                 fun = fun))
141 |     )
142 | }
143 | 
144 | #' @rdname dt_mutate
145 | #' @export
146 | dt_summarise = dt_summarize
147 | 
148 | #' @rdname dt_mutate
149 | #' @export
150 | dt_summarise_all = dt_summarize_all
151 | 
152 | #' @rdname dt_mutate
153 | #' @export
154 | dt_select = columns
155 | 
156 | 
157 | 
158 | #' @rdname dt_mutate
159 | #' @export
160 | dt_filter = rows
161 | 
162 | 
163 | 
164 | 
165 | 
166 | 


--------------------------------------------------------------------------------
/R/dcast.R:
--------------------------------------------------------------------------------
  1 | #' Convert data between wide and long forms.
  2 | #'
  3 | #' The `dcast` formula takes the form `LHS ~ RHS`, ex: `var1 + var2 ~ var3`. The
  4 | #' order of entries in the formula is essential. There are two special
  5 | #' variables: `.` and `...`. `.` represents no variable; `...` represents all
  6 | #' variables not otherwise mentioned in formula. LHS variable values will be in
  7 | #' rows. RHS variables values will become column names.
  8 | #' `fun.aggregate(value.var)` will be cell values. For details see
  9 | #' [dcast][data.table::dcast] and [melt][data.table::melt].
 10 | #' @param data A data.table/data.frame. `data.frame` will be automatically
 11 | #'   converted to data.table.
 12 | #' @param formula A formula of the form LHS ~ RHS to cast. For details see [dcast][data.table::dcast].
 13 | #' @param fun.aggregate Should the data be aggregated before casting? If the
 14 | #'   formula doesn't identify a single observation for each cell, then
 15 | #'   aggregation defaults to length with a message.
 16 | #' @param sep Character vector of length 1, indicating the separating character
 17 | #'   in variable names generated during casting. Default is _ for backwards
 18 | #'   compatibility.
 19 | #' @param margins For details see [dcast][data.table::dcast].
 20 | #' @param subset Specified if casting should be done on a subset of the data.
 21 | #' @param fill Value with which to fill missing cells. If fun.aggregate is
 22 | #'   present, takes the value by applying the function on a 0-length vector.
 23 | #' @param drop FALSE will cast by including all missing combinations. c(FALSE,
 24 | #'   TRUE) will only include all missing combinations of formula LHS. And
 25 | #'   c(TRUE, FALSE) will only include all missing combinations of formula RHS.
 26 | #' @param value.var Name of the column whose values will be filled to cast.
 27 | #'   Function 'guess()' tries to, well, guess this column automatically, if none
 28 | #'   is provided. It is possible to cast multiple `value.var`` columns
 29 | #'   simultaneously. For details see [dcast][data.table::dcast].
 30 | #' @param verbose For details see [dcast][data.table::dcast].
 31 | #' @param ... Any other arguments that may be passed to the aggregating function.
 32 | #'
 33 | #'
 34 | #' @param id.vars vector of id variables. Can be integer (corresponding id
 35 | #'   column numbers) or character (id column names) vector. If missing, all
 36 | #'   non-measure columns will be assigned to it. If integer, must be positive;
 37 | #'   see Details.
 38 | #' @param measure.vars Measure variables for melting. Can be missing, vector,
 39 | #'   list, or pattern-based. For details see [dcast][data.table::dcast].
 40 | #' @param variable.name name for the measured variable names column. The default name is 'variable'.
 41 | #' @param value.name name for the molten data values column(s). The default name
 42 | #'   is 'value'. Multiple names can be provided here for the case when
 43 | #'   measure.vars is a list, though note well that the names provided in
 44 | #'   measure.vars take precedence.
 45 | #' @param na.rm If TRUE, NA values will be removed from the molten data.
 46 | #' @param variable.factor If TRUE, the variable column will be converted to
 47 | #'   factor, else it will be a character column.
 48 | #' @param value.factor If TRUE, the value column will be converted to factor,
 49 | #'   else the molten value type is left unchanged.
 50 | #'
 51 | #'
 52 | #' @return data.table
 53 | #' @author Matt Dowle <mattjdowle@gmail.com>
 54 | #' @export
 55 | #' @keywords internal
 56 | #' @examples
 57 | #' # examples from 'tidyr' package
 58 | #' stocks = data.frame(
 59 | #'     time = as.Date('2009-01-01') + 0:9,
 60 | #'     X = rnorm(10, 0, 1),
 61 | #'     Y = rnorm(10, 0, 2),
 62 | #'     Z = rnorm(10, 0, 4)
 63 | #' )
 64 | #' stocksm = stocks %>%
 65 | #'     melt(id.vars = "time", variable.name = "stock", value.name = "price")
 66 | #' stocksm %>% dcast(time ~ stock)
 67 | #' stocksm %>% dcast(stock ~ time)
 68 | #'
 69 | #' # dcast and melt are complements
 70 | #' df = data.frame(x = c("a", "b"), y = c(3, 4), z = c(5, 6))
 71 | #' df %>%
 72 | #'     dcast(z ~ x, value.var = "y") %>%
 73 | #'     melt(id.vars = "z", variable.name = "x", value.name = "y", na.rm = TRUE)
 74 | dcast = function(data, formula, fun.aggregate = NULL, sep = "_",
 75 |                  ..., margins = NULL, subset = NULL, fill = NULL,
 76 |                  drop = TRUE, value.var = guess(data),
 77 |                  verbose = getOption("datatable.verbose")){
 78 |     UseMethod("dcast")
 79 | }
 80 | 
 81 | #' @export
 82 | dcast.data.frame = function(data, formula, fun.aggregate = NULL, sep = "_",
 83 |                  ..., margins = NULL, subset = NULL, fill = NULL,
 84 |                  drop = TRUE, value.var = guess(data),
 85 |                  verbose = getOption("datatable.verbose")){
 86 |     curr_call = sys.call()
 87 |     curr_call[[1]] = quote(data.table::dcast.data.table)
 88 |     curr_call[[2]] = substitute(data.table::as.data.table(data))
 89 |     eval.parent(curr_call)
 90 | }
 91 | 
 92 | 
 93 | #' @export
 94 | #' @rdname dcast
 95 | melt = function(data, id.vars, measure.vars,
 96 |                 variable.name = "variable", value.name = "value",
 97 |                 ..., na.rm = FALSE, variable.factor = TRUE,
 98 |                 value.factor = FALSE,
 99 |                 verbose = getOption("datatable.verbose")){
100 |     UseMethod("melt")
101 | }
102 | 
103 | #' @export
104 | melt.data.frame = function(data, id.vars, measure.vars,
105 |                 variable.name = "variable", value.name = "value",
106 |                 ..., na.rm = FALSE, variable.factor = TRUE,
107 |                 value.factor = FALSE,
108 |                 verbose = getOption("datatable.verbose")){
109 |     curr_call = sys.call()
110 |     curr_call[[1]] = quote(data.table::melt.data.table)
111 |     curr_call[[2]] = substitute(data.table::as.data.table(data))
112 |     eval.parent(curr_call)
113 | }
114 | 
115 | #' @rdname dcast
116 | #' @export
117 | guess = function (data){
118 |     if ("value" %chin% names(data))
119 |         return("value")
120 |     if ("(all)" %chin% names(data))
121 |         return("(all)")
122 |     var = names(data)[ncol(data)]
123 |     message("Using '", var, "' as value column. Use 'value.var' to override")
124 |     return(var)
125 | }
126 | 


--------------------------------------------------------------------------------
/R/join.R:
--------------------------------------------------------------------------------
  1 | #' Join two data.frames by common columns.
  2 | #'
  3 | #' Do different versions of SQL join operations. See examples.
  4 | #'
  5 | #' @param x data.frame or data.table
  6 | #' @param y data.frame or data.table
  7 | #' @param by a character vector of variables to join by. If NULL, the default,
  8 | #'   *_join() will do a natural join, using all variables with common names
  9 | #'   across the two tables. A message lists the variables so that you can check
 10 | #'   they're right (to suppress the message, simply explicitly list the
 11 | #'   variables that you want to join). To join by different variables on x and y
 12 | #'   use a named vector. For example, `by = c("a" = "b")` will match x.a to y.b.
 13 | #' @param suffix If there are non-joined duplicate variables in x and y, these
 14 | #'   suffixes will be added to the output to disambiguate them. Should be a
 15 | #'   character vector of length 2.
 16 | #'
 17 | #' @return data.table
 18 | #' @export
 19 | #'
 20 | #' @examples
 21 | #' workers = fread("
 22 | #'     name company
 23 | #'     Nick Acme
 24 | #'     John Ajax
 25 | #'     Daniela Ajax
 26 | #' ")
 27 | #'
 28 | #' positions = fread("
 29 | #'     name position
 30 | #'     John designer
 31 | #'     Daniela engineer
 32 | #'     Cathie manager
 33 | #' ")
 34 | #'
 35 | #' workers %>% dt_inner_join(positions)
 36 | #' workers %>% dt_left_join(positions)
 37 | #' workers %>% dt_right_join(positions)
 38 | #' workers %>% dt_full_join(positions)
 39 | #'
 40 | #' # filtering joins
 41 | #' workers %>% dt_anti_join(positions)
 42 | #' workers %>% dt_semi_join(positions)
 43 | #'
 44 | #' # To suppress the message, supply 'by' argument
 45 | #' workers %>% dt_left_join(positions, by = "name")
 46 | #'
 47 | #' # Use a named 'by' if the join variables have different names
 48 | #' positions2 = setNames(positions, c("worker", "position")) # rename first column in 'positions'
 49 | #' workers %>% dt_inner_join(positions2, by = c("name" = "worker"))
 50 | dt_left_join = function (x, y, by = NULL, suffix = c(".x", ".y")) {
 51 |     dt_join(x = x,
 52 |             y = y,
 53 |             by = by,
 54 |             suffix = suffix,
 55 |             all_x = TRUE,
 56 |             all_y = FALSE,
 57 |             src = "dt_left_join"
 58 |             )
 59 | }
 60 | 
 61 | #' @export
 62 | #' @rdname dt_left_join
 63 | dt_right_join = function (x, y, by = NULL, suffix = c(".x", ".y")) {
 64 |     dt_join(x = x,
 65 |             y = y,
 66 |             by = by,
 67 |             suffix = suffix,
 68 |             all_x = FALSE,
 69 |             all_y = TRUE,
 70 |             src = "dt_right_join"
 71 |     )
 72 | }
 73 | 
 74 | #' @export
 75 | #' @rdname dt_left_join
 76 | dt_inner_join = function (x, y, by = NULL, suffix = c(".x", ".y")) {
 77 |     dt_join(x = x,
 78 |             y = y,
 79 |             by = by,
 80 |             suffix = suffix,
 81 |             all_x = FALSE,
 82 |             all_y = FALSE,
 83 |             src = "dt_inner_join"
 84 |     )
 85 | }
 86 | 
 87 | #' @export
 88 | #' @rdname dt_left_join
 89 | dt_full_join = function (x, y, by = NULL, suffix = c(".x", ".y")) {
 90 |     dt_join(x = x,
 91 |             y = y,
 92 |             by = by,
 93 |             suffix = suffix,
 94 |             all_x = TRUE,
 95 |             all_y = TRUE,
 96 |             src = "dt_full_join"
 97 |     )
 98 | }
 99 | 
100 | #' @export
101 | #' @rdname dt_left_join
102 | dt_semi_join = function (x, y, by = NULL) {
103 |     if(!is.data.table(x)) x = as.data.table(x)
104 |     if(!is.data.table(y)) y = as.data.table(y)
105 |     if(length(by)==0) {
106 |         by = get_column_names(colnames(x), colnames(y), src = "dt_semi_join")
107 |     }
108 |     fsetdiff(x, x[!y, on=by], all=TRUE)
109 | }
110 | 
111 | #' @export
112 | #' @rdname dt_left_join
113 | dt_anti_join = function (x, y, by = NULL) {
114 |     if(!is.data.table(x)) x = as.data.table(x)
115 |     if(!is.data.table(y)) y = as.data.table(y)
116 |     if(length(by)==0) {
117 |         by = get_column_names(colnames(x), colnames(y), src = "dt_anti_join")
118 |     }
119 |     by_x = names(by)
120 |     by_y = unname(by)
121 |     if(is.null(by_x)){
122 |         by_x = by_y
123 |     } else {
124 |         empty_names = is.na(by_x)|(by_x=="")
125 |         by_x[empty_names] = by_y[empty_names]
126 |     }
127 |     check_existense(by_x, by_y, colnames(x), colnames(y), src = "dt_anti_join")
128 |     x[!y, on = by]
129 | }
130 | 
131 | get_column_names = function(col_x, col_y, src){
132 |     by = intersect(col_x, col_y)
133 |     if(length(by)>0){
134 |         message(sprintf("%s: joining, by = %s", src, paste(paste0('"', by, '"'), collapse = ", ")))
135 |     } else {
136 |         stop(sprintf("%s: 'by' required, because the data sources have no common variables", src), call. = FALSE)
137 |     }
138 |     by
139 | }
140 | 
141 | dt_join = function(x, y, by, suffix, all_x, all_y, src){
142 |     if(!is.data.table(x)) x = as.data.table(x)
143 |     if(!is.data.table(y)) y = as.data.table(y)
144 |     if(length(by)==0) {
145 |         by_x = by_y = get_column_names(colnames(x), colnames(y), src)
146 |     } else {
147 |         by_x = names(by)
148 |         by_y = unname(by)
149 |         if(is.null(by_x)){
150 |             by_x = by_y
151 |         } else {
152 |             empty_names = is.na(by_x)|(by_x=="")
153 |             by_x[empty_names] = by_y[empty_names]
154 |         }
155 |     }
156 |     check_existense(by_x, by_y, colnames(x), colnames(y), src)
157 |     merge(x = x,
158 |           y = y,
159 |           by.x = by_x,
160 |           by.y = by_y,
161 |           all.x = all_x,
162 |           all.y = all_y,
163 |           suffixes = suffix,
164 |           sort = FALSE,
165 |           no.dups = TRUE,
166 |           allow.cartesian = TRUE
167 |           )
168 | 
169 | }
170 | 
171 | 
172 | check_existense = function(by_x, by_y, names_x, names_y, src){
173 |     err = setdiff(by_x, names_x)
174 |     if(length(err)>0){
175 |         err = paste(paste0("'", err, "'"), collapse = ", ")
176 |         stop(sprintf("'%s': 'by' can't contain join column(s) %s which is missing from LHS", src, err), call. = FALSE)
177 |     }
178 |     err = setdiff(by_y, names_y)
179 |     if(length(err)>0){
180 |         err = paste(paste0("'", err, "'"), collapse = ", ")
181 |         stop(sprintf("'%s': 'by' can't contain join column(s) %s which is missing from RHS", src, err), call. = FALSE)
182 |     }
183 | }
184 | 


--------------------------------------------------------------------------------
/inst/tinytest/test_to_list.R:
--------------------------------------------------------------------------------
  1 | cat("\nContext:","to_list", "\n")
  2 | 
  3 | expect_identical(
  4 |     to_list(1:3),
  5 |     as.list(1:3)
  6 | )
  7 | 
  8 | data(iris)
  9 | expect_identical(
 10 |     to_list(iris, if(is.numeric(.value)) .value),
 11 |     as.list(iris[1:4])
 12 | )
 13 | 
 14 | 
 15 | expect_identical(
 16 |     to_list(iris, if(is.numeric(.x)) .x, skip_null = FALSE),
 17 |     c(as.list(iris[1:4]), list(Species = NULL))
 18 | )
 19 | 
 20 | expect_identical(
 21 |     iris %>% to_list(if(is.numeric(.x)) .x, skip_null = FALSE),
 22 |     c(as.list(iris[1:4]), list(Species = NULL))
 23 | )
 24 | 
 25 | expect_identical(
 26 |     to_list(iris, if(is.numeric(.x)) mean(.x) else uniqueN(.x)),
 27 |     c(as.list(colMeans(iris[1:4])), list(Species = uniqueN(iris$Species)))
 28 | )
 29 | 
 30 | expect_identical(
 31 |     to_list(1:5, .index^2 + .index),
 32 |     as.list((1:5)^2 + 1:5)
 33 | 
 34 | )
 35 | 
 36 | 
 37 | expect_identical(
 38 |     to_list((1:5)*100, sqrt),
 39 |     lapply((1:5)*100, sqrt)
 40 | )
 41 | 
 42 | 
 43 | 
 44 | expect_identical(
 45 |     to_list((1:5)*10, (.index)),
 46 |     as.list(1:5)
 47 | )
 48 | 
 49 | 
 50 | expect_identical(
 51 |     to_list((1:5)*10, (.name)),
 52 |     as.list(rep("", 5))
 53 | )
 54 | 
 55 | expect_identical(
 56 |     to_list(iris, if(grepl("Sepal", .name)) .x),
 57 |     as.list(iris[,c("Sepal.Length", "Sepal.Width")])
 58 | )
 59 | 
 60 | 
 61 | ########
 62 | cat("\nContext:","to_vec", "\n")
 63 | expect_identical(
 64 |     to_vec(iris, if(is.numeric(.x)) mean(.x) else uniqueN(.x)),
 65 |     c(colMeans(iris[1:4]), Species = uniqueN(iris$Species))
 66 | )
 67 | 
 68 | 
 69 | expect_identical(
 70 |     to_vec(iris, if(is.numeric(.x)) mean(.x) else uniqueN(.x), use.names = FALSE),
 71 |     unname(c(colMeans(iris[1:4]), Species = uniqueN(iris$Species)))
 72 | )
 73 | 
 74 | expect_identical(
 75 |     to_vec(iris, is.numeric),
 76 |     sapply(iris, is.numeric)
 77 | )
 78 | 
 79 | cat("\nContext:","to_df", "\n")
 80 | 
 81 | data("mtcars")
 82 | 
 83 | expect_identical(
 84 |     to_df(list(1:2, 2:3, 3:4)),
 85 |     data.table(V1 = 1:3, V2 = 2:4)
 86 | )
 87 | expect_identical(
 88 |     to_df(mtcars, list(var = .name, mean = mean(.x), sd = sd(.x))),
 89 |     data.table(var = names(mtcars), mean = colMeans(mtcars), sd = sapply(mtcars, sd))
 90 | )
 91 | 
 92 | expect_identical(
 93 |     to_df(mtcars, list(mean = mean(.x), sd = sd(.x)), idvalue = .name, idname = "var"),
 94 |     data.table(mean = colMeans(mtcars), sd = sapply(mtcars, sd), var = names(mtcars))
 95 | )
 96 | 
 97 | expect_identical(
 98 |     to_df(unname(as.list(mtcars)), list(mean = mean(.x), sd = sd(.x)), idvalue = .name, idname = "var"),
 99 |     data.table(mean = colMeans(mtcars), sd = sapply(mtcars, sd), var = "")
100 | )
101 | 
102 | expect_identical(
103 |     to_dfr(mtcars, list(mean = mean(.x), sd = sd(.x)), idvalue = .index),
104 |     data.table(mean = colMeans(mtcars), sd = sapply(mtcars, sd), item_id = seq_along(mtcars))
105 | )
106 | 
107 | vec = c("a", "b", "c")
108 | expect_identical(
109 |     to_dfr(vec, paste0(.x, 1:3), idvalue = .x),
110 |     data.table(V1 = paste0(vec, 1),
111 |                V2 = paste0(vec, 2),
112 |                V3 = paste0(vec, 3),
113 |                item_id = vec)
114 | )
115 | 
116 | 
117 | 
118 | expect_identical(
119 |     to_dfr(vec, paste0(.x, 1:3), idvalue = .x),
120 |     data.table(V1 = paste0(vec, 1),
121 |                V2 = paste0(vec, 2),
122 |                V3 = paste0(vec, 3),
123 |                item_id = vec)
124 | )
125 | 
126 | 
127 | expect_identical(
128 |     to_dfr(vec, paste0(.x, 1:3), idvalue = paste0(.x, .index)),
129 |     data.table(V1 = paste0(vec, 1),
130 |                V2 = paste0(vec, 2),
131 |                V3 = paste0(vec, 3),
132 |                item_id = paste0(vec, 1:3))
133 | )
134 | 
135 | expect_identical(
136 |     to_dfc(mtcars, c( mean(.x), sd(.x))),
137 |     as.data.table(rbind(colMeans(mtcars), sd = sapply(mtcars, sd)))
138 | )
139 | 
140 | cat("\nContext:","to_list scoping", "\n")
141 | 
142 | my_fun = function(x) {
143 |     d = 2
144 |     to_vec(x, .x*d)
145 | }
146 | 
147 | expect_equal(
148 |     my_fun(1:3),
149 |     2*(1:3)
150 | )
151 | 
152 | cat("\nContext:","to_list nested", "\n")
153 | 
154 | a = c(a = 1, b = 2, c = 3)
155 | b = c(aa = 10, bb = 20, cc = 30)
156 | 
157 | expect_identical(
158 |     to_list(a,
159 |             paste(.index, .name, .x, to_vec(b, paste(.index, .name, .x)))
160 |     ),
161 |     list(a = c("1 a 1 1 aa 10", "1 a 1 2 bb 20", "1 a 1 3 cc 30"),
162 |          b = c("2 b 2 1 aa 10", "2 b 2 2 bb 20", "2 b 2 3 cc 30"),
163 |          c = c("3 c 3 1 aa 10", "3 c 3 2 bb 20", "3 c 3 3 cc 30"))
164 | )
165 | if(getOption("covr", TRUE)){
166 |     cat("\nContext:","progress_bars", "\n")
167 |     expect_equal(
168 |         to_list(1:10, identity, trace = TRUE),
169 |         as.list(1:10)
170 |     )
171 |     expect_equal(
172 |         to_vec(setNames(1:26, letters), identity, trace = TRUE),
173 |         setNames(1:26, letters)
174 |     )
175 |     expect_equal(
176 |         to_vec(setNames(1:26, letters), identity, trace = quote(cat("current name:", .name, "\n"))),
177 |         setNames(1:26, letters)
178 |     )
179 |     expect_equal(
180 |         to_list(1:100, function(x) {
181 |             Sys.sleep(0.01)
182 |             x^2
183 |             }, trace = "pb"),
184 |         as.list((1:100)^2)
185 |     )
186 |     expect_equal(
187 |         to_list(1:100,{
188 |             Sys.sleep(.01)
189 |             .x^2
190 |         }, trace = "pb"),
191 |         as.list((1:100)^2)
192 |     )
193 |     expect_equal(
194 |         to_vec(1:100, function(x) {
195 |             Sys.sleep(.01)
196 |             x^2
197 |         }, trace = "pb", trace_step = 10),
198 |         ((1:100)^2)
199 |     )
200 |     expect_equal(
201 |         to_vec(1:100, {
202 |             Sys.sleep(.01)
203 |             .x^2
204 |         }, trace = TRUE, trace_step = 10),
205 |         ((1:100)^2)
206 |     )
207 | 
208 |     expect_equal(
209 |         to_list(1:100, function(x) {
210 |             Sys.sleep(.01)
211 |             x^2
212 |         }, trace = TRUE, trace_step = 10),
213 |         as.list((1:100)^2)
214 |     )
215 |     expect_equal(
216 |         to_dfc(mtcars, `+`, 1, trace = TRUE),
217 |         as.data.table(mtcars + 1)
218 | 
219 |     )
220 | 
221 |     expect_equal(
222 |         to_list(iris, grepl, pattern = "versi", trace = "pb"),
223 |         lapply(iris, grepl, pattern = "versi")
224 | 
225 |     )
226 | }
227 | 


--------------------------------------------------------------------------------
/inst/tinytest/test_let_all.R:
--------------------------------------------------------------------------------
  1 | cat("\nContext:","let_all", "\n")
  2 | 
  3 | scale2 = function(x) c(scale(x))
  4 | data(iris)
  5 | data(mtcars)
  6 | dt_iris = as.data.table(iris)
  7 | dt_mt = as.data.table(mtcars)
  8 | expect_equal(
  9 |     let_all(iris, if(is.numeric(.x)) scale2(.x)),
 10 |     dt_iris[, names(dt_iris[,-5]) := lapply(.SD, scale2), .SDcols = -5]
 11 | )
 12 | 
 13 | expect_equal(
 14 | let_all(mtcars, wow = if(.index==2) .x, by = cyl),
 15 | dt_mt
 16 | )
 17 | 
 18 | dt_iris = as.data.table(iris)
 19 | expect_equal(
 20 |     let_all(iris, scaled = if(is.numeric(.x)) scale2(.x)),
 21 |     dt_iris[, paste0(names(dt_iris[,-5]), "_scaled") := lapply(.SD, scale2), .SDcols = -5]
 22 | )
 23 | 
 24 | dt_iris = as.data.table(iris)
 25 | expect_equal(
 26 |     let_all(iris, scaled = if(is.numeric(.x)) c(scale2(.x)), by = Species),
 27 |     dt_iris[, paste0(names(dt_iris[,-5]), "_scaled") := lapply(.SD, function(x) c(scale2(x))), by = Species]
 28 | )
 29 | 
 30 | dt_iris = as.data.table(iris)
 31 | res = dt_iris[, paste0(names(dt_iris[,-5]), "_scaled") := lapply(.SD, function(x) c(scale2(x))), by = Species]
 32 | dt_iris = as.data.table(iris)
 33 | expect_equal(
 34 |     let_all(dt_iris, scaled = if(is.numeric(.x)) c(scale2(.x)), by = Species),
 35 |     res
 36 | )
 37 | 
 38 | dt_iris = as.data.table(iris)
 39 | res = dt_iris[, paste0(c("Sepal.Width", "Petal.Length"), "_scaled") := lapply(.SD[,c("Sepal.Width", "Petal.Length")],
 40 |                                                                               function(x) c(scale2(x))), by = Species]
 41 | dt_iris = as.data.table(iris)
 42 | expect_equal(
 43 |     let_all(dt_iris, scaled = if(.index %in% 2:3) c(scale2(.x)), by = Species),
 44 |     res
 45 | )
 46 | 
 47 | dt_iris = as.data.table(iris)
 48 | res = dt_iris[, paste0(c("Sepal.Width", "Petal.Length"), "_scaled") := lapply(.SD[,c("Sepal.Width", "Petal.Length")],
 49 |                                                                               function(x) c(scale2(x))), keyby = Species]
 50 | dt_iris = as.data.table(iris)
 51 | expect_equal(
 52 |     let_all(dt_iris, scaled = if(.index %in% 2:3) c(scale2(.x)), keyby = Species),
 53 |     res
 54 | )
 55 | 
 56 | 
 57 | dt_iris = as.data.table(iris)
 58 | expect_equal(
 59 |     let_all(iris[FALSE, ], scaled = if(is.numeric(.x)) scale2(.x)),
 60 |     dt_iris[FALSE, ][,paste0(names(dt_iris[,-5]), "_scaled") := lapply(.SD, scale2), .SDcols = -5]
 61 | )
 62 | 
 63 | dt_iris = as.data.table(iris)
 64 | expect_equal(
 65 |     let_all(iris, scaled = if(is.numeric(.x)) scale2(.x), uniqueN, by = Species),
 66 |     dt_iris[, c(paste0(names(dt_iris[,-5]), "_scaled"), paste0(names(dt_iris[,-5]), "_uniqueN")) := c(lapply(.SD, scale2), lapply(.SD, uniqueN)),
 67 |             by = Species]
 68 | )
 69 | 
 70 | dt_iris = as.data.table(iris)
 71 | expect_equal(
 72 |     let_all(iris, scaled = if(.index %in% 2:3) scale2(.x), scaled = uniqueN, by = Species),
 73 |     dt_iris[, c(paste0(c("Sepal.Width", "Petal.Length"), "_scaled"), paste0(names(dt_iris[,-5]), "_scaled.1")) := c(lapply(.SD[,2:3, with = TRUE], scale2), lapply(.SD, uniqueN)),
 74 |             by = Species]
 75 | )
 76 | 
 77 | dt_iris = as.data.table(iris)
 78 | expect_equal(
 79 |     let_all(iris, scaled = if(.index %in% 2:3) scale2(.x), scaled = uniqueN),
 80 |     dt_iris[, c(paste0(c("Sepal.Width", "Petal.Length"), "_scaled"), paste0(names(dt_iris), "_scaled.1")) := c(lapply(.SD[,2:3, with = TRUE], scale2), lapply(.SD, uniqueN))]
 81 | )
 82 | 
 83 | dt_iris = as.data.table(iris)
 84 | expect_equal(
 85 |     let_all(dt_iris, scaled = if(is.numeric(.x)) scale2(.x), uniqueN, by = Species),
 86 |     dt_iris
 87 | )
 88 | 
 89 | dt_iris = as.data.table(iris)
 90 | expect_equal(
 91 |     let_all(iris, scaled = if(is.numeric(.x)) scale2(.x), uniqueN),
 92 |     dt_iris[, c(paste0(names(dt_iris[,-5]), "_scaled"), paste0(names(dt_iris), "_uniqueN")) := c(lapply(.SD[,-5], scale2), lapply(.SD, uniqueN))]
 93 | )
 94 | 
 95 | dt_iris = as.data.table(iris)
 96 | expect_equal(
 97 |     let_all(iris, scaled = if(is.numeric(.x)) scale2(.x), i = FALSE),
 98 |     dt_iris[FALSE, paste0(names(dt_iris[,-5]), "_scaled") := lapply(.SD, scale2), .SDcols = -5]
 99 | )
100 | 
101 | dt_iris = as.data.table(iris)
102 | expect_equal(
103 |     let_all(iris, mean = mean(.x), by = Species, suffix = FALSE),
104 |     dt_iris[, paste0("mean_", names(dt_iris)[-5]) := lapply(.SD, mean), by = Species]
105 | )
106 | 
107 | dt_iris = as.data.table(iris)
108 | expect_equal(
109 |     let_all(iris, mean = mean(.x), by = Species, sep = ""),
110 |     dt_iris[, paste0(names(dt_iris)[-5], "mean") := lapply(.SD, mean), by = Species]
111 | )
112 | 
113 | 
114 | dt_iris = as.data.table(iris)
115 | expect_equal(
116 |     let_all(iris, mean, sd, length, by = Species, sep = "_"),
117 |     dt_iris[, c(paste0(names(dt_iris)[-5], "_mean"), paste0(names(dt_iris)[-5], "_sd"), paste0(names(dt_iris)[-5], "_length")) :=
118 |                 c(lapply(.SD, mean), lapply(.SD, sd), lapply(.SD, length)), by = Species]
119 | )
120 | 
121 | dt_iris = as.data.table(iris)
122 | my_name = "N"
123 | expect_equal(
124 |     let_all(iris, mean, sd, (my_name) := length, by = Species, sep = "_"),
125 |     dt_iris[, c(paste0(names(dt_iris)[-5], "_mean"), paste0(names(dt_iris)[-5], "_sd"), paste0(names(dt_iris)[-5], "_N")) :=
126 |                 c(lapply(.SD, mean), lapply(.SD, sd), lapply(.SD, length)), by = Species]
127 | )
128 | 
129 | data(iris)
130 | dt_iris = as.data.table(iris)
131 | my_fun = function(x){
132 |     if(is.numeric(x)){
133 |         mean(x)
134 |     } else {
135 |         NULL
136 |     }
137 | }
138 | 
139 | expect_equal(
140 |     let_all(iris, mean = my_fun),
141 |     dt_iris[,paste0(names(dt_iris)[-5], "_mean") := lapply(.SD, mean), .SDcols = -"Species"]
142 | )
143 | 
144 | dt_iris = as.data.table(iris)
145 | expect_equal(
146 |     let_all(iris, mean = function(x){
147 |         if(is.numeric(x)){
148 |             mean(x)
149 |         } else {
150 |             NULL
151 |         }
152 |     }),
153 |     dt_iris[,paste0(names(dt_iris)[-5], "_mean") := lapply(.SD, mean), .SDcols = -"Species"]
154 | )
155 | 
156 | dt_iris = as.data.table(iris)
157 | expect_equal(
158 |     let_all(iris, mean = mean, .SDcols = -"Species"),
159 |     dt_iris[,paste0(names(dt_iris)[-5], "_mean") := lapply(.SD, mean), .SDcols = -"Species"]
160 | )
161 | 
162 | 
163 | #### with etable
164 | 
165 | etab = data.frame(a = 1:2, b = 3:4)
166 | class(etab) = c("etable", class(etab))
167 | res = etab
168 | res$a_new = res$a + 1
169 | res$b_new = res$b + 1
170 | 
171 | etab2 = let_all(etab, new = .x + 1)
172 | expect_identical(etab2, res)
173 | 
174 | 


--------------------------------------------------------------------------------
/inst/tinytest/test_join.R:
--------------------------------------------------------------------------------
  1 | cat("\nContext:","dt_join_*", "\n")
  2 | workers = fread("
  3 |     name company
  4 |                 Nick Acme
  5 |                 John Ajax
  6 |                 Daniela Ajax
  7 |                 ", stringsAsFactors=FALSE, data.table = FALSE)
  8 | positions = fread("
  9 |                   name position
 10 |                   John designer
 11 |                   Daniela engineer
 12 |                   Cathie manager
 13 |                   ", stringsAsFactors=FALSE, data.table = FALSE)
 14 | 
 15 | data("iris")
 16 | data("mtcars")
 17 | expect_error(dt_left_join(mtcars, iris))
 18 | expect_error(dt_left_join(workers, positions, by = "xxx"))
 19 | expect_error(dt_anti_join(mtcars, iris))
 20 | expect_error(dt_anti_join(mtcars, iris, by = "am"))
 21 | expect_error(dt_anti_join(mtcars, iris, by = "Species"))
 22 | expect_error(dt_anti_join(mtcars, iris, by = c("Species", "Sepal.Length")))
 23 | expect_error(dt_anti_join(mtcars, iris, by = c("am", "vs")))
 24 | expect_error(dt_anti_join(mtcars, iris, by = c('xxx' = "am", 'yyy' = "vs")))
 25 | expect_error(dt_anti_join(mtcars, iris, by = c('xxx' = "am", "vs")))
 26 | expect_error(dt_anti_join(workers, positions, by = "xxx"))
 27 | 
 28 | res = workers %>% dt_inner_join(positions)
 29 | etal = structure(list(name = c("John", "Daniela"), company = c("Ajax",
 30 | "Ajax"), position = c("designer", "engineer")), class = c("data.table",
 31 | "data.frame"), row.names = c(NA, -2L))
 32 | expect_equal(res, etal)
 33 | 
 34 | res = workers %>% dt_left_join(positions)
 35 | etal = structure(list(name = c("Nick", "John", "Daniela"), company = c("Acme",
 36 | "Ajax", "Ajax"), position = c(NA, "designer", "engineer")), class = c("data.table",
 37 | "data.frame"), row.names = c(NA, -3L))
 38 | expect_equal(res, etal)
 39 | 
 40 | res = workers %>% dt_right_join(positions)
 41 | etal = structure(list(name = c("John", "Daniela", "Cathie"), company = c("Ajax",
 42 | "Ajax", NA), position = c("designer", "engineer", "manager")), row.names = c(NA,
 43 | -3L), class = c("data.table", "data.frame"))
 44 | expect_equal(res, etal)
 45 | 
 46 | res = workers %>% dt_full_join(positions)
 47 | etal = structure(list(name = c("Nick", "John", "Daniela", "Cathie"),
 48 | company = c("Acme", "Ajax", "Ajax", NA), position = c(NA,
 49 | "designer", "engineer", "manager")), row.names = c(NA, -4L
 50 | ), class = c("data.table", "data.frame"))
 51 | expect_equal(res, etal)
 52 | 
 53 | res = workers %>% dt_anti_join(positions)
 54 | etal = structure(list(name = "Nick", company = "Acme"), class = c("data.table",
 55 | "data.frame"), row.names = c(NA, -1L))
 56 | expect_equal(res, etal)
 57 | 
 58 | res = workers %>% dt_semi_join(positions)
 59 | etal = structure(list(name = c("John", "Daniela"), company = c("Ajax",
 60 | "Ajax")), class = c("data.table", "data.frame"), row.names = c(NA,
 61 | -2L))
 62 | expect_equal(res, etal)
 63 | # To suppress the message, supply 'by' argument
 64 | res = workers %>% dt_left_join(positions, by = "name")
 65 | etal = structure(list(name = c("Nick", "John", "Daniela"), company = c("Acme",
 66 | "Ajax", "Ajax"), position = c(NA, "designer", "engineer")), class = c("data.table",
 67 | "data.frame"), row.names = c(NA, -3L))
 68 | expect_equal(res, etal)
 69 | # Use a named 'by' if the join variables have different names
 70 | positions2 = setNames(positions, c("worker", "position")) # rename first column in 'positions'
 71 | res = workers %>% dt_inner_join(positions2, by = c("name" = "worker"))
 72 | etal = structure(list(name = c("John", "Daniela"), company = c("Ajax",
 73 | "Ajax"), position = c("designer", "engineer")), class = c("data.table",
 74 | "data.frame"), row.names = c(NA, -2L))
 75 | expect_equal(res, etal)
 76 | 
 77 | 
 78 | res = workers %>% dt_anti_join(positions2, by = c("name" = "worker"))
 79 | etal = structure(list(name = "Nick", company = "Acme"), class = c("data.table",
 80 | "data.frame"), row.names = c(NA, -1L))
 81 | expect_equal(res, etal)
 82 | 
 83 | res = workers %>% dt_semi_join(positions2, by = c("name" = "worker"))
 84 | etal = structure(list(name = c("John", "Daniela"), company = c("Ajax",
 85 | "Ajax")), class = c("data.table", "data.frame"), row.names = c(NA,
 86 | -2L))
 87 | expect_equal(res, etal)
 88 | 
 89 | 
 90 | positions3 = positions[c(1,1,1:3), ]
 91 | workers2 = workers[c(1, 2,2,2, 3),]
 92 | res = workers2 %>% dt_left_join(positions3, by = "name")
 93 | etal = structure(list(name = c("Nick", "John", "John", "John", "John",
 94 | "John", "John", "John", "John", "John", "Daniela"), company = c("Acme",
 95 | "Ajax", "Ajax", "Ajax", "Ajax", "Ajax", "Ajax", "Ajax", "Ajax",
 96 | "Ajax", "Ajax"), position = c(NA, "designer", "designer", "designer",
 97 | "designer", "designer", "designer", "designer", "designer", "designer",
 98 | "engineer")), class = c("data.table", "data.frame"), row.names = c(NA,
 99 | -11L))
100 | expect_equal(res, etal)
101 | 
102 | res = workers2 %>% dt_semi_join(positions3, by = "name")
103 | etal = structure(list(name = c("John", "John", "John", "Daniela"), company = c("Ajax",
104 | "Ajax", "Ajax", "Ajax")), class = c("data.table", "data.frame"
105 | ), row.names = c(NA, -4L))
106 | expect_equal(res, etal)
107 | 
108 | d1 = data.table(a=rep(1:2,each=3), b=1:6, key=c("a","b"))
109 | d2 = data.table(a=0:1, b=0:1, key=c("a","b"))
110 | 
111 | 
112 | res = dt_inner_join(d1, d2, by="a")
113 | etal = structure(list(a = c(1L, 1L, 1L), b.x = 1:3, b.y = c(1L, 1L,
114 | 1L)), class = c("data.table", "data.frame"), row.names = c(NA,
115 | -3L))
116 | expect_equal(res, etal)
117 | res = dt_inner_join(d1, d2, by="a", suffix=c(".d1", ".d2"))
118 | etal = structure(list(a = c(1L, 1L, 1L), b.d1 = 1:3, b.d2 = c(1L, 1L,
119 | 1L)), class = c("data.table", "data.frame"), row.names = c(NA,
120 | -3L))
121 | expect_equal(res, etal)
122 | 
123 | d1 = data.table(a=rep(1:2,each=3), b=1:6, e = 1)
124 | d2 = data.table(a=0:1, b=0:1, key=c("a","b"), d = 3)
125 | 
126 | res = dt_left_join(d1, d2, by = c("a", "b"))
127 | etal = structure(list(a = c(1L, 1L, 1L, 2L, 2L, 2L), b = 1:6, e = c(1,
128 | 1, 1, 1, 1, 1), d = c(3, NA, NA, NA, NA, NA)), class = c("data.table",
129 | "data.frame"), row.names = c(NA, -6L))
130 | expect_equal(res, etal)
131 | 
132 | d3 = setnames(d2, c("a", "v", "d"))
133 | 
134 | res = dt_full_join(d1, d3, by = c("a", "b" = "v"))
135 | etal = structure(list(a = c(1L, 1L, 1L, 2L, 2L, 2L, 0L), b = c(1L, 2L,
136 | 3L, 4L, 5L, 6L, 0L), e = c(1, 1, 1, 1, 1, 1, NA), d = c(3, NA,
137 | NA, NA, NA, NA, 3)), row.names = c(NA, -7L), class = c("data.table",
138 | "data.frame"))
139 | expect_equal(res, etal)
140 | 
141 | res = dt_anti_join(d1, d3, by = c("a", "b" = "v"))
142 | etal =structure(list(a = c(1L, 1L, 2L, 2L, 2L), b = 2:6, e = c(1, 1,
143 | 1, 1, 1)), class = c("data.table", "data.frame"), row.names = c(NA,
144 | -5L))
145 | expect_equal(res, etal)
146 | res = dt_semi_join(d1, d3, by = c("a", "b" = "v"))
147 | etal =structure(list(a = 1L, b = 1L, e = 1), class = c("data.table",
148 | "data.frame"), row.names = c(NA, -1L))
149 | expect_equal(res, etal)
150 | 
151 | 


--------------------------------------------------------------------------------
/man/to_long.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/to_wide.R
  3 | \name{to_long}
  4 | \alias{to_long}
  5 | \alias{to_wide}
  6 | \title{Convert data to long or to wide form}
  7 | \usage{
  8 | to_long(
  9 |   data,
 10 |   columns = NULL,
 11 |   keep = NULL,
 12 |   names_in = "variable",
 13 |   values_in = "value",
 14 |   drop_na = FALSE,
 15 |   names_factor = TRUE,
 16 |   value_factor = FALSE,
 17 |   ...
 18 | )
 19 | 
 20 | to_wide(
 21 |   data,
 22 |   keep = NULL,
 23 |   names_in = variable,
 24 |   values_in = value,
 25 |   fun = identity,
 26 |   sep = "_",
 27 |   fill = NA,
 28 |   missing_comb = c("none", "rows", "columns", "all"),
 29 |   ...
 30 | )
 31 | }
 32 | \arguments{
 33 | \item{data}{A data.frame to convert}
 34 | 
 35 | \item{columns}{unquoted names of variables for stacking. When missing, we
 36 | will stack all columns outside \code{keep} columns.}
 37 | 
 38 | \item{keep}{unquoted names of columns which will be kept as is, e. g. only
 39 | recycled or deduplicated. If missing, it is all columns except stacked or
 40 | unstacked. If \code{FALSE} then nothing will be kept.}
 41 | 
 42 | \item{names_in}{name of the stacked variable names column. The default name
 43 | is 'variable'. It is quoted in the \code{to_long} and unquoted in \code{to_wide}. If
 44 | \code{FALSE} in the \code{to_wide} than nothing will be widening.}
 45 | 
 46 | \item{values_in}{name(-s) of the stacked data values column(s). The default
 47 | name is 'value'. Multiple names can be provided here for the case when
 48 | \code{columns} is a list, though note well that the names provided in
 49 | \code{columns} take precedence. It is quoted in the \code{to_long} and unqoted in \code{to_wide}}
 50 | 
 51 | \item{drop_na}{If TRUE, NA values will be removed from the stacked data.}
 52 | 
 53 | \item{names_factor}{If TRUE, the column with names will be converted to
 54 | factor, else it will be a character column. TRUE by default.}
 55 | 
 56 | \item{value_factor}{If TRUE, the value column will be converted to factor,
 57 | else the stacked values type is left unchanged. FALSE by default.}
 58 | 
 59 | \item{...}{other arguments passed to \code{data.table::melt}/\code{data.table::dcast}}
 60 | 
 61 | \item{fun}{Should the data be aggregated before casting? By default, it is
 62 | \code{identity} - no aggregation. To use multiple aggregation functions, pass a
 63 | list; see Examples.}
 64 | 
 65 | \item{sep}{Character vector of length 1, indicating the separating character
 66 | in variable names generated during casting. Default is "_".}
 67 | 
 68 | \item{fill}{Value with which to fill missing cells. \code{NA} by default. If \code{fun} is
 69 | present, takes the value by applying the function on a 0-length vector.}
 70 | 
 71 | \item{missing_comb}{One of "none" (the default), "rows" - include missing
 72 | combinations in rows, "columns" - include missing combinations in columns,
 73 | and "all" include all missing combinations.}
 74 | }
 75 | \value{
 76 | data.table in the wide or long form.
 77 | }
 78 | \description{
 79 | \code{to_long} increases number of rows in the dataset and reduce number of
 80 | columns. \code{to_wide} makes invert transformation. You can use \link{cols} for
 81 | selecting variables in the arguments. See examples.
 82 | }
 83 | \examples{
 84 | data(iris)
 85 | 
 86 | # 'to_long'
 87 | 
 88 | long_iris = iris \%>\%
 89 |     to_long(keep = Species)
 90 | 
 91 | long_iris
 92 | 
 93 | iris_with_stat = long_iris \%>\%
 94 |     take(mean = mean(value),
 95 |          sd = sd(value),
 96 |          n = .N*1.0,
 97 |          by = .(Species, variable)
 98 |     ) \%>\%
 99 |     to_long(columns = c(mean, sd, n), names_in = "stat")
100 | 
101 | # 'to_wide' - table with multiple stats
102 | iris_with_stat \%>\%
103 |     to_wide()
104 | 
105 | 
106 | iris_with_stat \%>\%
107 |     to_wide(names_in = c(variable, stat))
108 | 
109 | iris_with_stat \%>\%
110 |     to_wide(names_in = c(variable, Species))
111 | 
112 | # 'to_wide' - aggregation function
113 | long_iris \%>\%
114 |     to_wide(fun = list(Mean = mean, SD = sd, N = length))
115 | 
116 | 
117 | # '\%to\%' selector - example from tidyr::pivot_longer
118 | 
119 | data(anscombe)
120 | anscombe \%>\%
121 |     to_long(
122 |         list(x = x1 \%to\% x4, y = y1 \%to\% y4),
123 |         names_in = "set"
124 |     )
125 | 
126 | ######################################
127 | ## Examples from data.table melt/dcast
128 | ######################################
129 | 
130 | set.seed(45)
131 | DT = data.table(
132 |     i_1 = c(1:5, NA)*1.0,
133 |     i_2 = c(NA,6,7,8,9,10)*1.0,
134 |     f_1 = factor(sample(c(letters[1:3], NA), 6, TRUE)),
135 |     f_2 = factor(c("z", "a", "x", "c", "x", "x"), ordered=TRUE),
136 |     c_1 = sample(c(letters[1:3], NA), 6, TRUE),
137 |     d_1 = as.Date(c(1:3,NA,4:5), origin="2013-09-01"),
138 |     d_2 = as.Date(6:1, origin="2012-01-01")
139 | )
140 | 
141 | # id, values as character/integer/numeric vectors
142 | 
143 | to_long(DT, f_1, keep = 1:2)
144 | to_long(DT, f_1, keep = c(i_1, i_2))
145 | to_long(DT, f_1, keep = i_1 \%to\% i_2)
146 | to_long(DT, f_1, keep = cols(i_1:i_2), names_factor = FALSE)
147 | to_long(DT, f_1, keep = cols("i_{1:2}"))
148 | to_long(DT, f_1, keep = cols("^i_"))
149 | to_long(DT, f_1, keep = cols("^i_"), names_in = "var", values_in = "val")
150 | 
151 | col_var = "^i_"
152 | to_long(DT, 3, keep = cols(col_var))
153 | 
154 | to_long(DT, cols("^f_"), keep = cols("^i_"), value_factor = TRUE)
155 | 
156 | to_long(mtcars)
157 | to_long(mtcars, keep = am)
158 | to_long(mtcars, columns = c(am, vs, mpg))
159 | to_long(mtcars, columns = c(am, vs, mpg), keep = FALSE)
160 | to_long(DT, keep = f_1, columns = c(i_1, i_2), drop_na = TRUE)
161 | to_long(DT, keep=1:2, columns = list(cols("^f_"), cols("^d_")), value_factor=TRUE)
162 | 
163 | data("ChickWeight")
164 | names(ChickWeight) = tolower(names(ChickWeight))
165 | DT = to_long(ChickWeight, keep=2:4)
166 | 
167 | to_wide(DT, keep = time, fun = mean)
168 | to_wide(DT, keep = FALSE, fun = mean)
169 | to_wide(DT, keep = diet, fun = mean)
170 | to_wide(DT, keep = c(diet, chick), names_in = time, missing_comb = "all")
171 | to_wide(DT, keep = c(diet, chick), names_in = time, missing_comb = "all", fill = 0)
172 | to_wide(DT, chick, time, fun = mean)
173 | 
174 | 
175 | 
176 | # using FALSE
177 | DT = data.table(v1 = rep(1:2, each = 6),
178 |                 v2 = rep(rep(1:3, 2), each = 2),
179 |                 v3 = rep(1:2, 6),
180 |                 v4 = rnorm(6))
181 | 
182 | ## for each combination of (v1, v2), add up all values of v4
183 | to_wide(DT,
184 |         cols("^v(1|2)"),
185 |         names_in = FALSE,
186 |         values_in = v4,
187 |         fun = sum
188 | )
189 | 
190 | # multiple values_in and multiple fun
191 | DT = data.table(x=sample(5,20,TRUE),
192 |                 y=sample(2,20,TRUE),
193 |                 z=sample(letters[1:2], 20,TRUE),
194 |                 d1 = runif(20),
195 |                 d2=1L)
196 | 
197 | # multiple values_in
198 | to_wide(DT,
199 |         keep = c(x, y),
200 |         names_in = z,
201 |         values_in = c(d1, d2),
202 |         fun = sum,
203 |         fill = 0)
204 | 
205 | # multiple funs
206 | to_wide(DT,
207 |         keep = c(x, y),
208 |         names_in = z,
209 |         values_in = d1,
210 |         fun = list(sum = sum, mean = mean),
211 |         fill = NULL)
212 | 
213 | # multiple fun and values_in (all combinations)
214 | to_wide(DT,
215 |         keep = c(x, y),
216 |         names_in = z,
217 |         values_in = c(d1, d2),
218 |         fun = list(sum = sum, mean = mean)
219 | )
220 | 
221 | # multiple fun and values_in (one-to-one)
222 | to_wide(DT,
223 |         keep = c(x, y),
224 |         names_in = z,
225 |         values_in = list(d1, d2),
226 |         fun = list(sum = sum, mean = mean)
227 | )
228 | }
229 | 


--------------------------------------------------------------------------------
/R/vlookup.R:
--------------------------------------------------------------------------------
  1 | #' Look up values in dictionary.
  2 | #'
  3 | #' `vlookup` function is inspired by VLOOKUP spreadsheet
  4 | #' function. It looks for a `lookup_value` in the `lookup_column` of
  5 | #' the `dict`, and then returns values in the same rows from
  6 | #' `result_column`.
  7 | #' `xlookup` is simplified version of `vlookup`. It searches for a
  8 | #' `lookup_value` in the `lookup_vector` and return values in the same
  9 | #' position from the `result_vector`.
 10 | #'
 11 | #' @param lookup_value Vector of looked up values
 12 | #' @param dict data.frame. Dictionary.
 13 | #' @param result_column numeric or character. Resulting columns in the
 14 | #'   `dict`. Default  value for `result_column` is 2 - for frequent
 15 | #'   case of dictionary with keys in the first column and results in the second
 16 | #'   column.
 17 | #' @param lookup_column Column of `dict` in which lookup value will be
 18 | #'   searched. By default, it is the first column of the `dict`.
 19 | #' @param lookup_vector vector in which 'lookup_value' will be searched during 'xlookup'.
 20 | #' @param result_vector vector with resulting values for 'xlookup'.
 21 | #' @param no_match vector of length one. NA by default. Where a valid match is
 22 | #'   not found, return the 'no_match' value you supply.
 23 | #' @return `xlookup` always return vector, `vlookup` returns vector if
 24 | #'   the `result_column` is single value. In the opposite case data.frame will
 25 | #'   be returned.
 26 | #'
 27 | #' @export
 28 | #' @examples
 29 | #' # with data.frame
 30 | #' dict = data.frame(num=1:26, small=letters, cap=LETTERS)
 31 | #' vlookup(1:3, dict)
 32 | #' vlookup(c(45,1:3,58), dict, result_column='cap')
 33 | #' vlookup(c(45,1:3,58), dict, result_column='cap', no_match = "Not found")
 34 | #'
 35 | #' # the same with xlookup
 36 | #' xlookup(1:3, dict$num, dict$small)
 37 | #' xlookup(c(45,1:3,58), dict$num, dict$cap)
 38 | #' xlookup(c(45,1:3,58), dict$num, dict$cap, no_match = "Not found")
 39 | #'
 40 | #'
 41 | #' # example from base 'merge'
 42 | #' authors = data.table(
 43 | #'     surname = c("Tukey", "Venables", "Tierney", "Ripley", "McNeil"),
 44 | #'     nationality = c("US", "Australia", "US", "UK", "Australia"),
 45 | #'     deceased = c("yes", rep("no", 4))
 46 | #' )
 47 | #'
 48 | #' books = data.table(
 49 | #'     surname = c("Tukey", "Venables", "Tierney",
 50 | #'                 "Ripley", "Ripley", "McNeil", "R Core"),
 51 | #'     title = c("Exploratory Data Analysis",
 52 | #'               "Modern Applied Statistics ...",
 53 | #'               "LISP-STAT",
 54 | #'               "Spatial Statistics", "Stochastic Simulation",
 55 | #'               "Interactive Data Analysis",
 56 | #'               "An Introduction to R")
 57 | #' )
 58 | #'
 59 | #' let(books,
 60 | #'      c("author_nationality", "author_deceased") := vlookup(surname,
 61 | #'              dict = authors,
 62 | #'              result_column = 2:3
 63 | #'          )
 64 | #' )[]
 65 | #'
 66 | #' # Just for fun. Examples borrowed from Microsoft Excel.
 67 | #' # It is not the R way of doing things.
 68 | #'
 69 | #' # Example 2
 70 | #'
 71 | #' ex2 = fread("
 72 | #'     Item_ID Item Cost Markup
 73 | #'     ST-340 Stroller 145.67  0.30
 74 | #'     BI-567 Bib 3.56  0.40
 75 | #'     DI-328 Diapers  21.45  0.35
 76 | #'     WI-989 Wipes  5.12  0.40
 77 | #'     AS-469 Aspirator 2.56  0.45
 78 | #' ")
 79 | #'
 80 | #' # Calculates the retail price of diapers by adding the markup percentage to the cost.
 81 | #' vlookup("DI-328", ex2, 3) * (1 + vlookup("DI-328", ex2, 4)) # 28.9575
 82 | #'
 83 | #' # Calculates the sale price of wipes by subtracting a specified discount from
 84 | #' # the retail price.
 85 | #' (vlookup("WI-989", ex2, "Cost") * (1 + vlookup("WI-989", ex2, "Markup"))) * (1 - 0.2)  # 5.7344
 86 | #'
 87 | #' A2 = ex2[["Item_ID"]][1]
 88 | #' A3 = ex2[["Item_ID"]][2]
 89 | #'
 90 | #' # If the cost of an item is greater than or equal to $20.00, displays the string
 91 | #' # "Markup is nn%"; otherwise, displays the string "Cost is under $20.00".
 92 | #' ifelse(vlookup(A2, ex2, "Cost") >= 20,
 93 | #'        paste0("Markup is " , 100 * vlookup(A2, ex2, "Markup"),"%"),
 94 | #'        "Cost is under $20.00") # Markup is 30%
 95 | #'
 96 | #'
 97 | #' # If the cost of an item is greater than or equal to $20.00, displays the string
 98 | #' # Markup is nn%"; otherwise, displays the string "Cost is $n.nn".
 99 | #' ifelse(vlookup(A3, ex2, "Cost") >= 20,
100 | #'        paste0("Markup is: " , 100 * vlookup(A3, ex2, "Markup") , "%"),
101 | #'        paste0("Cost is $", vlookup(A3, ex2, "Cost"))) #Cost is $3.56
102 | #'
103 | #'
104 | #' # Example 3
105 | #'
106 | #' ex3 = fread('
107 | #'     ID  Last_name  First_name  Title Birth_date
108 | #'     1 Davis Sara "Sales Rep."  12/8/1968
109 | #'     2 Fontana Olivier "V.P. of Sales" 2/19/1952
110 | #'     3 Leal Karina "Sales Rep." 8/30/1963
111 | #'     4 Patten Michael "Sales Rep." 9/19/1958
112 | #'     5 Burke Brian "Sales Mgr." 3/4/1955
113 | #'     6 Sousa Luis "Sales Rep."  7/2/1963
114 | #' ')
115 | #'
116 | #' # If there is an employee with an ID of 5, displays the employee's last name;
117 | #' # otherwise, displays the message "Employee not found".
118 | #' vlookup(5, ex3, "Last_name", no_match = "Employee not found") # Burke
119 | #'
120 | #' # Many employees
121 | #' vlookup(1:10, ex3, "Last_name", no_match =  "Employee not found")
122 | #'
123 | #' # For the employee with an ID of 4, concatenates the values of three cells into
124 | #' # a complete sentence.
125 | #' paste0(vlookup(4, ex3, "First_name"), " ",
126 | #'        vlookup(4, ex3, "Last_name"), " is a ",
127 | #'        vlookup(4, ex3, "Title")) # Michael Patten is a Sales Rep.
128 | vlookup = function(lookup_value, dict, result_column = 2, lookup_column = 1, no_match = NA){
129 |     (!is.list(lookup_value) && NCOL(lookup_value)==1)   || stop("'vlookup': 'lookup_value' should be vector.")
130 |     is.data.frame(dict) || stop("'vlookup': 'dict' should be data.frame.")
131 |     length(lookup_column)==1 || stop("'vlookup': 'lookup_column' should be vector of length 1.")
132 |     (length(no_match)==1) || stop("'vlookup': 'no_match' should be length 1.")
133 |     if(length(result_column)==1){
134 |         return(xlookup(lookup_value, dict[[lookup_column]], dict[[result_column]], no_match = no_match))
135 |     }
136 |     ind = fast_match(lookup_value, dict[[lookup_column]])
137 |     if(is.data.table(dict)){
138 |         res = dict[ind, result_column, with = FALSE]
139 |     } else {
140 |         res = dict[ind, result_column, drop = FALSE]
141 |     }
142 |     if(!identical(no_match, NA)) {
143 |         ind = is.na(ind)
144 |         res[ind, ] = no_match
145 |     }
146 |     res
147 | }
148 | 
149 | #' @rdname vlookup
150 | #' @export
151 | xlookup = function(lookup_value, lookup_vector, result_vector, no_match = NA){
152 |     (!is.list(lookup_value) && NCOL(lookup_value)==1)   || stop("'xlookup': 'lookup_value' should be vector.")
153 |     (!is.list(lookup_vector) && NCOL(lookup_vector)==1) || stop("'xlookup': 'lookup_vector' should be vector.")
154 |     (!is.list(result_vector) && NCOL(result_vector)==1) || stop("'xlookup': 'result_vector' should be vector.")
155 |     (length(no_match)==1) || stop("'xlookup': 'no_match' should be length 1.")
156 |     (NROW(lookup_vector) == NROW(result_vector)) || stop("'xlookup': 'lookup_vector' and 'result_vector' should be the same length.")
157 |     ind = fast_match(lookup_value, lookup_vector)
158 |     res = result_vector[ind]
159 |     if(!identical(no_match, NA)) res[is.na(ind)] = no_match
160 |     res
161 | }
162 | 
163 | fast_match = function(x, table, nomatch = NA_integer_){
164 |     if(is.character(x) && is.character(table)){
165 |         return(chmatch(x, table, nomatch = nomatch))
166 |     }
167 |     match(x, table, nomatch = nomatch, incomparables = NULL)
168 | }
169 | 
170 | 


--------------------------------------------------------------------------------
/man/maditr-package.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/maditr-package.R
  3 | \docType{package}
  4 | \name{maditr-package}
  5 | \alias{maditr}
  6 | \alias{maditr-package}
  7 | \title{maditr: Pipe-Style Interface for 'data.table'}
  8 | \description{
  9 | Package provides pipe-style interface for \code{data.table}. It preserves
 10 | all data.table features without significant impact on performance. '\code{let}'
 11 | and '\code{take}' functions are simplified interfaces for most common data
 12 | manipulation tasks.
 13 | }
 14 | \details{
 15 | \itemize{
 16 | \item To select rows from data: \code{rows(mtcars, am==0)}
 17 | \item To select columns from data: \code{columns(mtcars, mpg, vs:carb)}
 18 | \item To aggregate data: \code{take(mtcars, mean_mpg = mean(mpg), by = am)}
 19 | \item To aggregate all non-grouping columns: \code{take_all(mtcars, mean, by = am)}
 20 | \item To aggregate several columns with one summary: \code{take(mtcars, mpg, hp, fun = mean, by = am)}
 21 | \item To get total summary skip \code{by} argument: \code{take_all(mtcars, mean)}
 22 | \item Use magrittr pipe '\%>\%' to chain several operations:
 23 | }
 24 | 
 25 | \if{html}{\out{<div class="sourceCode">}}\preformatted{     mtcars \%>\%
 26 |         let(mpg_hp = mpg/hp) \%>\%
 27 |         take(mean(mpg_hp), by = am)
 28 | }\if{html}{\out{</div>}}
 29 | \itemize{
 30 | \item To modify variables or add new variables:
 31 | }
 32 | 
 33 | \if{html}{\out{<div class="sourceCode">}}\preformatted{      mtcars \%>\%
 34 |          let(new_var = 42,
 35 |              new_var2 = new_var*hp) \%>\%
 36 |           head()
 37 | }\if{html}{\out{</div>}}
 38 | \itemize{
 39 | \item To modify all non-grouping variables:
 40 | }
 41 | 
 42 | \if{html}{\out{<div class="sourceCode">}}\preformatted{      iris \%>\%
 43 |          let_all(
 44 |              scaled = (.x - mean(.x))/sd(.x),
 45 |              by = Species) \%>\%
 46 |           head()
 47 | }\if{html}{\out{</div>}}
 48 | \itemize{
 49 | \item To drop variable assign NULL: \code{let(mtcars, am = NULL) \%>\% head()}
 50 | \item To aggregate all variables conditionally on name:
 51 | }
 52 | 
 53 | \if{html}{\out{<div class="sourceCode">}}\preformatted{      iris \%>\%
 54 |           take_all(
 55 |               mean = if(startsWith(.name, "Sepal")) mean(.x),
 56 |               median = if(startsWith(.name, "Petal")) median(.x),
 57 |               by = Species
 58 |           )
 59 | }\if{html}{\out{</div>}}
 60 | \itemize{
 61 | \item For parametric assignment use ':=':
 62 | }
 63 | 
 64 | \if{html}{\out{<div class="sourceCode">}}\preformatted{     new_var = "my_var"
 65 |      old_var = "mpg"
 66 |      mtcars \%>\%
 67 |          let((new_var) := get(old_var)*2) \%>\%
 68 |          head()
 69 | }\if{html}{\out{</div>}}
 70 | \itemize{
 71 | \item For more sophisticated operations see 'query'/'query_if': these
 72 | functions translates its arguments one-to-one to '\verb{[.data.table}'
 73 | method. Additionally there are some conveniences such as automatic
 74 | 'data.frame' conversion to 'data.table'.
 75 | }
 76 | }
 77 | \examples{
 78 | # examples form 'dplyr' package
 79 | data(mtcars)
 80 | \donttest{
 81 | # Newly created variables are available immediately
 82 | mtcars \%>\%
 83 |     let(
 84 |         cyl2 = cyl * 2,
 85 |         cyl4 = cyl2 * 2
 86 |     ) \%>\%
 87 |     head()
 88 | 
 89 | # You can also use let() to remove variables and
 90 | # modify existing variables
 91 | mtcars \%>\%
 92 |     let(
 93 |         mpg = NULL,
 94 |         disp = disp * 0.0163871 # convert to litres
 95 |     ) \%>\%
 96 |     head()
 97 | 
 98 | 
 99 | # window functions are useful for grouped computations
100 | mtcars \%>\%
101 |     let(rank = rank(-mpg, ties.method = "min"),
102 |         by = cyl) \%>\%
103 |     head()
104 | 
105 | # You can drop variables by setting them to NULL
106 | mtcars \%>\% let(cyl = NULL) \%>\% head()
107 | 
108 | # keeps all existing variables
109 | mtcars \%>\%
110 |     let(displ_l = disp / 61.0237) \%>\%
111 |     head()
112 | 
113 | # keeps only the variables you create
114 | mtcars \%>\%
115 |     take(displ_l = disp / 61.0237)
116 | 
117 | 
118 | # can refer to both contextual variables and variable names:
119 | var = 100
120 | mtcars \%>\%
121 |     let(cyl = cyl * var) \%>\%
122 |     head()
123 | 
124 | # select rows
125 | mtcars \%>\%
126 |     rows(am==0) \%>\%
127 |     head()
128 | 
129 | # select rows with compound condition
130 | mtcars \%>\%
131 |     rows(am==0 & mpg>mean(mpg))
132 | 
133 | # select columns
134 | mtcars \%>\%
135 |     columns(vs:carb, cyl)
136 | 
137 | mtcars \%>\%
138 |     columns(-am, -cyl)
139 | 
140 | # regular expression pattern
141 | columns(iris, "^Petal") # variables which start from 'Petal'
142 | columns(iris, "Width$") # variables which end with 'Width'
143 | 
144 | # move Species variable to the front
145 | # pattern "^." matches all variables
146 | columns(iris, Species, "^.")
147 | 
148 | # pattern "^.*al" means "contains 'al'"
149 | columns(iris, "^.*al")
150 | 
151 | # numeric indexing - all variables except Species
152 | columns(iris, 1:4)
153 | 
154 | # A 'take' with summary functions applied without 'by' argument returns an aggregated data
155 | mtcars \%>\%
156 |     take(mean = mean(disp), n = .N)
157 | 
158 | # Usually, you'll want to group first
159 | mtcars \%>\%
160 |     take(mean = mean(disp), n = .N, by = cyl)
161 | 
162 | # You can group by expressions:
163 | mtcars \%>\%
164 |     take_all(mean, by = list(vsam = vs + am))
165 | 
166 | # modify all non-grouping variables in-place
167 | mtcars \%>\%
168 |     let_all((.x - mean(.x))/sd(.x), by = am) \%>\%
169 |     head()
170 | 
171 | # modify all non-grouping variables to new variables
172 | mtcars \%>\%
173 |     let_all(scaled = (.x - mean(.x))/sd(.x), by = am) \%>\%
174 |     head()
175 | 
176 | # conditionally modify all variables
177 | iris \%>\%
178 |     let_all(mean = if(is.numeric(.x)) mean(.x)) \%>\%
179 |     head()
180 | 
181 | # modify all variables conditionally on name
182 | iris \%>\%
183 |     let_all(
184 |         mean = if(startsWith(.name, "Sepal")) mean(.x),
185 |         median = if(startsWith(.name, "Petal")) median(.x),
186 |         by = Species
187 |     ) \%>\%
188 |     head()
189 | 
190 | # aggregation with 'take_all'
191 | mtcars \%>\%
192 |     take_all(mean = mean(.x), sd = sd(.x), n = .N, by = am)
193 | 
194 | # conditionally aggregate all variables
195 | iris \%>\%
196 |     take_all(mean = if(is.numeric(.x)) mean(.x))
197 | 
198 | # aggregate all variables conditionally on name
199 | iris \%>\%
200 |     take_all(
201 |         mean = if(startsWith(.name, "Sepal")) mean(.x),
202 |         median = if(startsWith(.name, "Petal")) median(.x),
203 |         by = Species
204 |     )
205 | 
206 | # parametric evaluation:
207 | var = quote(mean(cyl))
208 | mtcars \%>\%
209 |     let(mean_cyl = eval(var)) \%>\%
210 |     head()
211 | take(mtcars, eval(var))
212 | 
213 | # all together
214 | new_var = "mean_cyl"
215 | mtcars \%>\%
216 |     let((new_var) := eval(var)) \%>\%
217 |     head()
218 | take(mtcars, (new_var) := eval(var))
219 | 
220 | ########################################
221 | # variable selection
222 | 
223 | # range selection
224 | iris \%>\%
225 |     let(
226 |         avg = rowMeans(Sepal.Length \%to\% Petal.Width)
227 |     ) \%>\%
228 |     head()
229 | 
230 | # multiassignment
231 | iris \%>\%
232 |     let(
233 |         # starts with Sepal or Petal
234 |         multipled1 \%to\% multipled4 := cols("^(Sepal|Petal)")*2
235 |     ) \%>\%
236 |     head()
237 | 
238 | 
239 | mtcars \%>\%
240 |     let(
241 |         # text expansion
242 |         cols("scaled_{names(mtcars)}") := lapply(cols("{names(mtcars)}"), scale)
243 |     ) \%>\%
244 |     head()
245 | 
246 | # range selection in 'by'
247 | # range selection  + additional column
248 | mtcars \%>\%
249 |     take(
250 |         res = sum(cols(mpg, disp \%to\% drat)),
251 |         by = vs \%to\% gear
252 |     )
253 | }
254 | }
255 | \seealso{
256 | Useful links:
257 | \itemize{
258 |   \item \url{https://github.com/gdemin/maditr}
259 |   \item Report bugs at \url{https://github.com/gdemin/maditr/issues}
260 | }
261 | 
262 | }
263 | \author{
264 | \strong{Maintainer}: Gregory Demin \email{gdemin@gmail.com}
265 | 
266 | }
267 | 


--------------------------------------------------------------------------------
/inst/tinytest/test_to_wide.R:
--------------------------------------------------------------------------------
  1 | cat("\nContext:", "to_long", "\n")
  2 | 
  3 | set.seed(45)
  4 | DT = data.table(
  5 |     i_1 = c(1:5, NA),
  6 |     i_2 = c(NA,6,7,8,9,10),
  7 |     f_1 = factor(sample(c(letters[1:3], NA), 6, TRUE)),
  8 |     f_2 = factor(c("z", "a", "x", "c", "x", "x"), ordered=TRUE),
  9 |     c_1 = sample(c(letters[1:3], NA), 6, TRUE),
 10 |     d_1 = as.Date(c(1:3,NA,4:5), origin="2013-09-01"),
 11 |     d_2 = as.Date(6:1, origin="2012-01-01"))
 12 | # add a couple of list cols
 13 | DT[, l_1 := DT[, list(c=list(rep(i_1, sample(5,1)))), by = i_1]$c]
 14 | DT[, l_2 := DT[, list(c=list(rep(c_1, sample(5,1)))), by = i_1]$c]
 15 | 
 16 | # id, measure as character/integer/numeric vectors
 17 | expect_identical(
 18 |     to_long(DT, f_1, keep = 1:2),
 19 |     melt(DT, id=1:2, measure="f_1")
 20 | )
 21 | 
 22 | expect_identical(
 23 |     to_long(DT, f_1, keep = c(i_1, i_2)),
 24 |     melt(DT, id=1:2, measure="f_1")
 25 | )
 26 | 
 27 | expect_identical(
 28 |     to_long(DT, f_1, keep = cols(i_1:i_2)),
 29 |     melt(DT, id=1:2, measure="f_1")
 30 | )
 31 | 
 32 | expect_identical(
 33 |     to_long(DT, f_1, keep = cols(i_1:i_2), names_factor = FALSE),
 34 |     melt(DT, id=1:2, measure="f_1", variable.factor = FALSE)
 35 | )
 36 | 
 37 | expect_identical(
 38 |     to_long(DT, f_1, keep = cols("i_{1:2}")),
 39 |     melt(DT, id=1:2, measure="f_1")
 40 | )
 41 | 
 42 | expect_identical(
 43 |     to_long(DT, f_1, keep = cols("^i_")),
 44 |     melt(DT, id=1:2, measure="f_1")
 45 | )
 46 | 
 47 | expect_identical(
 48 |     to_long(DT, f_1, keep = cols("^i_"), names_in = "var", values_in = "val"),
 49 |     melt(DT, id=1:2, measure="f_1", variable.name = "var", value.name = "val")
 50 | )
 51 | 
 52 | 
 53 | 
 54 | col_var = "^i_"
 55 | expect_identical(
 56 |     to_long(DT, 3, keep = cols(col_var)),
 57 |     melt(DT, id=1:2, measure="f_1")
 58 | )
 59 | 
 60 | expect_identical(
 61 |     to_long(DT, cols("^f_"), keep = cols("^i_"), value_factor = TRUE),
 62 |     melt(DT, id=1:2, measure=3:4, value.factor=TRUE) # 'value' is *ordered* factor
 63 | )
 64 | 
 65 | data("mtcars")
 66 | expect_identical(
 67 |     to_long(mtcars),
 68 |     suppressWarnings(melt(mtcars))
 69 | )
 70 | 
 71 | data("mtcars")
 72 | expect_identical(
 73 |     to_long(mtcars, keep = am),
 74 |     melt(mtcars, id.vars = "am")
 75 | )
 76 | 
 77 | data("mtcars")
 78 | expect_identical(
 79 |     to_long(mtcars, columns = c(am, vs, mpg)),
 80 |     melt(mtcars, measure.vars = c("am", "vs", "mpg"))
 81 | )
 82 | 
 83 | 
 84 | data("mtcars")
 85 | expect_identical(
 86 |     to_long(mtcars, columns = c(am, vs, mpg), keep = FALSE),
 87 |     melt(mtcars, measure.vars = c("am", "vs", "mpg"), id = integer(0))
 88 | )
 89 | 
 90 | # on na.rm=TRUE. NAs are removed efficiently, from within C
 91 | expect_identical(
 92 |     suppressWarnings(to_long(DT, keep = f_1, columns = c(i_1, i_2), drop_na = TRUE)),
 93 |     suppressWarnings(melt(DT, id="f_1", measure=c("i_1", "i_2"), na.rm=TRUE)) # remove NA
 94 | )
 95 | 
 96 | expect_identical(
 97 |     suppressWarnings(to_long(DT, keep = f_1, columns = c(i_1, i_2), drop_na = FALSE)),
 98 |     suppressWarnings(melt(DT, id="f_1", measure=c("i_1", "i_2"), na.rm = FALSE))
 99 | )
100 | 
101 | expect_identical(
102 |     to_long(DT, keep=1:2, columns = list(cols("^f_"), cols("^d_")), value_factor=TRUE),
103 |     melt(DT, id=1:2, measure=patterns("^f_", "^d_"), value.factor=TRUE)
104 | )
105 | 
106 | 
107 | cat("\nContext:", "to_wide", "\n")
108 | 
109 | data("ChickWeight")
110 | names(ChickWeight) = tolower(names(ChickWeight))
111 | DT = to_long(ChickWeight, keep=2:4) # calls melt.data.table
112 | 
113 | expect_identical(
114 |     to_wide(DT, keep = time, fun = mean),
115 |     dcast(DT, time ~ variable, fun=mean) # using partial matching of argument
116 | )
117 | 
118 | expect_identical(
119 |     to_wide(DT, keep = FALSE, fun = mean),
120 |     dcast(DT, . ~ variable, fun=mean) # using partial matching of argument
121 | )
122 | 
123 | expect_identical(
124 |     to_wide(DT, keep = diet, fun = mean),
125 |     dcast(DT, diet ~ variable, fun=mean)
126 | )
127 | expect_identical(
128 |     to_wide(DT, keep = c(diet, chick), names_in = time, missing_comb = "all"),
129 |     dcast(DT, diet+chick ~ time, drop=FALSE)
130 | )
131 | expect_identical(
132 |     to_wide(DT, keep = c(diet, chick), names_in = time, missing_comb = "all", fill = 0),
133 |     dcast(DT, diet+chick ~ time, drop=FALSE, fill=0)
134 | )
135 | 
136 | expect_equal(
137 |     to_wide(DT, chick, time, fun = mean),
138 |     dcast(DT, chick ~ time, fun=mean)
139 | )
140 | 
141 | DT <- data.table(v1 = c(1.1, 1.1, 1.1, 2.2, 2.2, 2.2),
142 |                  v2 = factor(c(1L, 1L, 1L, 3L, 3L, 3L), levels=1:3),
143 |                  v3 = factor(c(2L, 3L, 5L, 1L, 2L, 6L), levels=1:6),
144 |                  v4 = c(3L, 2L, 2L, 5L, 4L, 3L))
145 | # drop=TRUE
146 | 
147 | expect_error(to_wide(DT, c(v1, v2), v3, missing_comb = "none"))
148 | expect_identical(
149 |     to_wide(DT, c(v1, v2), v3, values_in = v4, missing_comb = "none"),
150 |     dcast(DT, v1 + v2 ~ v3, value.var = "v4")  # default is drop=TRUE
151 | )
152 | expect_identical(
153 |     to_wide(DT, c(v1, v2), v3, values_in = v4, missing_comb = "all"),
154 |     dcast(DT, v1 + v2 ~ v3, value.var = "v4", drop=FALSE)          # all missing combinations of both LHS and RHS
155 | )
156 | expect_identical(
157 |     to_wide(DT, c(v1, v2), v3, values_in = v4, missing_comb = "rows"),
158 |     dcast(DT, v1 + v2 ~ v3, value.var = "v4", drop=c(FALSE, TRUE)) # all missing combinations of only LHS
159 | )
160 | expect_identical(
161 |     to_wide(DT, c(v1, v2), v3, values_in = v4, missing_comb = "columns"),
162 |     dcast(DT, v1 + v2 ~ v3, value.var = "v4", drop=c(TRUE, FALSE)) # all missing combinations of only RHS
163 | )
164 | 
165 | # using . and ...
166 | DT <- data.table(v1 = rep(1:2, each = 6),
167 |                  v2 = rep(rep(1:3, 2), each = 2),
168 |                  v3 = rep(1:2, 6),
169 |                  v4 = rnorm(6))
170 | expect_identical(
171 |     to_wide(DT, names_in = v3, values_in = v4),
172 |     dcast(DT, ... ~ v3, value.var = "v4") #same as v1 + v2 ~ v3, value.var = "v4"
173 | )
174 | expect_identical(
175 |     to_wide(DT, cols(v1 %to% v3), names_in = FALSE, values_in = v4),
176 |     dcast(DT, v1 + v2 + v3 ~ ., value.var = "v4")
177 | )
178 | 
179 | ## for each combination of (v1, v2), add up all values of v4
180 | expect_identical(
181 |     to_wide(DT, cols("^v(1|2)"), names_in = FALSE, values_in = v4, fun = sum),
182 |     dcast(DT, v1 + v2 ~ ., value.var = "v4", fun.aggregate = sum)
183 | )
184 | 
185 | expect_identical(
186 |     to_wide(DT, cols("v{1:2}"), names_in = FALSE, values_in = v4, fun = sum),
187 |     dcast(DT, v1 + v2 ~ ., value.var = "v4", fun.aggregate = sum)
188 | )
189 | 
190 | my_keep = "v{1:2}"
191 | my_names = FALSE
192 | my_values = "v4"
193 | 
194 | expect_error(to_wide(DT, my_keep, names_in = FALSE, values_in = v4, fun = sum))
195 | 
196 | 
197 | expect_identical(
198 |     to_wide(DT, cols(my_keep), names_in = my_names, values_in = my_values, fun = sum),
199 |     dcast(DT, v1 + v2 ~ ., value.var = "v4", fun.aggregate = sum)
200 | )
201 | 
202 | # multiple value.var and multiple fun.aggregate
203 | DT = data.table(x=sample(5,20,TRUE), y=sample(2,20,TRUE),
204 |                 z=sample(letters[1:2], 20,TRUE), d1 = runif(20), d2=1L)
205 | # multiple value.var
206 | expect_identical(
207 |     to_wide(DT, c(x, y), z, c(d1, d2), fun = sum, fill = 0),
208 |     dcast(DT, x + y ~ z, fun=sum, value.var=c("d1","d2"))
209 | )
210 | # multiple fun.aggregate
211 | expect_identical(
212 |     to_wide(DT, c(x, y), z, d1, fun = list(sum = sum, mean = mean), fill = NULL),
213 |     dcast(DT, x + y ~ z, fun=list(sum, mean), value.var="d1")
214 | )
215 | 
216 | 
217 | my_fun = list(sum = sum, mean = mean)
218 | expect_identical(
219 |     to_wide(DT, c(x, y), z, d1, fun = my_fun),
220 |     dcast(DT, x + y ~ z, fun=my_fun, value.var="d1", fill = NA)
221 | )
222 | # multiple fun.agg and value.var (all combinations)
223 | expect_identical(
224 |     to_wide(DT, c(x, y), z, c(d1, d2), fun = my_fun),
225 |     dcast(DT, x + y ~ z, fun=my_fun, value.var=c("d1", "d2"), fill = NA)
226 | )
227 | # multiple fun.agg and value.var (one-to-one)
228 | expect_identical(
229 |     to_wide(DT, c(x, y), z, list(d1, d2), fun = my_fun),
230 |     dcast(DT, x + y ~ z, fun=my_fun, value.var=list("d1", "d2"), fill = NA)
231 | )
232 | 


--------------------------------------------------------------------------------
/vignettes/Introduction.R:
--------------------------------------------------------------------------------
  1 | ## ----setup, include=FALSE-----------------------------------------------------
  2 | knitr::opts_chunk$set(echo = TRUE)
  3 | data.table::setDTthreads(2)
  4 | 
  5 | ## ----eval=FALSE---------------------------------------------------------------
  6 | #       mtcars %>%
  7 | #          let(mpg_hp = mpg/hp) %>%
  8 | #          take(mean(mpg_hp), by = am)
  9 | 
 10 | ## ----eval=FALSE---------------------------------------------------------------
 11 | #        mtcars %>%
 12 | #           let(new_var = 42,
 13 | #               new_var2 = new_var*hp) %>%
 14 | #           head()
 15 | 
 16 | ## ----eval=FALSE---------------------------------------------------------------
 17 | #      iris %>%
 18 | #        let_all(
 19 | #            scaled = (.x - mean(.x))/sd(.x),
 20 | #            by = Species) %>%
 21 | #         head()
 22 | 
 23 | ## ----eval=FALSE---------------------------------------------------------------
 24 | #      iris %>%
 25 | #        take_all(
 26 | #            mean = if(startsWith(.name, "Sepal")) mean(.x),
 27 | #            median = if(startsWith(.name, "Petal")) median(.x),
 28 | #            by = Species
 29 | #        )
 30 | 
 31 | ## ----eval=FALSE---------------------------------------------------------------
 32 | #      new_var = "my_var"
 33 | #      old_var = "mpg"
 34 | #      mtcars %>%
 35 | #          let((new_var) := get(old_var)*2) %>%
 36 | #          head()
 37 | #  
 38 | #      # or,
 39 | #      expr = quote(mean(cyl))
 40 | #      mtcars %>%
 41 | #          let((new_var) := eval(expr)) %>%
 42 | #          head()
 43 | #  
 44 | #      # the same with `take`
 45 | #      by_var = "vs,am"
 46 | #      take(mtcars, (new_var) := eval(expr), by = by_var)
 47 | 
 48 | ## ----include=FALSE------------------------------------------------------------
 49 | library(maditr)
 50 | 
 51 | ## -----------------------------------------------------------------------------
 52 | 
 53 | workers = fread("
 54 |     name company
 55 |     Nick Acme
 56 |     John Ajax
 57 |     Daniela Ajax
 58 | ")
 59 | 
 60 | positions = fread("
 61 |     name position
 62 |     John designer
 63 |     Daniela engineer
 64 |     Cathie manager
 65 | ")
 66 | 
 67 | # xlookup
 68 | workers = let(workers,
 69 |   position = xlookup(name, positions$name, positions$position)
 70 | )
 71 | 
 72 | # vlookup
 73 | # by default we search in the first column and return values from second column
 74 | workers = let(workers,
 75 |   position = vlookup(name, positions, no_match = "Not found")
 76 | )
 77 | 
 78 | # the same 
 79 | workers = let(workers,
 80 |   position = vlookup(name, positions, 
 81 |                      result_column = "position", 
 82 |                      no_match = "Not found") # or, result_column = 2 
 83 | )
 84 | 
 85 | head(workers)
 86 | 
 87 | ## -----------------------------------------------------------------------------
 88 | library(maditr)
 89 | data(mtcars)
 90 | 
 91 | # Newly created variables are available immediately
 92 | mtcars %>%
 93 |     let(
 94 |         cyl2 = cyl * 2,
 95 |         cyl4 = cyl2 * 2
 96 |     ) %>% head()
 97 | 
 98 | # You can also use let() to remove variables and
 99 | # modify existing variables
100 | mtcars %>%
101 |     let(
102 |         mpg = NULL,
103 |         disp = disp * 0.0163871 # convert to litres
104 |     ) %>% head()
105 | 
106 | 
107 | # window functions are useful for grouped computations
108 | mtcars %>%
109 |     let(rank = rank(-mpg, ties.method = "min"),
110 |         by = cyl) %>%
111 |     head()
112 | 
113 | # You can drop variables by setting them to NULL
114 | mtcars %>%
115 |     let(cyl = NULL) %>%
116 |     head()
117 | 
118 | # keeps all existing variables
119 | mtcars %>%
120 |     let(displ_l = disp / 61.0237) %>%
121 |     head()
122 | 
123 | # keeps only the variables you create
124 | mtcars %>%
125 |     take(displ_l = disp / 61.0237) %>% 
126 |     head()
127 | 
128 | 
129 | # can refer to both contextual variables and variable names:
130 | var = 100
131 | mtcars %>%
132 |     let(cyl = cyl * var) %>%
133 |     head()
134 | 
135 | # select rows
136 | mtcars %>%
137 |     rows(am==0) %>% 
138 |     head()
139 | 
140 | # select rows with compound condition
141 | mtcars %>%
142 |     rows(am==0 & mpg>mean(mpg))
143 | 
144 | # select columns
145 | mtcars %>% 
146 |     columns(vs:carb, cyl)
147 |     
148 | mtcars %>% 
149 |     columns(-am, -cyl)    
150 | 
151 | # regular expression pattern
152 | columns(iris, "^Petal") %>% head() # variables which start from 'Petal'
153 | columns(iris, "Width$") %>% head() # variables which end with 'Width'
154 | 
155 | # move Species variable to the front
156 | # pattern "^." matches all variables
157 | columns(iris, Species, "^.") %>% head()
158 | 
159 | # pattern "^.*al" means "contains 'al'"
160 | columns(iris, "^.*al") %>% head()
161 | 
162 | # numeric indexing - all variables except Species
163 | columns(iris, 1:4) %>% head()
164 | 
165 | # A 'take' with summary functions applied without 'by' argument returns an aggregated data
166 | mtcars %>%
167 |     take(mean = mean(disp), n = .N)
168 | 
169 | # Usually, you'll want to group first
170 | mtcars %>%
171 |     take(mean = mean(disp), n = .N, by = am)
172 | 
173 | # grouping by multiple variables
174 | mtcars %>%
175 |     take(mean = mean(disp), n = .N, by = list(am, vs))
176 | 
177 | # You can group by expressions:
178 | mtcars %>%
179 |     take_all(
180 |         mean,
181 |         by = list(vsam = vs + am)
182 |     )
183 | 
184 | # modify all non-grouping variables in-place
185 | mtcars %>%
186 |     let_all((.x - mean(.x))/sd(.x), by = am) %>%
187 |     head()
188 | 
189 | # modify all non-grouping variables to new variables
190 | mtcars %>%
191 |     let_all(scaled = (.x - mean(.x))/sd(.x), by = am) %>%
192 |     head()
193 | 
194 | # conditionally modify all variables
195 | iris %>%
196 |     let_all(mean = if(is.numeric(.x)) mean(.x)) %>%
197 |     head()
198 | 
199 | # modify all variables conditionally on name
200 | iris %>%
201 |     let_all(
202 |         mean = if(startsWith(.name, "Sepal")) mean(.x),
203 |         median = if(startsWith(.name, "Petal")) median(.x),
204 |         by = Species
205 |     ) %>%
206 |     head()
207 | 
208 | # aggregation with 'take_all'
209 | mtcars %>%
210 |     take_all(mean = mean(.x), sd = sd(.x), n = .N, by = am)
211 | 
212 | # conditionally aggregate all variables
213 | iris %>%
214 |     take_all(mean = if(is.numeric(.x)) mean(.x))
215 | 
216 | # aggregate all variables conditionally on name
217 | iris %>%
218 |     take_all(
219 |         mean = if(startsWith(.name, "Sepal")) mean(.x),
220 |         median = if(startsWith(.name, "Petal")) median(.x),
221 |         by = Species
222 |     )
223 | 
224 | # parametric evaluation:
225 | var = quote(mean(cyl))
226 | mtcars %>% 
227 |     let(mean_cyl = eval(var)) %>% 
228 |     head()
229 | take(mtcars, eval(var))
230 | 
231 | # all together
232 | new_var = "mean_cyl"
233 | mtcars %>% 
234 |     let((new_var) := eval(var)) %>% 
235 |     head()
236 | take(mtcars, (new_var) := eval(var))
237 | 
238 | 
239 | 
240 | ## -----------------------------------------------------------------------------
241 | # range selection
242 | iris %>% 
243 |     let(
244 |         avg = rowMeans(Sepal.Length %to% Petal.Width)
245 |     ) %>% 
246 |     head()
247 | 
248 | # multiassignment
249 | iris %>% 
250 |     let(
251 |         # starts with Sepal or Petal
252 |         multipled1 %to% multipled4 := cols("^(Sepal|Petal)")*2
253 |     ) %>% 
254 |     head()
255 | 
256 | 
257 | mtcars %>% 
258 |     let(
259 |         # text expansion
260 |         cols("scaled_{names(mtcars)}") := lapply(cols("{names(mtcars)}"), scale)
261 |     ) %>% 
262 |     head()
263 | 
264 | # range selection in 'by'
265 | # selection of range + additional column
266 | mtcars %>% 
267 |     take(
268 |         res = sum(cols(mpg, disp %to% drat)),
269 |         by = vs %to% gear
270 |     )
271 | 
272 | ## -----------------------------------------------------------------------------
273 | workers = fread("
274 |     name company
275 |     Nick Acme
276 |     John Ajax
277 |     Daniela Ajax
278 | ")
279 | 
280 | positions = fread("
281 |     name position
282 |     John designer
283 |     Daniela engineer
284 |     Cathie manager
285 | ")
286 | 
287 | workers
288 | positions
289 | 
290 | ## -----------------------------------------------------------------------------
291 | workers %>% dt_inner_join(positions)
292 | workers %>% dt_left_join(positions)
293 | workers %>% dt_right_join(positions)
294 | workers %>% dt_full_join(positions)
295 | 
296 | # filtering joins
297 | workers %>% dt_anti_join(positions)
298 | workers %>% dt_semi_join(positions)
299 | 
300 | ## ----eval=FALSE---------------------------------------------------------------
301 | #  workers %>% dt_left_join(positions, by = "name")
302 | 
303 | ## ----eval=FALSE---------------------------------------------------------------
304 | #  positions2 = setNames(positions, c("worker", "position")) # rename first column in 'positions'
305 | #  workers %>% dt_inner_join(positions2, by = c("name" = "worker"))
306 | 
307 | 


--------------------------------------------------------------------------------
/inst/tinytest/test_verbs.R:
--------------------------------------------------------------------------------
  1 | data(mtcars)
  2 | 
  3 | cat("\nContext:", "errors", "\n")
  4 | expect_error(take(1:5, fun = sum))
  5 | expect_error(let(1:5, new = 1))
  6 | expect_error(query(1:5, new := 1))
  7 | 
  8 | expect_error(let(mtcars))
  9 | expect_error(let(mtcars, am*2))
 10 | ################
 11 | cat("\nContext:", "let/let_if", "\n")
 12 | mt_dt = as.data.table(mtcars)
 13 | mt_dt2 = data.table::copy(mt_dt)
 14 | new_dt = let_if(mt_dt, am==0, mpg_hp = mpg/hp, new = mpg_hp*2, new2 := new*4)
 15 | new_dt2 = let_if(mtcars, am==0, mpg_hp = mpg/hp, new = mpg_hp*2, new2 := new*4)
 16 | mt_dt2[am==0, mpg_hp := mpg/hp][am==0, new := mpg_hp*2][am==0, new2 := new*4]
 17 | expect_identical(new_dt, mt_dt)
 18 | expect_identical(new_dt, mt_dt2)
 19 | expect_identical(new_dt2, mt_dt2)
 20 | 
 21 | 
 22 | # test scopes
 23 | my_take = function(coef2){
 24 |     take(mtcars, res = sum(am*coef2))
 25 | }
 26 | 
 27 | my_let = function(coef3){
 28 |     let(mtcars, res := am*coef3)
 29 | }
 30 | 
 31 | 
 32 | expect_identical(my_take(3), mt_dt[, .(res = sum(am*3))])
 33 | mt_dt = as.data.table(mtcars)
 34 | expect_identical(my_let(3), mt_dt[, res := am*3])
 35 | ###############
 36 | mt_dt = as.data.table(mtcars)
 37 | mt_dt2 = data.table::copy(mt_dt)
 38 | new_dt = let_if(mt_dt, am==0, mpg_hp := mpg/hp, "new" := mpg_hp*2)
 39 | new_dt2 = let_if(mtcars, am==0, mpg_hp := mpg/hp, "new" := mpg_hp*2)
 40 | mt_dt2[am==0, mpg_hp := mpg/hp][am==0, new := mpg_hp*2]
 41 | expect_identical(new_dt, mt_dt)
 42 | expect_identical(new_dt, mt_dt2)
 43 | expect_identical(new_dt2, mt_dt2)
 44 | ###############
 45 | mt_dt = as.data.table(mtcars)
 46 | mt_dt2 = data.table::copy(mt_dt)
 47 | my_var = "very_new"
 48 | new_dt = let(mt_dt, mpg_hp = mpg/hp, new = mpg_hp*2, (my_var) := mpg_hp + new)
 49 | new_dt2 = let(mtcars, mpg_hp = mpg/hp, new = mpg_hp*2, (my_var) := mpg_hp + new)
 50 | mt_dt2[, mpg_hp := mpg/hp][, new := mpg_hp*2][,(my_var) := mpg_hp + new]
 51 | expect_identical(new_dt, mt_dt)
 52 | expect_identical(new_dt, mt_dt2)
 53 | expect_identical(new_dt2, mt_dt2)
 54 | 
 55 | etab = data.frame(a = 1:2, b = 3:4)
 56 | class(etab) = c("etable", class(etab))
 57 | res = etab
 58 | res$new = c(1, NA)
 59 | etab2 = let_if(
 60 |     etab,
 61 |     1,
 62 |     new = 1
 63 | )
 64 | expect_identical(res, etab2)
 65 | res$new = 1
 66 | etab2 = let(
 67 |     etab,
 68 |     new = 1
 69 | )
 70 | expect_identical(res, etab2)
 71 | ###############
 72 | mt_dt = as.data.table(mtcars)
 73 | mt_dt2 = data.table::copy(mt_dt)
 74 | new_dt = let(mt_dt, mpg_hp = mean(mpg), keyby = cyl)
 75 | new_dt2 = let(mtcars, mpg_hp = mean(mpg), keyby = cyl)
 76 | mt_dt2[, mpg_hp := mean(mpg), keyby = cyl]
 77 | expect_identical(new_dt, mt_dt)
 78 | expect_identical(new_dt, mt_dt2)
 79 | expect_identical(new_dt2, mt_dt2)
 80 | 
 81 | ##########
 82 | 
 83 | mt_dt = as.data.table(mtcars)
 84 | mt_dt2 = data.table::copy(mt_dt)
 85 | let(mt_dt, filt = am==0)
 86 | let_if(mt_dt, (filt), counter = 5)
 87 | 
 88 | 
 89 | mt_dt2[, filt:= am==0]
 90 | mt_dt2[(filt), counter :=5]
 91 | 
 92 | expect_identical(mt_dt, mt_dt2)
 93 | expect_identical(take_if(mt_dt, (filt)), mt_dt[filt==TRUE,])
 94 | 
 95 | ###############
 96 | cat("\nContext:", "take/take_if", "\n")
 97 | mt_dt = as.data.table(mtcars)
 98 | res = take(mtcars, fun = mean, by = am)
 99 | res2 = take(mt_dt, fun = mean, by = am)
100 | res3 = mt_dt[, lapply(.SD, mean), by = am]
101 | expect_identical(res3, res)
102 | expect_identical(res3, res2)
103 | ###############
104 | mt_dt = as.data.table(mtcars)
105 | res = take(mtcars, fun = mean, by = am, .SDcols = c("mpg", "qsec"))
106 | res2 = take(mt_dt, fun = mean, by = am, .SDcols = c("mpg", "qsec"))
107 | res3 = mt_dt[, lapply(.SD, mean), by = am, .SDcols = c("mpg", "qsec")]
108 | expect_identical(res3, res)
109 | expect_identical(res3, res2)
110 | ###############
111 | mt_dt = as.data.table(mtcars)
112 | res = take(mtcars, mpg, hp, fun = mean, by = am)
113 | res2 = take(mt_dt, mpg, hp, fun = mean, by = am)
114 | res3 = mt_dt[, lapply(list(mpg = mpg, hp = hp), mean), by = am]
115 | expect_identical(res3, res)
116 | expect_identical(res3, res2)
117 | 
118 | ###############
119 | mt_dt = as.data.table(mtcars)
120 | res = take(mtcars, mpg, just_wow = hp, fun = mean, by = am)
121 | res2 = take(mt_dt, mpg, just_wow = hp, fun = mean, by = am)
122 | res3 = mt_dt[, lapply(list(mpg = mpg, just_wow = hp), mean), by = am]
123 | expect_identical(res3, res)
124 | expect_identical(res3, res2)
125 | ###################
126 | ###############
127 | mt_dt = as.data.table(mtcars)
128 | res = take(mtcars, mpg, just_wow := hp, fun = mean, by = am)
129 | res2 = take(mt_dt, mpg, just_wow := hp, fun = mean, by = am)
130 | res3 = mt_dt[, lapply(list(mpg = mpg, just_wow = hp), mean), by = am]
131 | expect_identical(res3, res)
132 | expect_identical(res3, res2)
133 | ###################
134 | mt_dt = as.data.table(mtcars)
135 | res = take_if(mtcars, am==0, fun = mean, by = am)
136 | res2 = take_if(mt_dt, am==0, fun = mean, by = am)
137 | res3 = mt_dt[am==0, lapply(.SD, mean), by = am]
138 | expect_identical(res3, res)
139 | expect_identical(res3, res2)
140 | 
141 | #############
142 | mt_dt = as.data.table(mtcars)
143 | res = take(mtcars, agg = mean(mpg), agg2 = mean(hp), by = am)
144 | res2 = take(mt_dt, agg = mean(mpg), agg2 = mean(hp), by = am)
145 | res3 = mt_dt[, list(agg = mean(mpg), agg2 = mean(hp)), by = am]
146 | expect_identical(res3, res)
147 | expect_identical(res3, res2)
148 | #############
149 | mt_dt = as.data.table(mtcars)
150 | res = take(mtcars, agg := mean(mpg), agg2 = mean(hp), by = am)
151 | res2 = take(mt_dt, agg := mean(mpg), agg2 = mean(hp), by = am)
152 | res3 = mt_dt[, list(agg = mean(mpg), agg2 = mean(hp)), by = am]
153 | expect_identical(res3, res)
154 | expect_identical(res3, res2)
155 | #############
156 | mt_dt = as.data.table(mtcars)
157 | res = take(mtcars, agg := mean(mpg), agg2 := mean(hp), by = am)
158 | res2 = take(mt_dt, agg := mean(mpg), agg2 := mean(hp), by = am)
159 | res3 = mt_dt[, list(agg = mean(mpg), agg2 = mean(hp)), by = am]
160 | expect_identical(res3, res)
161 | expect_identical(res3, res2)
162 | ##########################
163 | mt_dt = as.data.table(mtcars)
164 | res = take(mtcars, mean(mpg), mean(hp), by = am)
165 | res2 = take(mt_dt, mean(mpg), mean(hp), by = am)
166 | res3 = mt_dt[, list("mean(mpg)" = mean(mpg), "mean(hp)" = mean(hp)), by = am]
167 | expect_identical(res3, res)
168 | expect_identical(res3, res2)
169 | ##########################
170 | mt_dt = as.data.table(mtcars)
171 | res = take(mtcars, mean(mpg), mean(hp), by = am, autoname = FALSE)
172 | res2 = take(mt_dt, mean(mpg), mean(hp), by = am, autoname = FALSE)
173 | res3 = mt_dt[, list(mean(mpg), mean(hp)), by = am]
174 | expect_identical(res3, res)
175 | expect_identical(res3, res2)
176 | ##########################
177 | mt_dt = as.data.table(mtcars)
178 | res = take_if(mtcars, vs==0, mean(mpg), mean(hp), by = am, autoname = FALSE)
179 | res2 = take_if(mt_dt, vs==0, mean(mpg), mean(hp), by = am, autoname = FALSE)
180 | res3 = mt_dt[vs==0,  list(mean(mpg), mean(hp)), by = am]
181 | expect_identical(res3, res)
182 | expect_identical(res3, res2)
183 | ##########################
184 | 
185 | cat("\nContext:", "query/query_if", "\n")
186 | ##########################
187 | mt_dt = as.data.table(mtcars)
188 | res = query(mtcars, list(mean(mpg), mean(hp)), by = am)
189 | res2 = query(mt_dt, list(mean(mpg), mean(hp)), by = am)
190 | res3 = mt_dt[, list(mean(mpg), mean(hp)), by = am]
191 | expect_identical(res3, res)
192 | expect_identical(res3, res2)
193 | ##########################
194 | mt_dt = as.data.table(mtcars)
195 | res = query_if(mtcars, vs==0, list(mean(mpg), mean(hp)), by = am)
196 | res2 = query_if(mt_dt, vs==0, list(mean(mpg), mean(hp)), by = am)
197 | res3 = mt_dt[vs==0,  list(mean(mpg), mean(hp)), by = am]
198 | expect_identical(res3, res)
199 | expect_identical(res3, res2)
200 | 
201 | cat("\nContext:", "let/take: parametric evaluation", "\n")
202 | 
203 | 
204 | 
205 | #############
206 | mt_dt = as.data.table(mtcars)
207 | new_var = "agg"
208 | res = take(mtcars, (new_var) := mean(mpg), agg2 = mean(hp), by = am)
209 | res2 = take(mt_dt, (new_var) := mean(mpg), agg2 = mean(hp), by = am)
210 | res3 = mt_dt[, list(agg = mean(mpg), agg2 = mean(hp)), by = am]
211 | expect_identical(res3, res)
212 | expect_identical(res3, res2)
213 | 
214 | 
215 | mt_dt = as.data.table(mtcars)
216 | new_var = "agg"
217 | new_var2 = "agg2"
218 | res = take(mtcars, (new_var) := mean(mpg), (new_var2) := mean(hp), by = am)
219 | res2 = take(mt_dt, (new_var) := mean(mpg), (new_var2) := mean(hp), by = am)
220 | res3 = mt_dt[, list(agg = mean(mpg), agg2 = mean(hp)), by = am]
221 | expect_identical(res3, res)
222 | expect_identical(res3, res2)
223 | 
224 | 
225 | expr1 = quote(mean(mpg))
226 | expr2 = quote(mean(hp))
227 | mt_dt = as.data.table(mtcars)
228 | new_var = "agg"
229 | new_var2 = "agg2"
230 | res = take(mtcars, (new_var) := eval(expr1), (new_var2) := eval(expr2), by = am)
231 | res2 = take(mt_dt, (new_var) := eval(expr1), (new_var2) := eval(expr2), by = am)
232 | res3 = mt_dt[, list(agg = mean(mpg), agg2 = mean(hp)), by = am]
233 | expect_equal(res3, res)
234 | expect_equal(res3, res2)
235 | 
236 | 
237 | ####
238 | data(mtcars)
239 | mt_dt = as.data.table(mtcars)
240 | expect_identical(
241 |     take(mtcars, m = mean(mpg), by = cols(vs %to% gear)),
242 |     mt_dt[,.(m = mean(mpg)), by = .(vs, am, gear)]
243 | )
244 | 


--------------------------------------------------------------------------------
/R/to_list.R:
--------------------------------------------------------------------------------
  1 | #' Apply an expression to each element of a list or vector
  2 | #'
  3 | #' - `to_list` always returns a list, each element of which is the
  4 | #' result of expression `expr` on the elements of data. By
  5 | #' default, NULL's will be removed from the result. You can change this behavior
  6 | #' with `skip_null` argument.
  7 | #' - `to_vec` is the same as `to_list` but tries to convert its result
  8 | #' to vector via [unlist][base::unlist].
  9 | #' - `to_df` and `to_dfr` try to combine its results to data.table by rows.
 10 | #' - `to_dfc` tries to combine its result to data.table by columns.
 11 | #' ```
 12 | #' ```
 13 | #' Expression can use predefined variables: '.x' is a value of current list
 14 | #' element, '.name' is a name of the element and '.index' is sequential number
 15 | #' of the element.
 16 | #'
 17 | #' @param data data.frame/list/vector
 18 | #' @param expr expression or function. Expression can use predefined variables:
 19 | #'   '.x' is a value of current list element, '.name' is a name of the element
 20 | #'   and '.index' is sequential number of the element.
 21 | #' @param skip_null logical Should we skip NULL's from result? Default is TRUE
 22 | #' @param trace FALSE by default. Should we report progress during execution?
 23 | #'   Possible values are TRUE, FALSE, "pb" (progress bar) or custom expression in 'quote', e. g. 'quote(print(.x))'.
 24 | #'   Expression can contain '.x', '.name', and '.index' variables.
 25 | #' @param ... further arguments provided if 'expr' is function.
 26 | #' @param trace_step integer. 1 by default.  Step for reporting progress. Ignored if 'trace' argument is equal to FALSE.
 27 | #' @param recursive logical. Should unlisting be applied to list components of x? For details see [unlist][base::unlist].
 28 | #' @param use.names logical. TRUE by default. Should names of source list be
 29 | #'   preserved? Setting it to FALSE in some cases can greatly increase
 30 | #'   performance. For details see [unlist][base::unlist].
 31 | #' @param idvalue expression for calculation id column. Usually it is just
 32 | #'   unquoted symbols: one of the '.name', '.index' or '.x'.
 33 | #' @param idname character, 'item_id' by default. Name for the id column.
 34 | #'
 35 | #' @return 'to_list' returns list, 'to_vec' tries to return vector and other functions return data.table
 36 | #' @export
 37 | #'
 38 | #' @examples
 39 | #' 1:5 %>%
 40 | #'     to_list(rnorm(n = 3, .x))
 41 | #'
 42 | #' # or in 'lapply' style
 43 | #' 1:5 %>%
 44 | #'     to_list(rnorm, n = 3) %>%
 45 | #'     to_vec(mean)
 46 | #'
 47 | #' # or use an anonymous function
 48 | #' 1:5 %>%
 49 | #'     to_list(function(x) rnorm(3, x))
 50 | #'
 51 | #' # Use to_vec() to reduce output to a vector instead
 52 | #' # of a list:
 53 | #' # filtering - return only even numbers
 54 | #' to_vec(1:10, if(.x %% 2 == 0) .x)
 55 | #'
 56 | #' # filtering - calculate mean only on the numeric columns
 57 | #' to_vec(iris, if(is.numeric(.x)) mean(.x))
 58 | #'
 59 | #' # mean for numerics, number of distincts for others
 60 | #' to_vec(iris, if(is.numeric(.x)) mean(.x) else uniqueN(.x))
 61 | #'
 62 | #' # means for Sepal
 63 | #' to_vec(iris, if(startsWith(.name, "Sepal")) mean(.x))
 64 | #'
 65 | #' # A more realistic example: split a data frame into pieces, fit a
 66 | #' # model to each piece, summarise and extract R^2
 67 | #' mtcars %>%
 68 | #'     split(.$cyl) %>%
 69 | #'     to_list(summary(lm(mpg ~ wt, data = .x))) %>%
 70 | #'     to_vec(.x$r.squared)
 71 | #'
 72 | #' # If each element of the output is a data frame, use
 73 | #' # to_df to row-bind them together:
 74 | #' mtcars %>%
 75 | #'     split(.$cyl) %>%
 76 | #'     to_list(lm(mpg ~ wt, data = .x)) %>%
 77 | #'     to_df(c(cyl = .name, coef(.x)))
 78 | #'
 79 | #' \dontrun{
 80 | #' # read all csv files in "data" to data.frame
 81 | #' all_files = dir("data", pattern = "csv$", full.names = TRUE) %>%
 82 | #'     to_df(fread,
 83 | #'           idvalue = basename(.x),
 84 | #'           idname = "filename",
 85 | #'           trace = "pb"
 86 | #'           )
 87 | #' }
 88 | to_list = function(data,
 89 |                    expr = NULL,
 90 |                    ...,
 91 |                    skip_null = TRUE,
 92 |                    trace = FALSE,
 93 |                    trace_step = 1L
 94 | ){
 95 |     expr_expr = substitute(expr)
 96 |     if(is.null(expr_expr)) return(as.list(data))
 97 |     is.numeric(trace_step) || stop("'to_list': non-numeric 'trace_step' argument.")
 98 |     trace_step>=1 || stop("'to_list': 'trace_step' argument should be greater or equal to one.")
 99 | 
100 |     ## progress bar and trace
101 |     if(identical(trace, "pb")){
102 |         # progress bar
103 |         ._data_length = length(data)
104 |         pbar = utils::txtProgressBar(min = 0, max = ._data_length, style = 3)
105 |         on.exit({
106 |             utils::setTxtProgressBar(pbar, ._data_length)
107 |             close(pbar)
108 |             })
109 | 
110 |         # progress bar
111 |         trace_expr = substitute({
112 |             utils::setTxtProgressBar(pbar, min(.index - 1, ._data_length))
113 |         })
114 | 
115 | 
116 |     } else if(isTRUE(trace)){
117 |         # info
118 |         trace_expr = quote(cat(as.character(Sys.time()), " ",
119 |                                .index, ": ",
120 |                                .name, " ",
121 |                                if(is.atomic(.x) && length(.x)==1 && object.size(.x)<400) .x,
122 |                                "\n", sep = ""))
123 | 
124 |     } else if(is.null(trace) || isFALSE(trace)){
125 | 
126 |         # no tracing
127 |         trace_expr = NULL
128 | 
129 |     } else {
130 |         # custom tracing
131 |         trace_expr = trace
132 |     }
133 | 
134 |     if(!is.null(trace_expr) && (trace_step>1)){
135 |         trace_expr = substitute(
136 |             {
137 |                 if(.index %% trace_step == 0) trace_expr
138 |             },
139 |             list(trace_expr = trace_expr, trace_step = trace_step)
140 |         )
141 |     }
142 | 
143 |     ### main expression
144 |     ._indexes = seq_along(data)
145 |     ._names = names(data)
146 |     names(._indexes) = ._names
147 |     if(is.null(._names)) ._names = rep("", length(data))
148 |     data = force(data) # for new version of magrittr
149 |     if(is.symbol(expr_expr) || (length(expr_expr)>1 && identical(as.character(expr_expr[[1]]), "function"))){
150 |         if(is.null(trace_expr)){
151 |             # simple lapply case
152 |             res = lapply(data, expr, ...)
153 |         } else {
154 |             # simple lapply case with trace
155 |             expr_expr = substitute(expr(.x, ...))
156 |             expr_expr = eval.parent(substitute({function(.index, ...)
157 |             {
158 |                 .x = data[[.index]]
159 |                 .value = data[[.index]]
160 |                 .name = ._names[[.index]]
161 |                 trace_expr
162 |                 expr_expr
163 |             }}))
164 |             res = lapply(._indexes, expr_expr, ...)
165 |         }
166 |     } else {
167 |         # expression
168 |         expr_expr = eval.parent(substitute({function(.index, ...)
169 |         {
170 |             .x = data[[.index]]
171 |             .value = data[[.index]]
172 |             .name = ._names[[.index]]
173 |             trace_expr
174 |             expr_expr
175 |         }}))
176 |         res = lapply(._indexes, expr_expr, ...)
177 |     }
178 | 
179 |     #######
180 |     if(skip_null){
181 |         nulls = vapply(res, is.null, FUN.VALUE = logical(1), USE.NAMES = FALSE)
182 |         res = res[!nulls]
183 |     }
184 |     res
185 | }
186 | 
187 | #' @rdname to_list
188 | #' @export
189 | to_vec = function(data,
190 |                   expr = NULL,
191 |                   ...,
192 |                   skip_null = TRUE,
193 |                   trace = FALSE,
194 |                   trace_step = 1L,
195 |                   recursive = TRUE,
196 |                   use.names = TRUE){
197 |     res = eval.parent(substitute(maditr::to_list(data, expr, ..., skip_null = skip_null, trace = trace, trace_step = trace_step)))
198 |     unlist(res, recursive = recursive, use.names = use.names)
199 | 
200 | }
201 | 
202 | 
203 | #' @rdname to_list
204 | #' @export
205 | to_df = function(data,
206 |                  expr = NULL,
207 |                  ...,
208 |                  trace = FALSE,
209 |                  trace_step = 1L,
210 |                  idvalue = NULL,
211 |                  idname = "item_id"){
212 |     res = eval.parent(
213 |         substitute(
214 |             maditr::to_list(data,
215 |                     expr,
216 |                     ...,
217 |                     trace = trace,
218 |                     trace_step = trace_step,
219 |                     skip_null = FALSE)
220 |         )
221 |     )
222 |     for(i in seq_along(res)){
223 |         if(!is.null(res[[i]]) && !is.list(res[[i]])){
224 |             res[[i]] = as.data.table(as.list(res[[i]]))
225 |         }
226 |     }
227 |     idvalue_expr = substitute(idvalue)
228 |     if(!is.null(idvalue_expr)){
229 |         idvalue = eval.parent(substitute(maditr::to_list(data, expr = (idvalue), skip_null = FALSE)))
230 |         for(i in seq_along(data)){
231 |             if(!is.null(res[[i]]) && !is.null(idvalue[[i]])){
232 |                 res[[i]][[idname]] = idvalue[[i]]
233 |             }
234 |         }
235 |     }
236 |     rbindlist(res, use.names=TRUE, fill=TRUE)
237 | }
238 | 
239 | #' @rdname to_list
240 | #' @export
241 | to_dfr = to_df
242 | 
243 | #' @rdname to_list
244 | #' @export
245 | to_dfc = function(data,
246 |                   expr = NULL,
247 |                   ...,
248 |                   trace = FALSE,
249 |                   trace_step = 1){
250 |     res = eval.parent(
251 |         substitute(
252 |             maditr::to_list(data,
253 |                     expr,
254 |                     ...,
255 |                     trace = trace,
256 |                     trace_step = trace_step)
257 |             )
258 |         )
259 |     as.data.table(res)
260 | }
261 | 
262 | 
263 | 
264 | 


--------------------------------------------------------------------------------
/R/maditr-package.R:
--------------------------------------------------------------------------------
  1 | #' maditr: Pipe-Style Interface for 'data.table'
  2 | #'
  3 | #' Package provides pipe-style interface for `data.table`. It preserves
  4 | #' all data.table features without significant impact on performance. '`let`'
  5 | #' and '`take`' functions are simplified interfaces for most common data
  6 | #' manipulation tasks.
  7 | #'
  8 | #' - To select rows from data: `rows(mtcars, am==0)`
  9 | #' - To select columns from data: `columns(mtcars, mpg, vs:carb)`
 10 | #' - To aggregate data: `take(mtcars, mean_mpg = mean(mpg), by = am)`
 11 | #' - To aggregate all non-grouping columns: `take_all(mtcars, mean, by = am)`
 12 | #' - To aggregate several columns with one summary: `take(mtcars, mpg, hp, fun = mean, by = am)`
 13 | #' - To get total summary skip `by` argument: `take_all(mtcars, mean)`
 14 | #' - Use magrittr pipe '%>%' to chain several operations:
 15 | #' ```
 16 | #'      mtcars %>%
 17 | #'         let(mpg_hp = mpg/hp) %>%
 18 | #'         take(mean(mpg_hp), by = am)
 19 | #' ```
 20 | #' - To modify variables or add new variables:
 21 | #' ```
 22 | #'       mtcars %>%
 23 | #'          let(new_var = 42,
 24 | #'              new_var2 = new_var*hp) %>%
 25 | #'           head()
 26 | #' ```
 27 | #' - To modify all non-grouping variables:
 28 | #' ```
 29 | #'       iris %>%
 30 | #'          let_all(
 31 | #'              scaled = (.x - mean(.x))/sd(.x),
 32 | #'              by = Species) %>%
 33 | #'           head()
 34 | #' ```
 35 | #' - To drop variable assign NULL: `let(mtcars, am = NULL) %>% head()`
 36 | #' - To aggregate all variables conditionally on name:
 37 | #' ```
 38 | #'       iris %>%
 39 | #'           take_all(
 40 | #'               mean = if(startsWith(.name, "Sepal")) mean(.x),
 41 | #'               median = if(startsWith(.name, "Petal")) median(.x),
 42 | #'               by = Species
 43 | #'           )
 44 | #' ```
 45 | #' - For parametric assignment use ':=':
 46 | #' ```
 47 | #'      new_var = "my_var"
 48 | #'      old_var = "mpg"
 49 | #'      mtcars %>%
 50 | #'          let((new_var) := get(old_var)*2) %>%
 51 | #'          head()
 52 | #' ```
 53 | #' - For more sophisticated operations see 'query'/'query_if': these
 54 | #' functions translates its arguments one-to-one to '`[.data.table`'
 55 | #' method. Additionally there are some conveniences such as automatic
 56 | #' 'data.frame' conversion to 'data.table'.
 57 | #'
 58 | #' @examples
 59 | #' # examples form 'dplyr' package
 60 | #' data(mtcars)
 61 | #' \donttest{
 62 | #' # Newly created variables are available immediately
 63 | #' mtcars %>%
 64 | #'     let(
 65 | #'         cyl2 = cyl * 2,
 66 | #'         cyl4 = cyl2 * 2
 67 | #'     ) %>%
 68 | #'     head()
 69 | #'
 70 | #' # You can also use let() to remove variables and
 71 | #' # modify existing variables
 72 | #' mtcars %>%
 73 | #'     let(
 74 | #'         mpg = NULL,
 75 | #'         disp = disp * 0.0163871 # convert to litres
 76 | #'     ) %>%
 77 | #'     head()
 78 | #'
 79 | #'
 80 | #' # window functions are useful for grouped computations
 81 | #' mtcars %>%
 82 | #'     let(rank = rank(-mpg, ties.method = "min"),
 83 | #'         by = cyl) %>%
 84 | #'     head()
 85 | #'
 86 | #' # You can drop variables by setting them to NULL
 87 | #' mtcars %>% let(cyl = NULL) %>% head()
 88 | #'
 89 | #' # keeps all existing variables
 90 | #' mtcars %>%
 91 | #'     let(displ_l = disp / 61.0237) %>%
 92 | #'     head()
 93 | #'
 94 | #' # keeps only the variables you create
 95 | #' mtcars %>%
 96 | #'     take(displ_l = disp / 61.0237)
 97 | #'
 98 | #'
 99 | #' # can refer to both contextual variables and variable names:
100 | #' var = 100
101 | #' mtcars %>%
102 | #'     let(cyl = cyl * var) %>%
103 | #'     head()
104 | #'
105 | #' # select rows
106 | #' mtcars %>%
107 | #'     rows(am==0) %>%
108 | #'     head()
109 | #'
110 | #' # select rows with compound condition
111 | #' mtcars %>%
112 | #'     rows(am==0 & mpg>mean(mpg))
113 | #'
114 | #' # select columns
115 | #' mtcars %>%
116 | #'     columns(vs:carb, cyl)
117 | #'
118 | #' mtcars %>%
119 | #'     columns(-am, -cyl)
120 | #'
121 | #' # regular expression pattern
122 | #' columns(iris, "^Petal") # variables which start from 'Petal'
123 | #' columns(iris, "Width$") # variables which end with 'Width'
124 | #'
125 | #' # move Species variable to the front
126 | #' # pattern "^." matches all variables
127 | #' columns(iris, Species, "^.")
128 | #'
129 | #' # pattern "^.*al" means "contains 'al'"
130 | #' columns(iris, "^.*al")
131 | #'
132 | #' # numeric indexing - all variables except Species
133 | #' columns(iris, 1:4)
134 | #'
135 | #' # A 'take' with summary functions applied without 'by' argument returns an aggregated data
136 | #' mtcars %>%
137 | #'     take(mean = mean(disp), n = .N)
138 | #'
139 | #' # Usually, you'll want to group first
140 | #' mtcars %>%
141 | #'     take(mean = mean(disp), n = .N, by = cyl)
142 | #'
143 | #' # You can group by expressions:
144 | #' mtcars %>%
145 | #'     take_all(mean, by = list(vsam = vs + am))
146 | #'
147 | #' # modify all non-grouping variables in-place
148 | #' mtcars %>%
149 | #'     let_all((.x - mean(.x))/sd(.x), by = am) %>%
150 | #'     head()
151 | #'
152 | #' # modify all non-grouping variables to new variables
153 | #' mtcars %>%
154 | #'     let_all(scaled = (.x - mean(.x))/sd(.x), by = am) %>%
155 | #'     head()
156 | #'
157 | #' # conditionally modify all variables
158 | #' iris %>%
159 | #'     let_all(mean = if(is.numeric(.x)) mean(.x)) %>%
160 | #'     head()
161 | #'
162 | #' # modify all variables conditionally on name
163 | #' iris %>%
164 | #'     let_all(
165 | #'         mean = if(startsWith(.name, "Sepal")) mean(.x),
166 | #'         median = if(startsWith(.name, "Petal")) median(.x),
167 | #'         by = Species
168 | #'     ) %>%
169 | #'     head()
170 | #'
171 | #' # aggregation with 'take_all'
172 | #' mtcars %>%
173 | #'     take_all(mean = mean(.x), sd = sd(.x), n = .N, by = am)
174 | #'
175 | #' # conditionally aggregate all variables
176 | #' iris %>%
177 | #'     take_all(mean = if(is.numeric(.x)) mean(.x))
178 | #'
179 | #' # aggregate all variables conditionally on name
180 | #' iris %>%
181 | #'     take_all(
182 | #'         mean = if(startsWith(.name, "Sepal")) mean(.x),
183 | #'         median = if(startsWith(.name, "Petal")) median(.x),
184 | #'         by = Species
185 | #'     )
186 | #'
187 | #' # parametric evaluation:
188 | #' var = quote(mean(cyl))
189 | #' mtcars %>%
190 | #'     let(mean_cyl = eval(var)) %>%
191 | #'     head()
192 | #' take(mtcars, eval(var))
193 | #'
194 | #' # all together
195 | #' new_var = "mean_cyl"
196 | #' mtcars %>%
197 | #'     let((new_var) := eval(var)) %>%
198 | #'     head()
199 | #' take(mtcars, (new_var) := eval(var))
200 | #'
201 | #' ########################################
202 | #' # variable selection
203 | #'
204 | #' # range selection
205 | #' iris %>%
206 | #'     let(
207 | #'         avg = rowMeans(Sepal.Length %to% Petal.Width)
208 | #'     ) %>%
209 | #'     head()
210 | #'
211 | #' # multiassignment
212 | #' iris %>%
213 | #'     let(
214 | #'         # starts with Sepal or Petal
215 | #'         multipled1 %to% multipled4 := cols("^(Sepal|Petal)")*2
216 | #'     ) %>%
217 | #'     head()
218 | #'
219 | #'
220 | #' mtcars %>%
221 | #'     let(
222 | #'         # text expansion
223 | #'         cols("scaled_{names(mtcars)}") := lapply(cols("{names(mtcars)}"), scale)
224 | #'     ) %>%
225 | #'     head()
226 | #'
227 | #' # range selection in 'by'
228 | #' # range selection  + additional column
229 | #' mtcars %>%
230 | #'     take(
231 | #'         res = sum(cols(mpg, disp %to% drat)),
232 | #'         by = vs %to% gear
233 | #'     )
234 | #' }
235 | "_PACKAGE"
236 | 
237 | #' @import magrittr
238 | #' @import data.table
239 | 
240 | 
241 | #' @export
242 | magrittr::`%>%`
243 | 
244 | #' @export
245 | magrittr::`%<>%`
246 | 
247 | #' @export
248 | magrittr::`%$%`
249 | 
250 | #' @export
251 | magrittr::`%T>%`
252 | 
253 | #' @export
254 | data.table::data.table
255 | 
256 | 
257 | #' @export
258 | data.table::as.data.table
259 | #' @export
260 | data.table::is.data.table
261 | #' @export
262 | data.table::last
263 | #' @export
264 | data.table::first
265 | 
266 | #' @export
267 | data.table::`%like%`
268 | #' @export
269 | data.table::between
270 | #' @export
271 | data.table::`%between%`
272 | #' @export
273 | data.table::inrange
274 | #' @export
275 | data.table::`%inrange%`
276 | #' @export
277 | data.table::`:=`
278 | #' @export
279 | data.table::setnames
280 | #' @export
281 | data.table::setcolorder
282 | #' @export
283 | data.table::set
284 | #' @export
285 | data.table::setDT
286 | #' @export
287 | data.table::setDF
288 | #' @export
289 | data.table::setorder
290 | #' @export
291 | data.table::setorderv
292 | #' @export
293 | data.table::setkey
294 | #' @export
295 | data.table::setkeyv
296 | #' @export
297 | data.table::setindex
298 | #' @export
299 | data.table::setindexv
300 | #' @export
301 | data.table::chmatch
302 | #' @export
303 | data.table::`%chin%`
304 | #' @export
305 | data.table::rbindlist
306 | #' @export
307 | data.table::fread
308 | #' @export
309 | data.table::fwrite
310 | #' @export
311 | data.table::foverlaps
312 | #' @export
313 | data.table::shift
314 | #' @export
315 | data.table::transpose
316 | #' @export
317 | data.table::tstrsplit
318 | #' @export
319 | data.table::frank
320 | #' @export
321 | data.table::frankv
322 | #' @export
323 | data.table::.SD
324 | #' @export
325 | data.table::.N
326 | #' @export
327 | data.table::.I
328 | #' @export
329 | data.table::.GRP
330 | #' @export
331 | data.table::.BY
332 | #' @export
333 | data.table::rleid
334 | #' @export
335 | data.table::rleidv
336 | #' @export
337 | data.table::rowid
338 | #' @export
339 | data.table::rowidv
340 | #' @export
341 | data.table::uniqueN
342 | #' @export
343 | data.table::setDTthreads
344 | #' @export
345 | data.table::getDTthreads
346 | #' @export
347 | data.table::fintersect
348 | #' @export
349 | data.table::fsetdiff
350 | #' @export
351 | data.table::funion
352 | #' @export
353 | data.table::fsetequal
354 | #' @export
355 | data.table::shouldPrint
356 | #' @export
357 | data.table::fsort
358 | #' @export
359 | data.table::groupingsets
360 | #' @export
361 | data.table::cube
362 | #' @export
363 | data.table::rollup
364 | #' @export
365 | data.table::nafill
366 | #' @export
367 | data.table::setnafill
368 | #' @export
369 | data.table::frollmean
370 | #' @export
371 | data.table::frollsum
372 | #' @export
373 | data.table::fcoalesce
374 | #' @export
375 | data.table::fifelse
376 | #' @export
377 | data.table::fcase
378 | #' @export
379 | data.table::frollapply
380 | 


--------------------------------------------------------------------------------
/R/let_all.R:
--------------------------------------------------------------------------------
  1 | ###################### let_all #################
  2 | 
  3 | #' @export
  4 | #' @rdname let_if
  5 | let_all = function(data,
  6 |                    ...,
  7 |                    by,
  8 |                    keyby,
  9 |                    .SDcols,
 10 |                    suffix = TRUE,
 11 |                    sep = "_",
 12 |                    i
 13 | ){
 14 |     UseMethod("let_all")
 15 | }
 16 | 
 17 | #' @export
 18 | let_all.etable = function(data,
 19 |                          ...,
 20 |                          by,
 21 |                          keyby,
 22 |                          .SDcols,
 23 |                          suffix = TRUE,
 24 |                          sep = "_",
 25 |                          i
 26 | ){
 27 |     data_class = class(data)
 28 |     data = as.data.table(data)
 29 |     res = eval.parent(
 30 |         substitute(maditr::let_all(data,
 31 |                                   ...,
 32 |                                   by = by,
 33 |                                   keyby = keyby,
 34 |                                   .SDcols = .SDcols,
 35 |                                   suffix = suffix,
 36 |                                   sep = sep,
 37 |                                   i = i
 38 |         )
 39 |         )
 40 |     )
 41 |     setDF(res)
 42 |     class(res) = data_class
 43 |     res
 44 | }
 45 | 
 46 | #' @export
 47 | let_all.data.frame = function(data,
 48 |                               ...,
 49 |                               by,
 50 |                               keyby,
 51 |                               .SDcols,
 52 |                               suffix = TRUE,
 53 |                               sep = "_",
 54 |                               i
 55 | ){
 56 | 
 57 |     # if data is expression we want to calculate it only once
 58 |     data = force(data)
 59 | 
 60 |     j_expr = substitute(list(...))
 61 | 
 62 |     parent_frame = parent.frame()
 63 | 
 64 |     j_expr = as.list(j_expr)[-1]
 65 |     (length(j_expr) == 0) && stop("'let_all' - missing expressions. You should provide at least one expression.")
 66 | 
 67 | 
 68 |     j_expr = add_names_from_walrus_assignement(j_expr, envir = parent.frame())
 69 |     j_expr = add_names_from_single_symbol(j_expr)
 70 | 
 71 |     #################
 72 |     ## naming
 73 |     ## suffix = FALSE - prefix, if TRUE it will be suffix
 74 |     ## no names
 75 |     ## if single symbol expr all names will be left as is
 76 |     ## if multiple symbol expr names will be prefixed/suffixed with this symbol "_"
 77 |     ## if complex expr and there is no names then original names will be left as is
 78 |     ## duplicated names will be made unique
 79 |     #
 80 |     # we need to know resulting names
 81 |     # this is simplest method to escape complexities with by, keyby and SDCols interaction
 82 |     one_row = as.data.table(data[1,, drop = FALSE])
 83 |     ._orig_names = eval.parent(substitute(maditr::query(one_row, list(._res_names = names(.SD)),
 84 |                                                         by = by,
 85 |                                                         keyby = keyby,
 86 |                                                         .SDcols = .SDcols
 87 |     )))[["._res_names"]]
 88 |     j_expr_names = names(j_expr)
 89 |     j_expr_names[!(j_expr_names %in% "")] = make.unique(j_expr_names[!(j_expr_names %in% "")])
 90 |     ._all_names = lapply(j_expr_names, function(curr_name){
 91 |         if(curr_name == "") return(._orig_names)
 92 |         if(suffix){
 93 |             paste(._orig_names, curr_name, sep = sep)
 94 |         } else {
 95 |             paste(curr_name, ._orig_names, sep = sep)
 96 |         }
 97 |     })
 98 | 
 99 | 
100 |     to_drop = new.env()
101 |     ._data_names = names(data)
102 |     names(j_expr) = NULL
103 |     ###
104 |     for(j in seq_along(j_expr)){
105 |         expr = j_expr[[j]]
106 | 
107 | 
108 |         if((is.symbol(expr) && !identical(expr, quote(.name)) &&
109 |             !identical(expr, quote(.index)) &&
110 |             !identical(expr, quote(.x)) &&
111 |             !identical(expr, quote(.value)) &&
112 |             !identical(expr, quote(.N)) &&
113 |             !identical(expr, quote(.GRP))) ||
114 |            (length(expr)>1 && as.character(expr[[1]]) == "function")){
115 |             # let_all(data, scale)
116 |             # let_all(data, function(x) scale(x))
117 |             expr = substitute(lapply(.SD, expr))
118 |         } else {
119 |             # let_all(data, scale(.x))
120 |             expr = substitute({
121 |                 .data_names =  ._data_names
122 |                 lapply(names(.SD), function(.name) {
123 |                 .value = get(.name)
124 |                 .x = get(.name)
125 |                 .index = match(.name, .data_names)
126 |                 expr
127 |             })
128 |             })
129 |         }
130 | 
131 |         if(identical(._all_names[[j]], ._orig_names)){
132 |             j_expr[[j]] = substitute(
133 |                 {
134 | 
135 |                     res = expr
136 |                     # if expression returns NULL we leave this variable unchanged
137 |                     empty = which(vapply(res, is.null, FUN.VALUE = logical(1)))
138 |                     if(length(empty)>0) {
139 |                         res[empty] = .SD[ ,empty, with = FALSE]
140 |                     }
141 |                     res
142 |                 }
143 |             )
144 |             next()
145 |         }
146 | 
147 |         ##################
148 |         ._new_names = ._all_names[[j]]
149 |         # let_all(data, my_new_name = scale(.x))
150 |         j_expr[[j]] = substitute(
151 |             {
152 |                 res = expr
153 |                 # if expression returns NULL we should skip this variable
154 |                 empty = which(vapply(res, is.null, FUN.VALUE = logical(1)))
155 |                 if(length(empty)>0) {
156 |                     res[empty] = NA
157 |                     lapply(._new_names[empty], assign, value = NA, envir = to_drop)
158 |                 }
159 |                 res
160 |             }
161 |         )
162 | 
163 |     }
164 | 
165 |     ####
166 |     if(length(j_expr)>1){
167 |         j_expr = as.call(c(list(quote(c)), j_expr))
168 |     } else {
169 |         j_expr = j_expr[[1]]
170 |     }
171 |     ####
172 |     ._all_names = unlist(._all_names, recursive = TRUE, use.names = FALSE)
173 | 
174 |     # NULL is just a placeholder
175 |     expr = substitute(NULL[i,
176 |                            (._all_names) := j_expr,
177 |                            by = by,
178 |                            keyby = keyby,
179 |                            .SDcols = .SDcols
180 |     ]
181 |     )
182 | 
183 |     res = eval_in_parent_frame(data, expr, frame = parent_frame)
184 |     if(length(to_drop)>0){
185 |         res[,(names(to_drop)):=NULL]
186 |     }
187 |     res
188 | }
189 | 
190 | 
191 | 
192 | ###################### take_all #################
193 | 
194 | #' @export
195 | #' @rdname let_if
196 | take_all = function(data,
197 |                     ...,
198 |                     by,
199 |                     keyby,
200 |                     .SDcols,
201 |                     suffix = TRUE,
202 |                     sep = "_",
203 |                     i
204 | ){
205 |     UseMethod("take_all")
206 | }
207 | 
208 | #' @export
209 | take_all.data.frame = function(data,
210 |                                ...,
211 |                                by,
212 |                                keyby,
213 |                                .SDcols,
214 |                                suffix = TRUE,
215 |                                sep = "_",
216 |                                i
217 | ){
218 | 
219 |     j_expr = substitute(list(...))
220 | 
221 |     parent_frame = parent.frame()
222 | 
223 |     j_expr = as.list(j_expr)[-1]
224 |     (length(j_expr) == 0) && stop("'take_all' - missing expressions. You should provide at least one expression.")
225 | 
226 | 
227 |     j_expr = add_names_from_walrus_assignement(j_expr, envir = parent.frame())
228 |     j_expr = add_names_from_single_symbol(j_expr)
229 | 
230 | 
231 |     #################
232 |     ## naming
233 |     ## suffix = FALSE - prefix, if TRUE it will be suffix
234 |     ## no names
235 |     ## if single symbol expr all names will be left as is
236 |     ## if multiple symbol expr names will be prefixed/suffixed with this symbol "_"
237 |     ## if complex expr and there is no names then original names will be left as is
238 |     ## duplicated names will be made unique (??)
239 | 
240 |     # if data is expression we want to calculate it only once
241 | 
242 | 
243 |     ._data_names = names(data)
244 |     j_names = names(j_expr)
245 |     names(j_expr) = NULL
246 |     for(j in seq_along(j_expr)){
247 |         expr = j_expr[[j]]
248 |         expr = substitute_symbols(expr, list(
249 |             '.value' = quote(.SD[[.name]]),
250 |             '.x' = quote(.SD[[.name]]),
251 |             '.index' = substitute(match(.name, ._data_names))
252 |         ))
253 | 
254 |         if((is.symbol(expr) && !identical(expr, quote(.name)) &&
255 |             !identical(expr, quote(.N)) &&
256 |             !identical(expr, quote(.GRP))) ||
257 |            (length(expr)>1 && as.character(expr[[1]]) == "function")){
258 |             # take_all(data, mean)
259 |             # take_all(data, function(x) mean(x, na.rm = TRUE))
260 |             expr = substitute(lapply(.SD, expr))
261 |         } else {
262 |             # take_all(data, mean(.x))
263 |             expr = substitute(lapply(names(.SD), function(.name) expr))
264 |         }
265 |         curr_name = j_names[j]
266 |         if(curr_name != ""){
267 |             if(suffix){
268 |                 name_expr = substitute(paste(names(.SD), curr_name, sep = sep))
269 |             } else {
270 |                 name_expr = substitute(paste(curr_name, names(.SD), sep = sep))
271 |             }
272 |         } else {
273 |             name_expr = substitute(names(.SD))
274 |         }
275 | 
276 |         ##########################
277 |         j_expr[[j]] = substitute({
278 |             res = expr
279 |             names(res) = name_expr
280 |             not_empty = which(!vapply(res, is.null, FUN.VALUE = logical(1)))
281 |             res[not_empty]
282 |         })
283 | 
284 |     }
285 |     if(length(j_expr)>1){
286 |         j_expr = as.call(c(list(quote(c)), j_expr))
287 |     } else {
288 |         j_expr = j_expr[[1]]
289 |     }
290 |     ####
291 |     # NULL is just a placeholder
292 |     expr = substitute(NULL[i, j_expr,
293 |                            by = by,
294 |                            keyby = keyby,
295 |                            .SDcols = .SDcols])
296 | 
297 |     res = eval_in_parent_frame(data, expr, frame = parent_frame)
298 |     setnames(res, make.unique(names(res)))
299 |     res
300 | }
301 | 
302 | 
303 | 
304 | 


--------------------------------------------------------------------------------
/R/to_wide.R:
--------------------------------------------------------------------------------
  1 | variable = NULL # to pass CRAN check
  2 | value = NULL # to pass CRAN check
  3 | 
  4 | 
  5 | #' Convert data to long or to wide form
  6 | #'
  7 | #' `to_long` increases number of rows in the dataset and reduce number of
  8 | #' columns. `to_wide` makes invert transformation. You can use [cols] for
  9 | #' selecting variables in the arguments. See examples.
 10 | #'
 11 | #' @param data A data.frame to convert
 12 | #' @param columns unquoted names of variables for stacking. When missing, we
 13 | #'   will stack all columns outside `keep` columns.
 14 | #' @param keep unquoted names of columns which will be kept as is, e. g. only
 15 | #'   recycled or deduplicated. If missing, it is all columns except stacked or
 16 | #'   unstacked. If `FALSE` then nothing will be kept.
 17 | #' @param names_in name of the stacked variable names column. The default name
 18 | #'   is 'variable'. It is quoted in the `to_long` and unquoted in `to_wide`. If
 19 | #'   `FALSE` in the `to_wide` than nothing will be widening.
 20 | #' @param values_in name(-s) of the stacked data values column(s). The default
 21 | #'   name is 'value'. Multiple names can be provided here for the case when
 22 | #'   `columns` is a list, though note well that the names provided in
 23 | #'   `columns` take precedence. It is quoted in the `to_long` and unqoted in `to_wide`
 24 | #' @param drop_na If TRUE, NA values will be removed from the stacked data.
 25 | #' @param names_factor If TRUE, the column with names will be converted to
 26 | #'   factor, else it will be a character column. TRUE by default.
 27 | #' @param value_factor If TRUE, the value column will be converted to factor,
 28 | #'   else the stacked values type is left unchanged. FALSE by default.
 29 | #' @param fun Should the data be aggregated before casting? By default, it is
 30 | #'   `identity` - no aggregation. To use multiple aggregation functions, pass a
 31 | #'   list; see Examples.
 32 | #' @param sep Character vector of length 1, indicating the separating character
 33 | #'   in variable names generated during casting. Default is "_".
 34 | #' @param fill Value with which to fill missing cells. `NA` by default. If `fun` is
 35 | #'   present, takes the value by applying the function on a 0-length vector.
 36 | #' @param missing_comb One of "none" (the default), "rows" - include missing
 37 | #'   combinations in rows, "columns" - include missing combinations in columns,
 38 | #'   and "all" include all missing combinations.
 39 | #' @param ... other arguments passed to `data.table::melt`/`data.table::dcast`
 40 | #' @return data.table in the wide or long form.
 41 | #' @export
 42 | #'
 43 | #' @examples
 44 | #' data(iris)
 45 | #'
 46 | #' # 'to_long'
 47 | #'
 48 | #' long_iris = iris %>%
 49 | #'     to_long(keep = Species)
 50 | #'
 51 | #' long_iris
 52 | #'
 53 | #' iris_with_stat = long_iris %>%
 54 | #'     take(mean = mean(value),
 55 | #'          sd = sd(value),
 56 | #'          n = .N*1.0,
 57 | #'          by = .(Species, variable)
 58 | #'     ) %>%
 59 | #'     to_long(columns = c(mean, sd, n), names_in = "stat")
 60 | #'
 61 | #' # 'to_wide' - table with multiple stats
 62 | #' iris_with_stat %>%
 63 | #'     to_wide()
 64 | #'
 65 | #'
 66 | #' iris_with_stat %>%
 67 | #'     to_wide(names_in = c(variable, stat))
 68 | #'
 69 | #' iris_with_stat %>%
 70 | #'     to_wide(names_in = c(variable, Species))
 71 | #'
 72 | #' # 'to_wide' - aggregation function
 73 | #' long_iris %>%
 74 | #'     to_wide(fun = list(Mean = mean, SD = sd, N = length))
 75 | #'
 76 | #'
 77 | #' # '%to%' selector - example from tidyr::pivot_longer
 78 | #'
 79 | #' data(anscombe)
 80 | #' anscombe %>%
 81 | #'     to_long(
 82 | #'         list(x = x1 %to% x4, y = y1 %to% y4),
 83 | #'         names_in = "set"
 84 | #'     )
 85 | #'
 86 | #' ######################################
 87 | #' ## Examples from data.table melt/dcast
 88 | #' ######################################
 89 | #'
 90 | #' set.seed(45)
 91 | #' DT = data.table(
 92 | #'     i_1 = c(1:5, NA)*1.0,
 93 | #'     i_2 = c(NA,6,7,8,9,10)*1.0,
 94 | #'     f_1 = factor(sample(c(letters[1:3], NA), 6, TRUE)),
 95 | #'     f_2 = factor(c("z", "a", "x", "c", "x", "x"), ordered=TRUE),
 96 | #'     c_1 = sample(c(letters[1:3], NA), 6, TRUE),
 97 | #'     d_1 = as.Date(c(1:3,NA,4:5), origin="2013-09-01"),
 98 | #'     d_2 = as.Date(6:1, origin="2012-01-01")
 99 | #' )
100 | #'
101 | #' # id, values as character/integer/numeric vectors
102 | #'
103 | #' to_long(DT, f_1, keep = 1:2)
104 | #' to_long(DT, f_1, keep = c(i_1, i_2))
105 | #' to_long(DT, f_1, keep = i_1 %to% i_2)
106 | #' to_long(DT, f_1, keep = cols(i_1:i_2), names_factor = FALSE)
107 | #' to_long(DT, f_1, keep = cols("i_{1:2}"))
108 | #' to_long(DT, f_1, keep = cols("^i_"))
109 | #' to_long(DT, f_1, keep = cols("^i_"), names_in = "var", values_in = "val")
110 | #'
111 | #' col_var = "^i_"
112 | #' to_long(DT, 3, keep = cols(col_var))
113 | #'
114 | #' to_long(DT, cols("^f_"), keep = cols("^i_"), value_factor = TRUE)
115 | #'
116 | #' to_long(mtcars)
117 | #' to_long(mtcars, keep = am)
118 | #' to_long(mtcars, columns = c(am, vs, mpg))
119 | #' to_long(mtcars, columns = c(am, vs, mpg), keep = FALSE)
120 | #' to_long(DT, keep = f_1, columns = c(i_1, i_2), drop_na = TRUE)
121 | #' to_long(DT, keep=1:2, columns = list(cols("^f_"), cols("^d_")), value_factor=TRUE)
122 | #'
123 | #' data("ChickWeight")
124 | #' names(ChickWeight) = tolower(names(ChickWeight))
125 | #' DT = to_long(ChickWeight, keep=2:4)
126 | #'
127 | #' to_wide(DT, keep = time, fun = mean)
128 | #' to_wide(DT, keep = FALSE, fun = mean)
129 | #' to_wide(DT, keep = diet, fun = mean)
130 | #' to_wide(DT, keep = c(diet, chick), names_in = time, missing_comb = "all")
131 | #' to_wide(DT, keep = c(diet, chick), names_in = time, missing_comb = "all", fill = 0)
132 | #' to_wide(DT, chick, time, fun = mean)
133 | #'
134 | #'
135 | #'
136 | #' # using FALSE
137 | #' DT = data.table(v1 = rep(1:2, each = 6),
138 | #'                 v2 = rep(rep(1:3, 2), each = 2),
139 | #'                 v3 = rep(1:2, 6),
140 | #'                 v4 = rnorm(6))
141 | #'
142 | #' ## for each combination of (v1, v2), add up all values of v4
143 | #' to_wide(DT,
144 | #'         cols("^v(1|2)"),
145 | #'         names_in = FALSE,
146 | #'         values_in = v4,
147 | #'         fun = sum
148 | #' )
149 | #'
150 | #' # multiple values_in and multiple fun
151 | #' DT = data.table(x=sample(5,20,TRUE),
152 | #'                 y=sample(2,20,TRUE),
153 | #'                 z=sample(letters[1:2], 20,TRUE),
154 | #'                 d1 = runif(20),
155 | #'                 d2=1L)
156 | #'
157 | #' # multiple values_in
158 | #' to_wide(DT,
159 | #'         keep = c(x, y),
160 | #'         names_in = z,
161 | #'         values_in = c(d1, d2),
162 | #'         fun = sum,
163 | #'         fill = 0)
164 | #'
165 | #' # multiple funs
166 | #' to_wide(DT,
167 | #'         keep = c(x, y),
168 | #'         names_in = z,
169 | #'         values_in = d1,
170 | #'         fun = list(sum = sum, mean = mean),
171 | #'         fill = NULL)
172 | #'
173 | #' # multiple fun and values_in (all combinations)
174 | #' to_wide(DT,
175 | #'         keep = c(x, y),
176 | #'         names_in = z,
177 | #'         values_in = c(d1, d2),
178 | #'         fun = list(sum = sum, mean = mean)
179 | #' )
180 | #'
181 | #' # multiple fun and values_in (one-to-one)
182 | #' to_wide(DT,
183 | #'         keep = c(x, y),
184 | #'         names_in = z,
185 | #'         values_in = list(d1, d2),
186 | #'         fun = list(sum = sum, mean = mean)
187 | #' )
188 | to_long = function(data,
189 |                    columns = NULL,
190 |                    keep = NULL,
191 |                    names_in = "variable",
192 |                    values_in = "value",
193 |                    drop_na = FALSE,
194 |                    names_factor = TRUE,
195 |                    value_factor = FALSE,
196 |                    ...){
197 |     parent_frame = parent.frame()
198 |     data_names = names(data)
199 |     data_names_list = create_list_with_names(data_names)
200 |     columns_expr = substitute(columns)
201 |     columns_expr = replace_column_expr(columns_expr, data_names, parent_frame, type = "names")
202 |     columns = eval(columns_expr, data_names_list, enclos = parent_frame)
203 | 
204 |     keep_expr = substitute(keep)
205 |     keep_expr = replace_column_expr(keep_expr, data_names, parent_frame, type = "names")
206 |     keep = eval(keep_expr, data_names_list, enclos = parent_frame)
207 |     if(isFALSE(keep)) keep = integer(0)# no keep
208 | 
209 |     if(is.null(columns) && is.null(keep)) columns = data_names
210 |     maditr::melt(data = data,
211 |                  id.vars = keep,
212 |                  measure.vars = columns,
213 |                  variable.name = names_in,
214 |                  value.name = values_in,
215 |                  na.rm = drop_na,
216 |                  variable.factor = names_factor,
217 |                  value.factor = value_factor,
218 |                  ...
219 |     )
220 | }
221 | 
222 | 
223 | #' @export
224 | #' @rdname to_long
225 | to_wide = function(data,
226 |                    keep = NULL,
227 |                    names_in = variable,
228 |                    values_in = value,
229 |                    fun = identity,
230 |                    sep = "_",
231 |                    fill = NA,
232 |                    missing_comb = c("none", "rows", "columns", "all"),
233 |                    ...
234 | ){
235 |     parent_frame = parent.frame()
236 |     data_names = names(data)
237 |     data_names_list = create_list_with_names(data_names)
238 |     names_in_expr = substitute(names_in)
239 |     names_in_expr = replace_column_expr(names_in_expr, data_names, parent_frame, type = "names")
240 |     names_in = eval(names_in_expr, data_names_list, enclos = parent_frame)
241 |     if(isFALSE(names_in)) names_in = "." # no widening
242 | 
243 |     values_in_expr = substitute(values_in)
244 |     values_in_expr = replace_column_expr(values_in_expr, data_names, parent_frame, type = "names")
245 |     values_in = eval(values_in_expr, data_names_list, enclos = parent_frame)
246 | 
247 |     keep_expr = substitute(keep)
248 |     keep_expr = replace_column_expr(keep_expr, data_names, parent_frame, type = "names")
249 |     keep = eval(keep_expr, data_names_list, enclos = parent_frame)
250 |     if(isFALSE(keep)) keep = "."   # no keep
251 |     if(is.null(keep)) keep = "..." # keep all
252 | 
253 | 
254 | 
255 |     formula = stats::as.formula(paste(paste(keep, collapse = "+"), "~", paste(names_in, collapse = "+")))
256 |     missing_comb = match.arg(missing_comb)
257 |     drop = switch(missing_comb,
258 |                   "none" = TRUE,
259 |                   "rows" = c(FALSE, TRUE),
260 |                   "columns" = c(TRUE, FALSE),
261 |                   "all"  = c(FALSE, FALSE)
262 |     )
263 |     maditr::dcast(data = data,
264 |                   formula,
265 |                   fun.aggregate = fun,
266 |                   sep = sep,
267 |                   fill = fill,
268 |                   drop = drop,
269 |                   value.var = values_in,
270 |                   ...
271 |     )
272 | }
273 | 
274 | 
275 | 


--------------------------------------------------------------------------------
/vignettes/Introduction.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Introduction"
  3 | output:
  4 |   html_document:
  5 |     toc: true
  6 | vignette: >
  7 |   %\VignetteIndexEntry{maditr: Introduction}
  8 |   %\VignetteEngine{knitr::rmarkdown}
  9 |   %\VignetteEncoding{utf8}
 10 | ---
 11 | 
 12 | ```{r setup, include=FALSE}
 13 | knitr::opts_chunk$set(echo = TRUE)
 14 | data.table::setDTthreads(2)
 15 | ```
 16 | 
 17 | ## Links
 18 | 
 19 | - [maditr on CRAN](https://cran.r-project.org/package=maditr)
 20 | - [maditr on Github](https://github.com/gdemin/maditr)
 21 | - [Issues](https://github.com/gdemin/maditr/issues)
 22 | 
 23 | ## Overview
 24 | 
 25 | Package provides pipe-style interface for [data.table](https://cran.r-project.org/package=data.table) package. It preserves all data.table features without significant impact on performance. `let` and `take` functions are simplified interfaces for most common data manipulation tasks.
 26 | 
 27 | - To select rows from data: `rows(mtcars, am==0)`
 28 | - To select columns from data: `columns(mtcars, mpg, vs:carb)`
 29 | - To aggregate data: `take(mtcars, mean_mpg = mean(mpg), by = am)`
 30 | - To aggregate all non-grouping columns: `take_all(mtcars, mean, by = am)`
 31 | - To aggregate several columns with one summary: `take(mtcars, mpg, hp, fun = mean, by = am)`
 32 | - To get total summary skip `by` argument: `take_all(mtcars, mean)`
 33 | - Use magrittr pipe `%>%` to chain several operations: 
 34 | ```{r, eval=FALSE}
 35 |      mtcars %>%
 36 |         let(mpg_hp = mpg/hp) %>%
 37 |         take(mean(mpg_hp), by = am)
 38 | ```
 39 | - To modify variables or add new variables: 
 40 | ```{r, eval=FALSE}
 41 |       mtcars %>%
 42 |          let(new_var = 42,
 43 |              new_var2 = new_var*hp) %>%
 44 |          head()
 45 | ```          
 46 | - To drop variable assign NULL: `let(mtcars, am = NULL) %>% head()`
 47 | - To modify all non-grouping variables:
 48 | ```{r, eval=FALSE}
 49 |     iris %>%
 50 |       let_all(
 51 |           scaled = (.x - mean(.x))/sd(.x),
 52 |           by = Species) %>%
 53 |        head()
 54 | ``` 
 55 | - To aggregate all variables conditionally on name:
 56 | ```{r, eval=FALSE}
 57 |     iris %>%
 58 |       take_all(
 59 |           mean = if(startsWith(.name, "Sepal")) mean(.x),
 60 |           median = if(startsWith(.name, "Petal")) median(.x),
 61 |           by = Species
 62 |       )
 63 | ```
 64 | - For parametric assignment use `:=`: 
 65 | ```{r, eval=FALSE}
 66 |     new_var = "my_var"
 67 |     old_var = "mpg"
 68 |     mtcars %>%
 69 |         let((new_var) := get(old_var)*2) %>%
 70 |         head()
 71 |      
 72 |     # or,  
 73 |     expr = quote(mean(cyl))
 74 |     mtcars %>% 
 75 |         let((new_var) := eval(expr)) %>% 
 76 |         head()
 77 |     
 78 |     # the same with `take` 
 79 |     by_var = "vs,am"
 80 |     take(mtcars, (new_var) := eval(expr), by = by_var)
 81 | ```         
 82 | 
 83 | `query_if` function translates its arguments one-to-one to `[.data.table` method. Additionally there are some conveniences such as automatic `data.frame` conversion to `data.table`.
 84 | 
 85 | ## vlookup & xlookup
 86 | 
 87 | Let's make datasets for lookups:
 88 | ```{r include=FALSE}
 89 | library(maditr)
 90 | ```
 91 | 
 92 | ```{r}
 93 | 
 94 | workers = fread("
 95 |     name company
 96 |     Nick Acme
 97 |     John Ajax
 98 |     Daniela Ajax
 99 | ")
100 | 
101 | positions = fread("
102 |     name position
103 |     John designer
104 |     Daniela engineer
105 |     Cathie manager
106 | ")
107 | 
108 | # xlookup
109 | workers = let(workers,
110 |   position = xlookup(name, positions$name, positions$position)
111 | )
112 | 
113 | # vlookup
114 | # by default we search in the first column and return values from second column
115 | workers = let(workers,
116 |   position = vlookup(name, positions, no_match = "Not found")
117 | )
118 | 
119 | # the same 
120 | workers = let(workers,
121 |   position = vlookup(name, positions, 
122 |                      result_column = "position", 
123 |                      no_match = "Not found") # or, result_column = 2 
124 | )
125 | 
126 | head(workers)
127 | ```
128 | 
129 | ## More examples
130 | 
131 | We will use for demonstartion well-known `mtcars` dataset and some examples from `dplyr` package. 
132 | 
133 | ```{r}
134 | library(maditr)
135 | data(mtcars)
136 | 
137 | # Newly created variables are available immediately
138 | mtcars %>%
139 |     let(
140 |         cyl2 = cyl * 2,
141 |         cyl4 = cyl2 * 2
142 |     ) %>% head()
143 | 
144 | # You can also use let() to remove variables and
145 | # modify existing variables
146 | mtcars %>%
147 |     let(
148 |         mpg = NULL,
149 |         disp = disp * 0.0163871 # convert to litres
150 |     ) %>% head()
151 | 
152 | 
153 | # window functions are useful for grouped computations
154 | mtcars %>%
155 |     let(rank = rank(-mpg, ties.method = "min"),
156 |         by = cyl) %>%
157 |     head()
158 | 
159 | # You can drop variables by setting them to NULL
160 | mtcars %>%
161 |     let(cyl = NULL) %>%
162 |     head()
163 | 
164 | # keeps all existing variables
165 | mtcars %>%
166 |     let(displ_l = disp / 61.0237) %>%
167 |     head()
168 | 
169 | # keeps only the variables you create
170 | mtcars %>%
171 |     take(displ_l = disp / 61.0237) %>% 
172 |     head()
173 | 
174 | 
175 | # can refer to both contextual variables and variable names:
176 | var = 100
177 | mtcars %>%
178 |     let(cyl = cyl * var) %>%
179 |     head()
180 | 
181 | # select rows
182 | mtcars %>%
183 |     rows(am==0) %>% 
184 |     head()
185 | 
186 | # select rows with compound condition
187 | mtcars %>%
188 |     rows(am==0 & mpg>mean(mpg))
189 | 
190 | # select columns
191 | mtcars %>% 
192 |     columns(vs:carb, cyl)
193 |     
194 | mtcars %>% 
195 |     columns(-am, -cyl)    
196 | 
197 | # regular expression pattern
198 | columns(iris, "^Petal") %>% head() # variables which start from 'Petal'
199 | columns(iris, "Width$") %>% head() # variables which end with 'Width'
200 | 
201 | # move Species variable to the front
202 | # pattern "^." matches all variables
203 | columns(iris, Species, "^.") %>% head()
204 | 
205 | # pattern "^.*al" means "contains 'al'"
206 | columns(iris, "^.*al") %>% head()
207 | 
208 | # numeric indexing - all variables except Species
209 | columns(iris, 1:4) %>% head()
210 | 
211 | # A 'take' with summary functions applied without 'by' argument returns an aggregated data
212 | mtcars %>%
213 |     take(mean = mean(disp), n = .N)
214 | 
215 | # Usually, you'll want to group first
216 | mtcars %>%
217 |     take(mean = mean(disp), n = .N, by = am)
218 | 
219 | # grouping by multiple variables
220 | mtcars %>%
221 |     take(mean = mean(disp), n = .N, by = list(am, vs))
222 | 
223 | # You can group by expressions:
224 | mtcars %>%
225 |     take_all(
226 |         mean,
227 |         by = list(vsam = vs + am)
228 |     )
229 | 
230 | # modify all non-grouping variables in-place
231 | mtcars %>%
232 |     let_all((.x - mean(.x))/sd(.x), by = am) %>%
233 |     head()
234 | 
235 | # modify all non-grouping variables to new variables
236 | mtcars %>%
237 |     let_all(scaled = (.x - mean(.x))/sd(.x), by = am) %>%
238 |     head()
239 | 
240 | # conditionally modify all variables
241 | iris %>%
242 |     let_all(mean = if(is.numeric(.x)) mean(.x)) %>%
243 |     head()
244 | 
245 | # modify all variables conditionally on name
246 | iris %>%
247 |     let_all(
248 |         mean = if(startsWith(.name, "Sepal")) mean(.x),
249 |         median = if(startsWith(.name, "Petal")) median(.x),
250 |         by = Species
251 |     ) %>%
252 |     head()
253 | 
254 | # aggregation with 'take_all'
255 | mtcars %>%
256 |     take_all(mean = mean(.x), sd = sd(.x), n = .N, by = am)
257 | 
258 | # conditionally aggregate all variables
259 | iris %>%
260 |     take_all(mean = if(is.numeric(.x)) mean(.x))
261 | 
262 | # aggregate all variables conditionally on name
263 | iris %>%
264 |     take_all(
265 |         mean = if(startsWith(.name, "Sepal")) mean(.x),
266 |         median = if(startsWith(.name, "Petal")) median(.x),
267 |         by = Species
268 |     )
269 | 
270 | # parametric evaluation:
271 | var = quote(mean(cyl))
272 | mtcars %>% 
273 |     let(mean_cyl = eval(var)) %>% 
274 |     head()
275 | take(mtcars, eval(var))
276 | 
277 | # all together
278 | new_var = "mean_cyl"
279 | mtcars %>% 
280 |     let((new_var) := eval(var)) %>% 
281 |     head()
282 | take(mtcars, (new_var) := eval(var))
283 | 
284 | 
285 | ```
286 | 
287 | ## Variable selection in the expressions
288 | 
289 | You can use 'columns' inside expression in the 'take'/'let'. 'columns' will
290 | be replaced with data.table with selected columns. In 'let' in the
291 | expressions with ':=', 'cols' or '%to%' can be placed in the left part of the
292 | expression. It is usefull for multiple assignment.
293 | There are four ways of column selection:
294 | 
295 | 1. Simply by column names
296 | 2. By variable ranges, e. g. vs:carb. Alternatively, you can use '%to%'
297 | instead of colon: 'vs %to% carb'.
298 | 3. With regular expressions. Characters which start with '^' or end with $
299 | considered as Perl-style regular expression patterns. For example, '^Petal'
300 | returns all variables started with 'Petal'. 'Width$' returns all variables
301 | which end with 'Width'. Pattern '^.' matches all variables and pattern
302 | '^.*my_str' is equivalent to contains "my_str"'.
303 | 4. By character variables with interpolated parts. Expression in the curly
304 | brackets inside characters will be evaluated in the parent frame with
305 | 'text_expand' function. For example, `a{1:3}` will be transformed to the names 'a1',
306 | 'a2', 'a3'. 'cols' is just a shortcut for 'columns'.
307 | 
308 | ```{r}
309 | # range selection
310 | iris %>% 
311 |     let(
312 |         avg = rowMeans(Sepal.Length %to% Petal.Width)
313 |     ) %>% 
314 |     head()
315 | 
316 | # multiassignment
317 | iris %>% 
318 |     let(
319 |         # starts with Sepal or Petal
320 |         multipled1 %to% multipled4 := cols("^(Sepal|Petal)")*2
321 |     ) %>% 
322 |     head()
323 | 
324 | 
325 | mtcars %>% 
326 |     let(
327 |         # text expansion
328 |         cols("scaled_{names(mtcars)}") := lapply(cols("{names(mtcars)}"), scale)
329 |     ) %>% 
330 |     head()
331 | 
332 | # range selection in 'by'
333 | # selection of range + additional column
334 | mtcars %>% 
335 |     take(
336 |         res = sum(cols(mpg, disp %to% drat)),
337 |         by = vs %to% gear
338 |     )
339 | ```
340 | 
341 | ## Joins
342 | 
343 | Here we use the same datasets as with lookups:
344 | 
345 | ```{r}
346 | workers = fread("
347 |     name company
348 |     Nick Acme
349 |     John Ajax
350 |     Daniela Ajax
351 | ")
352 | 
353 | positions = fread("
354 |     name position
355 |     John designer
356 |     Daniela engineer
357 |     Cathie manager
358 | ")
359 | 
360 | workers
361 | positions
362 | ```
363 | 
364 | Different kinds of joins:
365 | 
366 | ```{r}
367 | workers %>% dt_inner_join(positions)
368 | workers %>% dt_left_join(positions)
369 | workers %>% dt_right_join(positions)
370 | workers %>% dt_full_join(positions)
371 | 
372 | # filtering joins
373 | workers %>% dt_anti_join(positions)
374 | workers %>% dt_semi_join(positions)
375 | ```
376 | 
377 | To suppress the message, supply `by` argument:
378 | ```{r, eval=FALSE}
379 | workers %>% dt_left_join(positions, by = "name")
380 | ```
381 | 
382 | Use a named `by` if the join variables have different names:
383 | ```{r, eval=FALSE}
384 | positions2 = setNames(positions, c("worker", "position")) # rename first column in 'positions'
385 | workers %>% dt_inner_join(positions2, by = c("name" = "worker"))
386 | ```
387 | 
388 | 
389 | 
390 | 
391 | 
392 | 
393 | 


--------------------------------------------------------------------------------