├── .Rbuildignore
├── .covrignore
├── .gitattributes
├── .github
    ├── .gitignore
    ├── CODE_OF_CONDUCT.md
    ├── CONTRIBUTING.md
    ├── ISSUE_TEMPLATE
    │   └── issue_template.md
    ├── SUPPORT.md
    └── workflows
    │   ├── R-CMD-check.yaml
    │   ├── pkgdown.yaml
    │   ├── pr-commands.yaml
    │   └── test-coverage.yaml
├── .gitignore
├── DESCRIPTION
├── LICENSE
├── LICENSE.md
├── NAMESPACE
├── NEWS.md
├── R
    ├── by.R
    ├── compat-purrr.R
    ├── complete.R
    ├── count.R
    ├── dtplyr-package.R
    ├── fill.R
    ├── reframe.R
    ├── replace_na.R
    ├── step-assign.R
    ├── step-call-pivot_longer.R
    ├── step-call-pivot_wider.R
    ├── step-call.R
    ├── step-colorder-relocate.R
    ├── step-colorder.R
    ├── step-first.R
    ├── step-group.R
    ├── step-join.R
    ├── step-modify.R
    ├── step-mutate.R
    ├── step-nest.R
    ├── step-set.R
    ├── step-setnames.R
    ├── step-subset-arrange.R
    ├── step-subset-do.R
    ├── step-subset-expand.R
    ├── step-subset-filter.R
    ├── step-subset-select.R
    ├── step-subset-separate.R
    ├── step-subset-slice.R
    ├── step-subset-summarise.R
    ├── step-subset-transmute.R
    ├── step-subset.R
    ├── step.R
    ├── tidyeval-across.R
    ├── tidyeval.R
    ├── unite.R
    ├── utils.R
    └── zzz.R
├── README.Rmd
├── README.md
├── _pkgdown.yml
├── codecov.yml
├── cran-comments.md
├── dtplyr.Rproj
├── man
    ├── arrange.dtplyr_step.Rd
    ├── collect.dtplyr_step.Rd
    ├── complete.dtplyr_step.Rd
    ├── count.dtplyr_step.Rd
    ├── distinct.dtplyr_step.Rd
    ├── dot-datatable.aware.Rd
    ├── drop_na.dtplyr_step.Rd
    ├── dtplyr-package.Rd
    ├── expand.dtplyr_step.Rd
    ├── figures
    │   ├── dt-seal.png
    │   └── logo.png
    ├── fill.dtplyr_step.Rd
    ├── filter.dtplyr_step.Rd
    ├── group_by.dtplyr_step.Rd
    ├── group_modify.dtplyr_step.Rd
    ├── head.dtplyr_step.Rd
    ├── intersect.dtplyr_step.Rd
    ├── lazy_dt.Rd
    ├── left_join.dtplyr_step.Rd
    ├── mutate.dtplyr_step.Rd
    ├── nest.dtplyr_step.Rd
    ├── pivot_longer.dtplyr_step.Rd
    ├── pivot_wider.dtplyr_step.Rd
    ├── reframe.dtplyr_step.Rd
    ├── relocate.dtplyr_step.Rd
    ├── rename.dtplyr_step.Rd
    ├── replace_na.dtplyr_step.Rd
    ├── select.dtplyr_step.Rd
    ├── separate.dtplyr_step.Rd
    ├── slice.dtplyr_step.Rd
    ├── summarise.dtplyr_step.Rd
    ├── transmute.dtplyr_step.Rd
    └── unite.dtplyr_step.Rd
├── pkgdown
    └── favicon
    │   ├── apple-touch-icon-120x120.png
    │   ├── apple-touch-icon-152x152.png
    │   ├── apple-touch-icon-180x180.png
    │   ├── apple-touch-icon-60x60.png
    │   ├── apple-touch-icon-76x76.png
    │   ├── apple-touch-icon.png
    │   ├── favicon-16x16.png
    │   ├── favicon-32x32.png
    │   └── favicon.ico
├── revdep
    ├── .gitignore
    ├── README.md
    ├── cran.md
    ├── email.yml
    ├── failures.md
    └── problems.md
├── tests
    ├── testthat.R
    └── testthat
    │   ├── _snaps
    │       ├── count.md
    │       ├── step-call-pivot_longer.md
    │       ├── step-call-pivot_wider.md
    │       ├── step-call.md
    │       ├── step-colorder-relocate.md
    │       ├── step-colorder.md
    │       ├── step-group.md
    │       ├── step-join.md
    │       ├── step-mutate.md
    │       ├── step-subset-filter.md
    │       ├── step-subset-select.md
    │       ├── step-subset-separate.md
    │       ├── step-subset-slice.md
    │       ├── step-subset-summarise.md
    │       ├── step.md
    │       ├── tidyeval-across.md
    │       ├── tidyeval.md
    │       └── unite.md
    │   ├── helpers-library.R
    │   ├── test-complete.R
    │   ├── test-count.R
    │   ├── test-fill.R
    │   ├── test-reframe.R
    │   ├── test-replace_na.R
    │   ├── test-step-call-pivot_longer.R
    │   ├── test-step-call-pivot_wider.R
    │   ├── test-step-call.R
    │   ├── test-step-colorder-relocate.R
    │   ├── test-step-colorder.R
    │   ├── test-step-first.R
    │   ├── test-step-group.R
    │   ├── test-step-join.R
    │   ├── test-step-modify.R
    │   ├── test-step-mutate.R
    │   ├── test-step-nest.R
    │   ├── test-step-set.R
    │   ├── test-step-subset-arrange.R
    │   ├── test-step-subset-do.R
    │   ├── test-step-subset-expand.R
    │   ├── test-step-subset-filter.R
    │   ├── test-step-subset-select.R
    │   ├── test-step-subset-separate.R
    │   ├── test-step-subset-slice.R
    │   ├── test-step-subset-summarise.R
    │   ├── test-step-subset-transmute.R
    │   ├── test-step-subset.R
    │   ├── test-step.R
    │   ├── test-tidyeval-across.R
    │   ├── test-tidyeval.R
    │   └── test-unite.R
└── vignettes
    ├── .gitignore
    ├── benchmark.R
    └── translation.Rmd


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^CRAN-RELEASE$
 2 | ^.*\.Rproj$
 3 | ^\.Rproj\.user$
 4 | ^\.travis\.yml$
 5 | ^cran-comments\.md$
 6 | ^revdep$
 7 | ^codecov\.yml$
 8 | ^vignettes/benchmark\.R$
 9 | ^vignettes/translation\.R$
10 | ^_pkgdown\.yml$
11 | ^docs$
12 | ^pkgdown$
13 | ^\.covrignore$
14 | ^README\.Rmd$
15 | ^\.github$
16 | ^LICENSE\.md$
17 | ^\.github/workflows/R-CMD-check\.yaml$
18 | ^\.github/workflows/pr-commands\.yaml$
19 | ^\.github/workflows/pkgdown\.yaml$
20 | ^CRAN-SUBMISSION$
21 | 


--------------------------------------------------------------------------------
/.covrignore:
--------------------------------------------------------------------------------
1 | R/deprec-*.R
2 | R/compat-*.R
3 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | /NEWS.md merge=union
2 | 


--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/.github/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Contributor Code of Conduct
 2 | 
 3 | As contributors and maintainers of this project, we pledge to respect all people who 
 4 | contribute through reporting issues, posting feature requests, updating documentation,
 5 | submitting pull requests or patches, and other activities.
 6 | 
 7 | We are committed to making participation in this project a harassment-free experience for
 8 | everyone, regardless of level of experience, gender, gender identity and expression,
 9 | sexual orientation, disability, personal appearance, body size, race, ethnicity, age, or religion.
10 | 
11 | Examples of unacceptable behavior by participants include the use of sexual language or
12 | imagery, derogatory comments or personal attacks, trolling, public or private harassment,
13 | insults, or other unprofessional conduct.
14 | 
15 | Project maintainers have the right and responsibility to remove, edit, or reject comments,
16 | commits, code, wiki edits, issues, and other contributions that are not aligned to this 
17 | Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed 
18 | from the project team.
19 | 
20 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by 
21 | opening an issue or contacting one or more of the project maintainers.
22 | 
23 | This Code of Conduct is adapted from the Contributor Covenant 
24 | (https://www.contributor-covenant.org), version 1.0.0, available at 
25 | https://contributor-covenant.org/version/1/0/0/.
26 | 


--------------------------------------------------------------------------------
/.github/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to dtplyr
 2 | 
 3 | This outlines how to propose a change to dtplyr. 
 4 | For more detailed info about contributing to this, and other tidyverse packages, please see the
 5 | [**development contributing guide**](https://rstd.io/tidy-contrib). 
 6 | 
 7 | ## Fixing typos
 8 | 
 9 | You can fix typos, spelling mistakes, or grammatical errors in the documentation directly using the GitHub web interface, as long as the changes are made in the _source_ file. 
10 | This generally means you'll need to edit [roxygen2 comments](https://roxygen2.r-lib.org/articles/roxygen2.html) in an `.R`, not a `.Rd` file. 
11 | You can find the `.R` file that generates the `.Rd` by reading the comment in the first line.
12 | 
13 | ## Bigger changes
14 | 
15 | If you want to make a bigger change, it's a good idea to first file an issue and make sure someone from the team agrees that it’s needed. 
16 | If you’ve found a bug, please file an issue that illustrates the bug with a minimal 
17 | [reprex](https://www.tidyverse.org/help/#reprex) (this will also help you write a unit test, if needed).
18 | 
19 | ### Pull request process
20 | 
21 | *   Fork the package and clone onto your computer. If you haven't done this before, we recommend using `usethis::create_from_github("batpigandme/dtplyr", fork = TRUE)`.
22 | 
23 | *   Install all development dependences with `devtools::install_dev_deps()`, and then make sure the package passes R CMD check by running `devtools::check()`. 
24 |     If R CMD check doesn't pass cleanly, it's a good idea to ask for help before continuing. 
25 | *   Create a Git branch for your pull request (PR). We recommend using `usethis::pr_init("brief-description-of-change")`.
26 | 
27 | *   Make your changes, commit to git, and then create a PR by running `usethis::pr_push()`, and following the prompts in your browser.
28 |     The title of your PR should briefly describe the change.
29 |     The body of your PR should contain `Fixes #issue-number`.
30 | 
31 | *  For user-facing changes, add a bullet to the top of `NEWS.md` (i.e. just below the first header). Follow the style described in <https://style.tidyverse.org/news.html>.
32 | 
33 | ### Code style
34 | 
35 | *   New code should follow the tidyverse [style guide](https://style.tidyverse.org). 
36 |     You can use the [styler](https://CRAN.R-project.org/package=styler) package to apply these styles, but please don't restyle code that has nothing to do with your PR.  
37 | 
38 | *  We use [roxygen2](https://cran.r-project.org/package=roxygen2), with [Markdown syntax](https://roxygen2.r-lib.org/articles/rd-formatting.html), for documentation.  
39 | 
40 | *  We use [testthat](https://cran.r-project.org/package=testthat) for unit tests. 
41 |    Contributions with test cases included are easier to accept.  
42 | 
43 | ## Code of Conduct
44 | 
45 | Please note that the dtplyr project is released with a
46 | [Contributor Code of Conduct](CODE_OF_CONDUCT.md). By contributing to this
47 | project you agree to abide by its terms.
48 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/issue_template.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report or feature request
 3 | about: Describe a bug you've seen or make a case for a new feature
 4 | ---
 5 | 
 6 | Please briefly describe your problem and what output you expect. If you have a question, please don't use this form. Instead, ask on <https://stackoverflow.com/> or <https://community.rstudio.com/>.
 7 | 
 8 | Please include a minimal reproducible example (AKA a reprex). If you've never heard of a [reprex](http://reprex.tidyverse.org/) before, start by reading <https://www.tidyverse.org/help/#reprex>.
 9 | 
10 | Brief description of the problem
11 | 
12 | ```r
13 | # insert reprex here
14 | ```
15 | 


--------------------------------------------------------------------------------
/.github/SUPPORT.md:
--------------------------------------------------------------------------------
 1 | # Getting help with dtplyr
 2 | 
 3 | Thanks for using dtplyr!
 4 | Before filing an issue, there are a few places to explore and pieces to put together to make the process as smooth as possible.
 5 | 
 6 | ## Make a reprex
 7 | 
 8 | Start by making a minimal **repr**oducible **ex**ample using the  [reprex](https://reprex.tidyverse.org/) package. 
 9 | If you haven't heard of or used reprex before, you're in for a treat! 
10 | Seriously, reprex will make all of your R-question-asking endeavors easier (which is a pretty insane ROI for the five to ten minutes it'll take you to learn what it's all about). 
11 | For additional reprex pointers, check out the [Get help!](https://www.tidyverse.org/help/) section of the tidyverse site.
12 | 
13 | ## Where to ask?
14 | 
15 | Armed with your reprex, the next step is to figure out [where to ask](https://www.tidyverse.org/help/#where-to-ask). 
16 | 
17 | *   If it's a question: start with [community.rstudio.com](https://community.rstudio.com/), and/or StackOverflow. There are more people there to answer questions.  
18 | 
19 | *   If it's a bug: you're in the right place, [file an issue](https://github.com/batpigandme/dtplyr/issues/new).  
20 |   
21 | *   If you're not sure: let the community help you figure it out! 
22 |     If your problem _is_ a bug or a feature request, you can easily return here and report it. 
23 | 
24 | Before opening a new issue, be sure to [search issues and pull requests](https://github.com/batpigandme/dtplyr/issues) to make sure the bug hasn't been reported and/or already fixed in the development version. 
25 | By default, the search will be pre-populated with `is:issue is:open`. 
26 | You can [edit the qualifiers](https://help.github.com/articles/searching-issues-and-pull-requests/)  (e.g. `is:pr`, `is:closed`) as needed. 
27 | For example, you'd simply remove `is:open` to search _all_ issues in the repo, open or closed.
28 | 
29 | ## What happens next?
30 | 
31 | To be as efficient as possible, development of tidyverse packages tends to be very bursty, so you shouldn't worry if you don't get an immediate response.
32 | Typically we don't look at a repo until a sufficient quantity of issues accumulates, then there’s a burst of intense activity as we focus our efforts. 
33 | That makes development more efficient because it avoids expensive context switching between problems, at the cost of taking longer to get back to you. 
34 | This process makes a good reprex particularly important because it might be multiple months between your initial report and when we start working on it. 
35 | If we can’t reproduce the bug, we can’t fix it!
36 | 


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | #
 4 | # NOTE: This workflow is overkill for most R packages and
 5 | # check-standard.yaml is likely a better choice.
 6 | # usethis::use_github_action("check-standard") will install it.
 7 | on:
 8 |   push:
 9 |     branches: [main, master]
10 |   pull_request:
11 |     branches: [main, master]
12 | 
13 | name: R-CMD-check.yaml
14 | 
15 | permissions: read-all
16 | 
17 | jobs:
18 |   R-CMD-check:
19 |     runs-on: ${{ matrix.config.os }}
20 | 
21 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
22 | 
23 |     strategy:
24 |       fail-fast: false
25 |       matrix:
26 |         config:
27 |           - {os: macos-latest,   r: 'release'}
28 | 
29 |           - {os: windows-latest, r: 'release'}
30 |           # use 4.0 or 4.1 to check with rtools40's older compiler
31 |           - {os: windows-latest, r: 'oldrel-4'}
32 | 
33 |           - {os: ubuntu-latest,  r: 'devel', http-user-agent: 'release'}
34 |           - {os: ubuntu-latest,  r: 'release'}
35 |           - {os: ubuntu-latest,  r: 'oldrel-1'}
36 |           - {os: ubuntu-latest,  r: 'oldrel-2'}
37 |           - {os: ubuntu-latest,  r: 'oldrel-3'}
38 |           - {os: ubuntu-latest,  r: 'oldrel-4'}
39 | 
40 |     env:
41 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
42 |       R_KEEP_PKG_SOURCE: yes
43 | 
44 |     steps:
45 |       - uses: actions/checkout@v4
46 | 
47 |       - uses: r-lib/actions/setup-pandoc@v2
48 | 
49 |       - uses: r-lib/actions/setup-r@v2
50 |         with:
51 |           r-version: ${{ matrix.config.r }}
52 |           http-user-agent: ${{ matrix.config.http-user-agent }}
53 |           use-public-rspm: true
54 | 
55 |       - uses: r-lib/actions/setup-r-dependencies@v2
56 |         with:
57 |           extra-packages: any::rcmdcheck
58 |           needs: check
59 | 
60 |       - uses: r-lib/actions/check-r-package@v2
61 |         with:
62 |           upload-snapshots: true
63 |           build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")'
64 | 


--------------------------------------------------------------------------------
/.github/workflows/pkgdown.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 |   release:
 9 |     types: [published]
10 |   workflow_dispatch:
11 | 
12 | name: pkgdown.yaml
13 | 
14 | permissions: read-all
15 | 
16 | jobs:
17 |   pkgdown:
18 |     runs-on: ubuntu-latest
19 |     # Only restrict concurrency for non-PR jobs
20 |     concurrency:
21 |       group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
22 |     env:
23 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
24 |     permissions:
25 |       contents: write
26 |     steps:
27 |       - uses: actions/checkout@v4
28 | 
29 |       - uses: r-lib/actions/setup-pandoc@v2
30 | 
31 |       - uses: r-lib/actions/setup-r@v2
32 |         with:
33 |           use-public-rspm: true
34 | 
35 |       - uses: r-lib/actions/setup-r-dependencies@v2
36 |         with:
37 |           extra-packages: any::pkgdown, local::.
38 |           needs: website
39 | 
40 |       - name: Build site
41 |         run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
42 |         shell: Rscript {0}
43 | 
44 |       - name: Deploy to GitHub pages 🚀
45 |         if: github.event_name != 'pull_request'
46 |         uses: JamesIves/github-pages-deploy-action@v4.5.0
47 |         with:
48 |           clean: false
49 |           branch: gh-pages
50 |           folder: docs
51 | 


--------------------------------------------------------------------------------
/.github/workflows/pr-commands.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   issue_comment:
 5 |     types: [created]
 6 | 
 7 | name: pr-commands.yaml
 8 | 
 9 | permissions: read-all
10 | 
11 | jobs:
12 |   document:
13 |     if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/document') }}
14 |     name: document
15 |     runs-on: ubuntu-latest
16 |     env:
17 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
18 |     permissions:
19 |       contents: write
20 |     steps:
21 |       - uses: actions/checkout@v4
22 | 
23 |       - uses: r-lib/actions/pr-fetch@v2
24 |         with:
25 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
26 | 
27 |       - uses: r-lib/actions/setup-r@v2
28 |         with:
29 |           use-public-rspm: true
30 | 
31 |       - uses: r-lib/actions/setup-r-dependencies@v2
32 |         with:
33 |           extra-packages: any::roxygen2
34 |           needs: pr-document
35 | 
36 |       - name: Document
37 |         run: roxygen2::roxygenise()
38 |         shell: Rscript {0}
39 | 
40 |       - name: commit
41 |         run: |
42 |           git config --local user.name "$GITHUB_ACTOR"
43 |           git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
44 |           git add man/\* NAMESPACE
45 |           git commit -m 'Document'
46 | 
47 |       - uses: r-lib/actions/pr-push@v2
48 |         with:
49 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
50 | 
51 |   style:
52 |     if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/style') }}
53 |     name: style
54 |     runs-on: ubuntu-latest
55 |     env:
56 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
57 |     permissions:
58 |       contents: write
59 |     steps:
60 |       - uses: actions/checkout@v4
61 | 
62 |       - uses: r-lib/actions/pr-fetch@v2
63 |         with:
64 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
65 | 
66 |       - uses: r-lib/actions/setup-r@v2
67 | 
68 |       - name: Install dependencies
69 |         run: install.packages("styler")
70 |         shell: Rscript {0}
71 | 
72 |       - name: Style
73 |         run: styler::style_pkg()
74 |         shell: Rscript {0}
75 | 
76 |       - name: commit
77 |         run: |
78 |           git config --local user.name "$GITHUB_ACTOR"
79 |           git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
80 |           git add \*.R
81 |           git commit -m 'Style'
82 | 
83 |       - uses: r-lib/actions/pr-push@v2
84 |         with:
85 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
86 | 


--------------------------------------------------------------------------------
/.github/workflows/test-coverage.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 | 
 9 | name: test-coverage.yaml
10 | 
11 | permissions: read-all
12 | 
13 | jobs:
14 |   test-coverage:
15 |     runs-on: ubuntu-latest
16 |     env:
17 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
18 | 
19 |     steps:
20 |       - uses: actions/checkout@v4
21 | 
22 |       - uses: r-lib/actions/setup-r@v2
23 |         with:
24 |           use-public-rspm: true
25 | 
26 |       - uses: r-lib/actions/setup-r-dependencies@v2
27 |         with:
28 |           extra-packages: any::covr, any::xml2
29 |           needs: coverage
30 | 
31 |       - name: Test coverage
32 |         run: |
33 |           cov <- covr::package_coverage(
34 |             quiet = FALSE,
35 |             clean = FALSE,
36 |             install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package")
37 |           )
38 |           covr::to_cobertura(cov)
39 |         shell: Rscript {0}
40 | 
41 |       - uses: codecov/codecov-action@v4
42 |         with:
43 |           fail_ci_if_error: ${{ github.event_name != 'pull_request' && true || false }}
44 |           file: ./cobertura.xml
45 |           plugin: noop
46 |           disable_search: true
47 |           token: ${{ secrets.CODECOV_TOKEN }}
48 | 
49 |       - name: Show testthat output
50 |         if: always()
51 |         run: |
52 |           ## --------------------------------------------------------------------
53 |           find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true
54 |         shell: bash
55 | 
56 |       - name: Upload test results
57 |         if: failure()
58 |         uses: actions/upload-artifact@v4
59 |         with:
60 |           name: coverage-test-failures
61 |           path: ${{ runner.temp }}/package
62 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | docs/
5 | inst/doc
6 | *.swp
7 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: dtplyr
 2 | Title: Data Table Back-End for 'dplyr'
 3 | Version: 1.3.1.9000
 4 | Authors@R: c(
 5 |     person("Hadley", "Wickham", , "hadley@posit.co", role = c("cre", "aut")),
 6 |     person("Maximilian", "Girlich", role = "aut"),
 7 |     person("Mark", "Fairbanks", role = "aut"),
 8 |     person("Ryan", "Dickerson", role = "aut"),
 9 |     person("Posit Software, PBC", role = c("cph", "fnd"))
10 |   )
11 | Description: Provides a data.table backend for 'dplyr'. The goal of
12 |     'dtplyr' is to allow you to write 'dplyr' code that is automatically
13 |     translated to the equivalent, but usually much faster, data.table
14 |     code.
15 | License: MIT + file LICENSE
16 | URL: https://dtplyr.tidyverse.org, https://github.com/tidyverse/dtplyr
17 | BugReports: https://github.com/tidyverse/dtplyr/issues
18 | Depends:
19 |     R (>= 4.0)
20 | Imports:
21 |     cli (>= 3.4.0),
22 |     data.table (>= 1.13.0),
23 |     dplyr (>= 1.1.0),
24 |     glue,
25 |     lifecycle,
26 |     rlang (>= 1.0.4),
27 |     tibble,
28 |     tidyselect (>= 1.2.0),
29 |     vctrs (>= 0.4.1)
30 | Suggests:
31 |     bench,
32 |     covr,
33 |     knitr,
34 |     rmarkdown,
35 |     testthat (>= 3.1.2),
36 |     tidyr (>= 1.1.0),
37 |     waldo (>= 0.3.1)
38 | VignetteBuilder:
39 |     knitr
40 | Config/Needs/website: tidyverse/tidytemplate
41 | Config/testthat/edition: 3
42 | Encoding: UTF-8
43 | Roxygen: {library(tidyr); list(markdown = TRUE)}
44 | RoxygenNote: 7.3.2
45 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2023
2 | COPYRIGHT HOLDER: dtplyr authors
3 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | Copyright (c) 2023 dtplyr authors
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/R/by.R:
--------------------------------------------------------------------------------
 1 | compute_by <- function(by,
 2 |                        data,
 3 |                        ...,
 4 |                        by_arg = "by",
 5 |                        data_arg = "data",
 6 |                        error_call = caller_env()) {
 7 |   check_dots_empty0(...)
 8 | 
 9 |   by <- enquo(by)
10 |   check_by(by, data, by_arg = by_arg, data_arg = data_arg, error_call = error_call)
11 | 
12 |   names <- eval_select_by(by, data, error_call = error_call)
13 | 
14 |   if (length(names) == 0) {
15 |     uses_by <- FALSE
16 |   } else {
17 |     uses_by <- TRUE
18 |   }
19 | 
20 |   new_by(uses_by = uses_by, names = names)
21 | }
22 | 
23 | is_grouped_dt <- function(data) {
24 |   !is_empty(group_vars(data))
25 | }
26 | 
27 | check_by <- function(by,
28 |                      data,
29 |                      ...,
30 |                      by_arg = "by",
31 |                      data_arg = "data",
32 |                      error_call = caller_env()) {
33 |   check_dots_empty0(...)
34 | 
35 |   if (quo_is_null(by)) {
36 |     return(invisible(NULL))
37 |   }
38 | 
39 |   if (is_grouped_dt(data)) {
40 |     message <- paste0(
41 |       "Can't supply {.arg {by_arg}} when ",
42 |       "{.arg {data_arg}} is a grouped data frame."
43 |     )
44 |     cli::cli_abort(message, call = error_call)
45 |   }
46 | 
47 |   invisible(NULL)
48 | }
49 | 
50 | eval_select_by <- function(by,
51 |                            data,
52 |                            error_call = caller_env()) {
53 |   out <- tidyselect::eval_select(
54 |     expr = by,
55 |     data = data,
56 |     allow_rename = FALSE,
57 |     error_call = error_call
58 |   )
59 |   names(out)
60 | }
61 | 
62 | new_by <- function(uses_by = FALSE, names = character()) {
63 |   structure(list(uses_by = uses_by, names = names), class = "dtplyr_by")
64 | }
65 | 
66 | 
67 | 
68 | 


--------------------------------------------------------------------------------
/R/complete.R:
--------------------------------------------------------------------------------
 1 | #' Complete a data frame with missing combinations of data
 2 | #'
 3 | #' @description
 4 | #' This is a method for the tidyr `complete()` generic. This is a wrapper
 5 | #' around `dtplyr` translations for `expand()`, `full_join()`, and `replace_na()`
 6 | #' that's useful for completing missing combinations of data.
 7 | #'
 8 | #' @param data A [lazy_dt()].
 9 | #' @inheritParams tidyr::complete
10 | #' @examples
11 | #' library(tidyr)
12 | #' tbl <- tibble(x = 1:2, y = 1:2, z = 3:4)
13 | #' dt <- lazy_dt(tbl)
14 | #'
15 | #' dt %>%
16 | #'   complete(x, y)
17 | #'
18 | #' dt %>%
19 | #'   complete(x, y, fill = list(z = 10L))
20 | # exported onLoad
21 | complete.dtplyr_step <- function(data, ..., fill = list()) {
22 |   dots <- enquos(...)
23 |   dots <- dots[!map_lgl(dots, quo_is_null)]
24 |   if (length(dots) == 0) {
25 |     return(data)
26 |   }
27 | 
28 |   full <- tidyr::expand(data, !!!dots)
29 |   full <- dplyr::full_join(full, data, by = full$vars)
30 |   full <- tidyr::replace_na(full, replace = fill)
31 |   full
32 | }
33 | 


--------------------------------------------------------------------------------
/R/count.R:
--------------------------------------------------------------------------------
 1 | #' Count observations by group
 2 | #'
 3 | #' This is a method for the dplyr [count()] generic. It is translated using
 4 | #' `.N` in the `j` argument, and supplying groups to `keyby` as appropriate.
 5 | #'
 6 | #' @param x A [lazy_dt()]
 7 | #' @inheritParams dplyr::count
 8 | #' @importFrom dplyr count
 9 | #' @export
10 | #' @examples
11 | #' library(dplyr, warn.conflicts = FALSE)
12 | #'
13 | #' dt <- lazy_dt(dplyr::starwars)
14 | #' dt %>% count(species)
15 | #' dt %>% count(species, sort = TRUE)
16 | #' dt %>% count(species, wt = mass, sort = TRUE)
17 | count.dtplyr_step <- function(x, ..., wt = NULL, sort = FALSE, name = NULL) {
18 |   if (!missing(...)) {
19 |     out <- group_by(x, ..., .add = TRUE)
20 |     .groups <- "drop"
21 |   } else {
22 |     out <- x
23 |     .groups <- "keep"
24 |   }
25 | 
26 |   out <- tally_count(out, {{ wt }}, sort, name, .groups)
27 | 
28 |   out
29 | }
30 | 
31 | #' @importFrom dplyr add_count
32 | #' @export
33 | add_count.dtplyr_step <- function(x, ..., wt = NULL, sort = FALSE, name = NULL) {
34 |   if (!missing(...)) {
35 |     out <- group_by(x, ..., .add = TRUE)
36 |   } else {
37 |     out <- x
38 |   }
39 |   out <- dplyr::add_tally(out, wt = !!enquo(wt), sort = sort, name = name)
40 |   out <- group_by(out, !!!syms(group_vars(x)))
41 |   out
42 | }
43 | 
44 | #' @importFrom dplyr tally
45 | #' @export
46 | tally.dtplyr_step <- function(x, wt = NULL, sort = FALSE, name = NULL) {
47 |   tally_count(x, {{ wt }}, sort, name, "drop_last")
48 | }
49 | 
50 | # Helpers -----------------------------------------------------------------
51 | 
52 | tally_count <- function(.data, wt = NULL, sort = FALSE, name = NULL, .groups = "drop_last") {
53 |   wt <- enquo(wt)
54 |   if (quo_is_null(wt)) {
55 |     n <- expr(n())
56 |   } else {
57 |     n <- expr(sum(!!wt, na.rm = TRUE))
58 |   }
59 |   name <- check_name(name, .data$groups)
60 | 
61 |   out <- summarise(.data, !!name := !!n, .groups = .groups)
62 | 
63 |   if (sort) {
64 |     out <- arrange(out, desc(!!sym(name)))
65 |   }
66 | 
67 |   out
68 | }
69 | 
70 | check_name <- function(name, vars) {
71 |   if (is.null(name)) {
72 |     name <- n_name(vars)
73 | 
74 |     if (name != "n") {
75 |       inform(c(
76 |         glue::glue("Storing counts in `{name}`, as `n` already present in input"),
77 |         i = "Use `name = \"new_name\"` to pick a new name."
78 |       ))
79 |     }
80 |   } else if (!is_string(name)) {
81 |     abort("`name` must be a string")
82 |   }
83 | 
84 |   name
85 | }
86 | 
87 | n_name <- function(x) {
88 |   name <- "n"
89 |   while (name %in% x) {
90 |     name <- paste0("n", name)
91 |   }
92 | 
93 |   name
94 | }
95 | 
96 | 


--------------------------------------------------------------------------------
/R/dtplyr-package.R:
--------------------------------------------------------------------------------
 1 | #' @import rlang
 2 | #' @importFrom data.table data.table as.data.table is.data.table
 3 | #' @importFrom lifecycle deprecated
 4 | #' @importFrom glue glue
 5 | #' @keywords internal
 6 | "_PACKAGE"
 7 | 
 8 | #' dtplyr is data.table aware
 9 | #'
10 | #' @keywords internal
11 | #' @export
12 | .datatable.aware <- TRUE
13 | 
14 | globalVariables(c(".SD", ".N", ".BY", ".I", "desc"))
15 | 


--------------------------------------------------------------------------------
/R/fill.R:
--------------------------------------------------------------------------------
 1 | #' Fill in missing values with previous or next value
 2 | #'
 3 | #' @description
 4 | #' This is a method for the tidyr `fill()` generic. It is translated to
 5 | #' [data.table::nafill()]. Note that `data.table::nafill()` currently only
 6 | #' works for integer and double columns.
 7 | #'
 8 | #' @inheritParams tidyr::fill
 9 | #' @examples
10 | #' library(tidyr)
11 | #'
12 | #' # Value (year) is recorded only when it changes
13 | #' sales <- lazy_dt(tibble::tribble(
14 | #'   ~quarter, ~year, ~sales,
15 | #'   "Q1",    2000,    66013,
16 | #'   "Q2",      NA,    69182,
17 | #'   "Q3",      NA,    53175,
18 | #'   "Q4",      NA,    21001,
19 | #'   "Q1",    2001,    46036,
20 | #'   "Q2",      NA,    58842,
21 | #'   "Q3",      NA,    44568,
22 | #'   "Q4",      NA,    50197,
23 | #'   "Q1",    2002,    39113,
24 | #'   "Q2",      NA,    41668,
25 | #'   "Q3",      NA,    30144,
26 | #'   "Q4",      NA,    52897,
27 | #'   "Q1",    2004,    32129,
28 | #'   "Q2",      NA,    67686,
29 | #'   "Q3",      NA,    31768,
30 | #'   "Q4",      NA,    49094
31 | #' ))
32 | #'
33 | #' # `fill()` defaults to replacing missing data from top to bottom
34 | #' sales %>% fill(year)
35 | #'
36 | #' # Value (n_squirrels) is missing above and below within a group
37 | #' squirrels <- lazy_dt(tibble::tribble(
38 | #'   ~group,    ~name,     ~role,     ~n_squirrels,
39 | #'   1,      "Sam",    "Observer",   NA,
40 | #'   1,     "Mara", "Scorekeeper",    8,
41 | #'   1,    "Jesse",    "Observer",   NA,
42 | #'   1,      "Tom",    "Observer",   NA,
43 | #'   2,     "Mike",    "Observer",   NA,
44 | #'   2,  "Rachael",    "Observer",   NA,
45 | #'   2,  "Sydekea", "Scorekeeper",   14,
46 | #'   2, "Gabriela",    "Observer",   NA,
47 | #'   3,  "Derrick",    "Observer",   NA,
48 | #'   3,     "Kara", "Scorekeeper",    9,
49 | #'   3,    "Emily",    "Observer",   NA,
50 | #'   3, "Danielle",    "Observer",   NA
51 | #' ))
52 | #'
53 | #' # The values are inconsistently missing by position within the group
54 | #' # Use .direction = "downup" to fill missing values in both directions
55 | #' squirrels %>%
56 | #'   dplyr::group_by(group) %>%
57 | #'   fill(n_squirrels, .direction = "downup") %>%
58 | #'   dplyr::ungroup()
59 | #'
60 | #' # Using `.direction = "updown"` accomplishes the same goal in this example
61 | # exported onLoad
62 | fill.dtplyr_step <- function(data, ..., .direction = c("down", "up", "downup", "updown")) {
63 | 
64 |   dots <- enquos(...)
65 | 
66 |   .direction <- arg_match(.direction)
67 | 
68 |   if (.direction %in% c("down", "up")) {
69 |     type <- switch(.direction, "down" = "locf", "up" = "nocb")
70 |     mutate(data, dplyr::across(c(!!!dots), nafill, type))
71 |   } else {
72 |     if (.direction == "downup") {
73 |       type1 <- "locf"
74 |       type2 <- "nocb"
75 |     } else {
76 |       type1 <- "nocb"
77 |       type2 <- "locf"
78 |     }
79 | 
80 |     mutate(data, dplyr::across(c(!!!dots), ~ nafill(nafill(.x, type1), type2)))
81 |   }
82 | }
83 | 


--------------------------------------------------------------------------------
/R/reframe.R:
--------------------------------------------------------------------------------
 1 | #' Summarise each group to one row
 2 | #'
 3 | #' This is a method for the dplyr [reframe()] generic. It is translated to
 4 | #' the `j` argument of `[.data.table`.
 5 | #'
 6 | #' @param .data A [lazy_dt()].
 7 | #' @inheritParams dplyr::reframe
 8 | #' @importFrom dplyr reframe
 9 | #' @export
10 | #' @examples
11 | #' library(dplyr, warn.conflicts = FALSE)
12 | #'
13 | #' dt <- lazy_dt(mtcars)
14 | #'
15 | #' dt %>%
16 | #'   reframe(qs = quantile(disp, c(0.25, 0.75)),
17 | #'           prob = c(0.25, 0.75),
18 | #'           .by = cyl)
19 | #'
20 | #' dt %>%
21 | #'   group_by(cyl) %>%
22 | #'   reframe(qs = quantile(disp, c(0.25, 0.75)),
23 | #'           prob = c(0.25, 0.75))
24 | reframe.dtplyr_step <- function(.data, ..., .by = NULL) {
25 |   out <- summarise(.data, ..., .by = {{ .by }})
26 |   ungroup(out)
27 | }
28 | 


--------------------------------------------------------------------------------
/R/replace_na.R:
--------------------------------------------------------------------------------
 1 | #' Replace NAs with specified values
 2 | #'
 3 | #' @description
 4 | #' This is a method for the tidyr `replace_na()` generic. It is translated to
 5 | #' [data.table::fcoalesce()].
 6 | #'
 7 | #' Note that unlike `tidyr::replace_na()`, `data.table::fcoalesce()` cannot
 8 | #' replace `NULL` values in lists.
 9 | #'
10 | #' @inheritParams tidyr::replace_na
11 | #' @param data A [lazy_dt()].
12 | #' @examples
13 | #' library(tidyr)
14 | #'
15 | #' # Replace NAs in a data frame
16 | #' dt <- lazy_dt(tibble(x = c(1, 2, NA), y = c("a", NA, "b")))
17 | #' dt %>% replace_na(list(x = 0, y = "unknown"))
18 | #'
19 | #' # Replace NAs using `dplyr::mutate()`
20 | #' dt %>% dplyr::mutate(x = replace_na(x, 0))
21 | # exported onLoad
22 | replace_na.dtplyr_step <- function(data, replace = list()) {
23 | 
24 |   stopifnot(is.list(replace))
25 |   if (length(replace) == 0) {
26 |     return(data)
27 |   }
28 | 
29 |   sim_data <- simulate_vars(data)
30 |   replace_vars <- intersect(names(replace), names(sim_data))
31 | 
32 |   replace_calls <- vector("list", length(replace_vars))
33 |   names(replace_calls) <- replace_vars
34 | 
35 |   for (i in seq_along(replace_vars)) {
36 |     var <- replace_vars[[i]]
37 |     check_replacement(replace[[i]], var)
38 |     replace_calls[[i]] <- call2("fcoalesce", sym(var), replace[[i]])
39 |   }
40 | 
41 |   mutate(data, !!!replace_calls)
42 | }
43 | 
44 | check_replacement <- function(x, var) {
45 |   n <- length(x)
46 |   if (n == 1) {
47 |     return()
48 |   }
49 | 
50 |   abort(glue::glue("Replacement for `{var}` is length {n}, not length 1"), call = caller_env())
51 | }
52 | 


--------------------------------------------------------------------------------
/R/step-assign.R:
--------------------------------------------------------------------------------
 1 | step_locals <- function(parent, locals, name) {
 2 |   stopifnot(is_step(parent))
 3 |   stopifnot(is.list(locals))
 4 |   stopifnot(is_string(name))
 5 | 
 6 |   new_step(
 7 |     parent = parent,
 8 |     locals = utils::modifyList(parent$locals, locals),
 9 |     implicit_copy = TRUE,
10 |     needs_copy = FALSE,
11 |     name = name,
12 |     class = "dtplyr_step_assign",
13 |   )
14 | }
15 | 
16 | #' @export
17 | dt_call.dtplyr_step_assign <- function(x, needs_copy = FALSE) {
18 |   sym(x$name)
19 | }
20 | 


--------------------------------------------------------------------------------
/R/step-colorder-relocate.R:
--------------------------------------------------------------------------------
 1 | #' Relocate variables using their names
 2 | #'
 3 | #' This is a method for the dplyr [relocate()] generic. It is translated to
 4 | #' the `j` argument of `[.data.table`.
 5 | #'
 6 | #' @param .data A [lazy_dt()].
 7 | #' @inheritParams dplyr::relocate
 8 | #' @importFrom dplyr relocate
 9 | #' @export
10 | #' @examples
11 | #' library(dplyr, warn.conflicts = FALSE)
12 | #'
13 | #' dt <- lazy_dt(data.frame(x = 1, y = 2, z = 3))
14 | #'
15 | #' dt %>% relocate(z)
16 | #' dt %>% relocate(y, .before = x)
17 | #' dt %>% relocate(y, .after = y)
18 | relocate.dtplyr_step <- function(.data, ..., .before = NULL, .after = NULL) {
19 |   new_vars <- names(tidyselect::eval_relocate(
20 |     expr(c(...)),
21 |     .data,
22 |     before = enquo(.before),
23 |     after = enquo(.after),
24 |     before_arg = ".before",
25 |     after_arg = ".after"
26 |   ))
27 |   out <- step_colorder(.data, new_vars)
28 |   step_group(out, .data$groups)
29 | }
30 | 


--------------------------------------------------------------------------------
/R/step-colorder.R:
--------------------------------------------------------------------------------
 1 | step_colorder <- function(x, col_order) {
 2 |   stopifnot(is_step(x))
 3 |   stopifnot(is.character(col_order) || is.integer(col_order))
 4 | 
 5 |   if (any(duplicated(col_order))) {
 6 |     abort("Every element of `col_order` must be unique.")
 7 |   }
 8 | 
 9 |   col_order <- unname(col_order)
10 |   if (is.integer(col_order)) {
11 |     if (identical(col_order, seq_along(col_order))) {
12 |       return(x)
13 |     }
14 |     vars <- x$vars[col_order]
15 |   } else {
16 |     vars_selected <- x$vars[x$vars %in% col_order]
17 |     vars_count <- vctrs::vec_count(vars_selected)
18 |     vars_problematic <- vars_count$key[vars_count$count != 1]
19 |     if (!is_empty(vars_problematic)) {
20 |       vars_error <- paste0(vars_problematic, collapse = ", ")
21 |       msg <- paste0("The column(s) ", vars_error, " do not uniquely match a column in `x`.")
22 |       abort(msg)
23 |     }
24 | 
25 |     if (identical(col_order, x$vars[seq_along(col_order)])) {
26 |       return(x)
27 |     }
28 |     vars <- col_order
29 |   }
30 | 
31 |   step_call(x,
32 |     "setcolorder",
33 |     args = list(col_order),
34 |     vars = vars,
35 |     in_place = !x$implicit_copy
36 |   )
37 | }
38 | 


--------------------------------------------------------------------------------
/R/step-first.R:
--------------------------------------------------------------------------------
  1 | #' Create a "lazy" data.table for use with dplyr verbs
  2 | #'
  3 | #' @description
  4 | #' A lazy data.table captures the intent of dplyr verbs, only actually
  5 | #' performing computation when requested (with [collect()], [pull()],
  6 | #' [as.data.frame()], [data.table::as.data.table()], or [tibble::as_tibble()]).
  7 | #' This allows dtplyr to convert dplyr verbs into as few data.table expressions
  8 | #' as possible, which leads to a high performance translation.
  9 | #'
 10 | #' See `vignette("translation")` for the details of the translation.
 11 | #'
 12 | #' @param x A data table (or something can can be coerced to a data table).
 13 | #' @param immutable If `TRUE`, `x` is treated as immutable and will never
 14 | #'   be modified by any code generated by dtplyr. Alternatively, you can set
 15 | #'   `immutable = FALSE` to allow dtplyr to modify the input object.
 16 | #' @param name Optionally, supply a name to be used in generated expressions.
 17 | #'   For expert use only.
 18 | #' @param key_by Set keys for data frame, using [select()] semantics (e.g.
 19 | #'   `key_by = c(key1, key2)`.
 20 | #'
 21 | #'   This uses [data.table::setkey()] to sort the table and build an index.
 22 | #'   This will considerably improve performance for subsets, summaries, and
 23 | #'   joins that use the keys.
 24 | #'
 25 | #'   See `vignette("datatable-keys-fast-subset")` for more details.
 26 | #' @export
 27 | #' @aliases tbl_dt grouped_dt
 28 | #' @examples
 29 | #' library(dplyr, warn.conflicts = FALSE)
 30 | #'
 31 | #' mtcars2 <- lazy_dt(mtcars)
 32 | #' mtcars2
 33 | #' mtcars2 %>% select(mpg:cyl)
 34 | #' mtcars2 %>% select(x = mpg, y = cyl)
 35 | #' mtcars2 %>% filter(cyl == 4) %>% select(mpg)
 36 | #' mtcars2 %>% select(mpg, cyl) %>% filter(cyl == 4)
 37 | #' mtcars2 %>% mutate(cyl2 = cyl * 2, cyl4 = cyl2 * 2)
 38 | #' mtcars2 %>% transmute(cyl2 = cyl * 2, vs2 = vs * 2)
 39 | #' mtcars2 %>% filter(cyl == 8) %>% mutate(cyl2 = cyl * 2)
 40 | #'
 41 | #' # Learn more about translation in vignette("translation")
 42 | #' by_cyl <- mtcars2 %>% group_by(cyl)
 43 | #' by_cyl %>% summarise(mpg = mean(mpg))
 44 | #' by_cyl %>% mutate(mpg = mean(mpg))
 45 | #' by_cyl %>%
 46 | #'   filter(mpg < mean(mpg)) %>%
 47 | #'   summarise(hp = mean(hp))
 48 | lazy_dt <- function(x, name = NULL, immutable = TRUE, key_by = NULL) {
 49 |   # in case `x` has an `as.data.table()` method but not a `group_vars()` method
 50 |   groups <- tryCatch(group_vars(x), error = function(e) character())
 51 | 
 52 |   if (!is.data.table(x)) {
 53 |     if (!immutable) {
 54 |       abort("`immutable` must be `TRUE` when `x` is not already a data table.")
 55 |     }
 56 |     x <- as.data.table(x)
 57 |     copied <- TRUE
 58 |   } else {
 59 |     copied <- FALSE
 60 |   }
 61 | 
 62 |   key_by <- enquo(key_by)
 63 |   key_vars <- unname(tidyselect::vars_select(names(x), !!key_by))
 64 |   if (length(key_vars)) {
 65 |     if (immutable && !copied) {
 66 |       x <- data.table::copy(x)
 67 |     }
 68 |     data.table::setkeyv(x, key_vars)
 69 |   }
 70 | 
 71 |   step_first(x, name = name, groups = groups, immutable = immutable, env = caller_env())
 72 | }
 73 | 
 74 | #' @export
 75 | dim.dtplyr_step_first <- function(x) {
 76 |   dim(x$parent)
 77 | }
 78 | 
 79 | step_first <- function(parent, name = NULL, groups = character(),
 80 |                        immutable = TRUE, env = caller_env()) {
 81 |   stopifnot(is.data.table(parent))
 82 | 
 83 |   if (is.null(name)) {
 84 |     name <- unique_name()
 85 |   }
 86 | 
 87 |   new_step(parent,
 88 |     vars = names(parent),
 89 |     groups = groups,
 90 |     locals = list(),
 91 |     implicit_copy = !immutable,
 92 |     needs_copy = FALSE,
 93 |     name = sym(name),
 94 |     env = env,
 95 |     class = "dtplyr_step_first"
 96 |   )
 97 | }
 98 | 
 99 | #' @export
100 | dt_call.dtplyr_step_first <- function(x, needs_copy = FALSE) {
101 |   if (needs_copy) {
102 |     expr(copy(!!x$name))
103 |   } else {
104 |     x$name
105 |   }
106 | }
107 | 
108 | #' @export
109 | dt_sources.dtplyr_step_first <- function(x) {
110 |   stats::setNames(list(x$parent), as.character(x$name))
111 | }
112 | 
113 | #' @export
114 | dt_has_computation.dtplyr_step_first <- function(x) {
115 |   FALSE
116 | }
117 | 
118 | unique_name <- local({
119 |   i <- 0
120 |   function() {
121 |     i <<- i + 1
122 |     paste0("_DT", i)
123 |   }
124 | })
125 | 


--------------------------------------------------------------------------------
/R/step-group.R:
--------------------------------------------------------------------------------
  1 | step_group <- function(parent, groups = parent$groups, arrange = parent$arrange) {
  2 |   if (can_step_group_return_early(parent, groups, arrange)) {
  3 |     return(parent)
  4 |   }
  5 | 
  6 |   new_step(
  7 |     parent,
  8 |     vars = parent$vars,
  9 |     groups = groups,
 10 |     class = "dtplyr_step_group",
 11 |     arrange = arrange,
 12 |     name = parent$name
 13 |   )
 14 | }
 15 | 
 16 | #' @export
 17 | dt_has_computation.dtplyr_step_group <- function(x) {
 18 |   dt_has_computation(x$parent)
 19 | }
 20 | 
 21 | 
 22 | add_grouping_param <- function(call, step, arrange = step$arrange) {
 23 |   if (length(step$groups) == 0) {
 24 |     return(call)
 25 |   }
 26 | 
 27 |   arrange <- arrange %||% TRUE
 28 |   using <- if (isTRUE(arrange)) "keyby" else "by"
 29 | 
 30 |   call[[using]] <- call2(".", !!!syms(step$groups))
 31 |   call
 32 | }
 33 | 
 34 | # dplyr methods -----------------------------------------------------------
 35 | 
 36 | #' Group and ungroup
 37 | #'
 38 | #' These are methods for dplyr's [group_by()] and [ungroup()] generics.
 39 | #' Grouping is translated to the either `keyby` and `by` argument of
 40 | #' `[.data.table` depending on the value of the `arrange` argument.
 41 | #'
 42 | #' @inheritParams dplyr::group_by
 43 | #' @param .data A [lazy_dt()]
 44 | #' @param arrange If `TRUE`, will automatically arrange the output of
 45 | #'   subsequent grouped operations by group. If `FALSE`, output order will be
 46 | #'   left unchanged. In the generated data.table code this switches between
 47 | #'   using the `keyby` (`TRUE`) and `by` (`FALSE`) arguments.
 48 | #' @param .add,add When `FALSE`, the default, `group_by()` will
 49 | #'   override existing groups. To add to the existing groups, use
 50 | #'   `.add = TRUE`.
 51 | #'
 52 | #'   This argument was previously called `add`, but that prevented
 53 | #'   creating a new grouping variable called `add`, and conflicts with
 54 | #'   our naming conventions.
 55 | #' @importFrom dplyr group_by
 56 | #' @export
 57 | #' @examples
 58 | #' library(dplyr, warn.conflicts = FALSE)
 59 | #' dt <- lazy_dt(mtcars)
 60 | #'
 61 | #' # group_by() is usually translated to `keyby` so that the groups
 62 | #' # are ordered in the output
 63 | #' dt %>%
 64 | #'  group_by(cyl) %>%
 65 | #'  summarise(mpg = mean(mpg))
 66 | #'
 67 | #' # use `arrange = FALSE` to instead use `by` so the original order
 68 | #' # or groups is preserved
 69 | #' dt %>%
 70 | #'  group_by(cyl, arrange = FALSE) %>%
 71 | #'  summarise(mpg = mean(mpg))
 72 | group_by.dtplyr_step <- function(.data, ..., .add = FALSE, arrange = TRUE) {
 73 |   dots <- capture_dots(.data, ..., .j = TRUE)
 74 |   dots <- dots[!map_lgl(dots, is.null)]
 75 | 
 76 |   # need `eval(expr(...))` to trigger warning for `add`
 77 |   groups <- eval(expr(dplyr::group_by_prepare(.data, !!!dots, .add = .add)))
 78 |   arranged <- if (!is.null(.data$arrange)) .data$arrange && arrange else arrange
 79 | 
 80 |   step_group(groups$data, as.character(groups$group_names), arranged)
 81 | }
 82 | 
 83 | can_step_group_return_early <- function(parent, groups, arrange) {
 84 |   if (is_empty(groups)) {
 85 |     return(is_empty(parent$groups))
 86 |   }
 87 | 
 88 |   same_arrange <- (is_false(arrange) || identical(arrange, parent$arrange))
 89 |   same_groups <- identical(groups, parent$groups)
 90 |   same_arrange && same_groups
 91 | }
 92 | 
 93 | #' @importFrom dplyr ungroup
 94 | #' @export
 95 | #' @rdname group_by.dtplyr_step
 96 | ungroup.dtplyr_step <- function(x, ...) {
 97 |   if (missing(...)) {
 98 |     step_group(x, groups = character())
 99 |   } else {
100 |     old_groups <- group_vars(x)
101 |     to_remove <- tidyselect::vars_select(x$vars, ...)
102 |     new_groups <- setdiff(old_groups, to_remove)
103 |     step_group(x, groups = new_groups)
104 |   }
105 | }
106 | 
107 | 


--------------------------------------------------------------------------------
/R/step-modify.R:
--------------------------------------------------------------------------------
 1 | step_modify <- function(parent, fun, args) {
 2 |   new_step(
 3 |     parent,
 4 |     groups = parent$groups,
 5 |     arrange = parent$arrange,
 6 |     implicit_copy = TRUE,
 7 |     fun = fun,
 8 |     args = args,
 9 |     class = "dtplyr_step_modify"
10 |   )
11 | }
12 | 
13 | #' @export
14 | dt_call.dtplyr_step_modify <- function(x, needs_copy = x$needs_copy) {
15 |   j <- call2(x$fun, quote(.SD), quote(.BY), !!!x$args)
16 |   out <- call2("[", dt_call(x$parent, needs_copy), , j)
17 | 
18 |   add_grouping_param(out, x, arrange = FALSE)
19 | }
20 | 
21 | # dplyr methods -----------------------------------------------------------
22 | 
23 | #' Apply a function to each group
24 | #'
25 | #' These are methods for the dplyr [group_map()] and [group_modify()] generics.
26 | #' They are both translated to `[.data.table`.
27 | #'
28 | #' @param .data A [lazy_dt()]
29 | #' @param .f The name of a two argument function. The first argument is passed
30 | #'   `.SD`,the data.table representing the current group; the second argument
31 | #'   is passed `.BY`, a list giving the current values of the grouping
32 | #'   variables. The function should return a list or data.table.
33 | #' @param ... Additional arguments passed to `.f`
34 | #' @param keep Not supported for [lazy_dt].
35 | #' @returns `group_map()` applies `.f` to each group, returning a list.
36 | #'   `group_modify()` replaces each group with the results of `.f`, returning a
37 | #'   modified [lazy_dt()].
38 | #' @importFrom dplyr group_modify
39 | #' @export
40 | #' @examples
41 | #' library(dplyr)
42 | #'
43 | #' dt <- lazy_dt(mtcars)
44 | #'
45 | #' dt %>%
46 | #'   group_by(cyl) %>%
47 | #'   group_modify(head, n = 2L)
48 | #'
49 | #' dt %>%
50 | #'   group_by(cyl) %>%
51 | #'   group_map(head, n = 2L)
52 | group_modify.dtplyr_step <- function(.data, .f, ..., keep = FALSE) {
53 |   if (!missing(keep)) {
54 |     abort("`keep` is not supported for lazy data tables")
55 |   }
56 | 
57 |   .f <- ensym(.f)
58 |   args <- enquos(...)
59 | 
60 |   step_modify(.data, fun = .f, args = args)
61 | }
62 | 
63 | #' @importFrom dplyr group_map
64 | #' @rdname group_modify.dtplyr_step
65 | #' @export
66 | group_map.dtplyr_step <- function(.data, .f, ..., keep = FALSE) {
67 |   .f <- as_function(.f, caller_env())
68 | 
69 |   dt <- as.data.table(.data)
70 |   dt[, list(list(.f(.SD, .BY, ...))), by = eval(.data$groups)]$V1
71 | }
72 | 


--------------------------------------------------------------------------------
/R/step-nest.R:
--------------------------------------------------------------------------------
 1 | #' Nest
 2 | #'
 3 | #' @description
 4 | #' This is a method for the tidyr [tidyr::nest()] generic. It is translated
 5 | #' using the non-nested variables in the `by` argument and `.SD` in the `j`
 6 | #' argument.
 7 | #'
 8 | #' @inheritParams tidyr::nest
 9 | #' @param ... <[`tidy-select`][tidyr::tidyr_tidy_select]> Columns to nest, specified
10 | #'   using name-variable pairs of the form `new_col = c(col1, col2, col3)`.
11 | #'   The right hand side can be any valid tidy select expression.
12 | #' @param .key Not supported.
13 | #' @param data A [lazy_dt()].
14 | #' @examples
15 | #' if (require("tidyr", quietly = TRUE)) {
16 | #'   dt <- lazy_dt(tibble(x = c(1, 2, 1), y = c("a", "a", "b")))
17 | #'   dt %>% nest(data = y)
18 | #'
19 | #'   dt %>% dplyr::group_by(x) %>% nest()
20 | #' }
21 | # exported onLoad
22 | nest.dtplyr_step <- function(.data, ..., .names_sep = NULL, .key = deprecated()) {
23 |   if (lifecycle::is_present(.key)) {
24 |     abort(c(
25 |       "`nest()` for lazy data.tables doesn't support the `.key` argument.",
26 |       i = "Use a name in the `...` argument instead."
27 |     ))
28 |   }
29 | 
30 |   cols <- eval_nest_dots(.data, ...)
31 | 
32 |   cols <- lapply(cols, set_names)
33 |   if (!is.null(.names_sep)) {
34 |     cols <- imap(cols, strip_names, .names_sep)
35 |   }
36 | 
37 |   if (length(cols) == 1 && is.null(.names_sep)) {
38 |     # use `.SD` as it is shorter and faster
39 |     nm <- names(cols)
40 |     j_exprs <- exprs(!!nm := .(.SD))
41 |   } else {
42 |     j_exprs <- imap(
43 |       cols,
44 |       function(x, name) {
45 |         x <- simplify_names(x)
46 |         expr(.(data.table(!!!syms(x))))
47 |       }
48 |     )
49 |   }
50 | 
51 |   asis <- setdiff(.data$vars, unlist(cols))
52 |   out <- step_subset_j(
53 |     .data,
54 |     vars = c(asis, names(cols)),
55 |     j = expr(.(!!!j_exprs)),
56 |     groups = asis,
57 |     arrange = FALSE
58 |   )
59 | 
60 |   groups <- intersect(out$vars, group_vars(.data))
61 |   group_by(out, !!!syms(groups))
62 | }
63 | 
64 | eval_nest_dots <- function(.data, ...) {
65 |   if (missing(...)) {
66 |     groups <- group_vars(.data)
67 |     if (is_empty(groups)) {
68 |       warn(paste0(
69 |         "`...` must not be empty for ungrouped data frames.\n",
70 |         "Did you want `data = everything()`?"
71 |       ))
72 |     }
73 | 
74 |     nest_vars <- setdiff(.data$vars, groups)
75 |     list(data = nest_vars)
76 |   } else {
77 |     cols <- enquos(...)
78 |     lapply(cols, function(.x) names(tidyselect::eval_select(.x, .data)))
79 |   }
80 | }
81 | 


--------------------------------------------------------------------------------
/R/step-set.R:
--------------------------------------------------------------------------------
 1 | step_set <- function(x, y, style) {
 2 |   stopifnot(is_step(x))
 3 |   stopifnot(is_step(y))
 4 |   stopifnot(is.character(style))
 5 | 
 6 |   new_step(
 7 |     parent = x,
 8 |     parent2 = y,
 9 |     locals = utils::modifyList(x$locals, y$locals),
10 |     style = style,
11 |     class = "dtplyr_step_set",
12 |   )
13 | }
14 | 
15 | #' @export
16 | dt_sources.dtplyr_step_set <- function(x) {
17 |   dt_sources.dtplyr_step_join(x)
18 | }
19 | 
20 | #' @export
21 | dt_call.dtplyr_step_set <- function(x, needs_copy = x$needs_copy) {
22 |   lhs <- dt_call(x$parent, needs_copy)
23 |   rhs <- dt_call(x$parent2)
24 | 
25 |   call <- switch(x$style,
26 |     intersect = call2("fintersect", lhs, rhs),
27 |     union = call2("funion", lhs, rhs),
28 |     union_all = call2("funion", lhs, rhs, all = TRUE),
29 |     setdiff = call2("fsetdiff", lhs, rhs),
30 |   )
31 | 
32 |   call
33 | }
34 | 
35 | # dplyr verbs -------------------------------------------------------------
36 | 
37 | #' Set operations
38 | #'
39 | #' These are methods for the dplyr generics [intersect()], [union()],
40 | #' [union_all()], and [setdiff()]. They are translated to
41 | #' [data.table::fintersect()], [data.table::funion()], and
42 | #' [data.table::fsetdiff()].
43 | #'
44 | #' @importFrom dplyr intersect
45 | #' @param x,y A pair of [lazy_dt()]s.
46 | #' @param ... Ignored
47 | #' @examples
48 | #' dt1 <- lazy_dt(data.frame(x = 1:4))
49 | #' dt2 <- lazy_dt(data.frame(x = c(2, 4, 6)))
50 | #'
51 | #' intersect(dt1, dt2)
52 | #' union(dt1, dt2)
53 | #' setdiff(dt1, dt2)
54 | #'
55 | # Exported onload
56 | intersect.dtplyr_step <- function(x, y, ...) {
57 |   if (!is_step(y)) {
58 |     y <- lazy_dt(y)
59 |   }
60 |   step_set(x, y, style = "intersect")
61 | }
62 | 
63 | #' @importFrom dplyr union
64 | #' @rdname intersect.dtplyr_step
65 | # Exported onload
66 | union.dtplyr_step <- function(x, y, ...) {
67 |   if (!is_step(y)) {
68 |     y <- lazy_dt(y)
69 |   }
70 |   step_set(x, y, style = "union")
71 | }
72 | 
73 | #' @importFrom dplyr union_all
74 | #' @rdname intersect.dtplyr_step
75 | #' @export
76 | union_all.dtplyr_step <- function(x, y, ...) {
77 |   if (!is_step(y)) {
78 |     y <- lazy_dt(y)
79 |   }
80 |   step_set(x, y, style = "union_all")
81 | }
82 | 
83 | #' @importFrom dplyr setdiff
84 | #' @rdname intersect.dtplyr_step
85 | # Exported onload
86 | setdiff.dtplyr_step <- function(x, y, ...) {
87 |   if (!is_step(y)) {
88 |     y <- lazy_dt(y)
89 |   }
90 |   step_set(x, y, style = "setdiff")
91 | }
92 | 


--------------------------------------------------------------------------------
/R/step-setnames.R:
--------------------------------------------------------------------------------
 1 | step_setnames <- function(x, old, new, in_place, rename_groups = FALSE) {
 2 |   stopifnot(is_step(x))
 3 |   stopifnot(is.character(old) || is.integer(old))
 4 |   stopifnot(is.character(new))
 5 |   stopifnot(length(old) == length(new))
 6 |   stopifnot(is_bool(in_place))
 7 |   stopifnot(is_bool(rename_groups))
 8 | 
 9 |   if (is.integer(old)) {
10 |     locs <- old
11 |   } else {
12 |     locs <- vctrs::vec_match(old, x$vars)
13 |   }
14 | 
15 |   name_changed <- x$vars[locs] != new
16 |   old <- old[name_changed]
17 |   new <- new[name_changed]
18 |   locs <- locs[name_changed]
19 | 
20 |   if (length(old) == 0) {
21 |     return(x)
22 |   }
23 | 
24 |   new_vars <- x$vars
25 |   new_vars[locs] <- new
26 |   out <- step_call(x,
27 |     "setnames",
28 |     args = list(old, new),
29 |     vars = new_vars,
30 |     in_place = in_place
31 |   )
32 | 
33 |   if (rename_groups) {
34 |     groups <- rename_groups(x$groups, set_names(old, new))
35 |     out <- step_group(out, groups)
36 |   }
37 | 
38 |   out
39 | }
40 | 


--------------------------------------------------------------------------------
/R/step-subset-arrange.R:
--------------------------------------------------------------------------------
 1 | #' Arrange rows by column values
 2 | #'
 3 | #' This is a method for dplyr generic [arrange()]. It is translated to
 4 | #' an [order()] call in the `i` argument of `[.data.table`.
 5 | #'
 6 | #' @param .data A [lazy_dt()].
 7 | #' @inheritParams dplyr::arrange
 8 | #' @importFrom dplyr arrange
 9 | #' @export
10 | #' @examples
11 | #' library(dplyr, warn.conflicts = FALSE)
12 | #'
13 | #' dt <- lazy_dt(mtcars)
14 | #' dt %>% arrange(vs, cyl)
15 | #' dt %>% arrange(desc(vs), cyl)
16 | #' dt %>% arrange(across(mpg:disp))
17 | arrange.dtplyr_step <- function(.data, ..., .by_group = FALSE) {
18 |   dots <- capture_dots(.data, ..., .j = FALSE)
19 |   if (.by_group) {
20 |     dots <- c(syms(.data$groups), dots)
21 |   }
22 | 
23 |   if (length(dots) == 0) {
24 |     return(.data)
25 |   }
26 | 
27 |   no_transmute <- all(map_lgl(dots, is_simple_arrange))
28 |   # Order without grouping then restore
29 |   dots <- set_names(dots, NULL)
30 |   if (is_copied(.data) && no_transmute) {
31 |     dots <- c(dots, na.last = TRUE)
32 |     step <- step_call(.data, "setorder", dots)
33 |   } else {
34 |     step <- step_subset(.data, i = call2("order", !!!dots), groups = character())
35 |   }
36 |   step_group(step, groups = .data$groups)
37 | }
38 | 
39 | is_copied <- function(x) {
40 |   x$implicit_copy || x$needs_copy
41 | }
42 | 
43 | is_simple_arrange <- function(x) {
44 |   out <- FALSE
45 |   if (is_symbol(x)) {
46 |     out <- TRUE
47 |   } else if (is_call(x, "-", 1)) {
48 |     if (is_symbol(x[[2]])) {
49 |       out <- TRUE
50 |     }
51 |   }
52 |   out
53 | }
54 | 


--------------------------------------------------------------------------------
/R/step-subset-do.R:
--------------------------------------------------------------------------------
 1 | #' @importFrom dplyr do
 2 | #' @export
 3 | do.dtplyr_step <- function(.data, ...) {
 4 |   # This is a partial implementation, because I don't think that many
 5 |   # people are likely to use it, given that do() is marked as questioning
 6 |   # Problems:
 7 |   # * doesn't handle unnamed case
 8 |   # * doesn't set .SDcols so `.SD` will only refer to non-groups
 9 |   # * can duplicating group vars (#5)
10 | 
11 |   dots <- capture_dots(.data, ...)
12 | 
13 |   if (any(names2(dots) == "")) {
14 |     # I can't see any way to figure out what the variables are
15 |     abort("Unnamed do() not supported by dtplyr")
16 |   }
17 | 
18 |   new_vars <- lapply(dots, function(x) call2(".", x))
19 |   j <- call2(".", !!!new_vars)
20 | 
21 |   vars <- union(.data$vars, names(dots))
22 | 
23 |   step_subset_j(.data, vars = vars, j = j)
24 | }
25 | 


--------------------------------------------------------------------------------
/R/step-subset-expand.R:
--------------------------------------------------------------------------------
  1 | #' Expand data frame to include all possible combinations of values.
  2 | #'
  3 | #' @description
  4 | #' This is a method for the tidyr `expand()` generic. It is translated to
  5 | #' [data.table::CJ()].
  6 | #'
  7 | #' @param ... Specification of columns to expand. Columns can be atomic vectors
  8 | #'   or lists.
  9 | #'
 10 | #'   * To find all unique combinations of `x`, `y` and `z`, including those not
 11 | #'     present in the data, supply each variable as a separate argument:
 12 | #'     `expand(df, x, y, z)`.
 13 | #'   * To find only the combinations that occur in the
 14 | #'     data, use `nesting`: `expand(df, nesting(x, y, z))`.
 15 | #'   * You can combine the two forms. For example,
 16 | #'     `expand(df, nesting(school_id, student_id), date)` would produce
 17 | #'     a row for each present school-student combination for all possible
 18 | #'     dates.
 19 | #'
 20 | #'   Unlike the data.frame method, this method does not use the full set of
 21 | #'   levels, just those that appear in the data.
 22 | #'
 23 | #'   When used with continuous variables, you may need to fill in values
 24 | #'   that do not appear in the data: to do so use expressions like
 25 | #'   `year = 2010:2020` or `year = full_seq(year,1)`.
 26 | #' @param data A [lazy_dt()].
 27 | #' @inheritParams tidyr::expand
 28 | #' @examples
 29 | #' library(tidyr)
 30 | #'
 31 | #' fruits <- lazy_dt(tibble(
 32 | #'   type   = c("apple", "orange", "apple", "orange", "orange", "orange"),
 33 | #'   year   = c(2010, 2010, 2012, 2010, 2010, 2012),
 34 | #'   size  =  factor(
 35 | #'     c("XS", "S",  "M", "S", "S", "M"),
 36 | #'     levels = c("XS", "S", "M", "L")
 37 | #'   ),
 38 | #'   weights = rnorm(6, as.numeric(size) + 2)
 39 | #' ))
 40 | #'
 41 | #' # All possible combinations ---------------------------------------
 42 | #' # Note that only present levels of the factor variable `size` are retained.
 43 | #' fruits %>% expand(type)
 44 | #' fruits %>% expand(type, size)
 45 | #'
 46 | #' # This is different from the data frame behaviour:
 47 | #' fruits %>% dplyr::collect() %>% expand(type, size)
 48 | #'
 49 | #' # Other uses -------------------------------------------------------
 50 | #' fruits %>% expand(type, size, 2010:2012)
 51 | #'
 52 | #' # Use `anti_join()` to determine which observations are missing
 53 | #' all <- fruits %>% expand(type, size, year)
 54 | #' all
 55 | #' all %>% dplyr::anti_join(fruits)
 56 | #'
 57 | #' # Use with `right_join()` to fill in missing rows
 58 | #' fruits %>% dplyr::right_join(all)
 59 | # exported onLoad
 60 | expand.dtplyr_step <- function(data, ..., .name_repair = "check_unique") {
 61 |   dots <- capture_dots(data, ..., .j = FALSE)
 62 |   dots <- dots[!map_lgl(dots, is_null)]
 63 |   if (length(dots) == 0) {
 64 |     return(data)
 65 |   }
 66 | 
 67 |   named_dots <- have_name(dots)
 68 |   if (any(!named_dots)) {
 69 |     # Auto-names generated by enquos() don't always work with the CJ() step
 70 |       ## Ex: `1:3`
 71 |     # Replicates the "V" naming convention data.table uses
 72 |     symbol_dots <- map_lgl(dots, is_symbol)
 73 |     needs_v_name <- !symbol_dots & !named_dots
 74 |     v_names <- paste0("V", 1:length(dots))
 75 |     names(dots)[needs_v_name] <- v_names[needs_v_name]
 76 |     names(dots)[symbol_dots] <- lapply(dots[symbol_dots], as_name)
 77 |   }
 78 |   names(dots) <- vctrs::vec_as_names(names(dots), repair = .name_repair)
 79 |   dots_names <- names(dots)
 80 | 
 81 |   out <- step_subset_j(
 82 |     data,
 83 |     vars = union(data$groups, dots_names),
 84 |     j = expr(CJ(!!!dots, unique = TRUE))
 85 |   )
 86 | 
 87 |   # Delete duplicate columns if group vars are expanded
 88 |   if (any(dots_names %in% out$groups)) {
 89 |     group_vars <- out$groups
 90 |     expanded_group_vars <- dots_names[dots_names %in% group_vars]
 91 | 
 92 |     out <- step_subset(
 93 |       out, groups = character(), j = expr(!!expanded_group_vars := NULL)
 94 |     )
 95 |     out <- group_by(out, !!!syms(group_vars))
 96 |   }
 97 | 
 98 |   out
 99 | }
100 | 


--------------------------------------------------------------------------------
/R/step-subset-filter.R:
--------------------------------------------------------------------------------
 1 | 
 2 | #' Subset rows using column values
 3 | #'
 4 | #' This is a method for the dplyr [arrange()] generic. It is translated to
 5 | #' the `i` argument of `[.data.table`
 6 | #'
 7 | #' @param .data A [lazy_dt()].
 8 | #' @param .preserve Ignored
 9 | #' @inheritParams dplyr::filter
10 | #' @examples
11 | #' library(dplyr, warn.conflicts = FALSE)
12 | #'
13 | #' dt <- lazy_dt(mtcars)
14 | #' dt %>% filter(cyl == 4)
15 | #' dt %>% filter(vs, am)
16 | #'
17 | #' dt %>%
18 | #'   group_by(cyl) %>%
19 | #'   filter(mpg > mean(mpg))
20 | #' @importFrom dplyr filter
21 | # exported onLoad
22 | filter.dtplyr_step <- function(.data, ..., .by = NULL, .preserve = FALSE) {
23 |   check_filter(...)
24 |   by <- compute_by({{ .by }}, .data, by_arg = ".by", data_arg = ".data")
25 |   dots <- capture_dots(.data, ..., .j = FALSE, .by = by)
26 | 
27 |   if (filter_by_lgl_col(dots)) {
28 |     # Suppress data.table warning when filtering with a logical variable
29 |     i <- call2("(", dots[[1]])
30 |   } else {
31 |     i <- Reduce(function(x, y) call2("&", x, y), dots)
32 |   }
33 | 
34 |   step_subset_i(.data, i, by)
35 | }
36 | 
37 | filter_by_lgl_col <- function(dots) {
38 |   if (length(dots) > 1) {
39 |     return(FALSE)
40 |   }
41 | 
42 |   dot <- dots[[1]]
43 |   if (is_symbol(dot)) {
44 |     return(TRUE)
45 |   }
46 | 
47 |   # catch expressions of form `!x`
48 |   is_call(dot, name = "!", n = 1) && is_symbol(dot[[2]])
49 | }
50 | 
51 | check_filter <- function(...) {
52 |   dots <- enquos(...)
53 |   named <- have_name(dots)
54 | 
55 |   for (i in which(named)) {
56 |     quo <- dots[[i]]
57 | 
58 |     # only allow named logical vectors, anything else
59 |     # is suspicious
60 |     expr <- quo_get_expr(quo)
61 |     if (!is.logical(expr)) {
62 |       abort(c(
63 |         glue::glue("Problem with `filter()` input `..{i}`."),
64 |         x = glue::glue("Input `..{i}` is named."),
65 |         i = glue::glue("This usually means that you've used `=` instead of `==`."),
66 |         i = glue::glue("Did you mean `{name} == {as_label(expr)}`?", name = names(dots)[i])
67 |       ), call = caller_env())
68 |     }
69 | 
70 |   }
71 | }
72 | 


--------------------------------------------------------------------------------
/R/step-subset-select.R:
--------------------------------------------------------------------------------
  1 | 
  2 | #' Subset columns using their names
  3 | #'
  4 | #' This is a method for the dplyr [select()] generic. It is translated to
  5 | #' the `j` argument of `[.data.table`.
  6 | #'
  7 | #' @param .data A [lazy_dt()].
  8 | #' @inheritParams dplyr::select
  9 | #' @importFrom dplyr select
 10 | #' @export
 11 | #' @examples
 12 | #' library(dplyr, warn.conflicts = FALSE)
 13 | #'
 14 | #' dt <- lazy_dt(data.frame(x1 = 1, x2 = 2, y1 = 3, y2 = 4))
 15 | #'
 16 | #' dt %>% select(starts_with("x"))
 17 | #' dt %>% select(ends_with("2"))
 18 | #' dt %>% select(z1 = x1, z2 = x2)
 19 | select.dtplyr_step <- function(.data, ...) {
 20 |   locs <- tidyselect::eval_select(expr(c(...)), .data)
 21 |   locs <- ensure_group_vars(locs, .data$vars, .data$groups)
 22 | 
 23 |   vars <- set_names(.data$vars[locs], names(locs))
 24 | 
 25 |   if (length(vars) == 0) {
 26 |     j <- 0L
 27 |     groups <- .data$groups
 28 |     is_unnamed <- TRUE
 29 |   } else {
 30 |     groups <- rename_groups(.data$groups, vars)
 31 |     vars <- simplify_names(vars)
 32 | 
 33 |     is_unnamed <- all(!have_name(vars))
 34 |     if (is_unnamed && identical(unname(vars), .data$vars)) {
 35 |       return(.data)
 36 |     }
 37 |     j <- call2(".", !!!syms(vars))
 38 |   }
 39 | 
 40 |   if (is_copied(.data) && is_unnamed && !can_merge_subset(.data)) {
 41 |     # Drop columns by reference if:
 42 |     #  * Data has been copied (implicitly or explicitly)
 43 |     #  * There is no renaming in the select statement
 44 |     #  * The selection can't be combined with a prior `i` step. Ex: dt[x < 7, .(x, y)]
 45 |     vars_drop <- setdiff(.data$vars, vars)
 46 |     out <- remove_vars(.data, vars_drop)
 47 |     out <- step_colorder(out, vars)
 48 |   } else {
 49 |     out <- step_subset_j(.data, vars = names(locs), groups = character(), j = j)
 50 |   }
 51 | 
 52 |   step_group(out, groups)
 53 | }
 54 | 
 55 | #' @importFrom tidyselect tidyselect_data_proxy
 56 | #' @exportS3Method
 57 | tidyselect_data_proxy.dtplyr_step <- function(x) {
 58 |   simulate_vars(x)
 59 | }
 60 | 
 61 | #' @importFrom tidyselect tidyselect_data_has_predicates
 62 | #' @exportS3Method
 63 | tidyselect_data_has_predicates.dtplyr_step <- function(x) {
 64 |   FALSE
 65 | }
 66 | 
 67 | simulate_vars <- function(x, drop_groups = FALSE) {
 68 |   if (drop_groups) {
 69 |     vars <- setdiff(x$vars, x$groups)
 70 |   } else {
 71 |     vars <- x$vars
 72 |   }
 73 | 
 74 |   as_tibble(rep_named(vars, list(logical())), .name_repair = "minimal")
 75 | }
 76 | 
 77 | ensure_group_vars <- function(loc, names, groups) {
 78 |   group_loc <- match(groups, names)
 79 |   missing <- setdiff(group_loc, loc)
 80 | 
 81 |   if (length(missing) > 0) {
 82 |     vars <- names[missing]
 83 |     inform(paste0(
 84 |       "Adding missing grouping variables: ",
 85 |       paste0("`", names[missing], "`", collapse = ", ")
 86 |     ))
 87 |     loc <- c(set_names(missing, vars), loc)
 88 |   }
 89 | 
 90 |   loc
 91 | }
 92 | 
 93 | rename_groups <- function(groups, vars) {
 94 |   old2new <- set_names(names(vars), vars)
 95 |   groups[groups %in% names(old2new)] <- old2new[groups]
 96 |   groups
 97 | }
 98 | 
 99 | simplify_names <- function(vars) {
100 |   names(vars)[vars == names(vars)] <- ""
101 |   vars
102 | }
103 | 
104 | remove_vars <- function(.data, vars) {
105 |   if (is_empty(vars)) {
106 |     return(.data)
107 |   }
108 |   out <- step_subset(
109 |     .data, groups = character(), j = expr(!!unique(vars) := NULL),
110 |     vars = setdiff(.data$vars, vars)
111 |   )
112 |   group_by(out, !!!syms(.data$groups))
113 | }
114 | 


--------------------------------------------------------------------------------
/R/step-subset-separate.R:
--------------------------------------------------------------------------------
 1 | #' Separate a character column into multiple columns with a regular
 2 | #' expression or numeric locations
 3 | #'
 4 | #' @description
 5 | #' This is a method for the [tidyr::separate()] generic. It is translated to
 6 | #'   [data.table::tstrsplit()] in the `j` argument of `[.data.table`.
 7 | #'
 8 | #' @param data A [lazy_dt()].
 9 | #' @param col Column name or position.
10 | #'
11 | #'   This argument is passed by expression and supports quasiquotation
12 | #'   (you can unquote column names or column positions).
13 | #' @param into Names of new variables to create as character vector.
14 | #'   Use `NA` to omit the variable in the output.
15 | #' @param sep Separator between columns.
16 | #'   The default value is a regular expression that matches any sequence of non-alphanumeric values.
17 | #' @param remove If TRUE, remove the input column from the output data frame.
18 | #' @param convert If TRUE, will run type.convert() with as.is = TRUE on new columns.
19 | #'   This is useful if the component columns are integer, numeric or logical.
20 | #'
21 | #'   NB: this will cause string "NA"s to be converted to NAs.
22 | #' @param ... Arguments passed on to methods
23 | #' @examples
24 | #' library(tidyr)
25 | #' # If you want to split by any non-alphanumeric value (the default):
26 | #' df <- lazy_dt(data.frame(x = c(NA, "x.y", "x.z", "y.z")), "DT")
27 | #' df %>% separate(x, c("A", "B"))
28 | #'
29 | #' # If you just want the second variable:
30 | #' df %>% separate(x, c(NA, "B"))
31 | #'
32 | #' # Use regular expressions to separate on multiple characters:
33 | #' df <- lazy_dt(data.frame(x = c(NA, "x?y", "x.z", "y:z")), "DT")
34 | #' df %>% separate(x, c("A","B"), sep = "([.?:])")
35 | #'
36 | #' # convert = TRUE detects column classes:
37 | #' df <- lazy_dt(data.frame(x = c("x:1", "x:2", "y:4", "z", NA)), "DT")
38 | #' df %>% separate(x, c("key","value"), ":") %>% str
39 | #' df %>% separate(x, c("key","value"), ":", convert = TRUE) %>% str
40 | # exported onLoad
41 | separate.dtplyr_step <- function(data, col, into,
42 |                                  sep = "[^[:alnum:]]+",
43 |                                  remove = TRUE,
44 |                                  convert = FALSE,
45 |                                  ...) {
46 |   if (!vctrs::vec_is(into, character())) {
47 |     abort("`into` must be a character vector.")
48 |   }
49 |   if (!vctrs::vec_is(sep, character())) {
50 |     abort("`sep` must be a character vector.")
51 |   }
52 | 
53 |   col <- sym(tidyselect::vars_pull(data$vars, !!enquo(col)))
54 | 
55 |   into_length <- length(into)
56 | 
57 |   not_na_into <- !is.na(into)
58 |   keep <- seq_along(into)[not_na_into]
59 |   into <- into[not_na_into]
60 | 
61 |   t_str_split <- call2("tstrsplit", col, split = sep)
62 |   if (length(keep) < into_length) {
63 |     t_str_split$keep <- keep
64 |   }
65 |   if (isTRUE(convert)) {
66 |     t_str_split$type.convert <- TRUE
67 |   }
68 | 
69 |   out <- step_subset(
70 |     data,
71 |     vars = union(data$vars, into),
72 |     j = call2(":=", into, t_str_split),
73 |     needs_copy = data$needs_copy || !data$implicit_copy
74 |   )
75 | 
76 |   if (remove && !as.character(col) %in% into) {
77 |     out <- select(out, -!!col)
78 |   }
79 | 
80 |   out
81 | }
82 | 


--------------------------------------------------------------------------------
/R/step-subset-summarise.R:
--------------------------------------------------------------------------------
  1 | 
  2 | #' Summarise each group to one row
  3 | #'
  4 | #' This is a method for the dplyr [summarise()] generic. It is translated to
  5 | #' the `j` argument of `[.data.table`.
  6 | #'
  7 | #' @param .data A [lazy_dt()].
  8 | #' @inheritParams dplyr::summarise
  9 | #' @importFrom dplyr summarise
 10 | #' @export
 11 | #' @examples
 12 | #' library(dplyr, warn.conflicts = FALSE)
 13 | #'
 14 | #' dt <- lazy_dt(mtcars)
 15 | #'
 16 | #' dt %>%
 17 | #'   group_by(cyl) %>%
 18 | #'   summarise(vs = mean(vs))
 19 | #'
 20 | #' dt %>%
 21 | #'   group_by(cyl) %>%
 22 | #'   summarise(across(disp:wt, mean))
 23 | summarise.dtplyr_step <- function(.data, ..., .by = NULL, .groups = NULL) {
 24 |   by <- compute_by({{ .by }}, .data, by_arg = ".by", data_arg = ".data")
 25 |   if (by$uses_by) {
 26 |     group_vars <- by$names
 27 |     .groups <- "drop"
 28 |   } else {
 29 |     group_vars <- .data$groups
 30 |   }
 31 | 
 32 |   dots <- capture_dots(.data, ..., .by = by)
 33 |   check_summarise_vars(dots)
 34 | 
 35 |   if (length(dots) == 0) {
 36 |     if (length(group_vars) == 0) {
 37 |       out <- step_subset_j(.data, vars = character(), j = 0L)
 38 |     } else {
 39 |       # Acts like distinct on grouping vars
 40 |       out <- distinct(.data, !!!syms(group_vars))
 41 |     }
 42 |   } else {
 43 |     out <- step_subset_j(
 44 |       .data,
 45 |       vars = union(group_vars, names(dots)),
 46 |       j = call2(".", !!!dots),
 47 |       by = by
 48 |     )
 49 |   }
 50 | 
 51 |   replaced_group_vars <- intersect(group_vars, names(dots))
 52 |   if (!is_empty(replaced_group_vars)) {
 53 |     out <- step_subset(
 54 |       out,
 55 |       groups = character(),
 56 |       j = expr(!!replaced_group_vars := NULL)
 57 |     )
 58 |   }
 59 | 
 60 |   out_groups <- summarise_groups(.data, .groups, caller_env())
 61 |   step_group(out, groups = out_groups)
 62 | }
 63 | 
 64 | 
 65 | # For each expression, check if it uses any newly created variables
 66 | check_summarise_vars <- function(dots) {
 67 |   for (i in seq_along(dots)) {
 68 |     used_vars <- all_names(get_expr(dots[[i]]))
 69 |     cur_vars <- names(dots)[seq_len(i - 1)]
 70 | 
 71 |     if (any(used_vars %in% cur_vars)) {
 72 |       abort(paste0(
 73 |         "`", names(dots)[[i]], "` ",
 74 |         "refers to a variable created earlier in this summarise().\n",
 75 |         "Do you need an extra mutate() step?"
 76 |       ), call = caller_env())
 77 |     }
 78 |   }
 79 | }
 80 | 
 81 | summarise_groups <- function(.data, .groups, env_caller) {
 82 |   if (!is.null(.groups) && !.groups %in% c("drop_last", "drop", "keep")) {
 83 |     abort(c(
 84 |       paste0(
 85 |         "`.groups` can't be ", as_label(.groups),
 86 |         if (.groups == "rowwise") " in dtplyr"
 87 |       ),
 88 |       i = 'Possible values are NULL (default), "drop_last", "drop", and "keep"'
 89 |     ), call = caller_env())
 90 |   }
 91 | 
 92 |   group_vars <- .data$groups
 93 |   n <- length(group_vars)
 94 | 
 95 |   verbose <- summarise_verbose(.groups, env_caller)
 96 |   if (verbose && n > 1) {
 97 |     new_groups <- glue::glue_collapse(paste0("'", group_vars[-n], "'"), sep = ", ")
 98 |     summarise_inform("has grouped output by {new_groups}")
 99 |   }
100 | 
101 |   .groups <- .groups %||% "drop_last"
102 |   switch(.groups,
103 |     drop_last = group_vars[-n],
104 |     keep = group_vars,
105 |     drop = character()
106 |   )
107 | }
108 | 
109 | summarise_verbose <- function(.groups, .env) {
110 |   is.null(.groups) &&
111 |     is_reference(topenv(.env), global_env()) &&
112 |     !identical(getOption("dplyr.summarise.inform"), FALSE)
113 | }
114 | 
115 | summarise_inform <- function(..., .env = parent.frame()) {
116 |   inform(paste0(
117 |     "`summarise()` ", glue::glue(..., .envir = .env), '. You can override using the `.groups` argument.'
118 |   ))
119 | }
120 | 


--------------------------------------------------------------------------------
/R/step-subset-transmute.R:
--------------------------------------------------------------------------------
 1 | #' Create new columns, dropping old
 2 | #'
 3 | #' This is a method for the dplyr [transmute()] generic. It is translated to
 4 | #' the `j` argument of `[.data.table`.
 5 | #'
 6 | #' @param .data A [lazy_dt()].
 7 | #' @inheritParams mutate.dtplyr_step
 8 | #' @importFrom dplyr transmute
 9 | #' @export
10 | #' @examples
11 | #' library(dplyr, warn.conflicts = FALSE)
12 | #'
13 | #' dt <- lazy_dt(dplyr::starwars)
14 | #' dt %>% transmute(name, sh = paste0(species, "/", homeworld))
15 | transmute.dtplyr_step <- function(.data, ...) {
16 |   out <- mutate(.data, ..., .keep = "none")
17 |   cols_expr <- names(capture_new_vars(.data, ...))
18 |   cols_group <- group_vars(.data)
19 |   cols_group <- setdiff(cols_group, cols_expr)
20 |   cols_retain <- c(cols_group, cols_expr)
21 |   select(out, any_of(cols_retain))
22 | }
23 | 


--------------------------------------------------------------------------------
/R/step-subset.R:
--------------------------------------------------------------------------------
  1 | step_subset <- function(parent,
  2 |                         vars = parent$vars,
  3 |                         groups = parent$groups,
  4 |                         locals = parent$locals,
  5 |                         arrange = parent$arrange,
  6 |                         i = NULL,
  7 |                         j = NULL,
  8 |                         on = character(),
  9 |                         allow_cartesian = NULL,
 10 |                         needs_copy = FALSE
 11 | ) {
 12 | 
 13 |   stopifnot(is_step(parent))
 14 |   stopifnot(is_expression(i) || is_call(i) || is_step(i))
 15 |   stopifnot(is_expression(j) || is_call(j))
 16 |   stopifnot(is.character(on))
 17 | 
 18 |   new_step(
 19 |     parent = parent,
 20 |     vars = vars,
 21 |     groups = groups,
 22 |     locals = locals,
 23 |     arrange = arrange,
 24 |     i = i,
 25 |     j = j,
 26 |     on = on,
 27 |     allow_cartesian = allow_cartesian,
 28 |     implicit_copy = !is.null(i) || !is.null(j),
 29 |     needs_copy = needs_copy || parent$needs_copy,
 30 |     class = "dtplyr_step_subset"
 31 |   )
 32 | }
 33 | 
 34 | # Grouped i needs an intermediate assignment for maximum efficiency
 35 | step_subset_i <- function(parent, i, by = new_by()) {
 36 |   if (is_empty(i)) {
 37 |     return(parent)
 38 |   }
 39 | 
 40 |   if (by$uses_by) {
 41 |     parent <- step_group(parent, by$names)
 42 |   }
 43 | 
 44 |   if (length(parent$groups) > 0) {
 45 |     parent <- compute(parent)
 46 | 
 47 |     nm <- sym(parent$name)
 48 |     i <- expr((!!nm)[, .I[!!i]])              # dt[, .I[]]
 49 |     i <- add_grouping_param(i, parent, FALSE) # dt[, .I[], by = ()]
 50 |     i <- call("$", i, quote(V1))              # dt[, .I[], by = ()]$V1
 51 |   }
 52 | 
 53 |   if (by$uses_by) {
 54 |     parent <- ungroup(parent)
 55 |   }
 56 | 
 57 |   step_subset(parent, i = i)
 58 | }
 59 | 
 60 | # When adding a subset that contains only j, it may be possible to merge
 61 | # the previous step.
 62 | step_subset_j <- function(parent,
 63 |                           vars = parent$vars,
 64 |                           groups = parent$groups,
 65 |                           arrange = parent$arrange,
 66 |                           j = NULL,
 67 |                           by = new_by()) {
 68 |   if (can_merge_subset(parent)) {
 69 |     i <- parent$i
 70 |     on <- parent$on
 71 |     parent <- parent$parent
 72 |   } else {
 73 |     i <- NULL
 74 |     on <- character()
 75 |   }
 76 | 
 77 |   if (by$uses_by) {
 78 |     parent <- step_group(parent, by$names)
 79 |   }
 80 | 
 81 |   out <- step_subset(
 82 |     parent,
 83 |     vars = vars,
 84 |     groups = groups,
 85 |     arrange = arrange,
 86 |     i = i,
 87 |     j = j,
 88 |     on = on
 89 |   )
 90 | 
 91 |   if (by$uses_by) {
 92 |     out <- ungroup(out)
 93 |   }
 94 | 
 95 |   out
 96 | }
 97 | 
 98 | can_merge_subset <- function(x) {
 99 |   # Can only merge subsets
100 |   if (!inherits(x, "dtplyr_step_subset")) {
101 |     return(FALSE)
102 |   }
103 | 
104 |   # Don't need to check that groups are identical because the only
105 |   # dplyr functions that generate expression in i are
106 |   # filter/slice/sample/arrange/join and don't affect groups
107 | 
108 |   is.null(x$j)
109 | }
110 | 
111 | #' @export
112 | dt_sources.dtplyr_step_subset <- function(x) {
113 |   # TODO: need to throw error if same name refers to different tables.
114 |   if (is_step(x$i)) {
115 |     utils::modifyList(dt_sources(x$parent), dt_sources(x$i))
116 |   } else {
117 |     dt_sources(x$parent)
118 |   }
119 | }
120 | 
121 | #' @export
122 | dt_call.dtplyr_step_subset <- function(x, needs_copy = x$needs_copy) {
123 |   if (is.null(x$i) && is.null(x$j)) {
124 |     return(dt_call(x$parent))
125 |   }
126 | 
127 |   i <- if (is_step(x$i)) dt_call(x$i) else x$i
128 | 
129 |   parent <- dt_call(x$parent, needs_copy)
130 | 
131 |   if (is.null(i) && is.null(x$j)) {
132 |     out <- parent
133 |   } else if (is.null(i) && !is.null(x$j)) {
134 |     out <- call2("[", parent, , x$j)
135 |   } else if (!is.null(i) && is.null(x$j)) {
136 |     out <- call2("[", parent, i)
137 |   } else {
138 |     out <- call2("[", parent, i, x$j)
139 |   }
140 | 
141 |   if (!is.null(x$j)) {
142 |     out <- add_grouping_param(out, x)
143 |   }
144 | 
145 |   if (length(x$on) > 0) {
146 |     out$on <- call2(".", !!!syms(x$on))
147 |     out$allow.cartesian <- x$allow_cartesian
148 |   }
149 |   out
150 | }
151 | 
152 | 


--------------------------------------------------------------------------------
/R/unite.R:
--------------------------------------------------------------------------------
 1 | #' Unite multiple columns into one by pasting strings together.
 2 | #'
 3 | #' @description
 4 | #' This is a method for the tidyr `unite()` generic.
 5 | #'
 6 | #' @inheritParams tidyr::unite
 7 | #' @examples
 8 | #' library(tidyr)
 9 | #'
10 | #' df <- lazy_dt(expand_grid(x = c("a", NA), y = c("b", NA)))
11 | #' df
12 | #'
13 | #' df %>% unite("z", x:y, remove = FALSE)
14 | #'
15 | #' # Separate is almost the complement of unite
16 | #' df %>%
17 | #'   unite("xy", x:y) %>%
18 | #'   separate(xy, c("x", "y"))
19 | #' # (but note `x` and `y` contain now "NA" not NA)
20 | # exported onLoad
21 | unite.dtplyr_step <- function(data, col, ..., sep = "_", remove = TRUE, na.rm = FALSE) {
22 |   if (is_true(na.rm)) {
23 |     abort("`na.rm` is not implemented in dtplyr")
24 |   }
25 | 
26 |   .col <- as_name(enquo(col))
27 | 
28 |   dots <- enquos(...)
29 |   if (length(dots) == 0) {
30 |     .cols <- data$vars
31 |     locs <- seq_along(.cols)
32 |   } else {
33 |     locs <- tidyselect::eval_select(expr(c(!!!dots)), data, allow_rename = FALSE)
34 |     .cols <- data$vars[locs]
35 |   }
36 | 
37 |   out <- mutate(ungroup(data), !!.col := paste(!!!syms(.cols), sep = sep))
38 | 
39 |   remove <- is_true(remove)
40 |   if (remove) {
41 |     .drop_cols <- setdiff(.cols, .col)
42 |     out <- select(out, -tidyselect::all_of(.drop_cols))
43 |   }
44 | 
45 |   group_vars <- data$groups
46 |   if (remove && any(.cols %in% group_vars)) {
47 |     group_vars <- setdiff(group_vars, .cols)
48 |   }
49 |   out <- relocate(out, !!.col, .before = min(locs))
50 | 
51 |   if (length(group_vars) > 0) {
52 |     out <- group_by(out, !!!syms(group_vars))
53 |   }
54 | 
55 |   out
56 | }
57 | 


--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
 1 | cat_line <- function(...) cat(paste(..., "\n", collapse = "", sep = ""))
 2 | 
 3 | # nocov start - compat-purrr.R
 4 | 
 5 | imap <- function(.x, .f, ...) {
 6 |   map2(.x, names(.x) %||% seq_along(.x), .f, ...)
 7 | }
 8 | 
 9 | map2 <- function(.x, .y, .f, ...) {
10 |   .f <- as_function(.f, env = global_env())
11 |   out <- mapply(.f, .x, .y, MoreArgs = list(...), SIMPLIFY = FALSE)
12 |   if (length(out) == length(.x)) {
13 |     set_names(out, names(.x))
14 |   } else {
15 |     set_names(out, NULL)
16 |   }
17 | }
18 | 
19 | # nocov end
20 | 
21 | # nocov start - compat-tidyr.R
22 | 
23 | strip_names <- function(df, base, names_sep) {
24 |   base <- paste0(base, names_sep)
25 |   names <- names(df)
26 | 
27 |   has_prefix <- startsWith(names, base)
28 |   names[has_prefix] <- substr(names[has_prefix], nchar(base) + 1, nchar(names[has_prefix]))
29 | 
30 |   set_names(df, names)
31 | }
32 | 
33 | # nocov end
34 | 


--------------------------------------------------------------------------------
/R/zzz.R:
--------------------------------------------------------------------------------
 1 | # nocov start
 2 | .onLoad <- function(...) {
 3 |   register_s3_method("dplyr", "filter", "dtplyr_step")
 4 |   register_s3_method("dplyr", "intersect", "dtplyr_step")
 5 |   register_s3_method("dplyr", "setdiff", "dtplyr_step")
 6 |   register_s3_method("dplyr", "union", "dtplyr_step")
 7 |   register_s3_method("tidyr", "complete", "dtplyr_step")
 8 |   register_s3_method("tidyr", "drop_na", "dtplyr_step")
 9 |   register_s3_method("tidyr", "expand", "dtplyr_step")
10 |   register_s3_method("tidyr", "fill", "dtplyr_step")
11 |   register_s3_method("tidyr", "pivot_longer", "dtplyr_step")
12 |   register_s3_method("tidyr", "pivot_wider", "dtplyr_step")
13 |   register_s3_method("tidyr", "replace_na", "dtplyr_step")
14 |   register_s3_method("tidyr", "nest", "dtplyr_step")
15 |   register_s3_method("tidyr", "separate", "dtplyr_step")
16 |   register_s3_method("tidyr", "unite", "dtplyr_step")
17 | }
18 | 
19 | register_s3_method <- function(pkg, generic, class, fun = NULL) {
20 |   stopifnot(is.character(pkg), length(pkg) == 1)
21 |   stopifnot(is.character(generic), length(generic) == 1)
22 |   stopifnot(is.character(class), length(class) == 1)
23 | 
24 |   if (is.null(fun)) {
25 |     fun <- get(paste0(generic, ".", class), envir = parent.frame())
26 |   } else {
27 |     stopifnot(is.function(fun))
28 |   }
29 | 
30 |   if (pkg %in% loadedNamespaces()) {
31 |     registerS3method(generic, class, fun, envir = asNamespace(pkg))
32 |   }
33 | 
34 |   # Always register hook in case package is later unloaded & reloaded
35 |   setHook(
36 |     packageEvent(pkg, "onLoad"),
37 |     function(...) {
38 |       registerS3method(generic, class, fun, envir = asNamespace(pkg))
39 |     }
40 |   )
41 | }
42 | # nocov end
43 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | output: github_document
  3 | ---
  4 | 
  5 | <!-- README.md is generated from README.Rmd. Please edit that file -->
  6 | 
  7 | ```{r, include = FALSE}
  8 | knitr::opts_chunk$set(
  9 |   collapse = TRUE,
 10 |   comment = "#>",
 11 |   fig.path = "man/figures/README-",
 12 |   out.width = "100%"
 13 | )
 14 | ```
 15 | 
 16 | # dtplyr <a href='https://dtplyr.tidyverse.org'><img src='man/figures/logo.png' align="right" height="138" /></a>
 17 | 
 18 | <!-- badges: start -->
 19 | [![CRAN status](https://www.r-pkg.org/badges/version/dtplyr)](https://cran.r-project.org/package=dtplyr)
 20 | [![R-CMD-check](https://github.com/tidyverse/dtplyr/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/tidyverse/dtplyr/actions/workflows/R-CMD-check.yaml)
 21 | [![Codecov test coverage](https://codecov.io/gh/tidyverse/dtplyr/graph/badge.svg)](https://app.codecov.io/gh/tidyverse/dtplyr)
 22 | <!-- badges: end -->
 23 | 
 24 | ## Overview
 25 | 
 26 | <a href="https://rdatatable-community.github.io/The-Raft/posts/2024-08-01-seal_of_approval-dtplyr/"><img src='man/figures/dt-seal.png' align="right" width="200" height="157" alt="data.table seal of approval"/></a>dtplyr provides a [data.table](http://r-datatable.com/) backend for dplyr. The goal of dtplyr is to allow you to write dplyr code that is automatically translated to the equivalent, but usually much faster, data.table code.
 27 | 
 28 | See `vignette("translation")` for details of the current translations, and  [table.express](https://github.com/asardaes/table.express) and [rqdatatable](https://github.com/WinVector/rqdatatable/) for related work.
 29 | 
 30 | ## Installation
 31 | 
 32 | You can install from CRAN with:
 33 | 
 34 | ```R
 35 | install.packages("dtplyr")
 36 | ```
 37 | 
 38 | Or try the development version from GitHub with:
 39 | 
 40 | ```R
 41 | # install.packages("pak")
 42 | pak::pak("tidyverse/dtplyr")
 43 | ```
 44 | 
 45 | ## Usage
 46 | 
 47 | To use dtplyr, you must at least load dtplyr and dplyr. You may also want to load [data.table](http://r-datatable.com/) so you can access the other goodies that it provides:
 48 | 
 49 | ```{r setup}
 50 | library(data.table)
 51 | library(dtplyr)
 52 | library(dplyr, warn.conflicts = FALSE)
 53 | ```
 54 | 
 55 | Then use `lazy_dt()` to create a "lazy" data table that tracks the operations performed on it.
 56 | 
 57 | ```{r}
 58 | mtcars2 <- lazy_dt(mtcars)
 59 | ```
 60 | 
 61 | You can preview the transformation (including the generated data.table code) by printing the result:
 62 | 
 63 | ```{r}
 64 | mtcars2 %>%
 65 |   filter(wt < 5) %>%
 66 |   mutate(l100k = 235.21 / mpg) %>% # liters / 100 km
 67 |   group_by(cyl) %>%
 68 |   summarise(l100k = mean(l100k))
 69 | ```
 70 | 
 71 | But generally you should reserve this only for debugging, and use `as.data.table()`, `as.data.frame()`, or `as_tibble()` to indicate that you're done with the transformation and want to access the results:
 72 | 
 73 | ```{r}
 74 | mtcars2 %>%
 75 |   filter(wt < 5) %>%
 76 |   mutate(l100k = 235.21 / mpg) %>% # liters / 100 km
 77 |   group_by(cyl) %>%
 78 |   summarise(l100k = mean(l100k)) %>%
 79 |   as_tibble()
 80 | ```
 81 | 
 82 | ## Why is dtplyr slower than data.table?
 83 | 
 84 | There are two primary reasons that dtplyr will always be somewhat slower than data.table:
 85 | 
 86 | * Each dplyr verb must do some work to convert dplyr syntax to data.table
 87 |   syntax. This takes time proportional to the complexity of the input code,
 88 |   not the input _data_, so should be a negligible overhead for large datasets.
 89 |   [Initial benchmarks][benchmark] suggest that the overhead should be under
 90 |   1ms per dplyr call.
 91 | 
 92 | * To match dplyr semantics, `mutate()` does not modify in place by default.
 93 |   This means that most expressions involving `mutate()` must make a copy
 94 |   that would not be necessary if you were using data.table directly.
 95 |   (You can opt out of this behaviour in `lazy_dt()` with `immutable = FALSE`).
 96 | 
 97 | [benchmark]: https://dtplyr.tidyverse.org/articles/translation.html#performance
 98 | 
 99 | ## Code of Conduct
100 | 
101 | Please note that the dtplyr project is released with a [Contributor Code of Conduct](https://dtplyr.tidyverse.org/CODE_OF_CONDUCT.html). By contributing to this project, you agree to abide by its terms.
102 | 


--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
 1 | url: https://dtplyr.tidyverse.org
 2 | 
 3 | template:
 4 |   package: tidytemplate
 5 |   bootstrap: 5
 6 |   includes:
 7 |     in_header: |
 8 |       <script defer data-domain="dtplyr.tidyverse.org,all.tidyverse.org" src="https://plausible.io/js/plausible.js"></script>
 9 | 
10 | development:
11 |   mode: auto
12 | 
13 | home:
14 |   links:
15 |   - text: Learn more about data.table
16 |     href: https://rdatatable.gitlab.io/data.table/
17 | 
18 | reference:
19 | - title: Getting data in and out
20 |   contents:
21 |   - lazy_dt
22 |   - collect.dtplyr_step
23 | 
24 | - title: Single table verbs
25 |   contents:
26 |   - arrange.dtplyr_step
27 |   - count.dtplyr_step
28 |   - distinct.dtplyr_step
29 |   - filter.dtplyr_step
30 |   - group_by.dtplyr_step
31 |   - group_modify.dtplyr_step
32 |   - head.dtplyr_step
33 |   - mutate.dtplyr_step
34 |   - transmute.dtplyr_step
35 |   - relocate.dtplyr_step
36 |   - rename.dtplyr_step
37 |   - reframe.dtplyr_step
38 |   - select.dtplyr_step
39 |   - slice.dtplyr_step
40 |   - summarise.dtplyr_step
41 | 
42 | - title: Two table verbs
43 |   contents:
44 |   - left_join.dtplyr_step
45 |   - intersect.dtplyr_step
46 | 
47 | - title: tidyr verbs
48 |   contents:
49 |   - complete.dtplyr_step
50 |   - drop_na.dtplyr_step
51 |   - expand.dtplyr_step
52 |   - fill.dtplyr_step
53 |   - nest.dtplyr_step
54 |   - pivot_wider.dtplyr_step
55 |   - pivot_longer.dtplyr_step
56 |   - replace_na.dtplyr_step
57 |   - separate.dtplyr_step
58 |   - unite.dtplyr_step
59 | 
60 | news:
61 |   releases:
62 |   - text: "Version 1.3.0"
63 |     href: https://www.tidyverse.org/blog/2023/02/dtplyr-1-3-0/
64 |   - text: "Version 1.2.0"
65 |     href: https://www.tidyverse.org/blog/2021/12/dtplyr-1-2-0/
66 |   - text: "Version 1.1.0"
67 |     href: https://www.tidyverse.org/blog/2021/02/dplyr-backends/
68 |   - text: "Version 1.0.0"
69 |     href: https://www.tidyverse.org/blog/2019/11/dtplyr-1-0-0/
70 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
 1 | comment: false
 2 | 
 3 | coverage:
 4 |   status:
 5 |     project:
 6 |       default:
 7 |         target: auto
 8 |         threshold: 1%
 9 |         informational: true
10 |     patch:
11 |       default:
12 |         target: auto
13 |         threshold: 1%
14 |         informational: true
15 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
1 | ## R CMD check results
2 | 
3 | 0 errors | 0 warnings | 0 notes
4 | 
5 | ## revdepcheck results
6 | 
7 | I did not check any revdeps as this is a patch release to fix an R CMD check failure.
8 | 


--------------------------------------------------------------------------------
/dtplyr.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: No
 4 | SaveWorkspace: No
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: knitr
13 | LaTeX: XeLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageRoxygenize: rd,collate,namespace
22 | 


--------------------------------------------------------------------------------
/man/arrange.dtplyr_step.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/step-subset-arrange.R
 3 | \name{arrange.dtplyr_step}
 4 | \alias{arrange.dtplyr_step}
 5 | \title{Arrange rows by column values}
 6 | \usage{
 7 | \method{arrange}{dtplyr_step}(.data, ..., .by_group = FALSE)
 8 | }
 9 | \arguments{
10 | \item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}.}
11 | 
12 | \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Variables, or
13 | functions of variables. Use \code{\link[dplyr:desc]{desc()}} to sort a variable in descending
14 | order.}
15 | 
16 | \item{.by_group}{If \code{TRUE}, will sort first by grouping variable. Applies to
17 | grouped data frames only.}
18 | }
19 | \description{
20 | This is a method for dplyr generic \code{\link[=arrange]{arrange()}}. It is translated to
21 | an \code{\link[=order]{order()}} call in the \code{i} argument of \verb{[.data.table}.
22 | }
23 | \examples{
24 | library(dplyr, warn.conflicts = FALSE)
25 | 
26 | dt <- lazy_dt(mtcars)
27 | dt \%>\% arrange(vs, cyl)
28 | dt \%>\% arrange(desc(vs), cyl)
29 | dt \%>\% arrange(across(mpg:disp))
30 | }
31 | 


--------------------------------------------------------------------------------
/man/collect.dtplyr_step.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/step.R
 3 | \name{collect.dtplyr_step}
 4 | \alias{collect.dtplyr_step}
 5 | \alias{compute.dtplyr_step}
 6 | \alias{as.data.table.dtplyr_step}
 7 | \alias{as.data.frame.dtplyr_step}
 8 | \alias{as_tibble.dtplyr_step}
 9 | \title{Force computation of a lazy data.table}
10 | \usage{
11 | \method{collect}{dtplyr_step}(x, ...)
12 | 
13 | \method{compute}{dtplyr_step}(x, name = unique_name(), ...)
14 | 
15 | \method{as.data.table}{dtplyr_step}(x, keep.rownames = FALSE, ...)
16 | 
17 | \method{as.data.frame}{dtplyr_step}(x, ...)
18 | 
19 | \method{as_tibble}{dtplyr_step}(x, ..., .name_repair = "check_unique")
20 | }
21 | \arguments{
22 | \item{x}{A \link{lazy_dt}}
23 | 
24 | \item{...}{Arguments used by other methods.}
25 | 
26 | \item{name}{Name of intermediate data.table.}
27 | 
28 | \item{keep.rownames}{Ignored as dplyr never preserves rownames.}
29 | 
30 | \item{.name_repair}{Treatment of problematic column names}
31 | }
32 | \description{
33 | \itemize{
34 | \item \code{collect()} returns a tibble, grouped if needed.
35 | \item \code{compute()} generates an intermediate assignment in the translation.
36 | \item \code{as.data.table()} returns a data.table.
37 | \item \code{as.data.frame()} returns a data frame.
38 | \item \code{as_tibble()} returns a tibble.
39 | }
40 | }
41 | \examples{
42 | library(dplyr, warn.conflicts = FALSE)
43 | 
44 | dt <- lazy_dt(mtcars)
45 | 
46 | # Generate translation
47 | avg_mpg <- dt \%>\%
48 |   filter(am == 1) \%>\%
49 |   group_by(cyl) \%>\%
50 |   summarise(mpg = mean(mpg))
51 | 
52 | # Show translation and temporarily compute result
53 | avg_mpg
54 | 
55 | # compute and return tibble
56 | avg_mpg_tb <- as_tibble(avg_mpg)
57 | avg_mpg_tb
58 | 
59 | # compute and return data.table
60 | avg_mpg_dt <- data.table::as.data.table(avg_mpg)
61 | avg_mpg_dt
62 | 
63 | # modify translation to use intermediate assignment
64 | compute(avg_mpg)
65 | 
66 | }
67 | 


--------------------------------------------------------------------------------
/man/complete.dtplyr_step.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/complete.R
 3 | \name{complete.dtplyr_step}
 4 | \alias{complete.dtplyr_step}
 5 | \title{Complete a data frame with missing combinations of data}
 6 | \usage{
 7 | \method{complete}{dtplyr_step}(data, ..., fill = list())
 8 | }
 9 | \arguments{
10 | \item{data}{A \code{\link[=lazy_dt]{lazy_dt()}}.}
11 | 
12 | \item{...}{<\code{\link[tidyr:tidyr_data_masking]{data-masking}}> Specification of columns
13 | to expand or complete. Columns can be atomic vectors or lists.
14 | \itemize{
15 | \item To find all unique combinations of \code{x}, \code{y} and \code{z}, including those not
16 | present in the data, supply each variable as a separate argument:
17 | \code{expand(df, x, y, z)} or \code{complete(df, x, y, z)}.
18 | \item To find only the combinations that occur in the
19 | data, use \code{nesting}: \code{expand(df, nesting(x, y, z))}.
20 | \item You can combine the two forms. For example,
21 | \code{expand(df, nesting(school_id, student_id), date)} would produce
22 | a row for each present school-student combination for all possible
23 | dates.
24 | }
25 | 
26 | When used with factors, \code{\link[tidyr:expand]{expand()}} and \code{\link[tidyr:complete]{complete()}} use the full set of
27 | levels, not just those that appear in the data. If you want to use only the
28 | values seen in the data, use \code{forcats::fct_drop()}.
29 | 
30 | When used with continuous variables, you may need to fill in values
31 | that do not appear in the data: to do so use expressions like
32 | \code{year = 2010:2020} or \code{year = full_seq(year,1)}.}
33 | 
34 | \item{fill}{A named list that for each variable supplies a single value to
35 | use instead of \code{NA} for missing combinations.}
36 | }
37 | \description{
38 | This is a method for the tidyr \code{complete()} generic. This is a wrapper
39 | around \code{dtplyr} translations for \code{expand()}, \code{full_join()}, and \code{replace_na()}
40 | that's useful for completing missing combinations of data.
41 | }
42 | \examples{
43 | library(tidyr)
44 | tbl <- tibble(x = 1:2, y = 1:2, z = 3:4)
45 | dt <- lazy_dt(tbl)
46 | 
47 | dt \%>\%
48 |   complete(x, y)
49 | 
50 | dt \%>\%
51 |   complete(x, y, fill = list(z = 10L))
52 | }
53 | 


--------------------------------------------------------------------------------
/man/count.dtplyr_step.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/count.R
 3 | \name{count.dtplyr_step}
 4 | \alias{count.dtplyr_step}
 5 | \title{Count observations by group}
 6 | \usage{
 7 | \method{count}{dtplyr_step}(x, ..., wt = NULL, sort = FALSE, name = NULL)
 8 | }
 9 | \arguments{
10 | \item{x}{A \code{\link[=lazy_dt]{lazy_dt()}}}
11 | 
12 | \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Variables to group
13 | by.}
14 | 
15 | \item{wt}{<\code{\link[rlang:args_data_masking]{data-masking}}> Frequency weights.
16 | Can be \code{NULL} or a variable:
17 | \itemize{
18 | \item If \code{NULL} (the default), counts the number of rows in each group.
19 | \item If a variable, computes \code{sum(wt)} for each group.
20 | }}
21 | 
22 | \item{sort}{If \code{TRUE}, will show the largest groups at the top.}
23 | 
24 | \item{name}{The name of the new column in the output.
25 | 
26 | If omitted, it will default to \code{n}. If there's already a column called \code{n},
27 | it will use \code{nn}. If there's a column called \code{n} and \code{nn}, it'll use
28 | \code{nnn}, and so on, adding \code{n}s until it gets a new name.}
29 | }
30 | \description{
31 | This is a method for the dplyr \code{\link[=count]{count()}} generic. It is translated using
32 | \code{.N} in the \code{j} argument, and supplying groups to \code{keyby} as appropriate.
33 | }
34 | \examples{
35 | library(dplyr, warn.conflicts = FALSE)
36 | 
37 | dt <- lazy_dt(dplyr::starwars)
38 | dt \%>\% count(species)
39 | dt \%>\% count(species, sort = TRUE)
40 | dt \%>\% count(species, wt = mass, sort = TRUE)
41 | }
42 | 


--------------------------------------------------------------------------------
/man/distinct.dtplyr_step.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/step-call.R
 3 | \name{distinct.dtplyr_step}
 4 | \alias{distinct.dtplyr_step}
 5 | \title{Subset distinct/unique rows}
 6 | \usage{
 7 | \method{distinct}{dtplyr_step}(.data, ..., .keep_all = FALSE)
 8 | }
 9 | \arguments{
10 | \item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}}
11 | 
12 | \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Optional variables to
13 | use when determining uniqueness. If there are multiple rows for a given
14 | combination of inputs, only the first row will be preserved. If omitted,
15 | will use all variables in the data frame.}
16 | 
17 | \item{.keep_all}{If \code{TRUE}, keep all variables in \code{.data}.
18 | If a combination of \code{...} is not distinct, this keeps the
19 | first row of values.}
20 | }
21 | \description{
22 | This is a method for the dplyr \code{\link[=distinct]{distinct()}} generic. It is translated to
23 | \code{\link[data.table:duplicated]{data.table::unique.data.table()}}.
24 | }
25 | \examples{
26 | library(dplyr, warn.conflicts = FALSE)
27 | df <- lazy_dt(data.frame(
28 |   x = sample(10, 100, replace = TRUE),
29 |   y = sample(10, 100, replace = TRUE)
30 | ))
31 | 
32 | df \%>\% distinct(x)
33 | df \%>\% distinct(x, y)
34 | df \%>\% distinct(x, .keep_all = TRUE)
35 | }
36 | 


--------------------------------------------------------------------------------
/man/dot-datatable.aware.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dtplyr-package.R
 3 | \docType{data}
 4 | \name{.datatable.aware}
 5 | \alias{.datatable.aware}
 6 | \title{dtplyr is data.table aware}
 7 | \format{
 8 | An object of class \code{logical} of length 1.
 9 | }
10 | \usage{
11 | .datatable.aware
12 | }
13 | \description{
14 | dtplyr is data.table aware
15 | }
16 | \keyword{internal}
17 | 


--------------------------------------------------------------------------------
/man/drop_na.dtplyr_step.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/step-call.R
 3 | \name{drop_na.dtplyr_step}
 4 | \alias{drop_na.dtplyr_step}
 5 | \title{Drop rows containing missing values}
 6 | \usage{
 7 | \method{drop_na}{dtplyr_step}(data, ...)
 8 | }
 9 | \arguments{
10 | \item{data}{A \code{\link[=lazy_dt]{lazy_dt()}}.}
11 | 
12 | \item{...}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> Columns to inspect for
13 | missing values. If empty, all columns are used.}
14 | }
15 | \description{
16 | This is a method for the tidyr \code{drop_na()} generic. It is translated to
17 | \code{data.table::na.omit()}
18 | }
19 | \examples{
20 | library(dplyr)
21 | library(tidyr)
22 | 
23 | dt <- lazy_dt(tibble(x = c(1, 2, NA), y = c("a", NA, "b")))
24 | dt \%>\% drop_na()
25 | dt \%>\% drop_na(x)
26 | 
27 | vars <- "y"
28 | dt \%>\% drop_na(x, any_of(vars))
29 | }
30 | 


--------------------------------------------------------------------------------
/man/dtplyr-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dtplyr-package.R
 3 | \docType{package}
 4 | \name{dtplyr-package}
 5 | \alias{dtplyr}
 6 | \alias{dtplyr-package}
 7 | \title{dtplyr: Data Table Back-End for 'dplyr'}
 8 | \description{
 9 | \if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}}
10 | 
11 | Provides a data.table backend for 'dplyr'. The goal of 'dtplyr' is to allow you to write 'dplyr' code that is automatically translated to the equivalent, but usually much faster, data.table code.
12 | }
13 | \seealso{
14 | Useful links:
15 | \itemize{
16 |   \item \url{https://dtplyr.tidyverse.org}
17 |   \item \url{https://github.com/tidyverse/dtplyr}
18 |   \item Report bugs at \url{https://github.com/tidyverse/dtplyr/issues}
19 | }
20 | 
21 | }
22 | \author{
23 | \strong{Maintainer}: Hadley Wickham \email{hadley@posit.co}
24 | 
25 | Authors:
26 | \itemize{
27 |   \item Maximilian Girlich
28 |   \item Mark Fairbanks
29 |   \item Ryan Dickerson
30 | }
31 | 
32 | Other contributors:
33 | \itemize{
34 |   \item Posit Software, PBC [copyright holder, funder]
35 | }
36 | 
37 | }
38 | \keyword{internal}
39 | 


--------------------------------------------------------------------------------
/man/expand.dtplyr_step.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/step-subset-expand.R
 3 | \name{expand.dtplyr_step}
 4 | \alias{expand.dtplyr_step}
 5 | \title{Expand data frame to include all possible combinations of values.}
 6 | \usage{
 7 | \method{expand}{dtplyr_step}(data, ..., .name_repair = "check_unique")
 8 | }
 9 | \arguments{
10 | \item{data}{A \code{\link[=lazy_dt]{lazy_dt()}}.}
11 | 
12 | \item{...}{Specification of columns to expand. Columns can be atomic vectors
13 | or lists.
14 | \itemize{
15 | \item To find all unique combinations of \code{x}, \code{y} and \code{z}, including those not
16 | present in the data, supply each variable as a separate argument:
17 | \code{expand(df, x, y, z)}.
18 | \item To find only the combinations that occur in the
19 | data, use \code{nesting}: \code{expand(df, nesting(x, y, z))}.
20 | \item You can combine the two forms. For example,
21 | \code{expand(df, nesting(school_id, student_id), date)} would produce
22 | a row for each present school-student combination for all possible
23 | dates.
24 | }
25 | 
26 | Unlike the data.frame method, this method does not use the full set of
27 | levels, just those that appear in the data.
28 | 
29 | When used with continuous variables, you may need to fill in values
30 | that do not appear in the data: to do so use expressions like
31 | \code{year = 2010:2020} or \code{year = full_seq(year,1)}.}
32 | 
33 | \item{.name_repair}{Treatment of problematic column names:
34 | \itemize{
35 | \item \code{"minimal"}: No name repair or checks, beyond basic existence,
36 | \item \code{"unique"}: Make sure names are unique and not empty,
37 | \item \code{"check_unique"}: (default value), no name repair, but check they are
38 | \code{unique},
39 | \item \code{"universal"}: Make the names \code{unique} and syntactic
40 | \item a function: apply custom name repair (e.g., \code{.name_repair = make.names}
41 | for names in the style of base R).
42 | \item A purrr-style anonymous function, see \code{\link[rlang:as_function]{rlang::as_function()}}
43 | }
44 | 
45 | This argument is passed on as \code{repair} to \code{\link[vctrs:vec_as_names]{vctrs::vec_as_names()}}.
46 | See there for more details on these terms and the strategies used
47 | to enforce them.}
48 | }
49 | \description{
50 | This is a method for the tidyr \code{expand()} generic. It is translated to
51 | \code{\link[data.table:J]{data.table::CJ()}}.
52 | }
53 | \examples{
54 | library(tidyr)
55 | 
56 | fruits <- lazy_dt(tibble(
57 |   type   = c("apple", "orange", "apple", "orange", "orange", "orange"),
58 |   year   = c(2010, 2010, 2012, 2010, 2010, 2012),
59 |   size  =  factor(
60 |     c("XS", "S",  "M", "S", "S", "M"),
61 |     levels = c("XS", "S", "M", "L")
62 |   ),
63 |   weights = rnorm(6, as.numeric(size) + 2)
64 | ))
65 | 
66 | # All possible combinations ---------------------------------------
67 | # Note that only present levels of the factor variable `size` are retained.
68 | fruits \%>\% expand(type)
69 | fruits \%>\% expand(type, size)
70 | 
71 | # This is different from the data frame behaviour:
72 | fruits \%>\% dplyr::collect() \%>\% expand(type, size)
73 | 
74 | # Other uses -------------------------------------------------------
75 | fruits \%>\% expand(type, size, 2010:2012)
76 | 
77 | # Use `anti_join()` to determine which observations are missing
78 | all <- fruits \%>\% expand(type, size, year)
79 | all
80 | all \%>\% dplyr::anti_join(fruits)
81 | 
82 | # Use with `right_join()` to fill in missing rows
83 | fruits \%>\% dplyr::right_join(all)
84 | }
85 | 


--------------------------------------------------------------------------------
/man/figures/dt-seal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidyverse/dtplyr/75310e32cbc8130bfecd80cca83c7c8fa78de609/man/figures/dt-seal.png


--------------------------------------------------------------------------------
/man/figures/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidyverse/dtplyr/75310e32cbc8130bfecd80cca83c7c8fa78de609/man/figures/logo.png


--------------------------------------------------------------------------------
/man/fill.dtplyr_step.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/fill.R
 3 | \name{fill.dtplyr_step}
 4 | \alias{fill.dtplyr_step}
 5 | \title{Fill in missing values with previous or next value}
 6 | \usage{
 7 | \method{fill}{dtplyr_step}(data, ..., .direction = c("down", "up", "downup", "updown"))
 8 | }
 9 | \arguments{
10 | \item{data}{A data frame.}
11 | 
12 | \item{...}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> Columns to fill.}
13 | 
14 | \item{.direction}{Direction in which to fill missing values. Currently
15 | either "down" (the default), "up", "downup" (i.e. first down and then up)
16 | or "updown" (first up and then down).}
17 | }
18 | \description{
19 | This is a method for the tidyr \code{fill()} generic. It is translated to
20 | \code{\link[data.table:nafill]{data.table::nafill()}}. Note that \code{data.table::nafill()} currently only
21 | works for integer and double columns.
22 | }
23 | \examples{
24 | library(tidyr)
25 | 
26 | # Value (year) is recorded only when it changes
27 | sales <- lazy_dt(tibble::tribble(
28 |   ~quarter, ~year, ~sales,
29 |   "Q1",    2000,    66013,
30 |   "Q2",      NA,    69182,
31 |   "Q3",      NA,    53175,
32 |   "Q4",      NA,    21001,
33 |   "Q1",    2001,    46036,
34 |   "Q2",      NA,    58842,
35 |   "Q3",      NA,    44568,
36 |   "Q4",      NA,    50197,
37 |   "Q1",    2002,    39113,
38 |   "Q2",      NA,    41668,
39 |   "Q3",      NA,    30144,
40 |   "Q4",      NA,    52897,
41 |   "Q1",    2004,    32129,
42 |   "Q2",      NA,    67686,
43 |   "Q3",      NA,    31768,
44 |   "Q4",      NA,    49094
45 | ))
46 | 
47 | # `fill()` defaults to replacing missing data from top to bottom
48 | sales \%>\% fill(year)
49 | 
50 | # Value (n_squirrels) is missing above and below within a group
51 | squirrels <- lazy_dt(tibble::tribble(
52 |   ~group,    ~name,     ~role,     ~n_squirrels,
53 |   1,      "Sam",    "Observer",   NA,
54 |   1,     "Mara", "Scorekeeper",    8,
55 |   1,    "Jesse",    "Observer",   NA,
56 |   1,      "Tom",    "Observer",   NA,
57 |   2,     "Mike",    "Observer",   NA,
58 |   2,  "Rachael",    "Observer",   NA,
59 |   2,  "Sydekea", "Scorekeeper",   14,
60 |   2, "Gabriela",    "Observer",   NA,
61 |   3,  "Derrick",    "Observer",   NA,
62 |   3,     "Kara", "Scorekeeper",    9,
63 |   3,    "Emily",    "Observer",   NA,
64 |   3, "Danielle",    "Observer",   NA
65 | ))
66 | 
67 | # The values are inconsistently missing by position within the group
68 | # Use .direction = "downup" to fill missing values in both directions
69 | squirrels \%>\%
70 |   dplyr::group_by(group) \%>\%
71 |   fill(n_squirrels, .direction = "downup") \%>\%
72 |   dplyr::ungroup()
73 | 
74 | # Using `.direction = "updown"` accomplishes the same goal in this example
75 | }
76 | 


--------------------------------------------------------------------------------
/man/filter.dtplyr_step.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/step-subset-filter.R
 3 | \name{filter.dtplyr_step}
 4 | \alias{filter.dtplyr_step}
 5 | \title{Subset rows using column values}
 6 | \usage{
 7 | \method{filter}{dtplyr_step}(.data, ..., .by = NULL, .preserve = FALSE)
 8 | }
 9 | \arguments{
10 | \item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}.}
11 | 
12 | \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Expressions that
13 | return a logical value, and are defined in terms of the variables in
14 | \code{.data}. If multiple expressions are included, they are combined with the
15 | \code{&} operator. Only rows for which all conditions evaluate to \code{TRUE} are
16 | kept.}
17 | 
18 | \item{.by}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}
19 | 
20 | <\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Optionally, a selection of columns to
21 | group by for just this operation, functioning as an alternative to \code{\link[dplyr:group_by]{group_by()}}. For
22 | details and examples, see \link[dplyr:dplyr_by]{?dplyr_by}.}
23 | 
24 | \item{.preserve}{Ignored}
25 | }
26 | \description{
27 | This is a method for the dplyr \code{\link[=arrange]{arrange()}} generic. It is translated to
28 | the \code{i} argument of \verb{[.data.table}
29 | }
30 | \examples{
31 | library(dplyr, warn.conflicts = FALSE)
32 | 
33 | dt <- lazy_dt(mtcars)
34 | dt \%>\% filter(cyl == 4)
35 | dt \%>\% filter(vs, am)
36 | 
37 | dt \%>\%
38 |   group_by(cyl) \%>\%
39 |   filter(mpg > mean(mpg))
40 | }
41 | 


--------------------------------------------------------------------------------
/man/group_by.dtplyr_step.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/step-group.R
 3 | \name{group_by.dtplyr_step}
 4 | \alias{group_by.dtplyr_step}
 5 | \alias{ungroup.dtplyr_step}
 6 | \title{Group and ungroup}
 7 | \usage{
 8 | \method{group_by}{dtplyr_step}(.data, ..., .add = FALSE, arrange = TRUE)
 9 | 
10 | \method{ungroup}{dtplyr_step}(x, ...)
11 | }
12 | \arguments{
13 | \item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}}
14 | 
15 | \item{...}{In \code{group_by()}, variables or computations to group by.
16 | Computations are always done on the ungrouped data frame.
17 | To perform computations on the grouped data, you need to use
18 | a separate \code{mutate()} step before the \code{group_by()}.
19 | Computations are not allowed in \code{nest_by()}.
20 | In \code{ungroup()}, variables to remove from the grouping.}
21 | 
22 | \item{.add, add}{When \code{FALSE}, the default, \code{group_by()} will
23 | override existing groups. To add to the existing groups, use
24 | \code{.add = TRUE}.
25 | 
26 | This argument was previously called \code{add}, but that prevented
27 | creating a new grouping variable called \code{add}, and conflicts with
28 | our naming conventions.}
29 | 
30 | \item{arrange}{If \code{TRUE}, will automatically arrange the output of
31 | subsequent grouped operations by group. If \code{FALSE}, output order will be
32 | left unchanged. In the generated data.table code this switches between
33 | using the \code{keyby} (\code{TRUE}) and \code{by} (\code{FALSE}) arguments.}
34 | 
35 | \item{x}{A \code{\link[dplyr:tbl]{tbl()}}}
36 | }
37 | \description{
38 | These are methods for dplyr's \code{\link[=group_by]{group_by()}} and \code{\link[=ungroup]{ungroup()}} generics.
39 | Grouping is translated to the either \code{keyby} and \code{by} argument of
40 | \verb{[.data.table} depending on the value of the \code{arrange} argument.
41 | }
42 | \examples{
43 | library(dplyr, warn.conflicts = FALSE)
44 | dt <- lazy_dt(mtcars)
45 | 
46 | # group_by() is usually translated to `keyby` so that the groups
47 | # are ordered in the output
48 | dt \%>\%
49 |  group_by(cyl) \%>\%
50 |  summarise(mpg = mean(mpg))
51 | 
52 | # use `arrange = FALSE` to instead use `by` so the original order
53 | # or groups is preserved
54 | dt \%>\%
55 |  group_by(cyl, arrange = FALSE) \%>\%
56 |  summarise(mpg = mean(mpg))
57 | }
58 | 


--------------------------------------------------------------------------------
/man/group_modify.dtplyr_step.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/step-modify.R
 3 | \name{group_modify.dtplyr_step}
 4 | \alias{group_modify.dtplyr_step}
 5 | \alias{group_map.dtplyr_step}
 6 | \title{Apply a function to each group}
 7 | \usage{
 8 | \method{group_modify}{dtplyr_step}(.data, .f, ..., keep = FALSE)
 9 | 
10 | \method{group_map}{dtplyr_step}(.data, .f, ..., keep = FALSE)
11 | }
12 | \arguments{
13 | \item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}}
14 | 
15 | \item{.f}{The name of a two argument function. The first argument is passed
16 | \code{.SD},the data.table representing the current group; the second argument
17 | is passed \code{.BY}, a list giving the current values of the grouping
18 | variables. The function should return a list or data.table.}
19 | 
20 | \item{...}{Additional arguments passed to \code{.f}}
21 | 
22 | \item{keep}{Not supported for \link{lazy_dt}.}
23 | }
24 | \value{
25 | \code{group_map()} applies \code{.f} to each group, returning a list.
26 | \code{group_modify()} replaces each group with the results of \code{.f}, returning a
27 | modified \code{\link[=lazy_dt]{lazy_dt()}}.
28 | }
29 | \description{
30 | These are methods for the dplyr \code{\link[=group_map]{group_map()}} and \code{\link[=group_modify]{group_modify()}} generics.
31 | They are both translated to \verb{[.data.table}.
32 | }
33 | \examples{
34 | library(dplyr)
35 | 
36 | dt <- lazy_dt(mtcars)
37 | 
38 | dt \%>\%
39 |   group_by(cyl) \%>\%
40 |   group_modify(head, n = 2L)
41 | 
42 | dt \%>\%
43 |   group_by(cyl) \%>\%
44 |   group_map(head, n = 2L)
45 | }
46 | 


--------------------------------------------------------------------------------
/man/head.dtplyr_step.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/step-call.R
 3 | \name{head.dtplyr_step}
 4 | \alias{head.dtplyr_step}
 5 | \alias{tail.dtplyr_step}
 6 | \title{Subset first or last rows}
 7 | \usage{
 8 | \method{head}{dtplyr_step}(x, n = 6L, ...)
 9 | 
10 | \method{tail}{dtplyr_step}(x, n = 6L, ...)
11 | }
12 | \arguments{
13 | \item{x}{A \code{\link[=lazy_dt]{lazy_dt()}}}
14 | 
15 | \item{n}{Number of rows to select. Can use a negative number to instead
16 | drop rows from the other end.}
17 | 
18 | \item{...}{Passed on to \code{\link[=head]{head()}}/\code{\link[=tail]{tail()}}.}
19 | }
20 | \description{
21 | These are methods for the base generics \code{\link[=head]{head()}} and \code{\link[=tail]{tail()}}. They
22 | are not translated.
23 | }
24 | \examples{
25 | library(dplyr, warn.conflicts = FALSE)
26 | dt <- lazy_dt(data.frame(x = 1:10))
27 | 
28 | # first three rows
29 | head(dt, 3)
30 | # last three rows
31 | tail(dt, 3)
32 | 
33 | # drop first three rows
34 | tail(dt, -3)
35 | }
36 | 


--------------------------------------------------------------------------------
/man/intersect.dtplyr_step.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/step-set.R
 3 | \name{intersect.dtplyr_step}
 4 | \alias{intersect.dtplyr_step}
 5 | \alias{union.dtplyr_step}
 6 | \alias{union_all.dtplyr_step}
 7 | \alias{setdiff.dtplyr_step}
 8 | \title{Set operations}
 9 | \usage{
10 | \method{intersect}{dtplyr_step}(x, y, ...)
11 | 
12 | \method{union}{dtplyr_step}(x, y, ...)
13 | 
14 | \method{union_all}{dtplyr_step}(x, y, ...)
15 | 
16 | \method{setdiff}{dtplyr_step}(x, y, ...)
17 | }
18 | \arguments{
19 | \item{x, y}{A pair of \code{\link[=lazy_dt]{lazy_dt()}}s.}
20 | 
21 | \item{...}{Ignored}
22 | }
23 | \description{
24 | These are methods for the dplyr generics \code{\link[=intersect]{intersect()}}, \code{\link[=union]{union()}},
25 | \code{\link[=union_all]{union_all()}}, and \code{\link[=setdiff]{setdiff()}}. They are translated to
26 | \code{\link[data.table:setops]{data.table::fintersect()}}, \code{\link[data.table:setops]{data.table::funion()}}, and
27 | \code{\link[data.table:setops]{data.table::fsetdiff()}}.
28 | }
29 | \examples{
30 | dt1 <- lazy_dt(data.frame(x = 1:4))
31 | dt2 <- lazy_dt(data.frame(x = c(2, 4, 6)))
32 | 
33 | intersect(dt1, dt2)
34 | union(dt1, dt2)
35 | setdiff(dt1, dt2)
36 | 
37 | }
38 | 


--------------------------------------------------------------------------------
/man/lazy_dt.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/step-first.R
 3 | \name{lazy_dt}
 4 | \alias{lazy_dt}
 5 | \alias{tbl_dt}
 6 | \alias{grouped_dt}
 7 | \title{Create a "lazy" data.table for use with dplyr verbs}
 8 | \usage{
 9 | lazy_dt(x, name = NULL, immutable = TRUE, key_by = NULL)
10 | }
11 | \arguments{
12 | \item{x}{A data table (or something can can be coerced to a data table).}
13 | 
14 | \item{name}{Optionally, supply a name to be used in generated expressions.
15 | For expert use only.}
16 | 
17 | \item{immutable}{If \code{TRUE}, \code{x} is treated as immutable and will never
18 | be modified by any code generated by dtplyr. Alternatively, you can set
19 | \code{immutable = FALSE} to allow dtplyr to modify the input object.}
20 | 
21 | \item{key_by}{Set keys for data frame, using \code{\link[=select]{select()}} semantics (e.g.
22 | \code{key_by = c(key1, key2)}.
23 | 
24 | This uses \code{\link[data.table:setkey]{data.table::setkey()}} to sort the table and build an index.
25 | This will considerably improve performance for subsets, summaries, and
26 | joins that use the keys.
27 | 
28 | See \code{vignette("datatable-keys-fast-subset")} for more details.}
29 | }
30 | \description{
31 | A lazy data.table captures the intent of dplyr verbs, only actually
32 | performing computation when requested (with \code{\link[=collect]{collect()}}, \code{\link[=pull]{pull()}},
33 | \code{\link[=as.data.frame]{as.data.frame()}}, \code{\link[data.table:as.data.table]{data.table::as.data.table()}}, or \code{\link[tibble:as_tibble]{tibble::as_tibble()}}).
34 | This allows dtplyr to convert dplyr verbs into as few data.table expressions
35 | as possible, which leads to a high performance translation.
36 | 
37 | See \code{vignette("translation")} for the details of the translation.
38 | }
39 | \examples{
40 | library(dplyr, warn.conflicts = FALSE)
41 | 
42 | mtcars2 <- lazy_dt(mtcars)
43 | mtcars2
44 | mtcars2 \%>\% select(mpg:cyl)
45 | mtcars2 \%>\% select(x = mpg, y = cyl)
46 | mtcars2 \%>\% filter(cyl == 4) \%>\% select(mpg)
47 | mtcars2 \%>\% select(mpg, cyl) \%>\% filter(cyl == 4)
48 | mtcars2 \%>\% mutate(cyl2 = cyl * 2, cyl4 = cyl2 * 2)
49 | mtcars2 \%>\% transmute(cyl2 = cyl * 2, vs2 = vs * 2)
50 | mtcars2 \%>\% filter(cyl == 8) \%>\% mutate(cyl2 = cyl * 2)
51 | 
52 | # Learn more about translation in vignette("translation")
53 | by_cyl <- mtcars2 \%>\% group_by(cyl)
54 | by_cyl \%>\% summarise(mpg = mean(mpg))
55 | by_cyl \%>\% mutate(mpg = mean(mpg))
56 | by_cyl \%>\%
57 |   filter(mpg < mean(mpg)) \%>\%
58 |   summarise(hp = mean(hp))
59 | }
60 | 


--------------------------------------------------------------------------------
/man/left_join.dtplyr_step.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/step-join.R
 3 | \name{left_join.dtplyr_step}
 4 | \alias{left_join.dtplyr_step}
 5 | \title{Join data tables}
 6 | \usage{
 7 | \method{left_join}{dtplyr_step}(x, y, ..., by = NULL, copy = FALSE, suffix = c(".x", ".y"))
 8 | }
 9 | \arguments{
10 | \item{x, y}{A pair of \code{\link[=lazy_dt]{lazy_dt()}}s.}
11 | 
12 | \item{...}{Other parameters passed onto methods.}
13 | 
14 | \item{by}{A join specification created with \code{\link[dplyr:join_by]{join_by()}}, or a character
15 | vector of variables to join by.
16 | 
17 | If \code{NULL}, the default, \verb{*_join()} will perform a natural join, using all
18 | variables in common across \code{x} and \code{y}. A message lists the variables so
19 | that you can check they're correct; suppress the message by supplying \code{by}
20 | explicitly.
21 | 
22 | To join on different variables between \code{x} and \code{y}, use a \code{\link[dplyr:join_by]{join_by()}}
23 | specification. For example, \code{join_by(a == b)} will match \code{x$a} to \code{y$b}.
24 | 
25 | To join by multiple variables, use a \code{\link[dplyr:join_by]{join_by()}} specification with
26 | multiple expressions. For example, \code{join_by(a == b, c == d)} will match
27 | \code{x$a} to \code{y$b} and \code{x$c} to \code{y$d}. If the column names are the same between
28 | \code{x} and \code{y}, you can shorten this by listing only the variable names, like
29 | \code{join_by(a, c)}.
30 | 
31 | \code{\link[dplyr:join_by]{join_by()}} can also be used to perform inequality, rolling, and overlap
32 | joins. See the documentation at \link[dplyr:join_by]{?join_by} for details on
33 | these types of joins.
34 | 
35 | For simple equality joins, you can alternatively specify a character vector
36 | of variable names to join by. For example, \code{by = c("a", "b")} joins \code{x$a}
37 | to \code{y$a} and \code{x$b} to \code{y$b}. If variable names differ between \code{x} and \code{y},
38 | use a named character vector like \code{by = c("x_a" = "y_a", "x_b" = "y_b")}.
39 | 
40 | To perform a cross-join, generating all combinations of \code{x} and \code{y}, see
41 | \code{\link[dplyr:cross_join]{cross_join()}}.}
42 | 
43 | \item{copy}{If \code{x} and \code{y} are not from the same data source,
44 | and \code{copy} is \code{TRUE}, then \code{y} will be copied into the
45 | same src as \code{x}.  This allows you to join tables across srcs, but
46 | it is a potentially expensive operation so you must opt into it.}
47 | 
48 | \item{suffix}{If there are non-joined duplicate variables in \code{x} and
49 | \code{y}, these suffixes will be added to the output to disambiguate them.
50 | Should be a character vector of length 2.}
51 | }
52 | \description{
53 | These are methods for the dplyr generics \code{\link[=left_join]{left_join()}}, \code{\link[=right_join]{right_join()}},
54 | \code{\link[=inner_join]{inner_join()}}, \code{\link[=full_join]{full_join()}}, \code{\link[=anti_join]{anti_join()}}, and \code{\link[=semi_join]{semi_join()}}. Left, right,
55 | inner, and anti join are translated to the \verb{[.data.table} equivalent,
56 | full joins to \code{\link[data.table:merge]{data.table::merge.data.table()}}.
57 | Left, right, and full joins are in some cases followed by calls to
58 | \code{\link[data.table:setcolorder]{data.table::setcolorder()}} and \code{\link[data.table:setattr]{data.table::setnames()}} to ensure that column
59 | order and names match dplyr conventions.
60 | Semi-joins don't have a direct data.table equivalent.
61 | }
62 | \examples{
63 | library(dplyr, warn.conflicts = FALSE)
64 | 
65 | band_dt <- lazy_dt(dplyr::band_members)
66 | instrument_dt <- lazy_dt(dplyr::band_instruments)
67 | 
68 | band_dt \%>\% left_join(instrument_dt)
69 | band_dt \%>\% right_join(instrument_dt)
70 | band_dt \%>\% inner_join(instrument_dt)
71 | band_dt \%>\% full_join(instrument_dt)
72 | 
73 | band_dt \%>\% semi_join(instrument_dt)
74 | band_dt \%>\% anti_join(instrument_dt)
75 | }
76 | 


--------------------------------------------------------------------------------
/man/mutate.dtplyr_step.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/step-mutate.R
 3 | \name{mutate.dtplyr_step}
 4 | \alias{mutate.dtplyr_step}
 5 | \title{Create and modify columns}
 6 | \usage{
 7 | \method{mutate}{dtplyr_step}(
 8 |   .data,
 9 |   ...,
10 |   .by = NULL,
11 |   .keep = c("all", "used", "unused", "none"),
12 |   .before = NULL,
13 |   .after = NULL
14 | )
15 | }
16 | \arguments{
17 | \item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}.}
18 | 
19 | \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Name-value pairs.
20 | The name gives the name of the column in the output.
21 | 
22 | The value can be:
23 | \itemize{
24 | \item A vector of length 1, which will be recycled to the correct length.
25 | \item A vector the same length as the current group (or the whole data frame
26 | if ungrouped).
27 | \item \code{NULL}, to remove the column.
28 | \item A data frame or tibble, to create multiple columns in the output.
29 | }}
30 | 
31 | \item{.by}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}
32 | 
33 | <\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Optionally, a selection of columns to
34 | group by for just this operation, functioning as an alternative to \code{\link[dplyr:group_by]{group_by()}}. For
35 | details and examples, see \link[dplyr:dplyr_by]{?dplyr_by}.}
36 | 
37 | \item{.keep}{Control which columns from \code{.data} are retained in the output. Grouping
38 | columns and columns created by \code{...} are always kept.
39 | \itemize{
40 | \item \code{"all"} retains all columns from \code{.data}. This is the default.
41 | \item \code{"used"} retains only the columns used in \code{...} to create new
42 | columns. This is useful for checking your work, as it displays inputs
43 | and outputs side-by-side.
44 | \item \code{"unused"} retains only the columns \emph{not} used in \code{...} to create new
45 | columns. This is useful if you generate new columns, but no longer need
46 | the columns used to generate them.
47 | \item \code{"none"} doesn't retain any extra columns from \code{.data}. Only the grouping
48 | variables and columns created by \code{...} are kept.
49 | }
50 | 
51 | Note: With dtplyr \code{.keep} will only work with column names passed as symbols, and won't
52 | work with other workflows (e.g. \code{eval(parse(text = "x + 1"))})}
53 | 
54 | \item{.before, .after}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Optionally, control where new columns
55 | should appear (the default is to add to the right hand side). See
56 | \code{\link[dplyr:relocate]{relocate()}} for more details.}
57 | }
58 | \description{
59 | This is a method for the dplyr \code{\link[=mutate]{mutate()}} generic. It is translated to
60 | the \code{j} argument of \verb{[.data.table}, using \verb{:=} to modify "in place". If
61 | \code{.before} or \code{.after} is provided, the new columns are relocated with a call
62 | to \code{\link[data.table:setcolorder]{data.table::setcolorder()}}.
63 | }
64 | \examples{
65 | library(dplyr, warn.conflicts = FALSE)
66 | 
67 | dt <- lazy_dt(data.frame(x = 1:5, y = 5:1))
68 | dt \%>\%
69 |   mutate(a = (x + y) / 2, b = sqrt(x^2 + y^2))
70 | 
71 | # It uses a more sophisticated translation when newly created variables
72 | # are used in the same expression
73 | dt \%>\%
74 |   mutate(x1 = x + 1, x2 = x1 + 1)
75 | }
76 | 


--------------------------------------------------------------------------------
/man/nest.dtplyr_step.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/step-nest.R
 3 | \name{nest.dtplyr_step}
 4 | \alias{nest.dtplyr_step}
 5 | \title{Nest}
 6 | \usage{
 7 | \method{nest}{dtplyr_step}(.data, ..., .names_sep = NULL, .key = deprecated())
 8 | }
 9 | \arguments{
10 | \item{.data}{A data frame.}
11 | 
12 | \item{...}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> Columns to nest, specified
13 | using name-variable pairs of the form \code{new_col = c(col1, col2, col3)}.
14 | The right hand side can be any valid tidy select expression.}
15 | 
16 | \item{.names_sep}{If \code{NULL}, the default, the inner names will come from
17 | the former outer names. If a string, the  new inner names will use the
18 | outer names with \code{names_sep} automatically stripped. This makes
19 | \code{names_sep} roughly symmetric between nesting and unnesting.}
20 | 
21 | \item{.key}{Not supported.}
22 | 
23 | \item{data}{A \code{\link[=lazy_dt]{lazy_dt()}}.}
24 | }
25 | \description{
26 | This is a method for the tidyr \code{\link[tidyr:nest]{tidyr::nest()}} generic. It is translated
27 | using the non-nested variables in the \code{by} argument and \code{.SD} in the \code{j}
28 | argument.
29 | }
30 | \examples{
31 | if (require("tidyr", quietly = TRUE)) {
32 |   dt <- lazy_dt(tibble(x = c(1, 2, 1), y = c("a", "a", "b")))
33 |   dt \%>\% nest(data = y)
34 | 
35 |   dt \%>\% dplyr::group_by(x) \%>\% nest()
36 | }
37 | }
38 | 


--------------------------------------------------------------------------------
/man/reframe.dtplyr_step.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/reframe.R
 3 | \name{reframe.dtplyr_step}
 4 | \alias{reframe.dtplyr_step}
 5 | \title{Summarise each group to one row}
 6 | \usage{
 7 | \method{reframe}{dtplyr_step}(.data, ..., .by = NULL)
 8 | }
 9 | \arguments{
10 | \item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}.}
11 | 
12 | \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}>
13 | 
14 | Name-value pairs of functions. The name will be the name of the variable in
15 | the result. The value can be a vector of any length.
16 | 
17 | Unnamed data frame values add multiple columns from a single expression.}
18 | 
19 | \item{.by}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}
20 | 
21 | <\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Optionally, a selection of columns to
22 | group by for just this operation, functioning as an alternative to \code{\link[dplyr:group_by]{group_by()}}. For
23 | details and examples, see \link[dplyr:dplyr_by]{?dplyr_by}.}
24 | }
25 | \description{
26 | This is a method for the dplyr \code{\link[=reframe]{reframe()}} generic. It is translated to
27 | the \code{j} argument of \verb{[.data.table}.
28 | }
29 | \examples{
30 | library(dplyr, warn.conflicts = FALSE)
31 | 
32 | dt <- lazy_dt(mtcars)
33 | 
34 | dt \%>\%
35 |   reframe(qs = quantile(disp, c(0.25, 0.75)),
36 |           prob = c(0.25, 0.75),
37 |           .by = cyl)
38 | 
39 | dt \%>\%
40 |   group_by(cyl) \%>\%
41 |   reframe(qs = quantile(disp, c(0.25, 0.75)),
42 |           prob = c(0.25, 0.75))
43 | }
44 | 


--------------------------------------------------------------------------------
/man/relocate.dtplyr_step.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/step-colorder-relocate.R
 3 | \name{relocate.dtplyr_step}
 4 | \alias{relocate.dtplyr_step}
 5 | \title{Relocate variables using their names}
 6 | \usage{
 7 | \method{relocate}{dtplyr_step}(.data, ..., .before = NULL, .after = NULL)
 8 | }
 9 | \arguments{
10 | \item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}.}
11 | 
12 | \item{...}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Columns to move.}
13 | 
14 | \item{.before, .after}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Destination of
15 | columns selected by \code{...}. Supplying neither will move columns to the
16 | left-hand side; specifying both is an error.}
17 | }
18 | \description{
19 | This is a method for the dplyr \code{\link[=relocate]{relocate()}} generic. It is translated to
20 | the \code{j} argument of \verb{[.data.table}.
21 | }
22 | \examples{
23 | library(dplyr, warn.conflicts = FALSE)
24 | 
25 | dt <- lazy_dt(data.frame(x = 1, y = 2, z = 3))
26 | 
27 | dt \%>\% relocate(z)
28 | dt \%>\% relocate(y, .before = x)
29 | dt \%>\% relocate(y, .after = y)
30 | }
31 | 


--------------------------------------------------------------------------------
/man/rename.dtplyr_step.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/step-call.R
 3 | \name{rename.dtplyr_step}
 4 | \alias{rename.dtplyr_step}
 5 | \alias{rename_with.dtplyr_step}
 6 | \title{Rename columns using their names}
 7 | \usage{
 8 | \method{rename}{dtplyr_step}(.data, ...)
 9 | 
10 | \method{rename_with}{dtplyr_step}(.data, .fn, .cols = everything(), ...)
11 | }
12 | \arguments{
13 | \item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}}
14 | 
15 | \item{...}{For \code{rename()}: <\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Use
16 | \code{new_name = old_name} to rename selected variables.
17 | 
18 | For \code{rename_with()}: additional arguments passed onto \code{.fn}.}
19 | 
20 | \item{.fn}{A function used to transform the selected \code{.cols}. Should
21 | return a character vector the same length as the input.}
22 | 
23 | \item{.cols}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Columns to rename;
24 | defaults to all columns.}
25 | }
26 | \description{
27 | These are methods for the dplyr generics \code{\link[=rename]{rename()}} and \code{\link[=rename_with]{rename_with()}}.
28 | They are both translated to \code{\link[data.table:setattr]{data.table::setnames()}}.
29 | }
30 | \examples{
31 | library(dplyr, warn.conflicts = FALSE)
32 | dt <- lazy_dt(data.frame(x = 1, y = 2, z = 3))
33 | dt \%>\% rename(new_x = x, new_y = y)
34 | dt \%>\% rename_with(toupper)
35 | }
36 | 


--------------------------------------------------------------------------------
/man/replace_na.dtplyr_step.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/replace_na.R
 3 | \name{replace_na.dtplyr_step}
 4 | \alias{replace_na.dtplyr_step}
 5 | \title{Replace NAs with specified values}
 6 | \usage{
 7 | \method{replace_na}{dtplyr_step}(data, replace = list())
 8 | }
 9 | \arguments{
10 | \item{data}{A \code{\link[=lazy_dt]{lazy_dt()}}.}
11 | 
12 | \item{replace}{If \code{data} is a data frame, \code{replace} takes a named list of
13 | values, with one value for each column that has missing values to be
14 | replaced. Each value in \code{replace} will be cast to the type of the column
15 | in \code{data} that it being used as a replacement in.
16 | 
17 | If \code{data} is a vector, \code{replace} takes a single value. This single value
18 | replaces all of the missing values in the vector. \code{replace} will be cast
19 | to the type of \code{data}.}
20 | }
21 | \description{
22 | This is a method for the tidyr \code{replace_na()} generic. It is translated to
23 | \code{\link[data.table:coalesce]{data.table::fcoalesce()}}.
24 | 
25 | Note that unlike \code{tidyr::replace_na()}, \code{data.table::fcoalesce()} cannot
26 | replace \code{NULL} values in lists.
27 | }
28 | \examples{
29 | library(tidyr)
30 | 
31 | # Replace NAs in a data frame
32 | dt <- lazy_dt(tibble(x = c(1, 2, NA), y = c("a", NA, "b")))
33 | dt \%>\% replace_na(list(x = 0, y = "unknown"))
34 | 
35 | # Replace NAs using `dplyr::mutate()`
36 | dt \%>\% dplyr::mutate(x = replace_na(x, 0))
37 | }
38 | 


--------------------------------------------------------------------------------
/man/select.dtplyr_step.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/step-subset-select.R
 3 | \name{select.dtplyr_step}
 4 | \alias{select.dtplyr_step}
 5 | \title{Subset columns using their names}
 6 | \usage{
 7 | \method{select}{dtplyr_step}(.data, ...)
 8 | }
 9 | \arguments{
10 | \item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}.}
11 | 
12 | \item{...}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> One or more unquoted
13 | expressions separated by commas. Variable names can be used as if they
14 | were positions in the data frame, so expressions like \code{x:y} can
15 | be used to select a range of variables.}
16 | }
17 | \description{
18 | This is a method for the dplyr \code{\link[=select]{select()}} generic. It is translated to
19 | the \code{j} argument of \verb{[.data.table}.
20 | }
21 | \examples{
22 | library(dplyr, warn.conflicts = FALSE)
23 | 
24 | dt <- lazy_dt(data.frame(x1 = 1, x2 = 2, y1 = 3, y2 = 4))
25 | 
26 | dt \%>\% select(starts_with("x"))
27 | dt \%>\% select(ends_with("2"))
28 | dt \%>\% select(z1 = x1, z2 = x2)
29 | }
30 | 


--------------------------------------------------------------------------------
/man/separate.dtplyr_step.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/step-subset-separate.R
 3 | \name{separate.dtplyr_step}
 4 | \alias{separate.dtplyr_step}
 5 | \title{Separate a character column into multiple columns with a regular
 6 | expression or numeric locations}
 7 | \usage{
 8 | \method{separate}{dtplyr_step}(
 9 |   data,
10 |   col,
11 |   into,
12 |   sep = "[^[:alnum:]]+",
13 |   remove = TRUE,
14 |   convert = FALSE,
15 |   ...
16 | )
17 | }
18 | \arguments{
19 | \item{data}{A \code{\link[=lazy_dt]{lazy_dt()}}.}
20 | 
21 | \item{col}{Column name or position.
22 | 
23 | This argument is passed by expression and supports quasiquotation
24 | (you can unquote column names or column positions).}
25 | 
26 | \item{into}{Names of new variables to create as character vector.
27 | Use \code{NA} to omit the variable in the output.}
28 | 
29 | \item{sep}{Separator between columns.
30 | The default value is a regular expression that matches any sequence of non-alphanumeric values.}
31 | 
32 | \item{remove}{If TRUE, remove the input column from the output data frame.}
33 | 
34 | \item{convert}{If TRUE, will run type.convert() with as.is = TRUE on new columns.
35 | This is useful if the component columns are integer, numeric or logical.
36 | 
37 | NB: this will cause string "NA"s to be converted to NAs.}
38 | 
39 | \item{...}{Arguments passed on to methods}
40 | }
41 | \description{
42 | This is a method for the \code{\link[tidyr:separate]{tidyr::separate()}} generic. It is translated to
43 | \code{\link[data.table:tstrsplit]{data.table::tstrsplit()}} in the \code{j} argument of \verb{[.data.table}.
44 | }
45 | \examples{
46 | library(tidyr)
47 | # If you want to split by any non-alphanumeric value (the default):
48 | df <- lazy_dt(data.frame(x = c(NA, "x.y", "x.z", "y.z")), "DT")
49 | df \%>\% separate(x, c("A", "B"))
50 | 
51 | # If you just want the second variable:
52 | df \%>\% separate(x, c(NA, "B"))
53 | 
54 | # Use regular expressions to separate on multiple characters:
55 | df <- lazy_dt(data.frame(x = c(NA, "x?y", "x.z", "y:z")), "DT")
56 | df \%>\% separate(x, c("A","B"), sep = "([.?:])")
57 | 
58 | # convert = TRUE detects column classes:
59 | df <- lazy_dt(data.frame(x = c("x:1", "x:2", "y:4", "z", NA)), "DT")
60 | df \%>\% separate(x, c("key","value"), ":") \%>\% str
61 | df \%>\% separate(x, c("key","value"), ":", convert = TRUE) \%>\% str
62 | }
63 | 


--------------------------------------------------------------------------------
/man/slice.dtplyr_step.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/step-subset-slice.R
 3 | \name{slice.dtplyr_step}
 4 | \alias{slice.dtplyr_step}
 5 | \alias{slice_head.dtplyr_step}
 6 | \alias{slice_tail.dtplyr_step}
 7 | \alias{slice_min.dtplyr_step}
 8 | \alias{slice_max.dtplyr_step}
 9 | \title{Subset rows using their positions}
10 | \usage{
11 | \method{slice}{dtplyr_step}(.data, ..., .by = NULL)
12 | 
13 | \method{slice_head}{dtplyr_step}(.data, ..., n, prop, by = NULL)
14 | 
15 | \method{slice_tail}{dtplyr_step}(.data, ..., n, prop, by = NULL)
16 | 
17 | \method{slice_min}{dtplyr_step}(.data, order_by, ..., n, prop, by = NULL, with_ties = TRUE)
18 | 
19 | \method{slice_max}{dtplyr_step}(.data, order_by, ..., n, prop, by = NULL, with_ties = TRUE)
20 | }
21 | \arguments{
22 | \item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}.}
23 | 
24 | \item{...}{For \code{slice()}: <\code{\link[rlang:args_data_masking]{data-masking}}>
25 | Integer row values.
26 | 
27 | Provide either positive values to keep, or negative values to drop.
28 | The values provided must be either all positive or all negative.
29 | Indices beyond the number of rows in the input are silently ignored.
30 | 
31 | For \verb{slice_*()}, these arguments are passed on to methods.}
32 | 
33 | \item{.by, by}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}
34 | 
35 | <\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Optionally, a selection of columns to
36 | group by for just this operation, functioning as an alternative to \code{\link[dplyr:group_by]{group_by()}}. For
37 | details and examples, see \link[dplyr:dplyr_by]{?dplyr_by}.}
38 | 
39 | \item{n, prop}{Provide either \code{n}, the number of rows, or \code{prop}, the
40 | proportion of rows to select. If neither are supplied, \code{n = 1} will be
41 | used. If \code{n} is greater than the number of rows in the group
42 | (or \code{prop > 1}), the result will be silently truncated to the group size.
43 | \code{prop} will be rounded towards zero to generate an integer number of
44 | rows.
45 | 
46 | A negative value of \code{n} or \code{prop} will be subtracted from the group
47 | size. For example, \code{n = -2} with a group of 5 rows will select 5 - 2 = 3
48 | rows; \code{prop = -0.25} with 8 rows will select 8 * (1 - 0.25) = 6 rows.}
49 | 
50 | \item{order_by}{<\code{\link[rlang:args_data_masking]{data-masking}}> Variable or
51 | function of variables to order by. To order by multiple variables, wrap
52 | them in a data frame or tibble.}
53 | 
54 | \item{with_ties}{Should ties be kept together? The default, \code{TRUE},
55 | may return more rows than you request. Use \code{FALSE} to ignore ties,
56 | and return the first \code{n} rows.}
57 | }
58 | \description{
59 | These are methods for the dplyr \code{\link[=slice]{slice()}}, \code{slice_head()}, \code{slice_tail()},
60 | \code{slice_min()}, \code{slice_max()} and \code{slice_sample()} generics. They are
61 | translated to the \code{i} argument of \verb{[.data.table}.
62 | 
63 | Unlike dplyr, \code{slice()} (and \code{slice()} alone) returns the same number of
64 | rows per group, regardless of whether or not the indices appear in each
65 | group.
66 | }
67 | \examples{
68 | library(dplyr, warn.conflicts = FALSE)
69 | 
70 | dt <- lazy_dt(mtcars)
71 | dt \%>\% slice(1, 5, 10)
72 | dt \%>\% slice(-(1:4))
73 | 
74 | # First and last rows based on existing order
75 | dt \%>\% slice_head(n = 5)
76 | dt \%>\% slice_tail(n = 5)
77 | 
78 | # Rows with minimum and maximum values of a variable
79 | dt \%>\% slice_min(mpg, n = 5)
80 | dt \%>\% slice_max(mpg, n = 5)
81 | 
82 | # slice_min() and slice_max() may return more rows than requested
83 | # in the presence of ties. Use with_ties = FALSE to suppress
84 | dt \%>\% slice_min(cyl, n = 1)
85 | dt \%>\% slice_min(cyl, n = 1, with_ties = FALSE)
86 | 
87 | # slice_sample() allows you to random select with or without replacement
88 | dt \%>\% slice_sample(n = 5)
89 | dt \%>\% slice_sample(n = 5, replace = TRUE)
90 | 
91 | # you can optionally weight by a variable - this code weights by the
92 | # physical weight of the cars, so heavy cars are more likely to get
93 | # selected
94 | dt \%>\% slice_sample(weight_by = wt, n = 5)
95 | }
96 | 


--------------------------------------------------------------------------------
/man/summarise.dtplyr_step.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/step-subset-summarise.R
 3 | \name{summarise.dtplyr_step}
 4 | \alias{summarise.dtplyr_step}
 5 | \title{Summarise each group to one row}
 6 | \usage{
 7 | \method{summarise}{dtplyr_step}(.data, ..., .by = NULL, .groups = NULL)
 8 | }
 9 | \arguments{
10 | \item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}.}
11 | 
12 | \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Name-value pairs of
13 | summary functions. The name will be the name of the variable in the result.
14 | 
15 | The value can be:
16 | \itemize{
17 | \item A vector of length 1, e.g. \code{min(x)}, \code{n()}, or \code{sum(is.na(y))}.
18 | \item A data frame, to add multiple columns from a single expression.
19 | }
20 | 
21 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Returning values with size 0 or >1 was
22 | deprecated as of 1.1.0. Please use \code{\link[dplyr:reframe]{reframe()}} for this instead.}
23 | 
24 | \item{.by}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}
25 | 
26 | <\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Optionally, a selection of columns to
27 | group by for just this operation, functioning as an alternative to \code{\link[dplyr:group_by]{group_by()}}. For
28 | details and examples, see \link[dplyr:dplyr_by]{?dplyr_by}.}
29 | 
30 | \item{.groups}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} Grouping structure of the
31 | result.
32 | \itemize{
33 | \item "drop_last": dropping the last level of grouping. This was the
34 | only supported option before version 1.0.0.
35 | \item "drop": All levels of grouping are dropped.
36 | \item "keep": Same grouping structure as \code{.data}.
37 | \item "rowwise": Each row is its own group.
38 | }
39 | 
40 | When \code{.groups} is not specified, it is chosen
41 | based on the number of rows of the results:
42 | \itemize{
43 | \item If all the results have 1 row, you get "drop_last".
44 | \item If the number of rows varies, you get "keep" (note that returning a
45 | variable number of rows was deprecated in favor of \code{\link[dplyr:reframe]{reframe()}}, which
46 | also unconditionally drops all levels of grouping).
47 | }
48 | 
49 | In addition, a message informs you of that choice, unless the result is ungrouped,
50 | the option "dplyr.summarise.inform" is set to \code{FALSE},
51 | or when \code{summarise()} is called from a function in a package.}
52 | }
53 | \description{
54 | This is a method for the dplyr \code{\link[=summarise]{summarise()}} generic. It is translated to
55 | the \code{j} argument of \verb{[.data.table}.
56 | }
57 | \examples{
58 | library(dplyr, warn.conflicts = FALSE)
59 | 
60 | dt <- lazy_dt(mtcars)
61 | 
62 | dt \%>\%
63 |   group_by(cyl) \%>\%
64 |   summarise(vs = mean(vs))
65 | 
66 | dt \%>\%
67 |   group_by(cyl) \%>\%
68 |   summarise(across(disp:wt, mean))
69 | }
70 | 


--------------------------------------------------------------------------------
/man/transmute.dtplyr_step.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/step-subset-transmute.R
 3 | \name{transmute.dtplyr_step}
 4 | \alias{transmute.dtplyr_step}
 5 | \title{Create new columns, dropping old}
 6 | \usage{
 7 | \method{transmute}{dtplyr_step}(.data, ...)
 8 | }
 9 | \arguments{
10 | \item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}.}
11 | 
12 | \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Name-value pairs.
13 | The name gives the name of the column in the output.
14 | 
15 | The value can be:
16 | \itemize{
17 | \item A vector of length 1, which will be recycled to the correct length.
18 | \item A vector the same length as the current group (or the whole data frame
19 | if ungrouped).
20 | \item \code{NULL}, to remove the column.
21 | \item A data frame or tibble, to create multiple columns in the output.
22 | }}
23 | }
24 | \description{
25 | This is a method for the dplyr \code{\link[=transmute]{transmute()}} generic. It is translated to
26 | the \code{j} argument of \verb{[.data.table}.
27 | }
28 | \examples{
29 | library(dplyr, warn.conflicts = FALSE)
30 | 
31 | dt <- lazy_dt(dplyr::starwars)
32 | dt \%>\% transmute(name, sh = paste0(species, "/", homeworld))
33 | }
34 | 


--------------------------------------------------------------------------------
/man/unite.dtplyr_step.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/unite.R
 3 | \name{unite.dtplyr_step}
 4 | \alias{unite.dtplyr_step}
 5 | \title{Unite multiple columns into one by pasting strings together.}
 6 | \usage{
 7 | \method{unite}{dtplyr_step}(data, col, ..., sep = "_", remove = TRUE, na.rm = FALSE)
 8 | }
 9 | \arguments{
10 | \item{data}{A data frame.}
11 | 
12 | \item{col}{The name of the new column, as a string or symbol.
13 | 
14 | This argument is passed by expression and supports
15 | \link[rlang:topic-inject]{quasiquotation} (you can unquote strings
16 | and symbols). The name is captured from the expression with
17 | \code{\link[rlang:defusing-advanced]{rlang::ensym()}} (note that this kind of interface where
18 | symbols do not represent actual objects is now discouraged in the
19 | tidyverse; we support it here for backward compatibility).}
20 | 
21 | \item{...}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> Columns to unite}
22 | 
23 | \item{sep}{Separator to use between values.}
24 | 
25 | \item{remove}{If \code{TRUE}, remove input columns from output data frame.}
26 | 
27 | \item{na.rm}{If \code{TRUE}, missing values will be removed prior to uniting
28 | each value.}
29 | }
30 | \description{
31 | This is a method for the tidyr \code{unite()} generic.
32 | }
33 | \examples{
34 | library(tidyr)
35 | 
36 | df <- lazy_dt(expand_grid(x = c("a", NA), y = c("b", NA)))
37 | df
38 | 
39 | df \%>\% unite("z", x:y, remove = FALSE)
40 | 
41 | # Separate is almost the complement of unite
42 | df \%>\%
43 |   unite("xy", x:y) \%>\%
44 |   separate(xy, c("x", "y"))
45 | # (but note `x` and `y` contain now "NA" not NA)
46 | }
47 | 


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-120x120.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidyverse/dtplyr/75310e32cbc8130bfecd80cca83c7c8fa78de609/pkgdown/favicon/apple-touch-icon-120x120.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-152x152.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidyverse/dtplyr/75310e32cbc8130bfecd80cca83c7c8fa78de609/pkgdown/favicon/apple-touch-icon-152x152.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-180x180.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidyverse/dtplyr/75310e32cbc8130bfecd80cca83c7c8fa78de609/pkgdown/favicon/apple-touch-icon-180x180.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-60x60.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidyverse/dtplyr/75310e32cbc8130bfecd80cca83c7c8fa78de609/pkgdown/favicon/apple-touch-icon-60x60.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-76x76.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidyverse/dtplyr/75310e32cbc8130bfecd80cca83c7c8fa78de609/pkgdown/favicon/apple-touch-icon-76x76.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidyverse/dtplyr/75310e32cbc8130bfecd80cca83c7c8fa78de609/pkgdown/favicon/apple-touch-icon.png


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidyverse/dtplyr/75310e32cbc8130bfecd80cca83c7c8fa78de609/pkgdown/favicon/favicon-16x16.png


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidyverse/dtplyr/75310e32cbc8130bfecd80cca83c7c8fa78de609/pkgdown/favicon/favicon-32x32.png


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tidyverse/dtplyr/75310e32cbc8130bfecd80cca83c7c8fa78de609/pkgdown/favicon/favicon.ico


--------------------------------------------------------------------------------
/revdep/.gitignore:
--------------------------------------------------------------------------------
1 | checks
2 | library
3 | checks.noindex
4 | library.noindex
5 | data.sqlite
6 | *.html
7 | cloud.noindex
8 | 


--------------------------------------------------------------------------------
/revdep/README.md:
--------------------------------------------------------------------------------
1 | # Revdeps
2 | 
3 | ## New problems (1)
4 | 
5 | |package |version |error  |warning |note |
6 | |:-------|:-------|:------|:-------|:----|
7 | |[rFIA](problems.md#rfia)|1.0.0   |__+1__ |        |     |
8 | 
9 | 


--------------------------------------------------------------------------------
/revdep/cran.md:
--------------------------------------------------------------------------------
 1 | ## revdepcheck results
 2 | 
 3 | We checked 11 reverse dependencies, comparing R CMD check results across CRAN and dev versions of this package.
 4 | 
 5 |  * We saw 1 new problems
 6 |  * We failed to check 0 packages
 7 | 
 8 | Issues with CRAN packages are summarised below.
 9 | 
10 | ### New problems
11 | (This reports the first line of each new failure)
12 | 
13 | * rFIA
14 |   checking examples ... ERROR
15 | 
16 | 


--------------------------------------------------------------------------------
/revdep/email.yml:
--------------------------------------------------------------------------------
1 | release_date: Feb 19
2 | rel_release_date: two weeks
3 | my_news_url: https://github.com/tidyverse/dtplyr/blob/master/NEWS.md
4 | release_version: 1.1.0
5 | release_details: >
6 |   This is a relatively short deadline, but it looks the problems are
7 |   relatively small, and I've contributed PRs where needed.
8 | 
9 | 


--------------------------------------------------------------------------------
/revdep/failures.md:
--------------------------------------------------------------------------------
1 | *Wow, no problems at all. :)*


--------------------------------------------------------------------------------
/revdep/problems.md:
--------------------------------------------------------------------------------
 1 | # rFIA
 2 | 
 3 | <details>
 4 | 
 5 | * Version: 1.0.0
 6 | * GitHub: https://github.com/hunter-stanke/rFIA
 7 | * Source code: https://github.com/cran/rFIA
 8 | * Date/Publication: 2021-12-15 18:10:02 UTC
 9 | * Number of recursive dependencies: 84
10 | 
11 | Run `revdepcheck::cloud_details(, "rFIA")` for more info
12 | 
13 | </details>
14 | 
15 | ## Newly broken
16 | 
17 | *   checking examples ... ERROR
18 |     ```
19 |     Running examples in ‘rFIA-Ex.R’ failed
20 |     The error most likely occurred in:
21 |     
22 |     > ### Name: area
23 |     > ### Title: Estimate land area from FIADB
24 |     > ### Aliases: area
25 |     > 
26 |     > ### ** Examples
27 |     > 
28 |     > ## Load data from the rFIA package
29 |     ...
30 |      14. │             └─tidyselect:::walk_data_tree(new, data_mask, context_mask)
31 |      15. │               └─tidyselect:::as_indices_sel_impl(...)
32 |      16. │                 └─tidyselect:::as_indices_impl(...)
33 |      17. │                   └─tidyselect:::chr_as_locations(x, vars, call = call, arg = arg)
34 |      18. │                     └─vctrs::vec_as_location(...)
35 |      19. └─vctrs (local) `<fn>`()
36 |      20.   └─vctrs:::stop_subscript_oob(...)
37 |      21.     └─vctrs:::stop_subscript(...)
38 |      22.       └─rlang::abort(...)
39 |     Execution halted
40 |     ```
41 | 
42 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(dtplyr)
3 | 
4 | test_check("dtplyr")
5 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/count.md:
--------------------------------------------------------------------------------
1 | # can control name
2 | 
3 |     Code
4 |       dt %>% count(name = 10) %>% collect()
5 |     Condition
6 |       Error in `check_name()`:
7 |       ! `name` must be a string
8 | 
9 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/step-call-pivot_longer.md:
--------------------------------------------------------------------------------
 1 | # can pivot to multiple measure cols
 2 | 
 3 |     Code
 4 |       show_query(step)
 5 |     Output
 6 |       melt(DT, measure.vars = list(c("x1", "x2", "x3", "x4"), c("y1", 
 7 |       "y2", "y3", "y4")), variable.name = "set", value.name = c("x", 
 8 |       "y"), variable.factor = FALSE)[, `:=`(set = c("1", "1", "2", 
 9 |       "2", "3", "3", "4", "4"))]
10 | 
11 | # errors on unbalanced datasets
12 | 
13 |     Code
14 |       pivot_longer(dt, everything(), names_to = c(".value", "id"), names_sep = "_")
15 |     Condition
16 |       Error in `pivot_longer()`:
17 |       ! `data.table::melt()` doesn't currently support melting of unbalanced datasets.
18 | 
19 | # informative errors on unsupported features
20 | 
21 |     Code
22 |       dt %>% pivot_longer(names_ptypes = list())
23 |     Condition
24 |       Error in `pivot_longer()`:
25 |       ! `names_ptypes` is not supported by dtplyr
26 |     Code
27 |       dt %>% pivot_longer(names_transform = list())
28 |     Condition
29 |       Error in `pivot_longer()`:
30 |       ! `names_transform` is not supported by dtplyr
31 |     Code
32 |       dt %>% pivot_longer(values_ptypes = list())
33 |     Condition
34 |       Error in `pivot_longer()`:
35 |       ! `values_ptypes` is not supported by dtplyr
36 |     Code
37 |       dt %>% pivot_longer(values_transform = list())
38 |     Condition
39 |       Error in `pivot_longer()`:
40 |       ! `values_transform` is not supported by dtplyr
41 | 
42 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/step-call-pivot_wider.md:
--------------------------------------------------------------------------------
 1 | # names_glue affects output names
 2 | 
 3 |     Code
 4 |       show_query(step)
 5 |     Output
 6 |       setnames(dcast(DT, formula = "..." ~ x + y, value.var = c("a", 
 7 |       "b"))[, `:=`(".", NULL)], c("a_X_1", "a_Y_2", "b_X_1", "b_Y_2"
 8 |       ), c("X1_a", "Y2_a", "X1_b", "Y2_b"))
 9 | 
10 | # can sort column names
11 | 
12 |     Code
13 |       show_query(step)
14 |     Output
15 |       setcolorder(dcast(DT, formula = "..." ~ chr, value.var = "int")[, 
16 |           `:=`(".", NULL)], c("Mon", "Tue", "Wed"))
17 | 
18 | # can sort column names with id
19 | 
20 |     Code
21 |       show_query(step)
22 |     Output
23 |       setcolorder(dcast(DT, formula = id ~ chr, value.var = "int"), 
24 |           c("id", "Mon", "Tue", "Wed"))
25 | 
26 | # can repair names if requested
27 | 
28 |     Code
29 |       pivot_wider(df, names_from = lab, values_from = val)
30 |     Condition
31 |       Error in `step_repair()`:
32 |       ! Names must be unique.
33 |       x These names are duplicated:
34 |         * "x" at locations 1 and 2.
35 |     Code
36 |       pivot_wider(df, names_from = lab, values_from = val, names_repair = "unique")
37 |     Message
38 |       New names:
39 |       * `x` -> `x...1`
40 |       * `x` -> `x...2`
41 |     Output
42 |       Source: local data table [1 x 2]
43 |       Call:   setnames(dcast(copy(DT), formula = x ~ lab, value.var = "val"), 
44 |           1:2, c("x...1", "x...2"))
45 |       
46 |         x...1 x...2
47 |         <dbl> <dbl>
48 |       1     1     2
49 |       
50 |       # Use as.data.table()/as.data.frame()/as_tibble() to access results
51 | 
52 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/step-call.md:
--------------------------------------------------------------------------------
 1 | # but not with anything else
 2 | 
 3 |     Code
 4 |       dt %>% rename_with(1)
 5 |     Condition
 6 |       Error in `rename_with()`:
 7 |       ! `.fn` must be a function name or formula
 8 | 
 9 | # rename_with generates minimal spec
10 | 
11 |     Code
12 |       dt %>% rename_with(toupper) %>% show_query()
13 |     Output
14 |       setnames(copy(DT), toupper)
15 |     Code
16 |       dt %>% rename_with(toupper, 1:3) %>% show_query()
17 |     Output
18 |       setnames(copy(DT), c("a", "b", "c"), toupper)
19 | 
20 | # can compute distinct computed variables
21 | 
22 |     Code
23 |       dt %>% distinct(z = x + y) %>% show_query()
24 |     Output
25 |       unique(copy(dt)[, `:=`(z = x + y)][, `:=`(c("x", "y"), NULL)])
26 | 
27 | # errors are raised
28 | 
29 |     Code
30 |       collect(drop_na(dt, "z"))
31 |     Condition
32 |       Error in `drop_na()`:
33 |       ! Can't select columns that don't exist.
34 |       x Column `z` doesn't exist.
35 | 
36 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/step-colorder-relocate.md:
--------------------------------------------------------------------------------
1 | # can only supply one of .before and .after
2 | 
3 |     Code
4 |       relocate(dt, y, .before = x, .after = x)
5 |     Condition
6 |       Error in `relocate()`:
7 |       ! Can't supply both `.before` and `.after`.
8 | 
9 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/step-colorder.md:
--------------------------------------------------------------------------------
 1 | # can handle duplicate column names
 2 | 
 3 |     The column(s) x do not uniquely match a column in `x`.
 4 | 
 5 | # checks col_order
 6 | 
 7 |     Every element of `col_order` must be unique.
 8 | 
 9 | ---
10 | 
11 |     Every element of `col_order` must be unique.
12 | 
13 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/step-group.md:
--------------------------------------------------------------------------------
 1 | # can add groups if requested
 2 | 
 3 |     Code
 4 |       . <- dt %>% group_by(x) %>% group_by(y, add = TRUE)
 5 |     Condition
 6 |       Warning:
 7 |       The `add` argument of `group_by()` is deprecated as of dplyr 1.0.0.
 8 |       i Please use the `.add` argument instead.
 9 | 
10 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/step-join.md:
--------------------------------------------------------------------------------
 1 | # performs cross join
 2 | 
 3 |     Code
 4 |       left_join(dt1, dt2, by = character())
 5 |     Output
 6 |       Source: local data table [4 x 3]
 7 |       Call:   setnames(setcolorder(copy(dt2)[, `:=`(.cross_join_col = 1)][copy(dt1)[, 
 8 |           `:=`(.cross_join_col = 1)], on = .(.cross_join_col), allow.cartesian = TRUE], 
 9 |           c(3L, 4L, 2L, 1L)), c("i.x", "x"), c("x.x", "x.y"))[, !".cross_join_col"]
10 |       
11 |           x.x y       x.y
12 |         <int> <chr> <int>
13 |       1     1 a         3
14 |       2     1 a         4
15 |       3     2 a         3
16 |       4     2 a         4
17 |       
18 |       # Use as.data.table()/as.data.frame()/as_tibble() to access results
19 | 
20 | ---
21 | 
22 |     Code
23 |       right_join(dt1, dt2, by = character())
24 |     Output
25 |       Source: local data table [4 x 3]
26 |       Call:   setnames(setcolorder(copy(dt2)[, `:=`(.cross_join_col = 1)][copy(dt1)[, 
27 |           `:=`(.cross_join_col = 1)], on = .(.cross_join_col), allow.cartesian = TRUE], 
28 |           c(3L, 4L, 2L, 1L)), c("i.x", "x"), c("x.x", "x.y"))[, !".cross_join_col"]
29 |       
30 |           x.x y       x.y
31 |         <int> <chr> <int>
32 |       1     1 a         3
33 |       2     1 a         4
34 |       3     2 a         3
35 |       4     2 a         4
36 |       
37 |       # Use as.data.table()/as.data.frame()/as_tibble() to access results
38 | 
39 | ---
40 | 
41 |     Code
42 |       full_join(dt1, dt2, by = character())
43 |     Output
44 |       Source: local data table [4 x 3]
45 |       Call:   setnames(setcolorder(copy(dt2)[, `:=`(.cross_join_col = 1)][copy(dt1)[, 
46 |           `:=`(.cross_join_col = 1)], on = .(.cross_join_col), allow.cartesian = TRUE], 
47 |           c(3L, 4L, 2L, 1L)), c("i.x", "x"), c("x.x", "x.y"))[, !".cross_join_col"]
48 |       
49 |           x.x y       x.y
50 |         <int> <chr> <int>
51 |       1     1 a         3
52 |       2     1 a         4
53 |       3     2 a         3
54 |       4     2 a         4
55 |       
56 |       # Use as.data.table()/as.data.frame()/as_tibble() to access results
57 | 
58 | ---
59 | 
60 |     Code
61 |       inner_join(dt1, dt2, by = character())
62 |     Output
63 |       Source: local data table [4 x 3]
64 |       Call:   setnames(setcolorder(copy(dt2)[, `:=`(.cross_join_col = 1)][copy(dt1)[, 
65 |           `:=`(.cross_join_col = 1)], on = .(.cross_join_col), allow.cartesian = TRUE], 
66 |           c(3L, 4L, 2L, 1L)), c("i.x", "x"), c("x.x", "x.y"))[, !".cross_join_col"]
67 |       
68 |           x.x y       x.y
69 |         <int> <chr> <int>
70 |       1     1 a         3
71 |       2     1 a         4
72 |       3     2 a         3
73 |       4     2 a         4
74 |       
75 |       # Use as.data.table()/as.data.frame()/as_tibble() to access results
76 | 
77 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/step-mutate.md:
--------------------------------------------------------------------------------
1 | # unnamed arguments matching column names are ignored
2 | 
3 |     Code
4 |       mutate(dt, y)
5 |     Condition
6 |       Error:
7 |       ! object 'y' not found
8 | 
9 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/step-subset-filter.md:
--------------------------------------------------------------------------------
 1 | # errors for named input
 2 | 
 3 |     Code
 4 |       filter(dt, x = 1)
 5 |     Condition
 6 |       Error in `filter()`:
 7 |       ! Problem with `filter()` input `..1`.
 8 |       x Input `..1` is named.
 9 |       i This usually means that you've used `=` instead of `==`.
10 |       i Did you mean `x == 1`?
11 | 
12 | ---
13 | 
14 |     Code
15 |       filter(dt, y > 1, x = 1)
16 |     Condition
17 |       Error in `filter()`:
18 |       ! Problem with `filter()` input `..2`.
19 |       x Input `..2` is named.
20 |       i This usually means that you've used `=` instead of `==`.
21 |       i Did you mean `x == 1`?
22 | 
23 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/step-subset-select.md:
--------------------------------------------------------------------------------
 1 | # empty select returns no columns
 2 | 
 3 |     Code
 4 |       out <- lz %>% group_by(x) %>% select()
 5 |     Message
 6 |       Adding missing grouping variables: `x`
 7 | 
 8 | # copied data: empty select returns no columns
 9 | 
10 |     Code
11 |       out <- lz %>% group_by(x) %>% select()
12 |     Message
13 |       Adding missing grouping variables: `x`
14 | 
15 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/step-subset-separate.md:
--------------------------------------------------------------------------------
 1 | # checks type of `into` and `sep`
 2 | 
 3 |     Code
 4 |       separate(dt, x, "x", FALSE)
 5 |     Condition
 6 |       Error in `separate()`:
 7 |       ! `sep` must be a character vector.
 8 | 
 9 | ---
10 | 
11 |     Code
12 |       separate(dt, x, FALSE)
13 |     Condition
14 |       Error in `separate()`:
15 |       ! `into` must be a character vector.
16 | 
17 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/step-subset-slice.md:
--------------------------------------------------------------------------------
 1 | # slice_*() checks for empty ...
 2 | 
 3 |     Code
 4 |       slice_head(dt, 5)
 5 |     Condition
 6 |       Error in `slice_head()`:
 7 |       ! `n` must be explicitly named.
 8 |       i Did you mean `slice_head(n = 5)`?
 9 |     Code
10 |       slice_tail(dt, 5)
11 |     Condition
12 |       Error in `slice_tail()`:
13 |       ! `n` must be explicitly named.
14 |       i Did you mean `slice_tail(n = 5)`?
15 |     Code
16 |       slice_min(dt, x, 5)
17 |     Condition
18 |       Error in `slice_min()`:
19 |       ! `n` must be explicitly named.
20 |       i Did you mean `slice_min(n = 5)`?
21 |     Code
22 |       slice_max(dt, x, 5)
23 |     Condition
24 |       Error in `slice_max()`:
25 |       ! `n` must be explicitly named.
26 |       i Did you mean `slice_max(n = 5)`?
27 |     Code
28 |       slice_sample(dt, 5)
29 |     Condition
30 |       Error in `slice_sample()`:
31 |       ! `n` must be explicitly named.
32 |       i Did you mean `slice_sample(n = 5)`?
33 | 
34 | ---
35 | 
36 |     Code
37 |       slice_min(dt)
38 |     Condition
39 |       Error in `slice_min()`:
40 |       ! `order_by` is absent but must be supplied.
41 |     Code
42 |       slice_max(dt)
43 |     Condition
44 |       Error in `slice_max()`:
45 |       ! `order_by` is absent but must be supplied.
46 | 
47 | # check_slice_catches common errors
48 | 
49 |     Code
50 |       slice_head(dt, n = 1, prop = 1)
51 |     Condition
52 |       Error in `slice_head()`:
53 |       ! Must supply exactly one of `n` and `prop` arguments.
54 |     Code
55 |       slice_head(dt, n = "a")
56 |     Condition
57 |       Error in `slice_head()`:
58 |       ! `n` must be a single number.
59 |     Code
60 |       slice_head(dt, prop = "a")
61 |     Condition
62 |       Error in `slice_head()`:
63 |       ! `prop` must be a single number.
64 |     Code
65 |       slice_head(dt, n = NA)
66 |     Condition
67 |       Error in `slice_head()`:
68 |       ! `n` must be a single number.
69 |     Code
70 |       slice_head(dt, prop = NA)
71 |     Condition
72 |       Error in `slice_head()`:
73 |       ! `prop` must be a single number.
74 | 
75 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/step-subset-summarise.md:
--------------------------------------------------------------------------------
 1 | # summarise(.groups=)
 2 | 
 3 |     Code
 4 |       eval_bare(expr(lazy_dt(data.frame(x = 1, y = 2), "DT") %>% group_by(x, y) %>%
 5 |         dplyr::summarise() %>% show_query()), env(global_env()))
 6 |     Message
 7 |       `summarise()` has grouped output by 'x'. You can override using the `.groups` argument.
 8 |     Output
 9 |       unique(DT)
10 | 
11 | ---
12 | 
13 |     `.groups` can't be "rowwise" in dtplyr
14 |     i Possible values are NULL (default), "drop_last", "drop", and "keep"
15 | 
16 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/tidyeval-across.md:
--------------------------------------------------------------------------------
 1 | # across() does not support formulas with dots
 2 | 
 3 |     Code
 4 |       (expect_error(capture_across(dt, across(a:b, ~ log(.x, base = .y), base = 2))))
 5 |     Output
 6 |       <error/rlang_error>
 7 |       Error in `across_fun()`:
 8 |       ! `dtplyr::across()` does not support `...` when a purrr-style lambda is used in `.fns`.
 9 |       i Use a lambda instead.
10 |       i Or inline them via a purrr-style lambda.
11 |     Code
12 |       (expect_error(capture_across(dt, across(a:b, list(~ log(.x, base = .y)), base = 2)))
13 |       )
14 |     Output
15 |       <error/rlang_error>
16 |       Error in `FUN()`:
17 |       ! `dtplyr::across()` does not support `...` when a purrr-style lambda is used in `.fns`.
18 |       i Use a lambda instead.
19 |       i Or inline them via a purrr-style lambda.
20 | 
21 | # across() gives informative errors
22 | 
23 |     Code
24 |       capture_across(dt, across(a, 1))
25 |     Condition
26 |       Error in `across_funs()`:
27 |       ! `.fns` argument to dtplyr::across() must be a NULL, a function, formula, or list
28 |     Code
29 |       capture_across(dt, across(a, list(1)))
30 |     Condition
31 |       Error in `FUN()`:
32 |       ! .fns argument to dtplyr::across() must contain a function or a formula
33 |       x Problem with 1
34 | 
35 | # if_all() gives informative errors
36 | 
37 |     Code
38 |       capture_if_all(dt, if_all(a, 1))
39 |     Condition
40 |       Error in `across_funs()`:
41 |       ! `.fns` argument to dtplyr::across() must be a NULL, a function, formula, or list
42 |     Code
43 |       capture_if_all(dt, if_all(a, list(1)))
44 |     Condition
45 |       Error in `FUN()`:
46 |       ! .fns argument to dtplyr::across() must contain a function or a formula
47 |       x Problem with 1
48 | 
49 | # if_all() cannot rename variables
50 | 
51 |     Code
52 |       (expect_error(capture_if_all(dt, if_all(c(a = x, b = y)))))
53 |     Output
54 |       <error/tidyselect:::error_disallowed_rename>
55 |       Error in `if_all()`:
56 |       ! Can't rename variables in this context.
57 | 
58 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/tidyeval.md:
--------------------------------------------------------------------------------
 1 | # translates lag()/lead()
 2 | 
 3 |     The `order_by` argument of `lag()` is not supported by dtplyr
 4 | 
 5 | # errors when `where()` is used, #271/#368
 6 | 
 7 |     This tidyselect interface doesn't support predicates.
 8 | 
 9 | ---
10 | 
11 |     This tidyselect interface doesn't support predicates.
12 | 
13 | # desc() checks the number of arguments
14 | 
15 |     Code
16 |       capture_dot(df, desc(a, b))
17 |     Condition
18 |       Error in `check_one_arg()`:
19 |       ! `desc()` expects exactly one argument.
20 | 
21 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/unite.md:
--------------------------------------------------------------------------------
1 | # errors on na.rm
2 | 
3 |     `na.rm` is not implemented in dtplyr
4 | 
5 | 


--------------------------------------------------------------------------------
/tests/testthat/helpers-library.R:
--------------------------------------------------------------------------------
1 | library(dplyr, warn.conflicts = FALSE)
2 | library(tidyr, warn.conflicts = FALSE)
3 | 


--------------------------------------------------------------------------------
/tests/testthat/test-complete.R:
--------------------------------------------------------------------------------
 1 | test_that("complete with no variables returns data as is", {
 2 |   mtcars_dt <- lazy_dt(mtcars, "DT")
 3 |   expect_equal(complete(mtcars_dt), mtcars_dt)
 4 | })
 5 | 
 6 | test_that("basic invocation works", {
 7 |   tbl <- tibble(x = 1:2, y = 1:2, z = 3:4)
 8 |   dt <- lazy_dt(tbl, "DT")
 9 |   out <- dt %>% complete(x, y) %>% collect()
10 | 
11 |   expect_equal(nrow(out), 4)
12 |   expect_equal(out$z, c(3, NA, NA, 4))
13 | })
14 | 
15 | test_that("empty expansion returns original", {
16 |   tbl <- tibble(x = character())
17 |   dt <- lazy_dt(tbl, "DT")
18 |   out <- dt %>% complete(y = NULL) %>% collect()
19 |   expect_equal(out, tbl)
20 | 
21 |   tbl <- tibble(x = 1:4)
22 |   dt <- lazy_dt(tbl, "DT")
23 |   out <- dt %>% complete(y = NULL) %>% collect()
24 |   expect_equal(out, tbl)
25 | })
26 | 


--------------------------------------------------------------------------------
/tests/testthat/test-count.R:
--------------------------------------------------------------------------------
  1 | 
  2 | test_that("can be used grouped or ungrouped", {
  3 |   dt <- lazy_dt(data.table(x = c(1, 1, 1, 2)), "DT")
  4 | 
  5 |   expect_equal(
  6 |     dt %>% count(x) %>% collect(),
  7 |     tibble(x = c(1, 2), n = c(3, 1))
  8 |   )
  9 |   expect_equal(
 10 |     dt %>% group_by(x) %>% count() %>% collect(),
 11 |     tibble(x = c(1, 2), n = c(3, 1)) %>% group_by(x)
 12 |   )
 13 | })
 14 | 
 15 | test_that("can control name", {
 16 |   dt <- lazy_dt(data.table(x = c(1, 1, 1, 2)), "DT")
 17 | 
 18 |   expect_equal(
 19 |     dt %>% count(x, name = "y") %>% collect(),
 20 |     tibble(x = c(1, 2), y = c(3, 1))
 21 |   )
 22 |   expect_snapshot(
 23 |     dt %>% count(name = 10) %>% collect(),
 24 |     error = TRUE
 25 |   )
 26 | })
 27 | 
 28 | test_that("name can match existing group var", {
 29 |   dt <- lazy_dt(data.table(a = 2))
 30 | 
 31 |   expect_equal(
 32 |     dt %>% group_by(a) %>% tally(name = 'a') %>% collect(),
 33 |     tibble(a = 1)
 34 |   )
 35 |   expect_equal(
 36 |     dt %>% count(a, name = 'a') %>% collect(),
 37 |     tibble(a = 1)
 38 |   )
 39 | })
 40 | 
 41 | 
 42 | test_that("can weight", {
 43 |   dt <- lazy_dt(data.table(x = c(1, 1, 2), y = c(1, 2, 10)), "DT")
 44 |   expect_equal(
 45 |     dt %>% count(x, wt = y) %>% collect(),
 46 |     tibble(x = c(1, 2), n = c(3, 10))
 47 |   )
 48 |   expect_equal(
 49 |     dt %>% add_count(x, wt = y) %>% collect(),
 50 |     dt %>% mutate(n = c(3, 3, 10)) %>% collect()
 51 |   )
 52 | })
 53 | 
 54 | test_that("can sort", {
 55 |   dt <- lazy_dt(data.table(x = c(1, 1, 2), y = c(1, 2, 10)), "DT")
 56 |   expect_equal(
 57 |     dt %>% count(x, wt = y, sort = TRUE) %>% collect(),
 58 |     tibble(x = c(2, 1), n = c(10, 3))
 59 |   )
 60 |   expect_equal(
 61 |     dt %>% add_count(x, wt = y, sort = TRUE) %>% collect(),
 62 |     tibble(x = c(2, 1, 1), y = c(10, 1, 2), n = c(10, 3, 3))
 63 |   )
 64 | })
 65 | 
 66 | test_that("tally works", {
 67 |   dt <- lazy_dt(data.table(x = c(1, 1, 1, 2)), "DT")
 68 |   expect_equal(
 69 |     dt %>% group_by(x) %>% tally() %>% collect(),
 70 |     tibble(x = c(1, 2), n = c(3, 1))
 71 |   )
 72 | })
 73 | 
 74 | test_that("informs if n column already present, unless overridden", {
 75 |   dt <- lazy_dt(data.frame(n = c(1, 1, 2, 2, 2)))
 76 |   expect_message(out <- count(dt, n), "already present")
 77 |   expect_named(as_tibble(out), c("n", "nn"))
 78 | 
 79 |   # not a good idea, but supported
 80 |   expect_message(out <- count(dt, n, name = "n"), NA)
 81 |   expect_named(as_tibble(out), "n")
 82 | 
 83 |   expect_message(out <- count(dt, n, name = "nn"), NA)
 84 |   expect_named(as_tibble(out), c("n", "nn"))
 85 | 
 86 |   dt <- lazy_dt(data.frame(n = c(1, 1, 2, 2, 2), nn = 1:5))
 87 |   expect_message(out <- count(dt, n), "already present")
 88 |   expect_named(as_tibble(out), c("n", "nn"))
 89 | 
 90 |   expect_message(out <- count(dt, n, nn), "already present")
 91 |   expect_named(as_tibble(out), c("n", "nn", "nnn"))
 92 | })
 93 | 
 94 | test_that("name must be string", {
 95 |   dt <- lazy_dt(data.frame(x = c(1, 2)))
 96 |   expect_error(count(dt, x, name = 1), "string")
 97 |   expect_error(count(dt, x, name = letters), "string")
 98 | })
 99 | 
100 | # add_count ---------------------------------------------------------------
101 | 
102 | test_that("add_count() gives expected calls and groups", {
103 |   dt <- lazy_dt(data.frame(g = c(1, 2, 2, 2)), "DT")
104 | 
105 |   res <- dt %>% add_count(g)
106 |   expect_equal(show_query(res), expr(copy(DT)[, `:=`(n = .N), by = .(g)]))
107 |   expect_equal(res$groups, character())
108 | 
109 |   res <- dt %>% group_by(g) %>% add_count()
110 |   expect_equal(show_query(res), expr(copy(DT)[, `:=`(n = .N), by = .(g)]))
111 |   expect_equal(res$groups, "g")
112 | })
113 | 


--------------------------------------------------------------------------------
/tests/testthat/test-fill.R:
--------------------------------------------------------------------------------
 1 | test_that("missings are filled correctly & translations are correct", {
 2 |   tbl <- tibble(x = c(NA, 1, NA, 2, NA, NA))
 3 |   dt <- lazy_dt(tbl, "DT")
 4 | 
 5 |   step <- fill(dt, x)
 6 |   expect_equal(show_query(step), expr(copy(DT)[, `:=`(x = nafill(x, "locf"))]))
 7 |   expect_equal(collect(step)$x, c(NA, 1, 1, 2, 2, 2))
 8 | 
 9 |   step <- fill(dt, x, .direction = "up")
10 |   expect_equal(show_query(step), expr(copy(DT)[, `:=`(x = nafill(x, "nocb"))]))
11 |   expect_equal(collect(step)$x, c(1, 1, 2, 2, NA, NA))
12 | 
13 |   step <- fill(dt, x, .direction = 'downup')
14 |   expect_equal(show_query(step), expr(copy(DT)[, `:=`(x = nafill(nafill(x, "locf"), "nocb"))]))
15 |   expect_equal(collect(step)$x, c(1, 1, 1, 2, 2, 2))
16 | 
17 |   step <- fill(dt, x, .direction = 'updown')
18 |   expect_equal(show_query(step), expr(copy(DT)[, `:=`(x = nafill(nafill(x, "nocb"), "locf"))]))
19 |   expect_equal(collect(step)$x, c(1, 1, 2, 2, 2, 2))
20 | })
21 | 
22 | test_that("auto-conversion to lazy_dt works as intended", {
23 |   dt <- data.table(x = c(NA, 1, NA, 2, NA, NA))
24 | 
25 |   out <- collect(fill(dt, x))
26 |   expect_equal(out$x, c(NA, 1, 1, 2, 2, 2))
27 | })
28 | 


--------------------------------------------------------------------------------
/tests/testthat/test-reframe.R:
--------------------------------------------------------------------------------
 1 | test_that("`reframe()` allows summaries", {
 2 |   df <- lazy_dt(tibble(g = c(1, 1, 1, 2, 2), x = 1:5))
 3 | 
 4 |   expect_identical(
 5 |     collect(reframe(df, x = mean(x))),
 6 |     tibble(x = 3)
 7 |   )
 8 |   expect_identical(
 9 |     collect(reframe(df, x = mean(x), .by = g)),
10 |     tibble(g = c(1, 2), x = c(2, 4.5))
11 |   )
12 | })
13 | 
14 | test_that("`reframe()` allows size 0 results", {
15 |   df <- lazy_dt(tibble(g = c(1, 1, 1, 2, 2), x = 1:5))
16 |   gdf <- group_by(df, g)
17 | 
18 |   expect_identical(
19 |     collect(reframe(df, x = which(x > 5))),
20 |     tibble(x = integer())
21 |   )
22 |   expect_identical(
23 |     collect(reframe(df, x = which(x > 5), .by = g)),
24 |     tibble(g = double(), x = integer())
25 |   )
26 |   expect_identical(
27 |     collect(reframe(gdf, x = which(x > 5))),
28 |     tibble(g = double(), x = integer())
29 |   )
30 | })
31 | 
32 | test_that("`reframe()` allows size >1 results", {
33 |   df <- lazy_dt(tibble(g = c(1, 1, 1, 2, 2), x = 1:5))
34 |   gdf <- group_by(df, g)
35 | 
36 |   expect_identical(
37 |     collect(reframe(df, x = which(x > 2))),
38 |     tibble(x = 3:5)
39 |   )
40 |   expect_identical(
41 |     collect(reframe(df, x = which(x > 2), .by = g)),
42 |     tibble(g = c(1, 2, 2), x = c(3L, 1L, 2L))
43 |   )
44 |   expect_identical(
45 |     collect(reframe(gdf, x = which(x > 2))),
46 |     tibble(g = c(1, 2, 2), x = c(3L, 1L, 2L))
47 |   )
48 | })
49 | 
50 | test_that("`reframe()` ungroups output", {
51 |   df <- lazy_dt(tibble(g = c(1, 1, 1, 2, 2), x = 1:5))
52 |   gdf <- group_by(df, g, x)
53 |   res <- reframe(gdf, row_num = row_number())
54 | 
55 |   expect_true(length(group_vars(res)) == 0)
56 | })
57 | 


--------------------------------------------------------------------------------
/tests/testthat/test-replace_na.R:
--------------------------------------------------------------------------------
 1 | # lazy data.tables -----------------------------------------------------------
 2 | 
 3 | test_that("empty call does nothing", {
 4 |   tbl <- tibble(x = c(1, NA))
 5 |   dt <- lazy_dt(tbl, "DT")
 6 |   out <- collect(replace_na(dt))
 7 |   expect_equal(out, tbl)
 8 | })
 9 | 
10 | test_that("missing values are replaced", {
11 |   tbl <- tibble(x = c(1, NA))
12 |   dt <- lazy_dt(tbl, "DT")
13 |   step <- replace_na(dt, list(x = 0))
14 |   out <- collect(step)
15 |   expect_equal(show_query(step), expr(copy(DT)[, `:=`(x = fcoalesce(x, 0))]))
16 |   expect_equal(out$x, c(1, 0))
17 | })
18 | 
19 | test_that("don't complain about variables that don't exist", {
20 |   tbl <- tibble(a = c(1, NA))
21 |   dt <- lazy_dt(tbl, "DT")
22 |   out <- collect(replace_na(dt, list(a = 100, b = 0)))
23 |   expect_equal(out, tibble(a = c(1, 100)))
24 | })
25 | 
26 | # Inside mutate() -----------------------------------------------------------
27 | 
28 | test_that("missing values are replaced", {
29 |   tbl <- tibble(x = c(1, NA))
30 |   dt <- lazy_dt(tbl, "DT")
31 |   step <- mutate(dt, x = replace_na(x, 0))
32 |   out <- collect(step)
33 |   expect_equal(show_query(step), expr(copy(DT)[, `:=`(x = fcoalesce(x, 0))]))
34 |   expect_equal(out$x, c(1, 0))
35 | })
36 | 


--------------------------------------------------------------------------------
/tests/testthat/test-step-colorder-relocate.R:
--------------------------------------------------------------------------------
 1 | test_that(".before and .after relocate individual cols", {
 2 |   dt <- lazy_dt(data.table(x = 1, y = 1), "DT")
 3 | 
 4 |   expect_equal(
 5 |     dt %>% relocate(x, .after = y) %>% show_query(),
 6 |     expr(setcolorder(copy(DT), !!c("y", "x")))
 7 |   )
 8 |   expect_equal(
 9 |     dt %>% relocate(y, .before = x) %>% show_query(),
10 |     expr(setcolorder(copy(DT), !!c("y", "x")))
11 |   )
12 | })
13 | 
14 | test_that("can move blocks of variables", {
15 |   dt <- lazy_dt(data.table(x = 1, a = 1, y = 1, b = 1), "DT")
16 | 
17 |   expect_equal(
18 |     dt %>% relocate(y, b, .before = a) %>% show_query(),
19 |     expr(setcolorder(copy(DT), !!c("x", "y", "b", "a")))
20 |   )
21 |   expect_equal(
22 |     dt %>% relocate(any_of(c("y", "b")), .before = a) %>% show_query(),
23 |     expr(setcolorder(copy(DT), !!c("x", "y", "b", "a")))
24 |   )
25 | })
26 | 
27 | test_that("All columns move before (after) columns in .before (.after)", {
28 |   dt <- lazy_dt(data.table(x = 1, a = 1, y = 1, b = 1), "DT")
29 | 
30 |   expect_equal(
31 |     dt %>% relocate(y, b, .before = c(x, a)) %>% show_query(),
32 |     expr(setcolorder(copy(DT), !!c("y", "b", "x", "a")))
33 |   )
34 |   expect_equal(
35 |     dt %>% relocate(x, a, .after = c(y, b)) %>% show_query(),
36 |     expr(setcolorder(copy(DT), !!c("y", "b", "x", "a")))
37 |   )
38 | })
39 | 
40 | test_that("extra variables in .before/.after unaffected", {
41 |   dt <- lazy_dt(data.table(a = 1, b = 1, c = 1, d = 1, e = 1), "DT")
42 | 
43 |   expect_equal(
44 |     dt %>% relocate(b, .after = c(a, c, e)) %>% show_query(),
45 |     expr(setcolorder(copy(DT), !!c("a", "c", "d", "e", "b")))
46 |   )
47 |   expect_equal(
48 |     dt %>% relocate(e, .before = c(b, d)) %>% show_query(),
49 |     expr(setcolorder(copy(DT), !!c("a", "e", "b", "c", "d")))
50 |   )
51 | })
52 | 
53 | test_that("no .before/.after moves to front", {
54 |   dt <- lazy_dt(data.table(x = 1, y = 2), "DT")
55 | 
56 |   expect_equal(
57 |     dt %>% relocate(y) %>% show_query(),
58 |     expr(setcolorder(copy(DT), !!c("y", "x")))
59 |   )
60 | })
61 | 
62 | test_that("can only supply one of .before and .after", {
63 |   dt <- lazy_dt(data.table(x = 1, y = 1), "DT")
64 | 
65 |   expect_snapshot(relocate(dt, y, .before = x, .after = x), error = TRUE)
66 | })
67 | 
68 | test_that("relocate() respects order specified by ...", {
69 |   dt <- lazy_dt(data.table(a = 1, x = 1, b = 1, z = 1, y = 1), "DT")
70 | 
71 |   expect_equal(
72 |     dt %>% relocate(x, y, z, .before = x) %>% show_query(),
73 |     expr(setcolorder(copy(DT), !!c("a", "x", "y", "z", "b")))
74 |   )
75 |   expect_equal(
76 |     dt %>% relocate(x, y, z, .after = last_col()) %>% show_query(),
77 |     expr(setcolorder(copy(DT), !!c("a", "b", "x", "y", "z")))
78 |   )
79 |   expect_equal(
80 |     dt %>% relocate(x, a, z) %>% show_query(),
81 |     expr(setcolorder(copy(DT), !!c("x", "a", "z", "b", "y")))
82 |   )
83 | })
84 | 
85 | test_that("relocate() only not alter grouping", {
86 |   dt <- lazy_dt(data.table(x = 1, y = 1, z = 1), "DT")
87 | 
88 |   expect_equal(
89 |     dt %>% group_by(x, y) %>% relocate(y, .before = x) %>% .$groups,
90 |     c("x", "y")
91 |   )
92 | })
93 | 


--------------------------------------------------------------------------------
/tests/testthat/test-step-colorder.R:
--------------------------------------------------------------------------------
 1 | test_that("can reorder columns", {
 2 |   dt <- lazy_dt(data.frame(x = 1:3, y = 1), "DT")
 3 | 
 4 |   expect_equal(
 5 |     dt %>% step_colorder(c("y", "x")) %>% show_query(),
 6 |     expr(setcolorder(copy(DT), !!c("y", "x")))
 7 |   )
 8 | 
 9 |   expect_named(
10 |     dt %>% step_colorder(c("y", "x")) %>% collect(),
11 |     c("y", "x")
12 |   )
13 | 
14 |   expect_equal(
15 |     dt %>% step_colorder(c(2L, 1L)) %>% show_query(),
16 |     expr(setcolorder(copy(DT), !!c(2L, 1L)))
17 |   )
18 | 
19 |   expect_named(
20 |     dt %>% step_colorder(c(2L, 1L)) %>% collect(),
21 |     c("y", "x")
22 |   )
23 | })
24 | 
25 | test_that("can handle duplicate column names", {
26 |   dt <- lazy_dt(data.table(x = 3, x = 2, y = 1), "DT")
27 | 
28 |   expect_snapshot_error(dt %>% step_colorder(c("y", "x")))
29 | 
30 |   expect_equal(
31 |     dt %>% step_colorder(c(3L, 2L)) %>% show_query(),
32 |     expr(setcolorder(copy(DT), !!c(3L, 2L)))
33 |   )
34 | 
35 |   expect_equal(
36 |     dt %>% step_colorder(c(3L, 2L)) %>% as.data.table(),
37 |     data.table(y = 1, x = 2, x = 3)
38 |   )
39 | })
40 | 
41 | test_that("checks col_order", {
42 |   dt <- lazy_dt(data.frame(x = 1:3, y = 1), "DT")
43 | 
44 |   expect_snapshot_error(dt %>% step_colorder(c("y", "y")))
45 |   expect_snapshot_error(dt %>% step_colorder(c(1L, 1L)))
46 | })
47 | 
48 | test_that("works for empty input", {
49 |   dt <- lazy_dt(data.frame(x = 1), "DT")
50 | 
51 |   expect_equal(dt %>% step_colorder(character()), dt)
52 |   expect_equal(dt %>% step_colorder(integer()), dt)
53 | })
54 | 
55 | test_that("doesn't add step if not necessary", {
56 |   dt <- lazy_dt(data.frame(x = 1, y = 2), "DT")
57 | 
58 |   expect_equal(dt %>% step_colorder(c("x", "y")), dt)
59 |   expect_equal(dt %>% step_colorder("x"), dt)
60 | 
61 |   expect_equal(dt %>% step_colorder(1:2), dt)
62 |   expect_equal(dt %>% step_colorder(1L), dt)
63 | })
64 | 


--------------------------------------------------------------------------------
/tests/testthat/test-step-first.R:
--------------------------------------------------------------------------------
 1 | test_that("constructor has sensible defaults", {
 2 |   dt <- data.table(x = 1:2, y = 1:2)
 3 |   step <- step_first(dt)
 4 | 
 5 |   expect_s3_class(step, "dtplyr_step_first")
 6 |   expect_equal(step$parent, dt)
 7 |   expect_equal(step$vars, c("x", "y"))
 8 |   expect_equal(step$groups, character())
 9 |   expect_match(as.character(step$name), "_DT")
10 | })
11 | 
12 | 
13 | # mutability --------------------------------------------------------------
14 | 
15 | test_that("doesn't need copy", {
16 |   dt <- lazy_dt(mtcars)
17 |   expect_false(dt$needs_copy)
18 | })
19 | 
20 | test_that("mutable object must be a data table", {
21 |   expect_error(lazy_dt(mtcars, immutable = FALSE), "not already a data table")
22 | })
23 | 
24 | test_that("mutable object never needs copy", {
25 |   dt <- lazy_dt(as.data.table(mtcars), immutable = FALSE)
26 |   expect_false(dt$needs_copy)
27 |   expect_false(dt %>% mutate(x = 1) %>% .$needs_copy)
28 | })
29 | 
30 | test_that("dt_call() copies if requested", {
31 |   dt <- lazy_dt(mtcars, name = "DT")
32 | 
33 |   expect_equal(dt_call(dt, FALSE), quote(DT))
34 |   expect_equal(dt_call(dt, TRUE), quote(copy(DT)))
35 | })
36 | 
37 | test_that("lazy_dt doesn't copy input", {
38 |   dt <- data.table(x = 1)
39 |   lz <- lazy_dt(dt)
40 | 
41 |   expect_equal(data.table::address(dt), data.table::address(lz$parent))
42 | })
43 | 
44 | # keys --------------------------------------------------------------------
45 | 
46 | test_that("can set keys", {
47 |   dt <- lazy_dt(mtcars, key_by = cyl)
48 |   expect_equal(data.table::key(dt$parent), "cyl")
49 | })
50 | 
51 | test_that("setting doesn't modify data.table", {
52 |   dt1 <- data.table(x = c(5, 1, 2))
53 |   dt2 <- lazy_dt(dt1, key_by = x)
54 | 
55 |   expect_equal(data.table::key(dt1$parent), NULL)
56 |   expect_equal(data.table::key(dt2$parent), "x")
57 | })
58 | 
59 | # groups ------------------------------------------------------------------
60 | 
61 | test_that("keeps groups", {
62 |   dt <- lazy_dt(group_by(mtcars, cyl))
63 |   expect_equal(group_vars(dt), "cyl")
64 | })
65 | 


--------------------------------------------------------------------------------
/tests/testthat/test-step-group.R:
--------------------------------------------------------------------------------
  1 | test_that("grouping and ungrouping adjust groups field", {
  2 |   dt <- lazy_dt(data.frame(x = 1:3, y = 1:3))
  3 | 
  4 |   expect_equal(dt %>% .$groups, character())
  5 |   expect_equal(dt %>% group_by(x) %>% .$groups, "x")
  6 |   expect_equal(dt %>% group_by(a = x) %>% .$groups, "a")
  7 |   expect_equal(dt %>% group_by(x) %>% group_by(y) %>% .$groups, "y")
  8 |   expect_equal(dt %>% group_by(x) %>% ungroup() %>% .$groups, character())
  9 | })
 10 | 
 11 | test_that("ungroup can remove variables from grouping", {
 12 |   dt <- lazy_dt(data.frame(x = 1:3, y = 1:3)) %>% group_by(x, y)
 13 | 
 14 |   expect_equal(dt %>% ungroup(y) %>% group_vars(), "x")
 15 | })
 16 | 
 17 | test_that("can use across", {
 18 |   dt <- lazy_dt(data.frame(x = 1:3, y = 1:3))
 19 |   expect_equal(dt %>% group_by(across(everything())) %>% .$groups, c("x", "y"))
 20 | })
 21 | 
 22 | test_that("can add groups if requested", {
 23 |   dt <- lazy_dt(data.frame(x = 1:3, y = 1:3), "DT")
 24 |   expect_equal(
 25 |     dt %>% group_by(x) %>% group_by(y, .add = TRUE) %>% .$groups,
 26 |     c("x", "y")
 27 |   )
 28 | 
 29 |   expect_snapshot({
 30 |     . <- dt %>% group_by(x) %>% group_by(y, add = TRUE)
 31 |   })
 32 | })
 33 | 
 34 | test_that("grouping can compute new variables if needed", {
 35 |   dt <- lazy_dt(data.frame(x = 1:3, y = 1:3), "DT")
 36 | 
 37 |   expect_equal(
 38 |     dt %>% group_by(xy = x + y) %>% show_query(),
 39 |     expr(copy(DT)[, `:=`(xy = x + y)])
 40 |   )
 41 | 
 42 |   # also works when RHS is only a symbol
 43 |   expect_equal(
 44 |     dt %>% group_by(z = x) %>% show_query(),
 45 |     expr(copy(DT)[, `:=`(z = x)])
 46 |   )
 47 | 
 48 |   expect_equal(
 49 |     dt %>% group_by(xy = x + y) %>% summarise(x = mean(x)) %>% show_query(),
 50 |     expr(copy(DT)[, `:=`(xy = x + y)][, .(x = mean(x)), keyby = .(xy)])
 51 |   )
 52 | })
 53 | 
 54 | test_that("vars set correctly", {
 55 |   dt <- lazy_dt(data.frame(x = 1:3, y = 1:3))
 56 |   expect_equal(dt %>% group_by(x) %>% .$vars, c("x", "y"))
 57 | })
 58 | 
 59 | test_that("`key` switches between keyby= and by=", {
 60 |   dt <- lazy_dt(data.frame(x = 1:3, y = 1:3), "DT")
 61 |   dt1 <- lazy_dt(mtcars, "DT1")
 62 | 
 63 |   expect_equal(
 64 |     dt %>% group_by(xy = x + y, arrange = FALSE) %>% summarize(x = mean(x)) %>% show_query(),
 65 |     expr(copy(DT)[, `:=`(xy = x + y)][, .(x = mean(x)), by = .(xy)])
 66 |   )
 67 | 
 68 |   expect_equal(
 69 |     dt1 %>% group_by(cyl, arrange = FALSE) %>% summarize(mean_mpg = mean(mpg)) %>% show_query(),
 70 |     expr(DT1[, .(mean_mpg = mean(mpg)), by = .(cyl)])
 71 |   )
 72 | 
 73 |   expect_equal(
 74 |     dt1 %>% group_by(cyl) %>% summarize(mean_mpg = mean(mpg)) %>% show_query(),
 75 |     expr(DT1[, .(mean_mpg = mean(mpg)), keyby = .(cyl)])
 76 |   )
 77 | })
 78 | 
 79 | test_that("emtpy and NULL group_by ungroups", {
 80 |   dt <- lazy_dt(data.frame(x = 1)) %>% group_by(x)
 81 |   expect_equal(group_by(dt) %>% group_vars(), character())
 82 |   expect_equal(group_by(dt, NULL) %>% group_vars(), character())
 83 |   expect_equal(group_by(dt, !!!list()) %>% group_vars(), character())
 84 | })
 85 | 
 86 | test_that("only adds step if necessary", {
 87 |   dt <- lazy_dt(data.table(x = 1, y = 1), "DT")
 88 |   expect_equal(dt %>% group_by(), dt)
 89 | 
 90 |   expect_equal(dt %>% ungroup(), dt)
 91 |   expect_equal(dt %>% ungroup(x), dt)
 92 | 
 93 |   dt_grouped <- dt %>% group_by(x)
 94 |   dt_grouped2 <- dt_grouped %>% group_by(x)
 95 |   expect_equal(dt_grouped, dt_grouped2)
 96 |   expect_equal(dt_grouped %>% ungroup(y), dt_grouped)
 97 | 
 98 |   out <- dt_grouped %>% mutate(y = y - mean(y)) %>% group_by()
 99 |   expect_s3_class(out, "dtplyr_step_group")
100 |   expect_equal(group_vars(out), character())
101 | })
102 | 
103 | test_that("works with non-standard column names, #451", {
104 |   dt <- lazy_dt(tibble(`a a` = "a"))
105 |   res <- dt %>%
106 |     group_by(`a a`) %>%
107 |     count() %>%
108 |     as_tibble()
109 |   expect_named(res, c("a a", "n"))
110 |   expect_equal(res$`a a`, "a")
111 | })
112 | 


--------------------------------------------------------------------------------
/tests/testthat/test-step-modify.R:
--------------------------------------------------------------------------------
 1 | test_that("group_modify creates modified data frame", {
 2 |   dt <- lazy_dt(data.table(g = c(1, 1, 2), x = 1:3))
 3 | 
 4 |   foo <- function(rows, g) {
 5 |     list(nc = ncol(rows), nr = nrow(rows))
 6 |   }
 7 |   out <- dt %>% group_by(g) %>% group_modify(foo) %>% collect()
 8 | 
 9 |   expect_equal(out$nc, c(1, 1))
10 |   expect_equal(out$nr, c(2, 1))
11 | })
12 | 
13 | test_that("group_map works", {
14 |   dt <- lazy_dt(data.table(g = c(1, 1, 2), x = 1:3))
15 |   out <- dt %>% group_by(g) %>% group_map(~ nrow(.))
16 |   expect_equal(out, list(2, 1))
17 | 
18 |   # don't include group data
19 |   out <- dt %>% group_by(g) %>% group_map(~ ncol(.))
20 |   expect_equal(out, list(1, 1))
21 | })
22 | 


--------------------------------------------------------------------------------
/tests/testthat/test-step-nest.R:
--------------------------------------------------------------------------------
 1 | test_that("nest turns grouped values into one list-df", {
 2 |   ldt <- lazy_dt(tibble(x = c(1, 1, 1), y = 1:3), "DT")
 3 |   out <- nest(ldt, data = y)
 4 |   outc <- collect(out)
 5 | 
 6 |   expect_equal(show_query(out), expr(DT[, .(data = .(.SD)), by = .(x)]))
 7 | 
 8 |   expect_equal(group_vars(out), character())
 9 |   expect_equal(out$vars, c("x", "data"))
10 | 
11 |   expect_equal(outc$x, 1)
12 |   expect_equal(length(outc$data), 1L)
13 |   expect_equal(outc$data[[1L]], data.table(y = 1:3))
14 | })
15 | 
16 | test_that("nest uses grouping vars if present", {
17 |   ldt <- lazy_dt(tibble(x = c(1, 1, 1), y = 1:3), "DT")
18 |   out <- nest(dplyr::group_by(ldt, x))
19 | 
20 |   expect_equal(group_vars(out), "x")
21 |   expect_equal(show_query(out), expr(DT[, .(data = .(.SD)), by = .(x)]))
22 | })
23 | 
24 | test_that("provided grouping vars override grouped defaults", {
25 |   ldt <- tibble(x = 1, y = 2, z = 3) %>% group_by(x) %>% lazy_dt("DT")
26 |   out <- nest(ldt, data = y)
27 | 
28 |   expect_equal(show_query(out), expr(DT[, .(data = .(.SD)), by = .(x, z)]))
29 |   expect_equal(group_vars(out), "x")
30 |   expect_equal(out$vars, c("x", "z", "data"))
31 | })
32 | 
33 | test_that("puts data into the correct row", {
34 |   ldt <- tibble(x = 1:3, y = c("B", "A", "A")) %>% lazy_dt()
35 |   out <- nest(ldt, data = x) %>% collect() %>% dplyr::filter(y == "B")
36 |   expect_equal(out$data[[1]]$x, 1)
37 | })
38 | 
39 | test_that("nesting everything yields a simple data frame", {
40 |   dt <- data.table(x = 1:3, y = c("B", "A", "A"))
41 |   ldt <- lazy_dt(dt, "DT")
42 |   out <- nest(ldt, data = c(x, y))
43 | 
44 |   expect_equal(show_query(out), expr(DT[, .(data = .(.SD))]))
45 |   expect_equal(out$vars, "data")
46 | 
47 |   expect_equal(collect(out)$data, list(dt))
48 | })
49 | 
50 | test_that("nest preserves order of data", {
51 |   ldt <- lazy_dt(tibble(x = c(1, 3, 2, 3, 2), y = 1:5), "DT")
52 |   out <- nest(ldt, data = y)
53 |   expect_equal(collect(out)$x, c(1, 3, 2))
54 | })
55 | 
56 | test_that("can strip names", {
57 |   ldt <- lazy_dt(tibble(x = c(1, 1, 1), ya = 1:3, yb = 4:6), "DT")
58 |   out <- nest(ldt, y = starts_with("y"), .names_sep = "")
59 | 
60 |   expect_equal(
61 |     show_query(out),
62 |     expr(DT[, .(y = .(data.table(a = ya, b = yb))), by = .(x)])
63 |   )
64 | 
65 |   expect_named(collect(out)$y[[1]], c("a", "b"))
66 | })
67 | 
68 | test_that("can nest multiple columns", {
69 |   ldt <- lazy_dt(tibble(x = 1, a1 = 1, a2 = 2, b1 = 1, b2 = 2), "DT")
70 |   out <- ldt %>% nest(a = c(a1, a2), b = c(b1, b2))
71 | 
72 |   expect_equal(
73 |     show_query(out),
74 |     expr(DT[, .(a = .(data.table(a1, a2)), b = .(data.table(b1, b2))), by = .(x)])
75 |   )
76 |   expect_equal(out$vars, c("x", "a", "b"))
77 | })
78 | 
79 | test_that("nesting no columns nests all inputs", {
80 |   # included only for backward compatibility
81 |   ldt <- lazy_dt(tibble(a1 = 1, a2 = 2, b1 = 1, b2 = 2), "DT")
82 |   expect_warning(out <- nest(ldt), "must not be empty")
83 |   expect_equal(show_query(out), expr(DT[, .(data = .(.SD))]))
84 | })
85 | 


--------------------------------------------------------------------------------
/tests/testthat/test-step-set.R:
--------------------------------------------------------------------------------
 1 | test_that("basic ops generate expected translation", {
 2 |   dt1 <- lazy_dt(data.frame(x = 1:3), "dt1")
 3 |   dt2 <- lazy_dt(data.frame(x = 2L), "dt2")
 4 | 
 5 |   expect_equal(
 6 |     dt1 %>% intersect(dt2) %>% show_query(),
 7 |     expr(fintersect(dt1, dt2))
 8 |   )
 9 |   expect_equal(
10 |     dt1 %>% union(dt2) %>% show_query(),
11 |     expr(funion(dt1, dt2))
12 |   )
13 |   expect_equal(
14 |     dt1 %>% union_all(dt2) %>% show_query(),
15 |     expr(funion(dt1, dt2, all = TRUE))
16 |   )
17 |   expect_equal(
18 |     dt1 %>% setdiff(dt2) %>% show_query(),
19 |     expr(fsetdiff(dt1, dt2))
20 |   )
21 | })
22 | 
23 | test_that("joins captures locals from both parents", {
24 |   dt1 <- lazy_dt(data.frame(x = 1)) %>% mutate(y = 1) %>% compute("D1")
25 |   dt2 <- lazy_dt(data.frame(x = 1)) %>% mutate(z = 1) %>% compute("D2")
26 | 
27 |   expect_named(intersect(dt1, dt2)$locals, c("D1", "D2"))
28 | })
29 | 
30 | test_that("vars set correctly", {
31 |   # data.table functions require the inputs to have same columns
32 |   dt1 <- lazy_dt(data.frame(x = 1, y = 2), "dt1")
33 |   dt2 <- lazy_dt(data.frame(x = 2, y = 2), "dt2")
34 | 
35 |   expect_equal(dt1 %>% union(dt2) %>% .$vars, c("x", "y"))
36 | })
37 | 


--------------------------------------------------------------------------------
/tests/testthat/test-step-subset-arrange.R:
--------------------------------------------------------------------------------
  1 | test_that("arrange orders variables", {
  2 |   dt <- lazy_dt(data.table(x = 1, y = 1, z = 1), "DT")
  3 | 
  4 |   expect_equal(
  5 |     dt %>% arrange(x) %>% show_query(),
  6 |     expr(DT[order(x)])
  7 |   )
  8 | })
  9 | 
 10 | test_that("arrange doesn't use, but still preserves, grouping", {
 11 |   dt <- group_by(lazy_dt(data.table(x = 1, y = 2), "DT"), x)
 12 | 
 13 |   step <- arrange(dt, y)
 14 |   expect_equal(step$groups, "x")
 15 |   expect_equal(dt_call(step), expr(DT[order(y)]))
 16 | 
 17 |   step2 <- arrange(dt, y, .by_group = TRUE)
 18 |   expect_equal(dt_call(step2), expr(DT[order(x, y)]))
 19 | })
 20 | 
 21 | test_that("empty arrange returns input unchanged", {
 22 |   dt <- lazy_dt(data.table(x = 1, y = 1, z = 1), "DT")
 23 |   expect_true(identical(arrange(dt), dt))
 24 | })
 25 | 
 26 | test_that("can use with across", {
 27 |   dt <- lazy_dt(data.table(x = 1, y = 1, z = 1), "DT")
 28 | 
 29 |   expect_equal(
 30 |     dt %>% arrange(across(x:y)) %>% show_query(),
 31 |     expr(DT[order(x, y)])
 32 |   )
 33 | })
 34 | 
 35 | test_that("vars set correctly", {
 36 |   dt <- lazy_dt(data.frame(x = 1:3, y = 1:3))
 37 |   expect_equal(dt %>% arrange(x) %>% .$vars, c("x", "y"))
 38 | })
 39 | 
 40 | test_that("desc works with internal quosure", {
 41 |   dt <- lazy_dt(data.table(x = c(4,3,9,7), y = 1:4))
 42 | 
 43 |   desc_df <- dt %>% arrange(desc(!!quo(x))) %>% collect()
 44 | 
 45 |   expect_equal(desc_df$x, c(9,7,4,3))
 46 | })
 47 | 
 48 | test_that("desc works .data pronoun", {
 49 |   dt <- lazy_dt(data.table(x = c(4,3,9,7), y = 1:4))
 50 | 
 51 |   desc_df <- dt %>% arrange(desc(.data$x)) %>% collect()
 52 | 
 53 |   expect_equal(desc_df$x, c(9,7,4,3))
 54 | })
 55 | 
 56 | test_that("only add step if necessary", {
 57 |   dt <- lazy_dt(data.frame(x = 1:3, y = 1:3))
 58 | 
 59 |   expect_equal(dt %>% arrange(), dt)
 60 |   expect_equal(dt %>% arrange(!!!list()), dt)
 61 | })
 62 | 
 63 | test_that("uses setorder when there is already a copy", {
 64 |   dt <- lazy_dt(data.frame(x = 1:3, y = 1:3), "DT")
 65 | 
 66 |   # Works with implicit copy
 67 |   step_implicit <- dt %>%
 68 |     filter(x < 4) %>%
 69 |     arrange(x, y)
 70 | 
 71 |   expect_equal(
 72 |     show_query(step_implicit),
 73 |     expr(setorder(DT[x < 4], x, y, na.last = TRUE))
 74 |   )
 75 | 
 76 |   # Works with explicit copy
 77 |   step_explicit <- dt %>%
 78 |     mutate(x = x * 2) %>%
 79 |     arrange(x, -y)
 80 | 
 81 |   expect_equal(
 82 |     show_query(step_explicit),
 83 |     expr(setorder(copy(DT)[, `:=`(x = x * 2)], x, -y, na.last = TRUE))
 84 |   )
 85 | })
 86 | 
 87 | test_that("setorder places NAs last", {
 88 |   dt <- lazy_dt(tibble(x = c("b", NA, "a")), "DT")
 89 |   dt$needs_copy <- TRUE
 90 | 
 91 |   # Works with implicit copy
 92 |   res <- dt %>%
 93 |     arrange(x) %>%
 94 |     as.data.table()
 95 | 
 96 |   expect_equal(res$x, c("a", "b", NA))
 97 | })
 98 | 
 99 | test_that("works with a transmute expression", {
100 |   dt <- lazy_dt(data.frame(x = 1:3, y = 1:3), "DT")
101 | 
102 |   step <- dt %>%
103 |     arrange(x + 1)
104 |   expect_equal(show_query(step), expr(DT[order(x + 1)]))
105 | 
106 |   # Works with complex expression
107 |   step <- dt %>%
108 |     arrange(-(x + y))
109 |   expect_equal(show_query(step), expr(DT[order(-(x + y))]))
110 | })
111 | 


--------------------------------------------------------------------------------
/tests/testthat/test-step-subset-do.R:
--------------------------------------------------------------------------------
 1 | test_that("basic operation as expected", {
 2 |   dt <- lazy_dt(data.frame(g = c(1, 1, 2), x = 1:3), "DT")
 3 | 
 4 |   expect_equal(
 5 |     dt %>% do(y = ncol(.)) %>% show_query(),
 6 |     expr(DT[, .(y = .(ncol(.SD)))])
 7 |   )
 8 | 
 9 |   expect_equal(
10 |     dt %>% group_by(g) %>% do(y = ncol(.)) %>% show_query(),
11 |     expr(DT[, .(y = .(ncol(.SD))), keyby = .(g)])
12 |   )
13 | })
14 | 
15 | 


--------------------------------------------------------------------------------
/tests/testthat/test-step-subset-expand.R:
--------------------------------------------------------------------------------
  1 | test_that("expand completes all values", {
  2 |   tbl <- tibble(x = 1:2, y = 1:2)
  3 |   dt <- lazy_dt(tbl, "DT")
  4 |   step <- expand(dt, x, y)
  5 |   out <- collect(step)
  6 | 
  7 |   expect_equal(
  8 |     show_query(step),
  9 |     expr(DT[, CJ(x = x, y = y, unique = TRUE)])
 10 |   )
 11 |   expect_equal(step$vars, c("x", "y"))
 12 |   expect_equal(nrow(out), 4)
 13 | })
 14 | 
 15 | test_that("multiple variables in one arg doesn't expand", {
 16 |   tbl <- tibble(x = 1:2, y = 1:2)
 17 |   dt <- lazy_dt(tbl, "DT")
 18 |   step <- expand(dt, c(x, y))
 19 |   out <- collect(step)
 20 | 
 21 |   expect_equal(nrow(out), 2)
 22 | })
 23 | 
 24 | test_that("works with unnamed vectors", {
 25 |   tbl <- tibble(x = 1:2, y = 1:2)
 26 |   dt <- lazy_dt(tbl, "DT")
 27 |   step <- expand(dt, x, 1:2)
 28 |   out <- collect(step)
 29 | 
 30 |   expect_equal(
 31 |     show_query(step),
 32 |     expr(DT[, CJ(x = x, V2 = 1:2, unique = TRUE)])
 33 |   )
 34 |   expect_equal(step$vars, c("x", "V2"))
 35 |   expect_equal(nrow(out), 4)
 36 | })
 37 | 
 38 | test_that("works with named vectors", {
 39 |   tbl <- tibble(x = 1:2, y = 1:2)
 40 |   dt <- lazy_dt(tbl, "DT")
 41 |   step <- expand(dt, x, val = 1:2)
 42 |   out <- collect(step)
 43 | 
 44 |   expect_equal(
 45 |     show_query(step),
 46 |     expr(DT[, CJ(x = x, val = 1:2, unique = TRUE)])
 47 |   )
 48 |   expect_equal(step$vars, c("x", "val"))
 49 |   expect_equal(nrow(out), 4)
 50 | })
 51 | 
 52 | test_that("expand respects groups", {
 53 |   tbl <- tibble(
 54 |     a = c(1L, 1L, 2L),
 55 |     b = c(1L, 2L, 1L),
 56 |     c = c(2L, 1L, 1L)
 57 |   )
 58 |   dt <- lazy_dt(tbl, "DT")
 59 |   step <- dt %>% group_by(c) %>% expand(a, b)
 60 |   out <- collect(step)
 61 | 
 62 |   expect_equal(
 63 |     show_query(step),
 64 |     expr(DT[, CJ(a = a, b = b, unique = TRUE), keyby = .(c)])
 65 |   )
 66 |   expect_equal(step$vars, c("c", "a", "b"))
 67 |   expect_equal(out$a, c(1, 1, 2, 2, 1))
 68 |   expect_equal(out$b, c(1, 2, 1, 2, 1))
 69 | })
 70 | 
 71 | test_that("expand handles group variables as arguments", {
 72 |   dt <- lazy_dt(data.frame(x = 1, y = 2, z = 3), "DT")
 73 | 
 74 |   # single group var, not redefined
 75 |   res <- dt %>% group_by(x) %>% expand(x, y)
 76 |   expect_equal(
 77 |     show_query(res),
 78 |     expr(DT[, CJ(x = x, y = y, unique = TRUE), keyby = .(x)][, `:=`("x", NULL)])
 79 |   )
 80 |   expect_equal(
 81 |     res$groups,
 82 |     "x"
 83 |   )
 84 | 
 85 |   # multiple group vars, not redefined
 86 |   res <- dt %>% group_by(x, y) %>% expand(x, y, z)
 87 |   expect_equal(
 88 |     show_query(res),
 89 |     expr(DT[, CJ(x = x, y = y, z = z, unique = TRUE), keyby = .(x, y)
 90 |             ][, !!expr(!!c("x", "y") := NULL)])
 91 |   )
 92 |   expect_equal(
 93 |     res$groups,
 94 |     c("x", "y")
 95 |   )
 96 | 
 97 |   # redefined group var
 98 |   res <- dt %>% group_by(x) %>% expand(x = 5, y)
 99 |   expect_equal(
100 |     show_query(res),
101 |     expr(DT[, CJ(x = 5, y = y, unique = TRUE), keyby = .(x)][, `:=`("x", NULL)])
102 |   )
103 |   expect_equal(
104 |     res$groups,
105 |     c("x")
106 |   )
107 |   expect_equal(
108 |     as_tibble(res),
109 |     tibble(x = 5, y = 2)
110 |   )
111 | })
112 | 
113 | test_that("NULL inputs", {
114 |   tbl <- tibble(x = 1:5)
115 |   dt <- lazy_dt(tbl, "DT")
116 |   step <- expand(dt, x, y = NULL)
117 |   out <- collect(step)
118 |   expect_equal(out, tbl)
119 | })
120 | 
121 | test_that("expand respects .name_repair", {
122 |   dt <- lazy_dt(tibble(x = 1:2), "DT")
123 | 
124 |   suppressMessages(
125 |     expect_named(dt %>% expand(x, x, .name_repair = "unique") %>% collect(), c("x...1", "x...2"))
126 |   )
127 | })
128 | 


--------------------------------------------------------------------------------
/tests/testthat/test-step-subset-filter.R:
--------------------------------------------------------------------------------
  1 | test_that("can filter by value", {
  2 |   dt <- lazy_dt(data.table(x = 1, y = 1, z = 1), "DT")
  3 | 
  4 |   expect_equal(
  5 |     dt %>% filter() %>% show_query(),
  6 |     expr(DT)
  7 |   )
  8 |   expect_equal(
  9 |     dt %>% filter(x) %>% show_query(),
 10 |     expr(DT[(x)])
 11 |   )
 12 | 
 13 |   expect_equal(
 14 |     dt %>% filter(x > 1) %>% show_query(),
 15 |     expr(DT[x > 1])
 16 |   )
 17 | 
 18 |   expect_equal(
 19 |     dt %>% filter(x > 1, y > 2) %>% show_query(),
 20 |     expr(DT[x > 1 & y > 2])
 21 |   )
 22 | })
 23 | 
 24 | test_that("can filter with logical columns", {
 25 |   dt <- lazy_dt(data.table(x = c(TRUE, FALSE)), "DT")
 26 | 
 27 |   expect_equal(
 28 |     dt %>% filter(x) %>% show_query(),
 29 |     expr(DT[(x)])
 30 |   )
 31 | 
 32 |   expect_equal(
 33 |     dt %>% filter(!x) %>% show_query(),
 34 |     expr(DT[(!x)])
 35 |   )
 36 | })
 37 | 
 38 | 
 39 | test_that("inlines external variables", {
 40 |   dt <- lazy_dt(data.table(x = 1), "DT")
 41 |   l <- c(1, 10)
 42 | 
 43 |   expect_equal(
 44 |     dt %>% filter(x %in% l) %>% show_query(),
 45 |     quote(DT[x %in% !!l])
 46 |   )
 47 | 
 48 |   # Except in the global environment
 49 |   # But I can't figure out how to test this - it's not too important
 50 |   # as it only affects the quality of the translation not the correctness
 51 | })
 52 | 
 53 | test_that("can use with across", {
 54 |   dt <- lazy_dt(data.table(x = 1, y = 1, z = 1), "DT")
 55 | 
 56 |   expect_equal(
 57 |     dt %>% filter(across(x:y, ~ . > 0)) %>% show_query(),
 58 |     expr(DT[x > 0 & y > 0])
 59 |   )
 60 | 
 61 |   expect_equal(
 62 |     dt %>% filter(if_all(x:y, ~ . > 0)) %>% show_query(),
 63 |     expr(DT[x > 0 & y > 0])
 64 |   )
 65 |   expect_equal(
 66 |     dt %>% filter(if_any(x:y, ~ . > 0)) %>% show_query(),
 67 |     expr(DT[x > 0 | y > 0])
 68 |   )
 69 | 
 70 |   # .cols defaults to everything()
 71 |   expect_equal(
 72 |     dt %>% filter(if_all(.fns = ~ . > 0)) %>% show_query(),
 73 |     expr(DT[x > 0 & y > 0 & z > 0])
 74 |   )
 75 |   expect_equal(
 76 |     dt %>% filter(if_any(.fns = ~ . > 0)) %>% show_query(),
 77 |     expr(DT[x > 0 | y > 0 | z > 0])
 78 |   )
 79 | })
 80 | 
 81 | test_that("can filter when grouped", {
 82 |   dt1 <- lazy_dt(data.table(x = c(1, 1, 2, 2), y = c(1, 2, 3, 4)), "DT")
 83 |   dt2 <- dt1 %>% group_by(x) %>% filter(sum(y) == 3)
 84 | 
 85 |   expect_equal(
 86 |     dt2 %>% show_query(),
 87 |     expr(DT[DT[, .I[sum(y) == 3], by = .(x)]$V1])
 88 |   )
 89 | 
 90 |   expect_equal(as_tibble(dt2), tibble(x = c(1, 1), y = c(1, 2)))
 91 | })
 92 | 
 93 | test_that("grouped filter doesn't reorder", {
 94 |   dt1 <- lazy_dt(data.frame(x = c(2, 2, 1, 1), y = 1:4), "DT")
 95 |   dt2 <- dt1 %>% group_by(x) %>% filter(TRUE)
 96 | 
 97 |   expect_equal(
 98 |     dt2 %>% show_query(),
 99 |     expr(DT[DT[, .I[TRUE], by = .(x)]$V1])
100 |   )
101 |   expect_equal(dt2 %>% as_tibble(), as_tibble(dt1))
102 | })
103 | 
104 | test_that("only adds step if dots are not empty", {
105 |   dt <- lazy_dt(data.table(x = 1), "DT")
106 | 
107 |   expect_equal(dt %>% filter(), dt)
108 |   expect_equal(dt %>% filter(!!!list()), dt)
109 | })
110 | 
111 | test_that("errors for named input", {
112 |   dt <- lazy_dt(data.table(x = 1, y = 2), "DT")
113 | 
114 |   expect_snapshot(error = TRUE, filter(dt, x = 1))
115 |   expect_snapshot(error = TRUE, filter(dt, y > 1, x = 1))
116 | })
117 | 
118 | test_that("allows named constants that resolve to logical vectors", {
119 |   dt <- lazy_dt(mtcars, "DT")
120 |   filters <- mtcars %>%
121 |     transmute(
122 |       cyl %in% 6:8,
123 |       hp / drat > 50
124 |     )
125 | 
126 |   expect_equal(
127 |     filter(dt, !!!filters),
128 |     filter(dt, !!!unname(filters))
129 |   )
130 | })
131 | 


--------------------------------------------------------------------------------
/tests/testthat/test-step-subset-select.R:
--------------------------------------------------------------------------------
  1 | test_that("can select variables", {
  2 |   dt <- lazy_dt(data.table(x = 1, y = 1, z = 1), "DT")
  3 | 
  4 |   expect_equal(
  5 |     dt %>% select(-z) %>% show_query(),
  6 |     expr(DT[, .(x, y)])
  7 |   )
  8 | 
  9 |   expect_equal(
 10 |     dt %>% select(a = x, y) %>% show_query(),
 11 |     expr(DT[, .(a = x, y)])
 12 |   )
 13 | })
 14 | 
 15 | test_that("can merge iff j-generating call comes after i", {
 16 |   dt <- lazy_dt(data.table(x = 1, y = 1, z = 1), "DT")
 17 | 
 18 |   expect_equal(
 19 |     dt %>% filter(x > 1) %>% select(y) %>% show_query(),
 20 |     expr(DT[x > 1, .(y)])
 21 |   )
 22 |   expect_equal(
 23 |     dt %>% select(x = y) %>% filter(x > 1) %>% show_query(),
 24 |     expr(DT[, .(x = y)][x > 1])
 25 |   )
 26 | })
 27 | 
 28 | test_that("renames grouping vars", {
 29 |   dt <- lazy_dt(data.table(x = 1, y = 1, z = 1))
 30 |   gt <- group_by(dt, x)
 31 | 
 32 |   expect_equal(select(gt, y = x)$groups, "y")
 33 | })
 34 | 
 35 | test_that("empty select returns no columns", {
 36 |   dt <- data.table(x = 1, y = 1, z = 1)
 37 |   lz <- lazy_dt(dt, "DT")
 38 |   expect_equal(
 39 |     lz %>% select() %>% collect(),
 40 |     tibble()
 41 |   )
 42 | 
 43 |   # unless it's grouped
 44 |   skip_if(utils::packageVersion("rlang") < "0.5.0")
 45 |   expect_snapshot(out <- lz %>% group_by(x) %>% select())
 46 |   expect_equal(
 47 |     out %>% collect(),
 48 |     group_by(tibble(x = 1), x)
 49 |   )
 50 | })
 51 | 
 52 | test_that("vars set correctly", {
 53 |   dt <- lazy_dt(data.frame(x = 1:3, y = 1:3))
 54 |   expect_equal(dt %>% select(a = x, y) %>% .$vars, c("a", "y"))
 55 | })
 56 | 
 57 | test_that("only add step if necessary", {
 58 |   dt <- lazy_dt(data.frame(x = 1:3, y = 1:3), "DT")
 59 |   expect_equal(dt %>% select(everything()), dt)
 60 |   expect_equal(dt %>% select(x, y), dt)
 61 | })
 62 | 
 63 | ### When data is copied (either implicitly or explicitly)
 64 | 
 65 | test_that("copied data: can select variables", {
 66 |   dt <- lazy_dt(data.table(x = 1, y = 2, z = 3), "DT")
 67 |   dt$needs_copy <- TRUE
 68 | 
 69 |   expect_equal(
 70 |     dt %>% select(-z) %>% show_query(),
 71 |     expr(copy(DT)[, `:=`(!!"z", NULL)])
 72 |   )
 73 | 
 74 |   expect_equal(
 75 |     dt %>% select(y, x) %>% show_query(),
 76 |     expr(setcolorder(copy(DT)[, `:=`("z", NULL)], !!c("y", "x")))
 77 |   )
 78 | 
 79 |   expect_equal(
 80 |     dt %>% select(a = x, y) %>% show_query(),
 81 |     expr(copy(DT)[, .(a = x, y)])
 82 |   )
 83 | })
 84 | 
 85 | test_that("copied data: renaming uses regular selection", {
 86 |   dt <- lazy_dt(data.table(x = 1, y = 2, z = 3), "DT")
 87 |   dt$needs_copy <- TRUE
 88 | 
 89 |   step <- dt %>% select(a = x, y)
 90 | 
 91 |   expect_equal(
 92 |     show_query(step),
 93 |     expr(copy(DT)[, .(a = x, y)])
 94 |   )
 95 | 
 96 |   expect_named(collect(step), c("a", "y"))
 97 | })
 98 | 
 99 | test_that("copied data: can merge iff j-generating call comes after i", {
100 |   dt <- lazy_dt(data.table(x = 1, y = 2, z = 3), "DT")
101 |   dt$needs_copy <- TRUE
102 | 
103 |   expect_equal(
104 |     dt %>% filter(x > 1) %>% select(y) %>% show_query(),
105 |     expr(copy(DT)[x > 1, .(y)])
106 |   )
107 |   expect_equal(
108 |     dt %>% select(x = y) %>% filter(x > 1) %>% show_query(),
109 |     expr(copy(DT)[, .(x = y)][x > 1])
110 |   )
111 | 
112 | })
113 | 
114 | test_that("copied data: renames grouping vars", {
115 |   dt <- lazy_dt(data.table(x = 1, y = 1, z = 1))
116 |   gt <- group_by(dt, x)
117 |   gt$needs_copy <- TRUE
118 | 
119 |   expect_equal(select(gt, y = x)$groups, "y")
120 | })
121 | 
122 | test_that("copied data: empty select returns no columns", {
123 |   dt <- data.table(x = 1, y = 2, z = 3)
124 |   lz <- lazy_dt(dt, "DT")
125 |   lz$needs_copy <- TRUE
126 |   expect_equal(
127 |     lz %>% select() %>% collect(),
128 |     tibble()
129 |   )
130 | 
131 |   # unless it's grouped
132 |   expect_snapshot(out <- lz %>% group_by(x) %>% select())
133 |   expect_equal(
134 |     out %>% collect(),
135 |     group_by(tibble(x = 1), x)
136 |   )
137 | })
138 | 
139 | test_that("copied data: only add step if necessary", {
140 |   dt <- lazy_dt(data.frame(x = 1:3, y = 1:3), "DT")
141 |   dt$needs_copy <- TRUE
142 |   expect_equal(dt %>% select(everything()), dt)
143 |   expect_equal(dt %>% select(x, y), dt)
144 | })
145 | 


--------------------------------------------------------------------------------
/tests/testthat/test-step-subset-separate.R:
--------------------------------------------------------------------------------
 1 | test_that("missing values in input are missing in output", {
 2 |   dt <- lazy_dt(tibble(x = c(NA, "a b")), "DT")
 3 |   step <- separate(dt, x, c("x", "y"))
 4 |   out <- collect(step)
 5 |   expect_equal(
 6 |     show_query(step),
 7 |     expr(copy(DT)[, `:=`(!!c("x", "y"), tstrsplit(x, split = "[^[:alnum:]]+"))])
 8 |   )
 9 |   expect_equal(out$x, c(NA, "a"))
10 |   expect_equal(out$y, c(NA, "b"))
11 | })
12 | 
13 | test_that("convert produces integers etc", {
14 |   dt <- lazy_dt(tibble(x = "1-1.5-FALSE"), "DT")
15 |   step <- separate(dt, x, c("x", "y", "z"), "-", convert = TRUE)
16 |   out <- collect(step)
17 |   expect_equal(
18 |     show_query(step),
19 |     expr(copy(DT)[, `:=`(!!c("x", "y", "z"), tstrsplit(x, split = "-", type.convert = TRUE))])
20 |   )
21 |   expect_equal(out$x, 1L)
22 |   expect_equal(out$y, 1.5)
23 |   expect_equal(out$z, FALSE)
24 | })
25 | 
26 | test_that("overwrites existing columns", {
27 |   dt <- lazy_dt(tibble(x = "a:b"), "DT")
28 |   step <- dt %>% separate(x, c("x", "y"))
29 |   out <- collect(step)
30 | 
31 |   expect_equal(
32 |     show_query(step),
33 |     expr(copy(DT)[, `:=`(!!c("x", "y"), tstrsplit(x, split = "[^[:alnum:]]+"))])
34 |   )
35 |   expect_equal(step$vars, c("x", "y"))
36 |   expect_equal(out$x, "a")
37 | })
38 | 
39 | test_that("drops NA columns", {
40 |   dt <- lazy_dt(tibble(x = c(NA, "a-b", "c-d")), "DT")
41 |   step <- separate(dt, x, c(NA, "y"), "-")
42 |   out <- collect(step)
43 |   expect_equal(step$vars, "y")
44 |   expect_equal(out$y, c(NA, "b", "d"))
45 | })
46 | 
47 | test_that("checks type of `into` and `sep`", {
48 |   dt <- lazy_dt(tibble(x = "a:b"), "DT")
49 |   expect_snapshot(
50 |     separate(dt, x, "x", FALSE),
51 |     error = TRUE
52 |   )
53 |   expect_snapshot(
54 |     separate(dt, x, FALSE),
55 |     error = TRUE
56 |   )
57 | })
58 | 
59 | test_that("only copies when necessary", {
60 |   dt <- tibble(x = paste(letters[1:3], letters[1:3], sep = "-"), y = 1:3) %>%
61 |     lazy_dt("DT")
62 |   step <- dt %>%
63 |     filter(y < 4) %>%
64 |     separate(x, into = c("left", "right"), sep = "-")
65 |   expect_equal(
66 |     show_query(step),
67 |     expr(DT[y < 4][, `:=`(!!c("left", "right"), tstrsplit(x, split = "-"))][, `:=`("x", NULL)])
68 |   )
69 | })
70 | 
71 | test_that("can pass quosure to `col` arg, #359", {
72 |   dt <- lazy_dt(tibble(combined = c("a_b", "a_b")), "DT")
73 |   separate2 <- function(df, col, into) {
74 |     collect(separate(df, {{ col }}, into))
75 |   }
76 |   out <- separate2(dt, combined, into = c("a", "b"))
77 |   expect_named(out, c("a", "b"))
78 |   expect_equal(out$a, c("a", "a"))
79 |   expect_equal(out$b, c("b", "b"))
80 | })
81 | 
82 | test_that("can use numeric `col` arg", {
83 |   dt <- lazy_dt(tibble(combined = c("a_b", "a_b")), "DT")
84 | 
85 |   out <- collect(separate(dt, 1, into = c("a", "b")))
86 |   expect_named(out, c("a", "b"))
87 |   expect_equal(out$a, c("a", "a"))
88 |   expect_equal(out$b, c("b", "b"))
89 | })
90 | 
91 | test_that("errors on multiple columns in `col`", {
92 |   dt <- lazy_dt(tibble(x = c("a_b", "a_b"), y = x), "DT")
93 | 
94 |   expect_error(separate(dt, c(x, y), into = c("left", "right")),
95 |                "must select exactly one column")
96 | })
97 | 


--------------------------------------------------------------------------------
/tests/testthat/test-step-subset-transmute.R:
--------------------------------------------------------------------------------
 1 | test_that("works", {
 2 |   dt <- lazy_dt(data.table(x = 1, y = 1, z = 1), "DT")
 3 | 
 4 |   expect_equal(
 5 |     dt %>% transmute(x) %>% collect(),
 6 |     dt %>% mutate(x, .keep = "none") %>% collect()
 7 |   )
 8 | })
 9 | 
10 | test_that("empty dots preserves groups", {
11 |   dt <- lazy_dt(data.table(x = 1, y = 1, z = 1), "DT") %>%
12 |     group_by(y)
13 | 
14 |   res <- dt %>% transmute() %>% collect()
15 | 
16 |   expect_equal(names(res), "y")
17 | })
18 | 
19 | test_that("preserves column order", {
20 |   dt <- lazy_dt(data.table(x = 1, y = 1), "DT")
21 | 
22 |   res <- dt %>% transmute(y, x) %>% collect()
23 | 
24 |   expect_equal(names(res), c("y", "x"))
25 | })
26 | 
27 | test_that("works correctly when column is both added and removed in the same call", {
28 |   dt <- lazy_dt(data.table(x = 1, y = 2), "DT")
29 | 
30 |   res <- dt %>% transmute(y, z = 3, z = NULL) %>% collect()
31 | 
32 |   expect_equal(names(res), "y")
33 | })
34 | 
35 | 


--------------------------------------------------------------------------------
/tests/testthat/test-step-subset.R:
--------------------------------------------------------------------------------
 1 | test_that("construtor has sensible defaults", {
 2 |   first <- step_first(data.table(x = 1), "DT")
 3 |   step <- step_subset(first)
 4 | 
 5 |   expect_s3_class(step, "dtplyr_step_subset")
 6 |   expect_equal(step$parent, first)
 7 |   expect_equal(step$vars, "x")
 8 |   expect_equal(step$groups, character())
 9 |   expect_equal(step$i, NULL)
10 |   expect_equal(step$j, NULL)
11 | })
12 | 
13 | test_that("generates expected calls", {
14 |   first <- lazy_dt(data.table(x = 1), "DT")
15 | 
16 |   ungrouped <- step_subset(first, i = quote(i), j = quote(j))
17 |   expect_equal(dt_call(ungrouped), expr(DT[i, j]))
18 | 
19 |   with_i <- step_subset(first, i = quote(i), j = quote(j), groups = "x")
20 |   expect_equal(dt_call(with_i), expr(DT[i, j, keyby = .(x)]))
21 | 
22 |   without_i <- step_subset(first, j = quote(j), groups = "x")
23 |   expect_equal(dt_call(without_i), expr(DT[, j, keyby = .(x)]))
24 | })
25 | 


--------------------------------------------------------------------------------
/tests/testthat/test-step.R:
--------------------------------------------------------------------------------
 1 | test_that("tbl metadata as expected", {
 2 |   dt <- lazy_dt(data.table(x = c(1, 1, 1, 2, 2, 3)), "DT")
 3 | 
 4 |   expect_equal(dim(dt), c(6, 1))
 5 |   expect_equal(as.character(tbl_vars(dt)), "x")
 6 |   expect_equal(show_query(dt), expr(DT))
 7 | })
 8 | 
 9 | test_that("group metadata as expected", {
10 |   dt <- lazy_dt(data.table(x = c(1, 1, 1, 2, 2, 3)))
11 |   expect_equal(group_vars(dt), character())
12 |   expect_equal(groups(dt), list())
13 |   expect_equal(group_size(dt), 6)
14 |   expect_equal(n_groups(dt), 1)
15 | 
16 |   gt <- group_by(dt, x)
17 |   expect_equal(group_vars(gt), c("x"))
18 |   expect_equal(groups(gt), syms("x"))
19 |   expect_equal(group_size(gt), c(3, 2, 1))
20 |   expect_equal(n_groups(gt), 3)
21 | })
22 | 
23 | test_that("has useful display methods", {
24 |   expect_snapshot({
25 |     dt <- lazy_dt(mtcars, "DT")
26 |     dt
27 |     dt %>% group_by(vs, am)
28 |     dt %>% mutate(y = 10) %>% compute("DT2")
29 |   })
30 | })
31 | 
32 | test_that("can print using n/max_extra_cols/max_footer_lines, #464, ", {
33 |   expect_snapshot({
34 |     dt <- letters %>%
35 |       lapply(function(.x) tibble(!!.x := 1:10)) %>%
36 |       bind_cols() %>%
37 |       lazy_dt("DT")
38 |     print(dt, n = 3)
39 |     print(dt, max_extra_cols = 3)
40 |     print(dt, max_footer_lines = 1)
41 |   })
42 | })
43 | 
44 | test_that("can evaluate to any data frame type", {
45 |   dt <- lazy_dt(mtcars, "DT")
46 | 
47 |   expect_identical(class(as.data.frame(dt)), "data.frame")
48 |   expect_s3_class(as.data.table(dt), "data.table")
49 |   expect_s3_class(as_tibble(dt), "tbl_df")
50 | 
51 |   expect_s3_class(collect(dt), "tbl_df")
52 | })
53 | 
54 | test_that("compute returns lazy_dt", {
55 |   dt <- lazy_dt(mtcars, "DT")
56 |   dt <- summarise(dt, n = n())
57 | 
58 |   dt2 <- compute(dt)
59 |   expect_s3_class(dt2, "dtplyr_step")
60 |   expect_equal(as.character(tbl_vars(dt2)), "n")
61 | })
62 | 
63 | test_that("collect and compute return grouped data", {
64 |   dt <- group_by(lazy_dt(data.table(x = 1, y = 1), "DT"), x)
65 | 
66 |   expect_equal(dt %>% compute() %>% group_vars(), "x")
67 |   expect_equal(dt %>% collect() %>% group_vars(), "x")
68 | })
69 | 
70 | 
71 | # pull() ------------------------------------------------------------------
72 | 
73 | test_that("pull default extracts last var from data frame", {
74 |   df <- lazy_dt(tibble(x = 1:10, y = 1:10), "DT")
75 |   expect_equal(pull(df), 1:10)
76 | })
77 | 
78 | test_that("can extract by name, or positive/negative position", {
79 |   x <- 1:10
80 |   df <- lazy_dt(tibble(x = x, y = runif(10)), "DT")
81 | 
82 |   expect_equal(pull(df, x), x)
83 |   expect_equal(pull(df, 1), x)
84 |   expect_equal(pull(df, -2L), x)
85 | })
86 | 
87 | test_that("can extract named vectors", {
88 |   x <- 1:10
89 |   y <- letters[x]
90 |   df <- lazy_dt(tibble(x = x, y = y), "DT")
91 |   xn <- set_names(x, y)
92 | 
93 |   expect_equal(pull(df, x, y), xn)
94 |   expect_equal(pull(df, 1, 2), xn)
95 | })
96 | 


--------------------------------------------------------------------------------
/tests/testthat/test-unite.R:
--------------------------------------------------------------------------------
 1 | test_that("unite pastes columns together & removes old col", {
 2 |   df <- lazy_dt(data.table(x = "a", y = "b"), "DT")
 3 |   step <- unite(df, "z", x:y)
 4 |   out <- as.data.table(step)
 5 |   expect_equal(names(out), "z")
 6 |   expect_equal(out$z, "a_b")
 7 |   expect_equal(
 8 |     show_query(step),
 9 |     expr(copy(DT)[, `:=`(z = paste(x, y, sep = "_"))][, `:=`(!!c("x", "y"), NULL)])
10 |   )
11 | })
12 | 
13 | test_that("unite does not remove new col in case of name clash", {
14 |   df <- lazy_dt(data.table(x = "a", y = "b"), "DT")
15 |   step <- unite(df, x, x:y)
16 |   out <- as.data.table(step)
17 |   expect_equal(names(out), "x")
18 |   expect_equal(out$x, "a_b")
19 | })
20 | 
21 | test_that("correct column order when remove = FALSE", {
22 |   df <- lazy_dt(data.table(x = "a", y = "b"), "DT")
23 |   step <- unite(df, "united", y, x, remove = FALSE)
24 |   out <- as.data.table(step)
25 |   expect_equal(names(out), c("united", "x", "y"))
26 |   expect_equal(out$united, "b_a")
27 | })
28 | 
29 | test_that("unite preserves grouping", {
30 |   df <- lazy_dt(data.table(g = 1, x = "a"), "DT") %>% group_by(g)
31 |   step <- df %>% unite(x, x)
32 |   expect_equal(dplyr::group_vars(df), dplyr::group_vars(step))
33 | })
34 | 
35 | test_that("doesn't use `by` for unite step", {
36 |   df <- lazy_dt(data.table(x = "a", y = "b", z = "c"), "DT") %>% group_by(z)
37 |   step <- unite(df, "z", x:y)
38 |   out <- as.data.table(step)
39 |   expect_equal(names(out), "z")
40 |   expect_equal(out$z, "a_b")
41 |   expect_equal(step$groups, "z")
42 |   expect_equal(
43 |     show_query(step),
44 |     expr(copy(DT)[, `:=`(z = paste(x, y, sep = "_"))][, `:=`(!!c("x", "y"), NULL)])
45 |   )
46 | })
47 | 
48 | test_that("drops grouping when needed", {
49 |   df <- lazy_dt(data.table(g = 1, x = "a"), "DT") %>% group_by(g)
50 |   step <- df %>% unite(gx, g, x)
51 |   rs <- as.data.table(step)
52 |   expect_equal(rs$gx, "1_a")
53 |   expect_equal(dplyr::group_vars(rs), character())
54 | })
55 | 
56 | test_that("keeps groups when needed", {
57 |   df <- lazy_dt(data.table(x = "x", y = "y"), "DT") %>% group_by(x, y)
58 |   step <- df %>% unite("z", x)
59 |   rs <- as.data.table(step)
60 |   expect_equal(rs$z, "x")
61 |   expect_equal(dplyr::group_vars(step), "y")
62 | })
63 | 
64 | test_that("empty var spec uses all vars", {
65 |   df <- lazy_dt(data.table(x = "a", y = "b"), "DT")
66 |   expect_equal(collect(unite(df, "z")), tibble(z = "a_b"))
67 | })
68 | 
69 | test_that("errors on na.rm", {
70 |   df <- lazy_dt(data.table(x = c("a", NA), y = c("b", NA)), "DT")
71 |   expect_snapshot_error(unite(df, "z", x:y, na.rm = TRUE))
72 | })
73 | 


--------------------------------------------------------------------------------
/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 | 


--------------------------------------------------------------------------------
/vignettes/benchmark.R:
--------------------------------------------------------------------------------
 1 | df <- vroom::vroom("G1_1e7_1e2_0_0.csv")
 2 | dt <- data.table(df)
 3 | 
 4 | 
 5 | # Q1 ----------------------------------------------------------------------
 6 | bench::mark(
 7 |   dplyr = dt %>% group_by(id1) %>% summarise(sum(v1)),
 8 |   direct = dt[, .(v1 = sum(v1)), by = id1],
 9 |   min_iterations = 3,
10 |   check = FALSE
11 | )
12 | 
13 | dt %>% group_by(id1) %>% summarise(sum(v1))
14 | 
15 | # Q2 ----------------------------------------------------------------------
16 | DF %>%
17 |   group_by(id1, id2) %>%
18 |   summarise(v1 = sum(v1))
19 | 
20 | DT[, .(v1=sum(v1)), by=.(id1, id2)]
21 | 
22 | # Q3 ----------------------------------------------------------------------
23 | DF %>%
24 |   group_by(id3) %>%
25 |   summarise(v1 = sum(v1), v3 = mean(v3))
26 | 
27 | DT[, .(v1=sum(v1), v3=mean(v3)), by=id3]
28 | 
29 | # Q4 ----------------------------------------------------------------------
30 | DF %>%
31 |   group_by(id4) %>%
32 |   summarise_at("mean", c("v1", "v2", "v3"))
33 | 
34 | DT[, lapply(.SD, mean), by=id4, .SDcols=v1:v3])
35 | 
36 | # Q5 ----------------------------------------------------------------------
37 | DF %>%
38 |   group_by(id6) %>%
39 |   summarise_at("sum", c("v1", "v2", "v3"))
40 | 
41 | DT[, lapply(.SD, sum), by=id6, .SDcols=v1:v3])
42 | 
43 | # Q6 ----------------------------------------------------------------------
44 | DF %>%
45 |   group_by(id2, id4) %>%
46 |   summarise(median_v3 = median(v3), sd_v3 = sd(v3))
47 | 
48 | DT[, .(median_v3=median(v3), sd_v3=sd(v3)), by=.(id2, id4)])
49 | 
50 | # Q7 ----------------------------------------------------------------------
51 | DF %>%
52 |   group_by(id2, id4) %>%
53 |   summarise(range_v1_v2 = max(v1) - min(v2))
54 | 
55 | DT[, .(range_v1_v2=max(v1)-min(v2)), by=.(id2, id4)]
56 | 
57 | # Q8 ----------------------------------------------------------------------
58 | DF %>%
59 |   select(id2, id4, largest2_v3 = v3) %>%
60 |   arrange(desc(largest2_v3)) %>%
61 |   group_by(id2, id4) %>%
62 |   filter(row_number() <= 2L)
63 | 
64 | DT[order(-v3), .(largest2_v3 = head(v3, 2L)), by=.(id2, id4)]
65 | 
66 | # Q9 ----------------------------------------------------------------------
67 | DF %>%
68 |   group_by(id2, id4) %>%
69 |   summarise(r2 = cor(v1, v2)^2)
70 | 
71 | DT[, .(r2=cor(v1, v2)^2), by=.(id2, id4)]
72 | 
73 | # Q10 ----------------------------------------------------------------------
74 | DF %>%
75 |   group_by(id1, id2, id3, id4, id5, id6) %>%
76 |   summarise(v3 = sum(v3), count = n())
77 | 
78 | DT[, .(v3=sum(v3), count=.N), by=id1:id6]
79 | 


--------------------------------------------------------------------------------