├── .Rbuildignore ├── .covrignore ├── .gitattributes ├── .github ├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── ISSUE_TEMPLATE │ └── issue_template.md ├── SUPPORT.md └── workflows │ ├── R-CMD-check.yaml │ ├── pkgdown.yaml │ ├── pr-commands.yaml │ └── test-coverage.yaml ├── .gitignore ├── DESCRIPTION ├── LICENSE ├── LICENSE.md ├── NAMESPACE ├── NEWS.md ├── R ├── by.R ├── compat-purrr.R ├── complete.R ├── count.R ├── dtplyr-package.R ├── fill.R ├── reframe.R ├── replace_na.R ├── step-assign.R ├── step-call-pivot_longer.R ├── step-call-pivot_wider.R ├── step-call.R ├── step-colorder-relocate.R ├── step-colorder.R ├── step-first.R ├── step-group.R ├── step-join.R ├── step-modify.R ├── step-mutate.R ├── step-nest.R ├── step-set.R ├── step-setnames.R ├── step-subset-arrange.R ├── step-subset-do.R ├── step-subset-expand.R ├── step-subset-filter.R ├── step-subset-select.R ├── step-subset-separate.R ├── step-subset-slice.R ├── step-subset-summarise.R ├── step-subset-transmute.R ├── step-subset.R ├── step.R ├── tidyeval-across.R ├── tidyeval.R ├── unite.R ├── utils.R └── zzz.R ├── README.Rmd ├── README.md ├── _pkgdown.yml ├── codecov.yml ├── cran-comments.md ├── dtplyr.Rproj ├── man ├── arrange.dtplyr_step.Rd ├── collect.dtplyr_step.Rd ├── complete.dtplyr_step.Rd ├── count.dtplyr_step.Rd ├── distinct.dtplyr_step.Rd ├── dot-datatable.aware.Rd ├── drop_na.dtplyr_step.Rd ├── dtplyr-package.Rd ├── expand.dtplyr_step.Rd ├── figures │ ├── dt-seal.png │ └── logo.png ├── fill.dtplyr_step.Rd ├── filter.dtplyr_step.Rd ├── group_by.dtplyr_step.Rd ├── group_modify.dtplyr_step.Rd ├── head.dtplyr_step.Rd ├── intersect.dtplyr_step.Rd ├── lazy_dt.Rd ├── left_join.dtplyr_step.Rd ├── mutate.dtplyr_step.Rd ├── nest.dtplyr_step.Rd ├── pivot_longer.dtplyr_step.Rd ├── pivot_wider.dtplyr_step.Rd ├── reframe.dtplyr_step.Rd ├── relocate.dtplyr_step.Rd ├── rename.dtplyr_step.Rd ├── replace_na.dtplyr_step.Rd ├── select.dtplyr_step.Rd ├── separate.dtplyr_step.Rd ├── slice.dtplyr_step.Rd ├── summarise.dtplyr_step.Rd ├── transmute.dtplyr_step.Rd └── unite.dtplyr_step.Rd ├── pkgdown └── favicon │ ├── apple-touch-icon-120x120.png │ ├── apple-touch-icon-152x152.png │ ├── apple-touch-icon-180x180.png │ ├── apple-touch-icon-60x60.png │ ├── apple-touch-icon-76x76.png │ ├── apple-touch-icon.png │ ├── favicon-16x16.png │ ├── favicon-32x32.png │ └── favicon.ico ├── revdep ├── .gitignore ├── README.md ├── cran.md ├── email.yml ├── failures.md └── problems.md ├── tests ├── testthat.R └── testthat │ ├── _snaps │ ├── count.md │ ├── step-call-pivot_longer.md │ ├── step-call-pivot_wider.md │ ├── step-call.md │ ├── step-colorder-relocate.md │ ├── step-colorder.md │ ├── step-group.md │ ├── step-join.md │ ├── step-mutate.md │ ├── step-subset-filter.md │ ├── step-subset-select.md │ ├── step-subset-separate.md │ ├── step-subset-slice.md │ ├── step-subset-summarise.md │ ├── step.md │ ├── tidyeval-across.md │ ├── tidyeval.md │ └── unite.md │ ├── helpers-library.R │ ├── test-complete.R │ ├── test-count.R │ ├── test-fill.R │ ├── test-reframe.R │ ├── test-replace_na.R │ ├── test-step-call-pivot_longer.R │ ├── test-step-call-pivot_wider.R │ ├── test-step-call.R │ ├── test-step-colorder-relocate.R │ ├── test-step-colorder.R │ ├── test-step-first.R │ ├── test-step-group.R │ ├── test-step-join.R │ ├── test-step-modify.R │ ├── test-step-mutate.R │ ├── test-step-nest.R │ ├── test-step-set.R │ ├── test-step-subset-arrange.R │ ├── test-step-subset-do.R │ ├── test-step-subset-expand.R │ ├── test-step-subset-filter.R │ ├── test-step-subset-select.R │ ├── test-step-subset-separate.R │ ├── test-step-subset-slice.R │ ├── test-step-subset-summarise.R │ ├── test-step-subset-transmute.R │ ├── test-step-subset.R │ ├── test-step.R │ ├── test-tidyeval-across.R │ ├── test-tidyeval.R │ └── test-unite.R └── vignettes ├── .gitignore ├── benchmark.R └── translation.Rmd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^CRAN-RELEASE$ 2 | ^.*\.Rproj$ 3 | ^\.Rproj\.user$ 4 | ^\.travis\.yml$ 5 | ^cran-comments\.md$ 6 | ^revdep$ 7 | ^codecov\.yml$ 8 | ^vignettes/benchmark\.R$ 9 | ^vignettes/translation\.R$ 10 | ^_pkgdown\.yml$ 11 | ^docs$ 12 | ^pkgdown$ 13 | ^\.covrignore$ 14 | ^README\.Rmd$ 15 | ^\.github$ 16 | ^LICENSE\.md$ 17 | ^\.github/workflows/R-CMD-check\.yaml$ 18 | ^\.github/workflows/pr-commands\.yaml$ 19 | ^\.github/workflows/pkgdown\.yaml$ 20 | ^CRAN-SUBMISSION$ 21 | -------------------------------------------------------------------------------- /.covrignore: -------------------------------------------------------------------------------- 1 | R/deprec-*.R 2 | R/compat-*.R 3 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | /NEWS.md merge=union 2 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.github/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Code of Conduct 2 | 3 | As contributors and maintainers of this project, we pledge to respect all people who 4 | contribute through reporting issues, posting feature requests, updating documentation, 5 | submitting pull requests or patches, and other activities. 6 | 7 | We are committed to making participation in this project a harassment-free experience for 8 | everyone, regardless of level of experience, gender, gender identity and expression, 9 | sexual orientation, disability, personal appearance, body size, race, ethnicity, age, or religion. 10 | 11 | Examples of unacceptable behavior by participants include the use of sexual language or 12 | imagery, derogatory comments or personal attacks, trolling, public or private harassment, 13 | insults, or other unprofessional conduct. 14 | 15 | Project maintainers have the right and responsibility to remove, edit, or reject comments, 16 | commits, code, wiki edits, issues, and other contributions that are not aligned to this 17 | Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed 18 | from the project team. 19 | 20 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by 21 | opening an issue or contacting one or more of the project maintainers. 22 | 23 | This Code of Conduct is adapted from the Contributor Covenant 24 | (https://www.contributor-covenant.org), version 1.0.0, available at 25 | https://contributor-covenant.org/version/1/0/0/. 26 | -------------------------------------------------------------------------------- /.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to dtplyr 2 | 3 | This outlines how to propose a change to dtplyr. 4 | For more detailed info about contributing to this, and other tidyverse packages, please see the 5 | [**development contributing guide**](https://rstd.io/tidy-contrib). 6 | 7 | ## Fixing typos 8 | 9 | You can fix typos, spelling mistakes, or grammatical errors in the documentation directly using the GitHub web interface, as long as the changes are made in the _source_ file. 10 | This generally means you'll need to edit [roxygen2 comments](https://roxygen2.r-lib.org/articles/roxygen2.html) in an `.R`, not a `.Rd` file. 11 | You can find the `.R` file that generates the `.Rd` by reading the comment in the first line. 12 | 13 | ## Bigger changes 14 | 15 | If you want to make a bigger change, it's a good idea to first file an issue and make sure someone from the team agrees that it’s needed. 16 | If you’ve found a bug, please file an issue that illustrates the bug with a minimal 17 | [reprex](https://www.tidyverse.org/help/#reprex) (this will also help you write a unit test, if needed). 18 | 19 | ### Pull request process 20 | 21 | * Fork the package and clone onto your computer. If you haven't done this before, we recommend using `usethis::create_from_github("batpigandme/dtplyr", fork = TRUE)`. 22 | 23 | * Install all development dependences with `devtools::install_dev_deps()`, and then make sure the package passes R CMD check by running `devtools::check()`. 24 | If R CMD check doesn't pass cleanly, it's a good idea to ask for help before continuing. 25 | * Create a Git branch for your pull request (PR). We recommend using `usethis::pr_init("brief-description-of-change")`. 26 | 27 | * Make your changes, commit to git, and then create a PR by running `usethis::pr_push()`, and following the prompts in your browser. 28 | The title of your PR should briefly describe the change. 29 | The body of your PR should contain `Fixes #issue-number`. 30 | 31 | * For user-facing changes, add a bullet to the top of `NEWS.md` (i.e. just below the first header). Follow the style described in . 32 | 33 | ### Code style 34 | 35 | * New code should follow the tidyverse [style guide](https://style.tidyverse.org). 36 | You can use the [styler](https://CRAN.R-project.org/package=styler) package to apply these styles, but please don't restyle code that has nothing to do with your PR. 37 | 38 | * We use [roxygen2](https://cran.r-project.org/package=roxygen2), with [Markdown syntax](https://roxygen2.r-lib.org/articles/rd-formatting.html), for documentation. 39 | 40 | * We use [testthat](https://cran.r-project.org/package=testthat) for unit tests. 41 | Contributions with test cases included are easier to accept. 42 | 43 | ## Code of Conduct 44 | 45 | Please note that the dtplyr project is released with a 46 | [Contributor Code of Conduct](CODE_OF_CONDUCT.md). By contributing to this 47 | project you agree to abide by its terms. 48 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/issue_template.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report or feature request 3 | about: Describe a bug you've seen or make a case for a new feature 4 | --- 5 | 6 | Please briefly describe your problem and what output you expect. If you have a question, please don't use this form. Instead, ask on or . 7 | 8 | Please include a minimal reproducible example (AKA a reprex). If you've never heard of a [reprex](http://reprex.tidyverse.org/) before, start by reading . 9 | 10 | Brief description of the problem 11 | 12 | ```r 13 | # insert reprex here 14 | ``` 15 | -------------------------------------------------------------------------------- /.github/SUPPORT.md: -------------------------------------------------------------------------------- 1 | # Getting help with dtplyr 2 | 3 | Thanks for using dtplyr! 4 | Before filing an issue, there are a few places to explore and pieces to put together to make the process as smooth as possible. 5 | 6 | ## Make a reprex 7 | 8 | Start by making a minimal **repr**oducible **ex**ample using the [reprex](https://reprex.tidyverse.org/) package. 9 | If you haven't heard of or used reprex before, you're in for a treat! 10 | Seriously, reprex will make all of your R-question-asking endeavors easier (which is a pretty insane ROI for the five to ten minutes it'll take you to learn what it's all about). 11 | For additional reprex pointers, check out the [Get help!](https://www.tidyverse.org/help/) section of the tidyverse site. 12 | 13 | ## Where to ask? 14 | 15 | Armed with your reprex, the next step is to figure out [where to ask](https://www.tidyverse.org/help/#where-to-ask). 16 | 17 | * If it's a question: start with [community.rstudio.com](https://community.rstudio.com/), and/or StackOverflow. There are more people there to answer questions. 18 | 19 | * If it's a bug: you're in the right place, [file an issue](https://github.com/batpigandme/dtplyr/issues/new). 20 | 21 | * If you're not sure: let the community help you figure it out! 22 | If your problem _is_ a bug or a feature request, you can easily return here and report it. 23 | 24 | Before opening a new issue, be sure to [search issues and pull requests](https://github.com/batpigandme/dtplyr/issues) to make sure the bug hasn't been reported and/or already fixed in the development version. 25 | By default, the search will be pre-populated with `is:issue is:open`. 26 | You can [edit the qualifiers](https://help.github.com/articles/searching-issues-and-pull-requests/) (e.g. `is:pr`, `is:closed`) as needed. 27 | For example, you'd simply remove `is:open` to search _all_ issues in the repo, open or closed. 28 | 29 | ## What happens next? 30 | 31 | To be as efficient as possible, development of tidyverse packages tends to be very bursty, so you shouldn't worry if you don't get an immediate response. 32 | Typically we don't look at a repo until a sufficient quantity of issues accumulates, then there’s a burst of intense activity as we focus our efforts. 33 | That makes development more efficient because it avoids expensive context switching between problems, at the cost of taking longer to get back to you. 34 | This process makes a good reprex particularly important because it might be multiple months between your initial report and when we start working on it. 35 | If we can’t reproduce the bug, we can’t fix it! 36 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | # 4 | # NOTE: This workflow is overkill for most R packages and 5 | # check-standard.yaml is likely a better choice. 6 | # usethis::use_github_action("check-standard") will install it. 7 | on: 8 | push: 9 | branches: [main, master] 10 | pull_request: 11 | branches: [main, master] 12 | 13 | name: R-CMD-check.yaml 14 | 15 | permissions: read-all 16 | 17 | jobs: 18 | R-CMD-check: 19 | runs-on: ${{ matrix.config.os }} 20 | 21 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 22 | 23 | strategy: 24 | fail-fast: false 25 | matrix: 26 | config: 27 | - {os: macos-latest, r: 'release'} 28 | 29 | - {os: windows-latest, r: 'release'} 30 | # use 4.0 or 4.1 to check with rtools40's older compiler 31 | - {os: windows-latest, r: 'oldrel-4'} 32 | 33 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} 34 | - {os: ubuntu-latest, r: 'release'} 35 | - {os: ubuntu-latest, r: 'oldrel-1'} 36 | - {os: ubuntu-latest, r: 'oldrel-2'} 37 | - {os: ubuntu-latest, r: 'oldrel-3'} 38 | - {os: ubuntu-latest, r: 'oldrel-4'} 39 | 40 | env: 41 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 42 | R_KEEP_PKG_SOURCE: yes 43 | 44 | steps: 45 | - uses: actions/checkout@v4 46 | 47 | - uses: r-lib/actions/setup-pandoc@v2 48 | 49 | - uses: r-lib/actions/setup-r@v2 50 | with: 51 | r-version: ${{ matrix.config.r }} 52 | http-user-agent: ${{ matrix.config.http-user-agent }} 53 | use-public-rspm: true 54 | 55 | - uses: r-lib/actions/setup-r-dependencies@v2 56 | with: 57 | extra-packages: any::rcmdcheck 58 | needs: check 59 | 60 | - uses: r-lib/actions/check-r-package@v2 61 | with: 62 | upload-snapshots: true 63 | build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' 64 | -------------------------------------------------------------------------------- /.github/workflows/pkgdown.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | release: 9 | types: [published] 10 | workflow_dispatch: 11 | 12 | name: pkgdown.yaml 13 | 14 | permissions: read-all 15 | 16 | jobs: 17 | pkgdown: 18 | runs-on: ubuntu-latest 19 | # Only restrict concurrency for non-PR jobs 20 | concurrency: 21 | group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} 22 | env: 23 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 24 | permissions: 25 | contents: write 26 | steps: 27 | - uses: actions/checkout@v4 28 | 29 | - uses: r-lib/actions/setup-pandoc@v2 30 | 31 | - uses: r-lib/actions/setup-r@v2 32 | with: 33 | use-public-rspm: true 34 | 35 | - uses: r-lib/actions/setup-r-dependencies@v2 36 | with: 37 | extra-packages: any::pkgdown, local::. 38 | needs: website 39 | 40 | - name: Build site 41 | run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) 42 | shell: Rscript {0} 43 | 44 | - name: Deploy to GitHub pages 🚀 45 | if: github.event_name != 'pull_request' 46 | uses: JamesIves/github-pages-deploy-action@v4.5.0 47 | with: 48 | clean: false 49 | branch: gh-pages 50 | folder: docs 51 | -------------------------------------------------------------------------------- /.github/workflows/pr-commands.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | issue_comment: 5 | types: [created] 6 | 7 | name: pr-commands.yaml 8 | 9 | permissions: read-all 10 | 11 | jobs: 12 | document: 13 | if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/document') }} 14 | name: document 15 | runs-on: ubuntu-latest 16 | env: 17 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 18 | permissions: 19 | contents: write 20 | steps: 21 | - uses: actions/checkout@v4 22 | 23 | - uses: r-lib/actions/pr-fetch@v2 24 | with: 25 | repo-token: ${{ secrets.GITHUB_TOKEN }} 26 | 27 | - uses: r-lib/actions/setup-r@v2 28 | with: 29 | use-public-rspm: true 30 | 31 | - uses: r-lib/actions/setup-r-dependencies@v2 32 | with: 33 | extra-packages: any::roxygen2 34 | needs: pr-document 35 | 36 | - name: Document 37 | run: roxygen2::roxygenise() 38 | shell: Rscript {0} 39 | 40 | - name: commit 41 | run: | 42 | git config --local user.name "$GITHUB_ACTOR" 43 | git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com" 44 | git add man/\* NAMESPACE 45 | git commit -m 'Document' 46 | 47 | - uses: r-lib/actions/pr-push@v2 48 | with: 49 | repo-token: ${{ secrets.GITHUB_TOKEN }} 50 | 51 | style: 52 | if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/style') }} 53 | name: style 54 | runs-on: ubuntu-latest 55 | env: 56 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 57 | permissions: 58 | contents: write 59 | steps: 60 | - uses: actions/checkout@v4 61 | 62 | - uses: r-lib/actions/pr-fetch@v2 63 | with: 64 | repo-token: ${{ secrets.GITHUB_TOKEN }} 65 | 66 | - uses: r-lib/actions/setup-r@v2 67 | 68 | - name: Install dependencies 69 | run: install.packages("styler") 70 | shell: Rscript {0} 71 | 72 | - name: Style 73 | run: styler::style_pkg() 74 | shell: Rscript {0} 75 | 76 | - name: commit 77 | run: | 78 | git config --local user.name "$GITHUB_ACTOR" 79 | git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com" 80 | git add \*.R 81 | git commit -m 'Style' 82 | 83 | - uses: r-lib/actions/pr-push@v2 84 | with: 85 | repo-token: ${{ secrets.GITHUB_TOKEN }} 86 | -------------------------------------------------------------------------------- /.github/workflows/test-coverage.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | 9 | name: test-coverage.yaml 10 | 11 | permissions: read-all 12 | 13 | jobs: 14 | test-coverage: 15 | runs-on: ubuntu-latest 16 | env: 17 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 18 | 19 | steps: 20 | - uses: actions/checkout@v4 21 | 22 | - uses: r-lib/actions/setup-r@v2 23 | with: 24 | use-public-rspm: true 25 | 26 | - uses: r-lib/actions/setup-r-dependencies@v2 27 | with: 28 | extra-packages: any::covr, any::xml2 29 | needs: coverage 30 | 31 | - name: Test coverage 32 | run: | 33 | cov <- covr::package_coverage( 34 | quiet = FALSE, 35 | clean = FALSE, 36 | install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package") 37 | ) 38 | covr::to_cobertura(cov) 39 | shell: Rscript {0} 40 | 41 | - uses: codecov/codecov-action@v4 42 | with: 43 | fail_ci_if_error: ${{ github.event_name != 'pull_request' && true || false }} 44 | file: ./cobertura.xml 45 | plugin: noop 46 | disable_search: true 47 | token: ${{ secrets.CODECOV_TOKEN }} 48 | 49 | - name: Show testthat output 50 | if: always() 51 | run: | 52 | ## -------------------------------------------------------------------- 53 | find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true 54 | shell: bash 55 | 56 | - name: Upload test results 57 | if: failure() 58 | uses: actions/upload-artifact@v4 59 | with: 60 | name: coverage-test-failures 61 | path: ${{ runner.temp }}/package 62 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | docs/ 5 | inst/doc 6 | *.swp 7 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: dtplyr 2 | Title: Data Table Back-End for 'dplyr' 3 | Version: 1.3.1.9000 4 | Authors@R: c( 5 | person("Hadley", "Wickham", , "hadley@posit.co", role = c("cre", "aut")), 6 | person("Maximilian", "Girlich", role = "aut"), 7 | person("Mark", "Fairbanks", role = "aut"), 8 | person("Ryan", "Dickerson", role = "aut"), 9 | person("Posit Software, PBC", role = c("cph", "fnd")) 10 | ) 11 | Description: Provides a data.table backend for 'dplyr'. The goal of 12 | 'dtplyr' is to allow you to write 'dplyr' code that is automatically 13 | translated to the equivalent, but usually much faster, data.table 14 | code. 15 | License: MIT + file LICENSE 16 | URL: https://dtplyr.tidyverse.org, https://github.com/tidyverse/dtplyr 17 | BugReports: https://github.com/tidyverse/dtplyr/issues 18 | Depends: 19 | R (>= 4.0) 20 | Imports: 21 | cli (>= 3.4.0), 22 | data.table (>= 1.13.0), 23 | dplyr (>= 1.1.0), 24 | glue, 25 | lifecycle, 26 | rlang (>= 1.0.4), 27 | tibble, 28 | tidyselect (>= 1.2.0), 29 | vctrs (>= 0.4.1) 30 | Suggests: 31 | bench, 32 | covr, 33 | knitr, 34 | rmarkdown, 35 | testthat (>= 3.1.2), 36 | tidyr (>= 1.1.0), 37 | waldo (>= 0.3.1) 38 | VignetteBuilder: 39 | knitr 40 | Config/Needs/website: tidyverse/tidytemplate 41 | Config/testthat/edition: 3 42 | Encoding: UTF-8 43 | Roxygen: {library(tidyr); list(markdown = TRUE)} 44 | RoxygenNote: 7.3.2 45 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2023 2 | COPYRIGHT HOLDER: dtplyr authors 3 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2023 dtplyr authors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /R/by.R: -------------------------------------------------------------------------------- 1 | compute_by <- function(by, 2 | data, 3 | ..., 4 | by_arg = "by", 5 | data_arg = "data", 6 | error_call = caller_env()) { 7 | check_dots_empty0(...) 8 | 9 | by <- enquo(by) 10 | check_by(by, data, by_arg = by_arg, data_arg = data_arg, error_call = error_call) 11 | 12 | names <- eval_select_by(by, data, error_call = error_call) 13 | 14 | if (length(names) == 0) { 15 | uses_by <- FALSE 16 | } else { 17 | uses_by <- TRUE 18 | } 19 | 20 | new_by(uses_by = uses_by, names = names) 21 | } 22 | 23 | is_grouped_dt <- function(data) { 24 | !is_empty(group_vars(data)) 25 | } 26 | 27 | check_by <- function(by, 28 | data, 29 | ..., 30 | by_arg = "by", 31 | data_arg = "data", 32 | error_call = caller_env()) { 33 | check_dots_empty0(...) 34 | 35 | if (quo_is_null(by)) { 36 | return(invisible(NULL)) 37 | } 38 | 39 | if (is_grouped_dt(data)) { 40 | message <- paste0( 41 | "Can't supply {.arg {by_arg}} when ", 42 | "{.arg {data_arg}} is a grouped data frame." 43 | ) 44 | cli::cli_abort(message, call = error_call) 45 | } 46 | 47 | invisible(NULL) 48 | } 49 | 50 | eval_select_by <- function(by, 51 | data, 52 | error_call = caller_env()) { 53 | out <- tidyselect::eval_select( 54 | expr = by, 55 | data = data, 56 | allow_rename = FALSE, 57 | error_call = error_call 58 | ) 59 | names(out) 60 | } 61 | 62 | new_by <- function(uses_by = FALSE, names = character()) { 63 | structure(list(uses_by = uses_by, names = names), class = "dtplyr_by") 64 | } 65 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /R/complete.R: -------------------------------------------------------------------------------- 1 | #' Complete a data frame with missing combinations of data 2 | #' 3 | #' @description 4 | #' This is a method for the tidyr `complete()` generic. This is a wrapper 5 | #' around `dtplyr` translations for `expand()`, `full_join()`, and `replace_na()` 6 | #' that's useful for completing missing combinations of data. 7 | #' 8 | #' @param data A [lazy_dt()]. 9 | #' @inheritParams tidyr::complete 10 | #' @examples 11 | #' library(tidyr) 12 | #' tbl <- tibble(x = 1:2, y = 1:2, z = 3:4) 13 | #' dt <- lazy_dt(tbl) 14 | #' 15 | #' dt %>% 16 | #' complete(x, y) 17 | #' 18 | #' dt %>% 19 | #' complete(x, y, fill = list(z = 10L)) 20 | # exported onLoad 21 | complete.dtplyr_step <- function(data, ..., fill = list()) { 22 | dots <- enquos(...) 23 | dots <- dots[!map_lgl(dots, quo_is_null)] 24 | if (length(dots) == 0) { 25 | return(data) 26 | } 27 | 28 | full <- tidyr::expand(data, !!!dots) 29 | full <- dplyr::full_join(full, data, by = full$vars) 30 | full <- tidyr::replace_na(full, replace = fill) 31 | full 32 | } 33 | -------------------------------------------------------------------------------- /R/count.R: -------------------------------------------------------------------------------- 1 | #' Count observations by group 2 | #' 3 | #' This is a method for the dplyr [count()] generic. It is translated using 4 | #' `.N` in the `j` argument, and supplying groups to `keyby` as appropriate. 5 | #' 6 | #' @param x A [lazy_dt()] 7 | #' @inheritParams dplyr::count 8 | #' @importFrom dplyr count 9 | #' @export 10 | #' @examples 11 | #' library(dplyr, warn.conflicts = FALSE) 12 | #' 13 | #' dt <- lazy_dt(dplyr::starwars) 14 | #' dt %>% count(species) 15 | #' dt %>% count(species, sort = TRUE) 16 | #' dt %>% count(species, wt = mass, sort = TRUE) 17 | count.dtplyr_step <- function(x, ..., wt = NULL, sort = FALSE, name = NULL) { 18 | if (!missing(...)) { 19 | out <- group_by(x, ..., .add = TRUE) 20 | .groups <- "drop" 21 | } else { 22 | out <- x 23 | .groups <- "keep" 24 | } 25 | 26 | out <- tally_count(out, {{ wt }}, sort, name, .groups) 27 | 28 | out 29 | } 30 | 31 | #' @importFrom dplyr add_count 32 | #' @export 33 | add_count.dtplyr_step <- function(x, ..., wt = NULL, sort = FALSE, name = NULL) { 34 | if (!missing(...)) { 35 | out <- group_by(x, ..., .add = TRUE) 36 | } else { 37 | out <- x 38 | } 39 | out <- dplyr::add_tally(out, wt = !!enquo(wt), sort = sort, name = name) 40 | out <- group_by(out, !!!syms(group_vars(x))) 41 | out 42 | } 43 | 44 | #' @importFrom dplyr tally 45 | #' @export 46 | tally.dtplyr_step <- function(x, wt = NULL, sort = FALSE, name = NULL) { 47 | tally_count(x, {{ wt }}, sort, name, "drop_last") 48 | } 49 | 50 | # Helpers ----------------------------------------------------------------- 51 | 52 | tally_count <- function(.data, wt = NULL, sort = FALSE, name = NULL, .groups = "drop_last") { 53 | wt <- enquo(wt) 54 | if (quo_is_null(wt)) { 55 | n <- expr(n()) 56 | } else { 57 | n <- expr(sum(!!wt, na.rm = TRUE)) 58 | } 59 | name <- check_name(name, .data$groups) 60 | 61 | out <- summarise(.data, !!name := !!n, .groups = .groups) 62 | 63 | if (sort) { 64 | out <- arrange(out, desc(!!sym(name))) 65 | } 66 | 67 | out 68 | } 69 | 70 | check_name <- function(name, vars) { 71 | if (is.null(name)) { 72 | name <- n_name(vars) 73 | 74 | if (name != "n") { 75 | inform(c( 76 | glue::glue("Storing counts in `{name}`, as `n` already present in input"), 77 | i = "Use `name = \"new_name\"` to pick a new name." 78 | )) 79 | } 80 | } else if (!is_string(name)) { 81 | abort("`name` must be a string") 82 | } 83 | 84 | name 85 | } 86 | 87 | n_name <- function(x) { 88 | name <- "n" 89 | while (name %in% x) { 90 | name <- paste0("n", name) 91 | } 92 | 93 | name 94 | } 95 | 96 | -------------------------------------------------------------------------------- /R/dtplyr-package.R: -------------------------------------------------------------------------------- 1 | #' @import rlang 2 | #' @importFrom data.table data.table as.data.table is.data.table 3 | #' @importFrom lifecycle deprecated 4 | #' @importFrom glue glue 5 | #' @keywords internal 6 | "_PACKAGE" 7 | 8 | #' dtplyr is data.table aware 9 | #' 10 | #' @keywords internal 11 | #' @export 12 | .datatable.aware <- TRUE 13 | 14 | globalVariables(c(".SD", ".N", ".BY", ".I", "desc")) 15 | -------------------------------------------------------------------------------- /R/fill.R: -------------------------------------------------------------------------------- 1 | #' Fill in missing values with previous or next value 2 | #' 3 | #' @description 4 | #' This is a method for the tidyr `fill()` generic. It is translated to 5 | #' [data.table::nafill()]. Note that `data.table::nafill()` currently only 6 | #' works for integer and double columns. 7 | #' 8 | #' @inheritParams tidyr::fill 9 | #' @examples 10 | #' library(tidyr) 11 | #' 12 | #' # Value (year) is recorded only when it changes 13 | #' sales <- lazy_dt(tibble::tribble( 14 | #' ~quarter, ~year, ~sales, 15 | #' "Q1", 2000, 66013, 16 | #' "Q2", NA, 69182, 17 | #' "Q3", NA, 53175, 18 | #' "Q4", NA, 21001, 19 | #' "Q1", 2001, 46036, 20 | #' "Q2", NA, 58842, 21 | #' "Q3", NA, 44568, 22 | #' "Q4", NA, 50197, 23 | #' "Q1", 2002, 39113, 24 | #' "Q2", NA, 41668, 25 | #' "Q3", NA, 30144, 26 | #' "Q4", NA, 52897, 27 | #' "Q1", 2004, 32129, 28 | #' "Q2", NA, 67686, 29 | #' "Q3", NA, 31768, 30 | #' "Q4", NA, 49094 31 | #' )) 32 | #' 33 | #' # `fill()` defaults to replacing missing data from top to bottom 34 | #' sales %>% fill(year) 35 | #' 36 | #' # Value (n_squirrels) is missing above and below within a group 37 | #' squirrels <- lazy_dt(tibble::tribble( 38 | #' ~group, ~name, ~role, ~n_squirrels, 39 | #' 1, "Sam", "Observer", NA, 40 | #' 1, "Mara", "Scorekeeper", 8, 41 | #' 1, "Jesse", "Observer", NA, 42 | #' 1, "Tom", "Observer", NA, 43 | #' 2, "Mike", "Observer", NA, 44 | #' 2, "Rachael", "Observer", NA, 45 | #' 2, "Sydekea", "Scorekeeper", 14, 46 | #' 2, "Gabriela", "Observer", NA, 47 | #' 3, "Derrick", "Observer", NA, 48 | #' 3, "Kara", "Scorekeeper", 9, 49 | #' 3, "Emily", "Observer", NA, 50 | #' 3, "Danielle", "Observer", NA 51 | #' )) 52 | #' 53 | #' # The values are inconsistently missing by position within the group 54 | #' # Use .direction = "downup" to fill missing values in both directions 55 | #' squirrels %>% 56 | #' dplyr::group_by(group) %>% 57 | #' fill(n_squirrels, .direction = "downup") %>% 58 | #' dplyr::ungroup() 59 | #' 60 | #' # Using `.direction = "updown"` accomplishes the same goal in this example 61 | # exported onLoad 62 | fill.dtplyr_step <- function(data, ..., .direction = c("down", "up", "downup", "updown")) { 63 | 64 | dots <- enquos(...) 65 | 66 | .direction <- arg_match(.direction) 67 | 68 | if (.direction %in% c("down", "up")) { 69 | type <- switch(.direction, "down" = "locf", "up" = "nocb") 70 | mutate(data, dplyr::across(c(!!!dots), nafill, type)) 71 | } else { 72 | if (.direction == "downup") { 73 | type1 <- "locf" 74 | type2 <- "nocb" 75 | } else { 76 | type1 <- "nocb" 77 | type2 <- "locf" 78 | } 79 | 80 | mutate(data, dplyr::across(c(!!!dots), ~ nafill(nafill(.x, type1), type2))) 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /R/reframe.R: -------------------------------------------------------------------------------- 1 | #' Summarise each group to one row 2 | #' 3 | #' This is a method for the dplyr [reframe()] generic. It is translated to 4 | #' the `j` argument of `[.data.table`. 5 | #' 6 | #' @param .data A [lazy_dt()]. 7 | #' @inheritParams dplyr::reframe 8 | #' @importFrom dplyr reframe 9 | #' @export 10 | #' @examples 11 | #' library(dplyr, warn.conflicts = FALSE) 12 | #' 13 | #' dt <- lazy_dt(mtcars) 14 | #' 15 | #' dt %>% 16 | #' reframe(qs = quantile(disp, c(0.25, 0.75)), 17 | #' prob = c(0.25, 0.75), 18 | #' .by = cyl) 19 | #' 20 | #' dt %>% 21 | #' group_by(cyl) %>% 22 | #' reframe(qs = quantile(disp, c(0.25, 0.75)), 23 | #' prob = c(0.25, 0.75)) 24 | reframe.dtplyr_step <- function(.data, ..., .by = NULL) { 25 | out <- summarise(.data, ..., .by = {{ .by }}) 26 | ungroup(out) 27 | } 28 | -------------------------------------------------------------------------------- /R/replace_na.R: -------------------------------------------------------------------------------- 1 | #' Replace NAs with specified values 2 | #' 3 | #' @description 4 | #' This is a method for the tidyr `replace_na()` generic. It is translated to 5 | #' [data.table::fcoalesce()]. 6 | #' 7 | #' Note that unlike `tidyr::replace_na()`, `data.table::fcoalesce()` cannot 8 | #' replace `NULL` values in lists. 9 | #' 10 | #' @inheritParams tidyr::replace_na 11 | #' @param data A [lazy_dt()]. 12 | #' @examples 13 | #' library(tidyr) 14 | #' 15 | #' # Replace NAs in a data frame 16 | #' dt <- lazy_dt(tibble(x = c(1, 2, NA), y = c("a", NA, "b"))) 17 | #' dt %>% replace_na(list(x = 0, y = "unknown")) 18 | #' 19 | #' # Replace NAs using `dplyr::mutate()` 20 | #' dt %>% dplyr::mutate(x = replace_na(x, 0)) 21 | # exported onLoad 22 | replace_na.dtplyr_step <- function(data, replace = list()) { 23 | 24 | stopifnot(is.list(replace)) 25 | if (length(replace) == 0) { 26 | return(data) 27 | } 28 | 29 | sim_data <- simulate_vars(data) 30 | replace_vars <- intersect(names(replace), names(sim_data)) 31 | 32 | replace_calls <- vector("list", length(replace_vars)) 33 | names(replace_calls) <- replace_vars 34 | 35 | for (i in seq_along(replace_vars)) { 36 | var <- replace_vars[[i]] 37 | check_replacement(replace[[i]], var) 38 | replace_calls[[i]] <- call2("fcoalesce", sym(var), replace[[i]]) 39 | } 40 | 41 | mutate(data, !!!replace_calls) 42 | } 43 | 44 | check_replacement <- function(x, var) { 45 | n <- length(x) 46 | if (n == 1) { 47 | return() 48 | } 49 | 50 | abort(glue::glue("Replacement for `{var}` is length {n}, not length 1"), call = caller_env()) 51 | } 52 | -------------------------------------------------------------------------------- /R/step-assign.R: -------------------------------------------------------------------------------- 1 | step_locals <- function(parent, locals, name) { 2 | stopifnot(is_step(parent)) 3 | stopifnot(is.list(locals)) 4 | stopifnot(is_string(name)) 5 | 6 | new_step( 7 | parent = parent, 8 | locals = utils::modifyList(parent$locals, locals), 9 | implicit_copy = TRUE, 10 | needs_copy = FALSE, 11 | name = name, 12 | class = "dtplyr_step_assign", 13 | ) 14 | } 15 | 16 | #' @export 17 | dt_call.dtplyr_step_assign <- function(x, needs_copy = FALSE) { 18 | sym(x$name) 19 | } 20 | -------------------------------------------------------------------------------- /R/step-colorder-relocate.R: -------------------------------------------------------------------------------- 1 | #' Relocate variables using their names 2 | #' 3 | #' This is a method for the dplyr [relocate()] generic. It is translated to 4 | #' the `j` argument of `[.data.table`. 5 | #' 6 | #' @param .data A [lazy_dt()]. 7 | #' @inheritParams dplyr::relocate 8 | #' @importFrom dplyr relocate 9 | #' @export 10 | #' @examples 11 | #' library(dplyr, warn.conflicts = FALSE) 12 | #' 13 | #' dt <- lazy_dt(data.frame(x = 1, y = 2, z = 3)) 14 | #' 15 | #' dt %>% relocate(z) 16 | #' dt %>% relocate(y, .before = x) 17 | #' dt %>% relocate(y, .after = y) 18 | relocate.dtplyr_step <- function(.data, ..., .before = NULL, .after = NULL) { 19 | new_vars <- names(tidyselect::eval_relocate( 20 | expr(c(...)), 21 | .data, 22 | before = enquo(.before), 23 | after = enquo(.after), 24 | before_arg = ".before", 25 | after_arg = ".after" 26 | )) 27 | out <- step_colorder(.data, new_vars) 28 | step_group(out, .data$groups) 29 | } 30 | -------------------------------------------------------------------------------- /R/step-colorder.R: -------------------------------------------------------------------------------- 1 | step_colorder <- function(x, col_order) { 2 | stopifnot(is_step(x)) 3 | stopifnot(is.character(col_order) || is.integer(col_order)) 4 | 5 | if (any(duplicated(col_order))) { 6 | abort("Every element of `col_order` must be unique.") 7 | } 8 | 9 | col_order <- unname(col_order) 10 | if (is.integer(col_order)) { 11 | if (identical(col_order, seq_along(col_order))) { 12 | return(x) 13 | } 14 | vars <- x$vars[col_order] 15 | } else { 16 | vars_selected <- x$vars[x$vars %in% col_order] 17 | vars_count <- vctrs::vec_count(vars_selected) 18 | vars_problematic <- vars_count$key[vars_count$count != 1] 19 | if (!is_empty(vars_problematic)) { 20 | vars_error <- paste0(vars_problematic, collapse = ", ") 21 | msg <- paste0("The column(s) ", vars_error, " do not uniquely match a column in `x`.") 22 | abort(msg) 23 | } 24 | 25 | if (identical(col_order, x$vars[seq_along(col_order)])) { 26 | return(x) 27 | } 28 | vars <- col_order 29 | } 30 | 31 | step_call(x, 32 | "setcolorder", 33 | args = list(col_order), 34 | vars = vars, 35 | in_place = !x$implicit_copy 36 | ) 37 | } 38 | -------------------------------------------------------------------------------- /R/step-first.R: -------------------------------------------------------------------------------- 1 | #' Create a "lazy" data.table for use with dplyr verbs 2 | #' 3 | #' @description 4 | #' A lazy data.table captures the intent of dplyr verbs, only actually 5 | #' performing computation when requested (with [collect()], [pull()], 6 | #' [as.data.frame()], [data.table::as.data.table()], or [tibble::as_tibble()]). 7 | #' This allows dtplyr to convert dplyr verbs into as few data.table expressions 8 | #' as possible, which leads to a high performance translation. 9 | #' 10 | #' See `vignette("translation")` for the details of the translation. 11 | #' 12 | #' @param x A data table (or something can can be coerced to a data table). 13 | #' @param immutable If `TRUE`, `x` is treated as immutable and will never 14 | #' be modified by any code generated by dtplyr. Alternatively, you can set 15 | #' `immutable = FALSE` to allow dtplyr to modify the input object. 16 | #' @param name Optionally, supply a name to be used in generated expressions. 17 | #' For expert use only. 18 | #' @param key_by Set keys for data frame, using [select()] semantics (e.g. 19 | #' `key_by = c(key1, key2)`. 20 | #' 21 | #' This uses [data.table::setkey()] to sort the table and build an index. 22 | #' This will considerably improve performance for subsets, summaries, and 23 | #' joins that use the keys. 24 | #' 25 | #' See `vignette("datatable-keys-fast-subset")` for more details. 26 | #' @export 27 | #' @aliases tbl_dt grouped_dt 28 | #' @examples 29 | #' library(dplyr, warn.conflicts = FALSE) 30 | #' 31 | #' mtcars2 <- lazy_dt(mtcars) 32 | #' mtcars2 33 | #' mtcars2 %>% select(mpg:cyl) 34 | #' mtcars2 %>% select(x = mpg, y = cyl) 35 | #' mtcars2 %>% filter(cyl == 4) %>% select(mpg) 36 | #' mtcars2 %>% select(mpg, cyl) %>% filter(cyl == 4) 37 | #' mtcars2 %>% mutate(cyl2 = cyl * 2, cyl4 = cyl2 * 2) 38 | #' mtcars2 %>% transmute(cyl2 = cyl * 2, vs2 = vs * 2) 39 | #' mtcars2 %>% filter(cyl == 8) %>% mutate(cyl2 = cyl * 2) 40 | #' 41 | #' # Learn more about translation in vignette("translation") 42 | #' by_cyl <- mtcars2 %>% group_by(cyl) 43 | #' by_cyl %>% summarise(mpg = mean(mpg)) 44 | #' by_cyl %>% mutate(mpg = mean(mpg)) 45 | #' by_cyl %>% 46 | #' filter(mpg < mean(mpg)) %>% 47 | #' summarise(hp = mean(hp)) 48 | lazy_dt <- function(x, name = NULL, immutable = TRUE, key_by = NULL) { 49 | # in case `x` has an `as.data.table()` method but not a `group_vars()` method 50 | groups <- tryCatch(group_vars(x), error = function(e) character()) 51 | 52 | if (!is.data.table(x)) { 53 | if (!immutable) { 54 | abort("`immutable` must be `TRUE` when `x` is not already a data table.") 55 | } 56 | x <- as.data.table(x) 57 | copied <- TRUE 58 | } else { 59 | copied <- FALSE 60 | } 61 | 62 | key_by <- enquo(key_by) 63 | key_vars <- unname(tidyselect::vars_select(names(x), !!key_by)) 64 | if (length(key_vars)) { 65 | if (immutable && !copied) { 66 | x <- data.table::copy(x) 67 | } 68 | data.table::setkeyv(x, key_vars) 69 | } 70 | 71 | step_first(x, name = name, groups = groups, immutable = immutable, env = caller_env()) 72 | } 73 | 74 | #' @export 75 | dim.dtplyr_step_first <- function(x) { 76 | dim(x$parent) 77 | } 78 | 79 | step_first <- function(parent, name = NULL, groups = character(), 80 | immutable = TRUE, env = caller_env()) { 81 | stopifnot(is.data.table(parent)) 82 | 83 | if (is.null(name)) { 84 | name <- unique_name() 85 | } 86 | 87 | new_step(parent, 88 | vars = names(parent), 89 | groups = groups, 90 | locals = list(), 91 | implicit_copy = !immutable, 92 | needs_copy = FALSE, 93 | name = sym(name), 94 | env = env, 95 | class = "dtplyr_step_first" 96 | ) 97 | } 98 | 99 | #' @export 100 | dt_call.dtplyr_step_first <- function(x, needs_copy = FALSE) { 101 | if (needs_copy) { 102 | expr(copy(!!x$name)) 103 | } else { 104 | x$name 105 | } 106 | } 107 | 108 | #' @export 109 | dt_sources.dtplyr_step_first <- function(x) { 110 | stats::setNames(list(x$parent), as.character(x$name)) 111 | } 112 | 113 | #' @export 114 | dt_has_computation.dtplyr_step_first <- function(x) { 115 | FALSE 116 | } 117 | 118 | unique_name <- local({ 119 | i <- 0 120 | function() { 121 | i <<- i + 1 122 | paste0("_DT", i) 123 | } 124 | }) 125 | -------------------------------------------------------------------------------- /R/step-group.R: -------------------------------------------------------------------------------- 1 | step_group <- function(parent, groups = parent$groups, arrange = parent$arrange) { 2 | if (can_step_group_return_early(parent, groups, arrange)) { 3 | return(parent) 4 | } 5 | 6 | new_step( 7 | parent, 8 | vars = parent$vars, 9 | groups = groups, 10 | class = "dtplyr_step_group", 11 | arrange = arrange, 12 | name = parent$name 13 | ) 14 | } 15 | 16 | #' @export 17 | dt_has_computation.dtplyr_step_group <- function(x) { 18 | dt_has_computation(x$parent) 19 | } 20 | 21 | 22 | add_grouping_param <- function(call, step, arrange = step$arrange) { 23 | if (length(step$groups) == 0) { 24 | return(call) 25 | } 26 | 27 | arrange <- arrange %||% TRUE 28 | using <- if (isTRUE(arrange)) "keyby" else "by" 29 | 30 | call[[using]] <- call2(".", !!!syms(step$groups)) 31 | call 32 | } 33 | 34 | # dplyr methods ----------------------------------------------------------- 35 | 36 | #' Group and ungroup 37 | #' 38 | #' These are methods for dplyr's [group_by()] and [ungroup()] generics. 39 | #' Grouping is translated to the either `keyby` and `by` argument of 40 | #' `[.data.table` depending on the value of the `arrange` argument. 41 | #' 42 | #' @inheritParams dplyr::group_by 43 | #' @param .data A [lazy_dt()] 44 | #' @param arrange If `TRUE`, will automatically arrange the output of 45 | #' subsequent grouped operations by group. If `FALSE`, output order will be 46 | #' left unchanged. In the generated data.table code this switches between 47 | #' using the `keyby` (`TRUE`) and `by` (`FALSE`) arguments. 48 | #' @param .add,add When `FALSE`, the default, `group_by()` will 49 | #' override existing groups. To add to the existing groups, use 50 | #' `.add = TRUE`. 51 | #' 52 | #' This argument was previously called `add`, but that prevented 53 | #' creating a new grouping variable called `add`, and conflicts with 54 | #' our naming conventions. 55 | #' @importFrom dplyr group_by 56 | #' @export 57 | #' @examples 58 | #' library(dplyr, warn.conflicts = FALSE) 59 | #' dt <- lazy_dt(mtcars) 60 | #' 61 | #' # group_by() is usually translated to `keyby` so that the groups 62 | #' # are ordered in the output 63 | #' dt %>% 64 | #' group_by(cyl) %>% 65 | #' summarise(mpg = mean(mpg)) 66 | #' 67 | #' # use `arrange = FALSE` to instead use `by` so the original order 68 | #' # or groups is preserved 69 | #' dt %>% 70 | #' group_by(cyl, arrange = FALSE) %>% 71 | #' summarise(mpg = mean(mpg)) 72 | group_by.dtplyr_step <- function(.data, ..., .add = FALSE, arrange = TRUE) { 73 | dots <- capture_dots(.data, ..., .j = TRUE) 74 | dots <- dots[!map_lgl(dots, is.null)] 75 | 76 | # need `eval(expr(...))` to trigger warning for `add` 77 | groups <- eval(expr(dplyr::group_by_prepare(.data, !!!dots, .add = .add))) 78 | arranged <- if (!is.null(.data$arrange)) .data$arrange && arrange else arrange 79 | 80 | step_group(groups$data, as.character(groups$group_names), arranged) 81 | } 82 | 83 | can_step_group_return_early <- function(parent, groups, arrange) { 84 | if (is_empty(groups)) { 85 | return(is_empty(parent$groups)) 86 | } 87 | 88 | same_arrange <- (is_false(arrange) || identical(arrange, parent$arrange)) 89 | same_groups <- identical(groups, parent$groups) 90 | same_arrange && same_groups 91 | } 92 | 93 | #' @importFrom dplyr ungroup 94 | #' @export 95 | #' @rdname group_by.dtplyr_step 96 | ungroup.dtplyr_step <- function(x, ...) { 97 | if (missing(...)) { 98 | step_group(x, groups = character()) 99 | } else { 100 | old_groups <- group_vars(x) 101 | to_remove <- tidyselect::vars_select(x$vars, ...) 102 | new_groups <- setdiff(old_groups, to_remove) 103 | step_group(x, groups = new_groups) 104 | } 105 | } 106 | 107 | -------------------------------------------------------------------------------- /R/step-modify.R: -------------------------------------------------------------------------------- 1 | step_modify <- function(parent, fun, args) { 2 | new_step( 3 | parent, 4 | groups = parent$groups, 5 | arrange = parent$arrange, 6 | implicit_copy = TRUE, 7 | fun = fun, 8 | args = args, 9 | class = "dtplyr_step_modify" 10 | ) 11 | } 12 | 13 | #' @export 14 | dt_call.dtplyr_step_modify <- function(x, needs_copy = x$needs_copy) { 15 | j <- call2(x$fun, quote(.SD), quote(.BY), !!!x$args) 16 | out <- call2("[", dt_call(x$parent, needs_copy), , j) 17 | 18 | add_grouping_param(out, x, arrange = FALSE) 19 | } 20 | 21 | # dplyr methods ----------------------------------------------------------- 22 | 23 | #' Apply a function to each group 24 | #' 25 | #' These are methods for the dplyr [group_map()] and [group_modify()] generics. 26 | #' They are both translated to `[.data.table`. 27 | #' 28 | #' @param .data A [lazy_dt()] 29 | #' @param .f The name of a two argument function. The first argument is passed 30 | #' `.SD`,the data.table representing the current group; the second argument 31 | #' is passed `.BY`, a list giving the current values of the grouping 32 | #' variables. The function should return a list or data.table. 33 | #' @param ... Additional arguments passed to `.f` 34 | #' @param keep Not supported for [lazy_dt]. 35 | #' @returns `group_map()` applies `.f` to each group, returning a list. 36 | #' `group_modify()` replaces each group with the results of `.f`, returning a 37 | #' modified [lazy_dt()]. 38 | #' @importFrom dplyr group_modify 39 | #' @export 40 | #' @examples 41 | #' library(dplyr) 42 | #' 43 | #' dt <- lazy_dt(mtcars) 44 | #' 45 | #' dt %>% 46 | #' group_by(cyl) %>% 47 | #' group_modify(head, n = 2L) 48 | #' 49 | #' dt %>% 50 | #' group_by(cyl) %>% 51 | #' group_map(head, n = 2L) 52 | group_modify.dtplyr_step <- function(.data, .f, ..., keep = FALSE) { 53 | if (!missing(keep)) { 54 | abort("`keep` is not supported for lazy data tables") 55 | } 56 | 57 | .f <- ensym(.f) 58 | args <- enquos(...) 59 | 60 | step_modify(.data, fun = .f, args = args) 61 | } 62 | 63 | #' @importFrom dplyr group_map 64 | #' @rdname group_modify.dtplyr_step 65 | #' @export 66 | group_map.dtplyr_step <- function(.data, .f, ..., keep = FALSE) { 67 | .f <- as_function(.f, caller_env()) 68 | 69 | dt <- as.data.table(.data) 70 | dt[, list(list(.f(.SD, .BY, ...))), by = eval(.data$groups)]$V1 71 | } 72 | -------------------------------------------------------------------------------- /R/step-nest.R: -------------------------------------------------------------------------------- 1 | #' Nest 2 | #' 3 | #' @description 4 | #' This is a method for the tidyr [tidyr::nest()] generic. It is translated 5 | #' using the non-nested variables in the `by` argument and `.SD` in the `j` 6 | #' argument. 7 | #' 8 | #' @inheritParams tidyr::nest 9 | #' @param ... <[`tidy-select`][tidyr::tidyr_tidy_select]> Columns to nest, specified 10 | #' using name-variable pairs of the form `new_col = c(col1, col2, col3)`. 11 | #' The right hand side can be any valid tidy select expression. 12 | #' @param .key Not supported. 13 | #' @param data A [lazy_dt()]. 14 | #' @examples 15 | #' if (require("tidyr", quietly = TRUE)) { 16 | #' dt <- lazy_dt(tibble(x = c(1, 2, 1), y = c("a", "a", "b"))) 17 | #' dt %>% nest(data = y) 18 | #' 19 | #' dt %>% dplyr::group_by(x) %>% nest() 20 | #' } 21 | # exported onLoad 22 | nest.dtplyr_step <- function(.data, ..., .names_sep = NULL, .key = deprecated()) { 23 | if (lifecycle::is_present(.key)) { 24 | abort(c( 25 | "`nest()` for lazy data.tables doesn't support the `.key` argument.", 26 | i = "Use a name in the `...` argument instead." 27 | )) 28 | } 29 | 30 | cols <- eval_nest_dots(.data, ...) 31 | 32 | cols <- lapply(cols, set_names) 33 | if (!is.null(.names_sep)) { 34 | cols <- imap(cols, strip_names, .names_sep) 35 | } 36 | 37 | if (length(cols) == 1 && is.null(.names_sep)) { 38 | # use `.SD` as it is shorter and faster 39 | nm <- names(cols) 40 | j_exprs <- exprs(!!nm := .(.SD)) 41 | } else { 42 | j_exprs <- imap( 43 | cols, 44 | function(x, name) { 45 | x <- simplify_names(x) 46 | expr(.(data.table(!!!syms(x)))) 47 | } 48 | ) 49 | } 50 | 51 | asis <- setdiff(.data$vars, unlist(cols)) 52 | out <- step_subset_j( 53 | .data, 54 | vars = c(asis, names(cols)), 55 | j = expr(.(!!!j_exprs)), 56 | groups = asis, 57 | arrange = FALSE 58 | ) 59 | 60 | groups <- intersect(out$vars, group_vars(.data)) 61 | group_by(out, !!!syms(groups)) 62 | } 63 | 64 | eval_nest_dots <- function(.data, ...) { 65 | if (missing(...)) { 66 | groups <- group_vars(.data) 67 | if (is_empty(groups)) { 68 | warn(paste0( 69 | "`...` must not be empty for ungrouped data frames.\n", 70 | "Did you want `data = everything()`?" 71 | )) 72 | } 73 | 74 | nest_vars <- setdiff(.data$vars, groups) 75 | list(data = nest_vars) 76 | } else { 77 | cols <- enquos(...) 78 | lapply(cols, function(.x) names(tidyselect::eval_select(.x, .data))) 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /R/step-set.R: -------------------------------------------------------------------------------- 1 | step_set <- function(x, y, style) { 2 | stopifnot(is_step(x)) 3 | stopifnot(is_step(y)) 4 | stopifnot(is.character(style)) 5 | 6 | new_step( 7 | parent = x, 8 | parent2 = y, 9 | locals = utils::modifyList(x$locals, y$locals), 10 | style = style, 11 | class = "dtplyr_step_set", 12 | ) 13 | } 14 | 15 | #' @export 16 | dt_sources.dtplyr_step_set <- function(x) { 17 | dt_sources.dtplyr_step_join(x) 18 | } 19 | 20 | #' @export 21 | dt_call.dtplyr_step_set <- function(x, needs_copy = x$needs_copy) { 22 | lhs <- dt_call(x$parent, needs_copy) 23 | rhs <- dt_call(x$parent2) 24 | 25 | call <- switch(x$style, 26 | intersect = call2("fintersect", lhs, rhs), 27 | union = call2("funion", lhs, rhs), 28 | union_all = call2("funion", lhs, rhs, all = TRUE), 29 | setdiff = call2("fsetdiff", lhs, rhs), 30 | ) 31 | 32 | call 33 | } 34 | 35 | # dplyr verbs ------------------------------------------------------------- 36 | 37 | #' Set operations 38 | #' 39 | #' These are methods for the dplyr generics [intersect()], [union()], 40 | #' [union_all()], and [setdiff()]. They are translated to 41 | #' [data.table::fintersect()], [data.table::funion()], and 42 | #' [data.table::fsetdiff()]. 43 | #' 44 | #' @importFrom dplyr intersect 45 | #' @param x,y A pair of [lazy_dt()]s. 46 | #' @param ... Ignored 47 | #' @examples 48 | #' dt1 <- lazy_dt(data.frame(x = 1:4)) 49 | #' dt2 <- lazy_dt(data.frame(x = c(2, 4, 6))) 50 | #' 51 | #' intersect(dt1, dt2) 52 | #' union(dt1, dt2) 53 | #' setdiff(dt1, dt2) 54 | #' 55 | # Exported onload 56 | intersect.dtplyr_step <- function(x, y, ...) { 57 | if (!is_step(y)) { 58 | y <- lazy_dt(y) 59 | } 60 | step_set(x, y, style = "intersect") 61 | } 62 | 63 | #' @importFrom dplyr union 64 | #' @rdname intersect.dtplyr_step 65 | # Exported onload 66 | union.dtplyr_step <- function(x, y, ...) { 67 | if (!is_step(y)) { 68 | y <- lazy_dt(y) 69 | } 70 | step_set(x, y, style = "union") 71 | } 72 | 73 | #' @importFrom dplyr union_all 74 | #' @rdname intersect.dtplyr_step 75 | #' @export 76 | union_all.dtplyr_step <- function(x, y, ...) { 77 | if (!is_step(y)) { 78 | y <- lazy_dt(y) 79 | } 80 | step_set(x, y, style = "union_all") 81 | } 82 | 83 | #' @importFrom dplyr setdiff 84 | #' @rdname intersect.dtplyr_step 85 | # Exported onload 86 | setdiff.dtplyr_step <- function(x, y, ...) { 87 | if (!is_step(y)) { 88 | y <- lazy_dt(y) 89 | } 90 | step_set(x, y, style = "setdiff") 91 | } 92 | -------------------------------------------------------------------------------- /R/step-setnames.R: -------------------------------------------------------------------------------- 1 | step_setnames <- function(x, old, new, in_place, rename_groups = FALSE) { 2 | stopifnot(is_step(x)) 3 | stopifnot(is.character(old) || is.integer(old)) 4 | stopifnot(is.character(new)) 5 | stopifnot(length(old) == length(new)) 6 | stopifnot(is_bool(in_place)) 7 | stopifnot(is_bool(rename_groups)) 8 | 9 | if (is.integer(old)) { 10 | locs <- old 11 | } else { 12 | locs <- vctrs::vec_match(old, x$vars) 13 | } 14 | 15 | name_changed <- x$vars[locs] != new 16 | old <- old[name_changed] 17 | new <- new[name_changed] 18 | locs <- locs[name_changed] 19 | 20 | if (length(old) == 0) { 21 | return(x) 22 | } 23 | 24 | new_vars <- x$vars 25 | new_vars[locs] <- new 26 | out <- step_call(x, 27 | "setnames", 28 | args = list(old, new), 29 | vars = new_vars, 30 | in_place = in_place 31 | ) 32 | 33 | if (rename_groups) { 34 | groups <- rename_groups(x$groups, set_names(old, new)) 35 | out <- step_group(out, groups) 36 | } 37 | 38 | out 39 | } 40 | -------------------------------------------------------------------------------- /R/step-subset-arrange.R: -------------------------------------------------------------------------------- 1 | #' Arrange rows by column values 2 | #' 3 | #' This is a method for dplyr generic [arrange()]. It is translated to 4 | #' an [order()] call in the `i` argument of `[.data.table`. 5 | #' 6 | #' @param .data A [lazy_dt()]. 7 | #' @inheritParams dplyr::arrange 8 | #' @importFrom dplyr arrange 9 | #' @export 10 | #' @examples 11 | #' library(dplyr, warn.conflicts = FALSE) 12 | #' 13 | #' dt <- lazy_dt(mtcars) 14 | #' dt %>% arrange(vs, cyl) 15 | #' dt %>% arrange(desc(vs), cyl) 16 | #' dt %>% arrange(across(mpg:disp)) 17 | arrange.dtplyr_step <- function(.data, ..., .by_group = FALSE) { 18 | dots <- capture_dots(.data, ..., .j = FALSE) 19 | if (.by_group) { 20 | dots <- c(syms(.data$groups), dots) 21 | } 22 | 23 | if (length(dots) == 0) { 24 | return(.data) 25 | } 26 | 27 | no_transmute <- all(map_lgl(dots, is_simple_arrange)) 28 | # Order without grouping then restore 29 | dots <- set_names(dots, NULL) 30 | if (is_copied(.data) && no_transmute) { 31 | dots <- c(dots, na.last = TRUE) 32 | step <- step_call(.data, "setorder", dots) 33 | } else { 34 | step <- step_subset(.data, i = call2("order", !!!dots), groups = character()) 35 | } 36 | step_group(step, groups = .data$groups) 37 | } 38 | 39 | is_copied <- function(x) { 40 | x$implicit_copy || x$needs_copy 41 | } 42 | 43 | is_simple_arrange <- function(x) { 44 | out <- FALSE 45 | if (is_symbol(x)) { 46 | out <- TRUE 47 | } else if (is_call(x, "-", 1)) { 48 | if (is_symbol(x[[2]])) { 49 | out <- TRUE 50 | } 51 | } 52 | out 53 | } 54 | -------------------------------------------------------------------------------- /R/step-subset-do.R: -------------------------------------------------------------------------------- 1 | #' @importFrom dplyr do 2 | #' @export 3 | do.dtplyr_step <- function(.data, ...) { 4 | # This is a partial implementation, because I don't think that many 5 | # people are likely to use it, given that do() is marked as questioning 6 | # Problems: 7 | # * doesn't handle unnamed case 8 | # * doesn't set .SDcols so `.SD` will only refer to non-groups 9 | # * can duplicating group vars (#5) 10 | 11 | dots <- capture_dots(.data, ...) 12 | 13 | if (any(names2(dots) == "")) { 14 | # I can't see any way to figure out what the variables are 15 | abort("Unnamed do() not supported by dtplyr") 16 | } 17 | 18 | new_vars <- lapply(dots, function(x) call2(".", x)) 19 | j <- call2(".", !!!new_vars) 20 | 21 | vars <- union(.data$vars, names(dots)) 22 | 23 | step_subset_j(.data, vars = vars, j = j) 24 | } 25 | -------------------------------------------------------------------------------- /R/step-subset-expand.R: -------------------------------------------------------------------------------- 1 | #' Expand data frame to include all possible combinations of values. 2 | #' 3 | #' @description 4 | #' This is a method for the tidyr `expand()` generic. It is translated to 5 | #' [data.table::CJ()]. 6 | #' 7 | #' @param ... Specification of columns to expand. Columns can be atomic vectors 8 | #' or lists. 9 | #' 10 | #' * To find all unique combinations of `x`, `y` and `z`, including those not 11 | #' present in the data, supply each variable as a separate argument: 12 | #' `expand(df, x, y, z)`. 13 | #' * To find only the combinations that occur in the 14 | #' data, use `nesting`: `expand(df, nesting(x, y, z))`. 15 | #' * You can combine the two forms. For example, 16 | #' `expand(df, nesting(school_id, student_id), date)` would produce 17 | #' a row for each present school-student combination for all possible 18 | #' dates. 19 | #' 20 | #' Unlike the data.frame method, this method does not use the full set of 21 | #' levels, just those that appear in the data. 22 | #' 23 | #' When used with continuous variables, you may need to fill in values 24 | #' that do not appear in the data: to do so use expressions like 25 | #' `year = 2010:2020` or `year = full_seq(year,1)`. 26 | #' @param data A [lazy_dt()]. 27 | #' @inheritParams tidyr::expand 28 | #' @examples 29 | #' library(tidyr) 30 | #' 31 | #' fruits <- lazy_dt(tibble( 32 | #' type = c("apple", "orange", "apple", "orange", "orange", "orange"), 33 | #' year = c(2010, 2010, 2012, 2010, 2010, 2012), 34 | #' size = factor( 35 | #' c("XS", "S", "M", "S", "S", "M"), 36 | #' levels = c("XS", "S", "M", "L") 37 | #' ), 38 | #' weights = rnorm(6, as.numeric(size) + 2) 39 | #' )) 40 | #' 41 | #' # All possible combinations --------------------------------------- 42 | #' # Note that only present levels of the factor variable `size` are retained. 43 | #' fruits %>% expand(type) 44 | #' fruits %>% expand(type, size) 45 | #' 46 | #' # This is different from the data frame behaviour: 47 | #' fruits %>% dplyr::collect() %>% expand(type, size) 48 | #' 49 | #' # Other uses ------------------------------------------------------- 50 | #' fruits %>% expand(type, size, 2010:2012) 51 | #' 52 | #' # Use `anti_join()` to determine which observations are missing 53 | #' all <- fruits %>% expand(type, size, year) 54 | #' all 55 | #' all %>% dplyr::anti_join(fruits) 56 | #' 57 | #' # Use with `right_join()` to fill in missing rows 58 | #' fruits %>% dplyr::right_join(all) 59 | # exported onLoad 60 | expand.dtplyr_step <- function(data, ..., .name_repair = "check_unique") { 61 | dots <- capture_dots(data, ..., .j = FALSE) 62 | dots <- dots[!map_lgl(dots, is_null)] 63 | if (length(dots) == 0) { 64 | return(data) 65 | } 66 | 67 | named_dots <- have_name(dots) 68 | if (any(!named_dots)) { 69 | # Auto-names generated by enquos() don't always work with the CJ() step 70 | ## Ex: `1:3` 71 | # Replicates the "V" naming convention data.table uses 72 | symbol_dots <- map_lgl(dots, is_symbol) 73 | needs_v_name <- !symbol_dots & !named_dots 74 | v_names <- paste0("V", 1:length(dots)) 75 | names(dots)[needs_v_name] <- v_names[needs_v_name] 76 | names(dots)[symbol_dots] <- lapply(dots[symbol_dots], as_name) 77 | } 78 | names(dots) <- vctrs::vec_as_names(names(dots), repair = .name_repair) 79 | dots_names <- names(dots) 80 | 81 | out <- step_subset_j( 82 | data, 83 | vars = union(data$groups, dots_names), 84 | j = expr(CJ(!!!dots, unique = TRUE)) 85 | ) 86 | 87 | # Delete duplicate columns if group vars are expanded 88 | if (any(dots_names %in% out$groups)) { 89 | group_vars <- out$groups 90 | expanded_group_vars <- dots_names[dots_names %in% group_vars] 91 | 92 | out <- step_subset( 93 | out, groups = character(), j = expr(!!expanded_group_vars := NULL) 94 | ) 95 | out <- group_by(out, !!!syms(group_vars)) 96 | } 97 | 98 | out 99 | } 100 | -------------------------------------------------------------------------------- /R/step-subset-filter.R: -------------------------------------------------------------------------------- 1 | 2 | #' Subset rows using column values 3 | #' 4 | #' This is a method for the dplyr [arrange()] generic. It is translated to 5 | #' the `i` argument of `[.data.table` 6 | #' 7 | #' @param .data A [lazy_dt()]. 8 | #' @param .preserve Ignored 9 | #' @inheritParams dplyr::filter 10 | #' @examples 11 | #' library(dplyr, warn.conflicts = FALSE) 12 | #' 13 | #' dt <- lazy_dt(mtcars) 14 | #' dt %>% filter(cyl == 4) 15 | #' dt %>% filter(vs, am) 16 | #' 17 | #' dt %>% 18 | #' group_by(cyl) %>% 19 | #' filter(mpg > mean(mpg)) 20 | #' @importFrom dplyr filter 21 | # exported onLoad 22 | filter.dtplyr_step <- function(.data, ..., .by = NULL, .preserve = FALSE) { 23 | check_filter(...) 24 | by <- compute_by({{ .by }}, .data, by_arg = ".by", data_arg = ".data") 25 | dots <- capture_dots(.data, ..., .j = FALSE, .by = by) 26 | 27 | if (filter_by_lgl_col(dots)) { 28 | # Suppress data.table warning when filtering with a logical variable 29 | i <- call2("(", dots[[1]]) 30 | } else { 31 | i <- Reduce(function(x, y) call2("&", x, y), dots) 32 | } 33 | 34 | step_subset_i(.data, i, by) 35 | } 36 | 37 | filter_by_lgl_col <- function(dots) { 38 | if (length(dots) > 1) { 39 | return(FALSE) 40 | } 41 | 42 | dot <- dots[[1]] 43 | if (is_symbol(dot)) { 44 | return(TRUE) 45 | } 46 | 47 | # catch expressions of form `!x` 48 | is_call(dot, name = "!", n = 1) && is_symbol(dot[[2]]) 49 | } 50 | 51 | check_filter <- function(...) { 52 | dots <- enquos(...) 53 | named <- have_name(dots) 54 | 55 | for (i in which(named)) { 56 | quo <- dots[[i]] 57 | 58 | # only allow named logical vectors, anything else 59 | # is suspicious 60 | expr <- quo_get_expr(quo) 61 | if (!is.logical(expr)) { 62 | abort(c( 63 | glue::glue("Problem with `filter()` input `..{i}`."), 64 | x = glue::glue("Input `..{i}` is named."), 65 | i = glue::glue("This usually means that you've used `=` instead of `==`."), 66 | i = glue::glue("Did you mean `{name} == {as_label(expr)}`?", name = names(dots)[i]) 67 | ), call = caller_env()) 68 | } 69 | 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /R/step-subset-select.R: -------------------------------------------------------------------------------- 1 | 2 | #' Subset columns using their names 3 | #' 4 | #' This is a method for the dplyr [select()] generic. It is translated to 5 | #' the `j` argument of `[.data.table`. 6 | #' 7 | #' @param .data A [lazy_dt()]. 8 | #' @inheritParams dplyr::select 9 | #' @importFrom dplyr select 10 | #' @export 11 | #' @examples 12 | #' library(dplyr, warn.conflicts = FALSE) 13 | #' 14 | #' dt <- lazy_dt(data.frame(x1 = 1, x2 = 2, y1 = 3, y2 = 4)) 15 | #' 16 | #' dt %>% select(starts_with("x")) 17 | #' dt %>% select(ends_with("2")) 18 | #' dt %>% select(z1 = x1, z2 = x2) 19 | select.dtplyr_step <- function(.data, ...) { 20 | locs <- tidyselect::eval_select(expr(c(...)), .data) 21 | locs <- ensure_group_vars(locs, .data$vars, .data$groups) 22 | 23 | vars <- set_names(.data$vars[locs], names(locs)) 24 | 25 | if (length(vars) == 0) { 26 | j <- 0L 27 | groups <- .data$groups 28 | is_unnamed <- TRUE 29 | } else { 30 | groups <- rename_groups(.data$groups, vars) 31 | vars <- simplify_names(vars) 32 | 33 | is_unnamed <- all(!have_name(vars)) 34 | if (is_unnamed && identical(unname(vars), .data$vars)) { 35 | return(.data) 36 | } 37 | j <- call2(".", !!!syms(vars)) 38 | } 39 | 40 | if (is_copied(.data) && is_unnamed && !can_merge_subset(.data)) { 41 | # Drop columns by reference if: 42 | # * Data has been copied (implicitly or explicitly) 43 | # * There is no renaming in the select statement 44 | # * The selection can't be combined with a prior `i` step. Ex: dt[x < 7, .(x, y)] 45 | vars_drop <- setdiff(.data$vars, vars) 46 | out <- remove_vars(.data, vars_drop) 47 | out <- step_colorder(out, vars) 48 | } else { 49 | out <- step_subset_j(.data, vars = names(locs), groups = character(), j = j) 50 | } 51 | 52 | step_group(out, groups) 53 | } 54 | 55 | #' @importFrom tidyselect tidyselect_data_proxy 56 | #' @exportS3Method 57 | tidyselect_data_proxy.dtplyr_step <- function(x) { 58 | simulate_vars(x) 59 | } 60 | 61 | #' @importFrom tidyselect tidyselect_data_has_predicates 62 | #' @exportS3Method 63 | tidyselect_data_has_predicates.dtplyr_step <- function(x) { 64 | FALSE 65 | } 66 | 67 | simulate_vars <- function(x, drop_groups = FALSE) { 68 | if (drop_groups) { 69 | vars <- setdiff(x$vars, x$groups) 70 | } else { 71 | vars <- x$vars 72 | } 73 | 74 | as_tibble(rep_named(vars, list(logical())), .name_repair = "minimal") 75 | } 76 | 77 | ensure_group_vars <- function(loc, names, groups) { 78 | group_loc <- match(groups, names) 79 | missing <- setdiff(group_loc, loc) 80 | 81 | if (length(missing) > 0) { 82 | vars <- names[missing] 83 | inform(paste0( 84 | "Adding missing grouping variables: ", 85 | paste0("`", names[missing], "`", collapse = ", ") 86 | )) 87 | loc <- c(set_names(missing, vars), loc) 88 | } 89 | 90 | loc 91 | } 92 | 93 | rename_groups <- function(groups, vars) { 94 | old2new <- set_names(names(vars), vars) 95 | groups[groups %in% names(old2new)] <- old2new[groups] 96 | groups 97 | } 98 | 99 | simplify_names <- function(vars) { 100 | names(vars)[vars == names(vars)] <- "" 101 | vars 102 | } 103 | 104 | remove_vars <- function(.data, vars) { 105 | if (is_empty(vars)) { 106 | return(.data) 107 | } 108 | out <- step_subset( 109 | .data, groups = character(), j = expr(!!unique(vars) := NULL), 110 | vars = setdiff(.data$vars, vars) 111 | ) 112 | group_by(out, !!!syms(.data$groups)) 113 | } 114 | -------------------------------------------------------------------------------- /R/step-subset-separate.R: -------------------------------------------------------------------------------- 1 | #' Separate a character column into multiple columns with a regular 2 | #' expression or numeric locations 3 | #' 4 | #' @description 5 | #' This is a method for the [tidyr::separate()] generic. It is translated to 6 | #' [data.table::tstrsplit()] in the `j` argument of `[.data.table`. 7 | #' 8 | #' @param data A [lazy_dt()]. 9 | #' @param col Column name or position. 10 | #' 11 | #' This argument is passed by expression and supports quasiquotation 12 | #' (you can unquote column names or column positions). 13 | #' @param into Names of new variables to create as character vector. 14 | #' Use `NA` to omit the variable in the output. 15 | #' @param sep Separator between columns. 16 | #' The default value is a regular expression that matches any sequence of non-alphanumeric values. 17 | #' @param remove If TRUE, remove the input column from the output data frame. 18 | #' @param convert If TRUE, will run type.convert() with as.is = TRUE on new columns. 19 | #' This is useful if the component columns are integer, numeric or logical. 20 | #' 21 | #' NB: this will cause string "NA"s to be converted to NAs. 22 | #' @param ... Arguments passed on to methods 23 | #' @examples 24 | #' library(tidyr) 25 | #' # If you want to split by any non-alphanumeric value (the default): 26 | #' df <- lazy_dt(data.frame(x = c(NA, "x.y", "x.z", "y.z")), "DT") 27 | #' df %>% separate(x, c("A", "B")) 28 | #' 29 | #' # If you just want the second variable: 30 | #' df %>% separate(x, c(NA, "B")) 31 | #' 32 | #' # Use regular expressions to separate on multiple characters: 33 | #' df <- lazy_dt(data.frame(x = c(NA, "x?y", "x.z", "y:z")), "DT") 34 | #' df %>% separate(x, c("A","B"), sep = "([.?:])") 35 | #' 36 | #' # convert = TRUE detects column classes: 37 | #' df <- lazy_dt(data.frame(x = c("x:1", "x:2", "y:4", "z", NA)), "DT") 38 | #' df %>% separate(x, c("key","value"), ":") %>% str 39 | #' df %>% separate(x, c("key","value"), ":", convert = TRUE) %>% str 40 | # exported onLoad 41 | separate.dtplyr_step <- function(data, col, into, 42 | sep = "[^[:alnum:]]+", 43 | remove = TRUE, 44 | convert = FALSE, 45 | ...) { 46 | if (!vctrs::vec_is(into, character())) { 47 | abort("`into` must be a character vector.") 48 | } 49 | if (!vctrs::vec_is(sep, character())) { 50 | abort("`sep` must be a character vector.") 51 | } 52 | 53 | col <- sym(tidyselect::vars_pull(data$vars, !!enquo(col))) 54 | 55 | into_length <- length(into) 56 | 57 | not_na_into <- !is.na(into) 58 | keep <- seq_along(into)[not_na_into] 59 | into <- into[not_na_into] 60 | 61 | t_str_split <- call2("tstrsplit", col, split = sep) 62 | if (length(keep) < into_length) { 63 | t_str_split$keep <- keep 64 | } 65 | if (isTRUE(convert)) { 66 | t_str_split$type.convert <- TRUE 67 | } 68 | 69 | out <- step_subset( 70 | data, 71 | vars = union(data$vars, into), 72 | j = call2(":=", into, t_str_split), 73 | needs_copy = data$needs_copy || !data$implicit_copy 74 | ) 75 | 76 | if (remove && !as.character(col) %in% into) { 77 | out <- select(out, -!!col) 78 | } 79 | 80 | out 81 | } 82 | -------------------------------------------------------------------------------- /R/step-subset-summarise.R: -------------------------------------------------------------------------------- 1 | 2 | #' Summarise each group to one row 3 | #' 4 | #' This is a method for the dplyr [summarise()] generic. It is translated to 5 | #' the `j` argument of `[.data.table`. 6 | #' 7 | #' @param .data A [lazy_dt()]. 8 | #' @inheritParams dplyr::summarise 9 | #' @importFrom dplyr summarise 10 | #' @export 11 | #' @examples 12 | #' library(dplyr, warn.conflicts = FALSE) 13 | #' 14 | #' dt <- lazy_dt(mtcars) 15 | #' 16 | #' dt %>% 17 | #' group_by(cyl) %>% 18 | #' summarise(vs = mean(vs)) 19 | #' 20 | #' dt %>% 21 | #' group_by(cyl) %>% 22 | #' summarise(across(disp:wt, mean)) 23 | summarise.dtplyr_step <- function(.data, ..., .by = NULL, .groups = NULL) { 24 | by <- compute_by({{ .by }}, .data, by_arg = ".by", data_arg = ".data") 25 | if (by$uses_by) { 26 | group_vars <- by$names 27 | .groups <- "drop" 28 | } else { 29 | group_vars <- .data$groups 30 | } 31 | 32 | dots <- capture_dots(.data, ..., .by = by) 33 | check_summarise_vars(dots) 34 | 35 | if (length(dots) == 0) { 36 | if (length(group_vars) == 0) { 37 | out <- step_subset_j(.data, vars = character(), j = 0L) 38 | } else { 39 | # Acts like distinct on grouping vars 40 | out <- distinct(.data, !!!syms(group_vars)) 41 | } 42 | } else { 43 | out <- step_subset_j( 44 | .data, 45 | vars = union(group_vars, names(dots)), 46 | j = call2(".", !!!dots), 47 | by = by 48 | ) 49 | } 50 | 51 | replaced_group_vars <- intersect(group_vars, names(dots)) 52 | if (!is_empty(replaced_group_vars)) { 53 | out <- step_subset( 54 | out, 55 | groups = character(), 56 | j = expr(!!replaced_group_vars := NULL) 57 | ) 58 | } 59 | 60 | out_groups <- summarise_groups(.data, .groups, caller_env()) 61 | step_group(out, groups = out_groups) 62 | } 63 | 64 | 65 | # For each expression, check if it uses any newly created variables 66 | check_summarise_vars <- function(dots) { 67 | for (i in seq_along(dots)) { 68 | used_vars <- all_names(get_expr(dots[[i]])) 69 | cur_vars <- names(dots)[seq_len(i - 1)] 70 | 71 | if (any(used_vars %in% cur_vars)) { 72 | abort(paste0( 73 | "`", names(dots)[[i]], "` ", 74 | "refers to a variable created earlier in this summarise().\n", 75 | "Do you need an extra mutate() step?" 76 | ), call = caller_env()) 77 | } 78 | } 79 | } 80 | 81 | summarise_groups <- function(.data, .groups, env_caller) { 82 | if (!is.null(.groups) && !.groups %in% c("drop_last", "drop", "keep")) { 83 | abort(c( 84 | paste0( 85 | "`.groups` can't be ", as_label(.groups), 86 | if (.groups == "rowwise") " in dtplyr" 87 | ), 88 | i = 'Possible values are NULL (default), "drop_last", "drop", and "keep"' 89 | ), call = caller_env()) 90 | } 91 | 92 | group_vars <- .data$groups 93 | n <- length(group_vars) 94 | 95 | verbose <- summarise_verbose(.groups, env_caller) 96 | if (verbose && n > 1) { 97 | new_groups <- glue::glue_collapse(paste0("'", group_vars[-n], "'"), sep = ", ") 98 | summarise_inform("has grouped output by {new_groups}") 99 | } 100 | 101 | .groups <- .groups %||% "drop_last" 102 | switch(.groups, 103 | drop_last = group_vars[-n], 104 | keep = group_vars, 105 | drop = character() 106 | ) 107 | } 108 | 109 | summarise_verbose <- function(.groups, .env) { 110 | is.null(.groups) && 111 | is_reference(topenv(.env), global_env()) && 112 | !identical(getOption("dplyr.summarise.inform"), FALSE) 113 | } 114 | 115 | summarise_inform <- function(..., .env = parent.frame()) { 116 | inform(paste0( 117 | "`summarise()` ", glue::glue(..., .envir = .env), '. You can override using the `.groups` argument.' 118 | )) 119 | } 120 | -------------------------------------------------------------------------------- /R/step-subset-transmute.R: -------------------------------------------------------------------------------- 1 | #' Create new columns, dropping old 2 | #' 3 | #' This is a method for the dplyr [transmute()] generic. It is translated to 4 | #' the `j` argument of `[.data.table`. 5 | #' 6 | #' @param .data A [lazy_dt()]. 7 | #' @inheritParams mutate.dtplyr_step 8 | #' @importFrom dplyr transmute 9 | #' @export 10 | #' @examples 11 | #' library(dplyr, warn.conflicts = FALSE) 12 | #' 13 | #' dt <- lazy_dt(dplyr::starwars) 14 | #' dt %>% transmute(name, sh = paste0(species, "/", homeworld)) 15 | transmute.dtplyr_step <- function(.data, ...) { 16 | out <- mutate(.data, ..., .keep = "none") 17 | cols_expr <- names(capture_new_vars(.data, ...)) 18 | cols_group <- group_vars(.data) 19 | cols_group <- setdiff(cols_group, cols_expr) 20 | cols_retain <- c(cols_group, cols_expr) 21 | select(out, any_of(cols_retain)) 22 | } 23 | -------------------------------------------------------------------------------- /R/step-subset.R: -------------------------------------------------------------------------------- 1 | step_subset <- function(parent, 2 | vars = parent$vars, 3 | groups = parent$groups, 4 | locals = parent$locals, 5 | arrange = parent$arrange, 6 | i = NULL, 7 | j = NULL, 8 | on = character(), 9 | allow_cartesian = NULL, 10 | needs_copy = FALSE 11 | ) { 12 | 13 | stopifnot(is_step(parent)) 14 | stopifnot(is_expression(i) || is_call(i) || is_step(i)) 15 | stopifnot(is_expression(j) || is_call(j)) 16 | stopifnot(is.character(on)) 17 | 18 | new_step( 19 | parent = parent, 20 | vars = vars, 21 | groups = groups, 22 | locals = locals, 23 | arrange = arrange, 24 | i = i, 25 | j = j, 26 | on = on, 27 | allow_cartesian = allow_cartesian, 28 | implicit_copy = !is.null(i) || !is.null(j), 29 | needs_copy = needs_copy || parent$needs_copy, 30 | class = "dtplyr_step_subset" 31 | ) 32 | } 33 | 34 | # Grouped i needs an intermediate assignment for maximum efficiency 35 | step_subset_i <- function(parent, i, by = new_by()) { 36 | if (is_empty(i)) { 37 | return(parent) 38 | } 39 | 40 | if (by$uses_by) { 41 | parent <- step_group(parent, by$names) 42 | } 43 | 44 | if (length(parent$groups) > 0) { 45 | parent <- compute(parent) 46 | 47 | nm <- sym(parent$name) 48 | i <- expr((!!nm)[, .I[!!i]]) # dt[, .I[]] 49 | i <- add_grouping_param(i, parent, FALSE) # dt[, .I[], by = ()] 50 | i <- call("$", i, quote(V1)) # dt[, .I[], by = ()]$V1 51 | } 52 | 53 | if (by$uses_by) { 54 | parent <- ungroup(parent) 55 | } 56 | 57 | step_subset(parent, i = i) 58 | } 59 | 60 | # When adding a subset that contains only j, it may be possible to merge 61 | # the previous step. 62 | step_subset_j <- function(parent, 63 | vars = parent$vars, 64 | groups = parent$groups, 65 | arrange = parent$arrange, 66 | j = NULL, 67 | by = new_by()) { 68 | if (can_merge_subset(parent)) { 69 | i <- parent$i 70 | on <- parent$on 71 | parent <- parent$parent 72 | } else { 73 | i <- NULL 74 | on <- character() 75 | } 76 | 77 | if (by$uses_by) { 78 | parent <- step_group(parent, by$names) 79 | } 80 | 81 | out <- step_subset( 82 | parent, 83 | vars = vars, 84 | groups = groups, 85 | arrange = arrange, 86 | i = i, 87 | j = j, 88 | on = on 89 | ) 90 | 91 | if (by$uses_by) { 92 | out <- ungroup(out) 93 | } 94 | 95 | out 96 | } 97 | 98 | can_merge_subset <- function(x) { 99 | # Can only merge subsets 100 | if (!inherits(x, "dtplyr_step_subset")) { 101 | return(FALSE) 102 | } 103 | 104 | # Don't need to check that groups are identical because the only 105 | # dplyr functions that generate expression in i are 106 | # filter/slice/sample/arrange/join and don't affect groups 107 | 108 | is.null(x$j) 109 | } 110 | 111 | #' @export 112 | dt_sources.dtplyr_step_subset <- function(x) { 113 | # TODO: need to throw error if same name refers to different tables. 114 | if (is_step(x$i)) { 115 | utils::modifyList(dt_sources(x$parent), dt_sources(x$i)) 116 | } else { 117 | dt_sources(x$parent) 118 | } 119 | } 120 | 121 | #' @export 122 | dt_call.dtplyr_step_subset <- function(x, needs_copy = x$needs_copy) { 123 | if (is.null(x$i) && is.null(x$j)) { 124 | return(dt_call(x$parent)) 125 | } 126 | 127 | i <- if (is_step(x$i)) dt_call(x$i) else x$i 128 | 129 | parent <- dt_call(x$parent, needs_copy) 130 | 131 | if (is.null(i) && is.null(x$j)) { 132 | out <- parent 133 | } else if (is.null(i) && !is.null(x$j)) { 134 | out <- call2("[", parent, , x$j) 135 | } else if (!is.null(i) && is.null(x$j)) { 136 | out <- call2("[", parent, i) 137 | } else { 138 | out <- call2("[", parent, i, x$j) 139 | } 140 | 141 | if (!is.null(x$j)) { 142 | out <- add_grouping_param(out, x) 143 | } 144 | 145 | if (length(x$on) > 0) { 146 | out$on <- call2(".", !!!syms(x$on)) 147 | out$allow.cartesian <- x$allow_cartesian 148 | } 149 | out 150 | } 151 | 152 | -------------------------------------------------------------------------------- /R/unite.R: -------------------------------------------------------------------------------- 1 | #' Unite multiple columns into one by pasting strings together. 2 | #' 3 | #' @description 4 | #' This is a method for the tidyr `unite()` generic. 5 | #' 6 | #' @inheritParams tidyr::unite 7 | #' @examples 8 | #' library(tidyr) 9 | #' 10 | #' df <- lazy_dt(expand_grid(x = c("a", NA), y = c("b", NA))) 11 | #' df 12 | #' 13 | #' df %>% unite("z", x:y, remove = FALSE) 14 | #' 15 | #' # Separate is almost the complement of unite 16 | #' df %>% 17 | #' unite("xy", x:y) %>% 18 | #' separate(xy, c("x", "y")) 19 | #' # (but note `x` and `y` contain now "NA" not NA) 20 | # exported onLoad 21 | unite.dtplyr_step <- function(data, col, ..., sep = "_", remove = TRUE, na.rm = FALSE) { 22 | if (is_true(na.rm)) { 23 | abort("`na.rm` is not implemented in dtplyr") 24 | } 25 | 26 | .col <- as_name(enquo(col)) 27 | 28 | dots <- enquos(...) 29 | if (length(dots) == 0) { 30 | .cols <- data$vars 31 | locs <- seq_along(.cols) 32 | } else { 33 | locs <- tidyselect::eval_select(expr(c(!!!dots)), data, allow_rename = FALSE) 34 | .cols <- data$vars[locs] 35 | } 36 | 37 | out <- mutate(ungroup(data), !!.col := paste(!!!syms(.cols), sep = sep)) 38 | 39 | remove <- is_true(remove) 40 | if (remove) { 41 | .drop_cols <- setdiff(.cols, .col) 42 | out <- select(out, -tidyselect::all_of(.drop_cols)) 43 | } 44 | 45 | group_vars <- data$groups 46 | if (remove && any(.cols %in% group_vars)) { 47 | group_vars <- setdiff(group_vars, .cols) 48 | } 49 | out <- relocate(out, !!.col, .before = min(locs)) 50 | 51 | if (length(group_vars) > 0) { 52 | out <- group_by(out, !!!syms(group_vars)) 53 | } 54 | 55 | out 56 | } 57 | -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | cat_line <- function(...) cat(paste(..., "\n", collapse = "", sep = "")) 2 | 3 | # nocov start - compat-purrr.R 4 | 5 | imap <- function(.x, .f, ...) { 6 | map2(.x, names(.x) %||% seq_along(.x), .f, ...) 7 | } 8 | 9 | map2 <- function(.x, .y, .f, ...) { 10 | .f <- as_function(.f, env = global_env()) 11 | out <- mapply(.f, .x, .y, MoreArgs = list(...), SIMPLIFY = FALSE) 12 | if (length(out) == length(.x)) { 13 | set_names(out, names(.x)) 14 | } else { 15 | set_names(out, NULL) 16 | } 17 | } 18 | 19 | # nocov end 20 | 21 | # nocov start - compat-tidyr.R 22 | 23 | strip_names <- function(df, base, names_sep) { 24 | base <- paste0(base, names_sep) 25 | names <- names(df) 26 | 27 | has_prefix <- startsWith(names, base) 28 | names[has_prefix] <- substr(names[has_prefix], nchar(base) + 1, nchar(names[has_prefix])) 29 | 30 | set_names(df, names) 31 | } 32 | 33 | # nocov end 34 | -------------------------------------------------------------------------------- /R/zzz.R: -------------------------------------------------------------------------------- 1 | # nocov start 2 | .onLoad <- function(...) { 3 | register_s3_method("dplyr", "filter", "dtplyr_step") 4 | register_s3_method("dplyr", "intersect", "dtplyr_step") 5 | register_s3_method("dplyr", "setdiff", "dtplyr_step") 6 | register_s3_method("dplyr", "union", "dtplyr_step") 7 | register_s3_method("tidyr", "complete", "dtplyr_step") 8 | register_s3_method("tidyr", "drop_na", "dtplyr_step") 9 | register_s3_method("tidyr", "expand", "dtplyr_step") 10 | register_s3_method("tidyr", "fill", "dtplyr_step") 11 | register_s3_method("tidyr", "pivot_longer", "dtplyr_step") 12 | register_s3_method("tidyr", "pivot_wider", "dtplyr_step") 13 | register_s3_method("tidyr", "replace_na", "dtplyr_step") 14 | register_s3_method("tidyr", "nest", "dtplyr_step") 15 | register_s3_method("tidyr", "separate", "dtplyr_step") 16 | register_s3_method("tidyr", "unite", "dtplyr_step") 17 | } 18 | 19 | register_s3_method <- function(pkg, generic, class, fun = NULL) { 20 | stopifnot(is.character(pkg), length(pkg) == 1) 21 | stopifnot(is.character(generic), length(generic) == 1) 22 | stopifnot(is.character(class), length(class) == 1) 23 | 24 | if (is.null(fun)) { 25 | fun <- get(paste0(generic, ".", class), envir = parent.frame()) 26 | } else { 27 | stopifnot(is.function(fun)) 28 | } 29 | 30 | if (pkg %in% loadedNamespaces()) { 31 | registerS3method(generic, class, fun, envir = asNamespace(pkg)) 32 | } 33 | 34 | # Always register hook in case package is later unloaded & reloaded 35 | setHook( 36 | packageEvent(pkg, "onLoad"), 37 | function(...) { 38 | registerS3method(generic, class, fun, envir = asNamespace(pkg)) 39 | } 40 | ) 41 | } 42 | # nocov end 43 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | 6 | 7 | ```{r, include = FALSE} 8 | knitr::opts_chunk$set( 9 | collapse = TRUE, 10 | comment = "#>", 11 | fig.path = "man/figures/README-", 12 | out.width = "100%" 13 | ) 14 | ``` 15 | 16 | # dtplyr 17 | 18 | 19 | [![CRAN status](https://www.r-pkg.org/badges/version/dtplyr)](https://cran.r-project.org/package=dtplyr) 20 | [![R-CMD-check](https://github.com/tidyverse/dtplyr/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/tidyverse/dtplyr/actions/workflows/R-CMD-check.yaml) 21 | [![Codecov test coverage](https://codecov.io/gh/tidyverse/dtplyr/graph/badge.svg)](https://app.codecov.io/gh/tidyverse/dtplyr) 22 | 23 | 24 | ## Overview 25 | 26 | data.table seal of approvaldtplyr provides a [data.table](http://r-datatable.com/) backend for dplyr. The goal of dtplyr is to allow you to write dplyr code that is automatically translated to the equivalent, but usually much faster, data.table code. 27 | 28 | See `vignette("translation")` for details of the current translations, and [table.express](https://github.com/asardaes/table.express) and [rqdatatable](https://github.com/WinVector/rqdatatable/) for related work. 29 | 30 | ## Installation 31 | 32 | You can install from CRAN with: 33 | 34 | ```R 35 | install.packages("dtplyr") 36 | ``` 37 | 38 | Or try the development version from GitHub with: 39 | 40 | ```R 41 | # install.packages("pak") 42 | pak::pak("tidyverse/dtplyr") 43 | ``` 44 | 45 | ## Usage 46 | 47 | To use dtplyr, you must at least load dtplyr and dplyr. You may also want to load [data.table](http://r-datatable.com/) so you can access the other goodies that it provides: 48 | 49 | ```{r setup} 50 | library(data.table) 51 | library(dtplyr) 52 | library(dplyr, warn.conflicts = FALSE) 53 | ``` 54 | 55 | Then use `lazy_dt()` to create a "lazy" data table that tracks the operations performed on it. 56 | 57 | ```{r} 58 | mtcars2 <- lazy_dt(mtcars) 59 | ``` 60 | 61 | You can preview the transformation (including the generated data.table code) by printing the result: 62 | 63 | ```{r} 64 | mtcars2 %>% 65 | filter(wt < 5) %>% 66 | mutate(l100k = 235.21 / mpg) %>% # liters / 100 km 67 | group_by(cyl) %>% 68 | summarise(l100k = mean(l100k)) 69 | ``` 70 | 71 | But generally you should reserve this only for debugging, and use `as.data.table()`, `as.data.frame()`, or `as_tibble()` to indicate that you're done with the transformation and want to access the results: 72 | 73 | ```{r} 74 | mtcars2 %>% 75 | filter(wt < 5) %>% 76 | mutate(l100k = 235.21 / mpg) %>% # liters / 100 km 77 | group_by(cyl) %>% 78 | summarise(l100k = mean(l100k)) %>% 79 | as_tibble() 80 | ``` 81 | 82 | ## Why is dtplyr slower than data.table? 83 | 84 | There are two primary reasons that dtplyr will always be somewhat slower than data.table: 85 | 86 | * Each dplyr verb must do some work to convert dplyr syntax to data.table 87 | syntax. This takes time proportional to the complexity of the input code, 88 | not the input _data_, so should be a negligible overhead for large datasets. 89 | [Initial benchmarks][benchmark] suggest that the overhead should be under 90 | 1ms per dplyr call. 91 | 92 | * To match dplyr semantics, `mutate()` does not modify in place by default. 93 | This means that most expressions involving `mutate()` must make a copy 94 | that would not be necessary if you were using data.table directly. 95 | (You can opt out of this behaviour in `lazy_dt()` with `immutable = FALSE`). 96 | 97 | [benchmark]: https://dtplyr.tidyverse.org/articles/translation.html#performance 98 | 99 | ## Code of Conduct 100 | 101 | Please note that the dtplyr project is released with a [Contributor Code of Conduct](https://dtplyr.tidyverse.org/CODE_OF_CONDUCT.html). By contributing to this project, you agree to abide by its terms. 102 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | url: https://dtplyr.tidyverse.org 2 | 3 | template: 4 | package: tidytemplate 5 | bootstrap: 5 6 | includes: 7 | in_header: | 8 | 9 | 10 | development: 11 | mode: auto 12 | 13 | home: 14 | links: 15 | - text: Learn more about data.table 16 | href: https://rdatatable.gitlab.io/data.table/ 17 | 18 | reference: 19 | - title: Getting data in and out 20 | contents: 21 | - lazy_dt 22 | - collect.dtplyr_step 23 | 24 | - title: Single table verbs 25 | contents: 26 | - arrange.dtplyr_step 27 | - count.dtplyr_step 28 | - distinct.dtplyr_step 29 | - filter.dtplyr_step 30 | - group_by.dtplyr_step 31 | - group_modify.dtplyr_step 32 | - head.dtplyr_step 33 | - mutate.dtplyr_step 34 | - transmute.dtplyr_step 35 | - relocate.dtplyr_step 36 | - rename.dtplyr_step 37 | - reframe.dtplyr_step 38 | - select.dtplyr_step 39 | - slice.dtplyr_step 40 | - summarise.dtplyr_step 41 | 42 | - title: Two table verbs 43 | contents: 44 | - left_join.dtplyr_step 45 | - intersect.dtplyr_step 46 | 47 | - title: tidyr verbs 48 | contents: 49 | - complete.dtplyr_step 50 | - drop_na.dtplyr_step 51 | - expand.dtplyr_step 52 | - fill.dtplyr_step 53 | - nest.dtplyr_step 54 | - pivot_wider.dtplyr_step 55 | - pivot_longer.dtplyr_step 56 | - replace_na.dtplyr_step 57 | - separate.dtplyr_step 58 | - unite.dtplyr_step 59 | 60 | news: 61 | releases: 62 | - text: "Version 1.3.0" 63 | href: https://www.tidyverse.org/blog/2023/02/dtplyr-1-3-0/ 64 | - text: "Version 1.2.0" 65 | href: https://www.tidyverse.org/blog/2021/12/dtplyr-1-2-0/ 66 | - text: "Version 1.1.0" 67 | href: https://www.tidyverse.org/blog/2021/02/dplyr-backends/ 68 | - text: "Version 1.0.0" 69 | href: https://www.tidyverse.org/blog/2019/11/dtplyr-1-0-0/ 70 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | 3 | coverage: 4 | status: 5 | project: 6 | default: 7 | target: auto 8 | threshold: 1% 9 | informational: true 10 | patch: 11 | default: 12 | target: auto 13 | threshold: 1% 14 | informational: true 15 | -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | ## R CMD check results 2 | 3 | 0 errors | 0 warnings | 0 notes 4 | 5 | ## revdepcheck results 6 | 7 | I did not check any revdeps as this is a patch release to fix an R CMD check failure. 8 | -------------------------------------------------------------------------------- /dtplyr.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: knitr 13 | LaTeX: XeLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: rd,collate,namespace 22 | -------------------------------------------------------------------------------- /man/arrange.dtplyr_step.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/step-subset-arrange.R 3 | \name{arrange.dtplyr_step} 4 | \alias{arrange.dtplyr_step} 5 | \title{Arrange rows by column values} 6 | \usage{ 7 | \method{arrange}{dtplyr_step}(.data, ..., .by_group = FALSE) 8 | } 9 | \arguments{ 10 | \item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}.} 11 | 12 | \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Variables, or 13 | functions of variables. Use \code{\link[dplyr:desc]{desc()}} to sort a variable in descending 14 | order.} 15 | 16 | \item{.by_group}{If \code{TRUE}, will sort first by grouping variable. Applies to 17 | grouped data frames only.} 18 | } 19 | \description{ 20 | This is a method for dplyr generic \code{\link[=arrange]{arrange()}}. It is translated to 21 | an \code{\link[=order]{order()}} call in the \code{i} argument of \verb{[.data.table}. 22 | } 23 | \examples{ 24 | library(dplyr, warn.conflicts = FALSE) 25 | 26 | dt <- lazy_dt(mtcars) 27 | dt \%>\% arrange(vs, cyl) 28 | dt \%>\% arrange(desc(vs), cyl) 29 | dt \%>\% arrange(across(mpg:disp)) 30 | } 31 | -------------------------------------------------------------------------------- /man/collect.dtplyr_step.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/step.R 3 | \name{collect.dtplyr_step} 4 | \alias{collect.dtplyr_step} 5 | \alias{compute.dtplyr_step} 6 | \alias{as.data.table.dtplyr_step} 7 | \alias{as.data.frame.dtplyr_step} 8 | \alias{as_tibble.dtplyr_step} 9 | \title{Force computation of a lazy data.table} 10 | \usage{ 11 | \method{collect}{dtplyr_step}(x, ...) 12 | 13 | \method{compute}{dtplyr_step}(x, name = unique_name(), ...) 14 | 15 | \method{as.data.table}{dtplyr_step}(x, keep.rownames = FALSE, ...) 16 | 17 | \method{as.data.frame}{dtplyr_step}(x, ...) 18 | 19 | \method{as_tibble}{dtplyr_step}(x, ..., .name_repair = "check_unique") 20 | } 21 | \arguments{ 22 | \item{x}{A \link{lazy_dt}} 23 | 24 | \item{...}{Arguments used by other methods.} 25 | 26 | \item{name}{Name of intermediate data.table.} 27 | 28 | \item{keep.rownames}{Ignored as dplyr never preserves rownames.} 29 | 30 | \item{.name_repair}{Treatment of problematic column names} 31 | } 32 | \description{ 33 | \itemize{ 34 | \item \code{collect()} returns a tibble, grouped if needed. 35 | \item \code{compute()} generates an intermediate assignment in the translation. 36 | \item \code{as.data.table()} returns a data.table. 37 | \item \code{as.data.frame()} returns a data frame. 38 | \item \code{as_tibble()} returns a tibble. 39 | } 40 | } 41 | \examples{ 42 | library(dplyr, warn.conflicts = FALSE) 43 | 44 | dt <- lazy_dt(mtcars) 45 | 46 | # Generate translation 47 | avg_mpg <- dt \%>\% 48 | filter(am == 1) \%>\% 49 | group_by(cyl) \%>\% 50 | summarise(mpg = mean(mpg)) 51 | 52 | # Show translation and temporarily compute result 53 | avg_mpg 54 | 55 | # compute and return tibble 56 | avg_mpg_tb <- as_tibble(avg_mpg) 57 | avg_mpg_tb 58 | 59 | # compute and return data.table 60 | avg_mpg_dt <- data.table::as.data.table(avg_mpg) 61 | avg_mpg_dt 62 | 63 | # modify translation to use intermediate assignment 64 | compute(avg_mpg) 65 | 66 | } 67 | -------------------------------------------------------------------------------- /man/complete.dtplyr_step.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/complete.R 3 | \name{complete.dtplyr_step} 4 | \alias{complete.dtplyr_step} 5 | \title{Complete a data frame with missing combinations of data} 6 | \usage{ 7 | \method{complete}{dtplyr_step}(data, ..., fill = list()) 8 | } 9 | \arguments{ 10 | \item{data}{A \code{\link[=lazy_dt]{lazy_dt()}}.} 11 | 12 | \item{...}{<\code{\link[tidyr:tidyr_data_masking]{data-masking}}> Specification of columns 13 | to expand or complete. Columns can be atomic vectors or lists. 14 | \itemize{ 15 | \item To find all unique combinations of \code{x}, \code{y} and \code{z}, including those not 16 | present in the data, supply each variable as a separate argument: 17 | \code{expand(df, x, y, z)} or \code{complete(df, x, y, z)}. 18 | \item To find only the combinations that occur in the 19 | data, use \code{nesting}: \code{expand(df, nesting(x, y, z))}. 20 | \item You can combine the two forms. For example, 21 | \code{expand(df, nesting(school_id, student_id), date)} would produce 22 | a row for each present school-student combination for all possible 23 | dates. 24 | } 25 | 26 | When used with factors, \code{\link[tidyr:expand]{expand()}} and \code{\link[tidyr:complete]{complete()}} use the full set of 27 | levels, not just those that appear in the data. If you want to use only the 28 | values seen in the data, use \code{forcats::fct_drop()}. 29 | 30 | When used with continuous variables, you may need to fill in values 31 | that do not appear in the data: to do so use expressions like 32 | \code{year = 2010:2020} or \code{year = full_seq(year,1)}.} 33 | 34 | \item{fill}{A named list that for each variable supplies a single value to 35 | use instead of \code{NA} for missing combinations.} 36 | } 37 | \description{ 38 | This is a method for the tidyr \code{complete()} generic. This is a wrapper 39 | around \code{dtplyr} translations for \code{expand()}, \code{full_join()}, and \code{replace_na()} 40 | that's useful for completing missing combinations of data. 41 | } 42 | \examples{ 43 | library(tidyr) 44 | tbl <- tibble(x = 1:2, y = 1:2, z = 3:4) 45 | dt <- lazy_dt(tbl) 46 | 47 | dt \%>\% 48 | complete(x, y) 49 | 50 | dt \%>\% 51 | complete(x, y, fill = list(z = 10L)) 52 | } 53 | -------------------------------------------------------------------------------- /man/count.dtplyr_step.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/count.R 3 | \name{count.dtplyr_step} 4 | \alias{count.dtplyr_step} 5 | \title{Count observations by group} 6 | \usage{ 7 | \method{count}{dtplyr_step}(x, ..., wt = NULL, sort = FALSE, name = NULL) 8 | } 9 | \arguments{ 10 | \item{x}{A \code{\link[=lazy_dt]{lazy_dt()}}} 11 | 12 | \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Variables to group 13 | by.} 14 | 15 | \item{wt}{<\code{\link[rlang:args_data_masking]{data-masking}}> Frequency weights. 16 | Can be \code{NULL} or a variable: 17 | \itemize{ 18 | \item If \code{NULL} (the default), counts the number of rows in each group. 19 | \item If a variable, computes \code{sum(wt)} for each group. 20 | }} 21 | 22 | \item{sort}{If \code{TRUE}, will show the largest groups at the top.} 23 | 24 | \item{name}{The name of the new column in the output. 25 | 26 | If omitted, it will default to \code{n}. If there's already a column called \code{n}, 27 | it will use \code{nn}. If there's a column called \code{n} and \code{nn}, it'll use 28 | \code{nnn}, and so on, adding \code{n}s until it gets a new name.} 29 | } 30 | \description{ 31 | This is a method for the dplyr \code{\link[=count]{count()}} generic. It is translated using 32 | \code{.N} in the \code{j} argument, and supplying groups to \code{keyby} as appropriate. 33 | } 34 | \examples{ 35 | library(dplyr, warn.conflicts = FALSE) 36 | 37 | dt <- lazy_dt(dplyr::starwars) 38 | dt \%>\% count(species) 39 | dt \%>\% count(species, sort = TRUE) 40 | dt \%>\% count(species, wt = mass, sort = TRUE) 41 | } 42 | -------------------------------------------------------------------------------- /man/distinct.dtplyr_step.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/step-call.R 3 | \name{distinct.dtplyr_step} 4 | \alias{distinct.dtplyr_step} 5 | \title{Subset distinct/unique rows} 6 | \usage{ 7 | \method{distinct}{dtplyr_step}(.data, ..., .keep_all = FALSE) 8 | } 9 | \arguments{ 10 | \item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}} 11 | 12 | \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Optional variables to 13 | use when determining uniqueness. If there are multiple rows for a given 14 | combination of inputs, only the first row will be preserved. If omitted, 15 | will use all variables in the data frame.} 16 | 17 | \item{.keep_all}{If \code{TRUE}, keep all variables in \code{.data}. 18 | If a combination of \code{...} is not distinct, this keeps the 19 | first row of values.} 20 | } 21 | \description{ 22 | This is a method for the dplyr \code{\link[=distinct]{distinct()}} generic. It is translated to 23 | \code{\link[data.table:duplicated]{data.table::unique.data.table()}}. 24 | } 25 | \examples{ 26 | library(dplyr, warn.conflicts = FALSE) 27 | df <- lazy_dt(data.frame( 28 | x = sample(10, 100, replace = TRUE), 29 | y = sample(10, 100, replace = TRUE) 30 | )) 31 | 32 | df \%>\% distinct(x) 33 | df \%>\% distinct(x, y) 34 | df \%>\% distinct(x, .keep_all = TRUE) 35 | } 36 | -------------------------------------------------------------------------------- /man/dot-datatable.aware.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtplyr-package.R 3 | \docType{data} 4 | \name{.datatable.aware} 5 | \alias{.datatable.aware} 6 | \title{dtplyr is data.table aware} 7 | \format{ 8 | An object of class \code{logical} of length 1. 9 | } 10 | \usage{ 11 | .datatable.aware 12 | } 13 | \description{ 14 | dtplyr is data.table aware 15 | } 16 | \keyword{internal} 17 | -------------------------------------------------------------------------------- /man/drop_na.dtplyr_step.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/step-call.R 3 | \name{drop_na.dtplyr_step} 4 | \alias{drop_na.dtplyr_step} 5 | \title{Drop rows containing missing values} 6 | \usage{ 7 | \method{drop_na}{dtplyr_step}(data, ...) 8 | } 9 | \arguments{ 10 | \item{data}{A \code{\link[=lazy_dt]{lazy_dt()}}.} 11 | 12 | \item{...}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> Columns to inspect for 13 | missing values. If empty, all columns are used.} 14 | } 15 | \description{ 16 | This is a method for the tidyr \code{drop_na()} generic. It is translated to 17 | \code{data.table::na.omit()} 18 | } 19 | \examples{ 20 | library(dplyr) 21 | library(tidyr) 22 | 23 | dt <- lazy_dt(tibble(x = c(1, 2, NA), y = c("a", NA, "b"))) 24 | dt \%>\% drop_na() 25 | dt \%>\% drop_na(x) 26 | 27 | vars <- "y" 28 | dt \%>\% drop_na(x, any_of(vars)) 29 | } 30 | -------------------------------------------------------------------------------- /man/dtplyr-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dtplyr-package.R 3 | \docType{package} 4 | \name{dtplyr-package} 5 | \alias{dtplyr} 6 | \alias{dtplyr-package} 7 | \title{dtplyr: Data Table Back-End for 'dplyr'} 8 | \description{ 9 | \if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}} 10 | 11 | Provides a data.table backend for 'dplyr'. The goal of 'dtplyr' is to allow you to write 'dplyr' code that is automatically translated to the equivalent, but usually much faster, data.table code. 12 | } 13 | \seealso{ 14 | Useful links: 15 | \itemize{ 16 | \item \url{https://dtplyr.tidyverse.org} 17 | \item \url{https://github.com/tidyverse/dtplyr} 18 | \item Report bugs at \url{https://github.com/tidyverse/dtplyr/issues} 19 | } 20 | 21 | } 22 | \author{ 23 | \strong{Maintainer}: Hadley Wickham \email{hadley@posit.co} 24 | 25 | Authors: 26 | \itemize{ 27 | \item Maximilian Girlich 28 | \item Mark Fairbanks 29 | \item Ryan Dickerson 30 | } 31 | 32 | Other contributors: 33 | \itemize{ 34 | \item Posit Software, PBC [copyright holder, funder] 35 | } 36 | 37 | } 38 | \keyword{internal} 39 | -------------------------------------------------------------------------------- /man/expand.dtplyr_step.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/step-subset-expand.R 3 | \name{expand.dtplyr_step} 4 | \alias{expand.dtplyr_step} 5 | \title{Expand data frame to include all possible combinations of values.} 6 | \usage{ 7 | \method{expand}{dtplyr_step}(data, ..., .name_repair = "check_unique") 8 | } 9 | \arguments{ 10 | \item{data}{A \code{\link[=lazy_dt]{lazy_dt()}}.} 11 | 12 | \item{...}{Specification of columns to expand. Columns can be atomic vectors 13 | or lists. 14 | \itemize{ 15 | \item To find all unique combinations of \code{x}, \code{y} and \code{z}, including those not 16 | present in the data, supply each variable as a separate argument: 17 | \code{expand(df, x, y, z)}. 18 | \item To find only the combinations that occur in the 19 | data, use \code{nesting}: \code{expand(df, nesting(x, y, z))}. 20 | \item You can combine the two forms. For example, 21 | \code{expand(df, nesting(school_id, student_id), date)} would produce 22 | a row for each present school-student combination for all possible 23 | dates. 24 | } 25 | 26 | Unlike the data.frame method, this method does not use the full set of 27 | levels, just those that appear in the data. 28 | 29 | When used with continuous variables, you may need to fill in values 30 | that do not appear in the data: to do so use expressions like 31 | \code{year = 2010:2020} or \code{year = full_seq(year,1)}.} 32 | 33 | \item{.name_repair}{Treatment of problematic column names: 34 | \itemize{ 35 | \item \code{"minimal"}: No name repair or checks, beyond basic existence, 36 | \item \code{"unique"}: Make sure names are unique and not empty, 37 | \item \code{"check_unique"}: (default value), no name repair, but check they are 38 | \code{unique}, 39 | \item \code{"universal"}: Make the names \code{unique} and syntactic 40 | \item a function: apply custom name repair (e.g., \code{.name_repair = make.names} 41 | for names in the style of base R). 42 | \item A purrr-style anonymous function, see \code{\link[rlang:as_function]{rlang::as_function()}} 43 | } 44 | 45 | This argument is passed on as \code{repair} to \code{\link[vctrs:vec_as_names]{vctrs::vec_as_names()}}. 46 | See there for more details on these terms and the strategies used 47 | to enforce them.} 48 | } 49 | \description{ 50 | This is a method for the tidyr \code{expand()} generic. It is translated to 51 | \code{\link[data.table:J]{data.table::CJ()}}. 52 | } 53 | \examples{ 54 | library(tidyr) 55 | 56 | fruits <- lazy_dt(tibble( 57 | type = c("apple", "orange", "apple", "orange", "orange", "orange"), 58 | year = c(2010, 2010, 2012, 2010, 2010, 2012), 59 | size = factor( 60 | c("XS", "S", "M", "S", "S", "M"), 61 | levels = c("XS", "S", "M", "L") 62 | ), 63 | weights = rnorm(6, as.numeric(size) + 2) 64 | )) 65 | 66 | # All possible combinations --------------------------------------- 67 | # Note that only present levels of the factor variable `size` are retained. 68 | fruits \%>\% expand(type) 69 | fruits \%>\% expand(type, size) 70 | 71 | # This is different from the data frame behaviour: 72 | fruits \%>\% dplyr::collect() \%>\% expand(type, size) 73 | 74 | # Other uses ------------------------------------------------------- 75 | fruits \%>\% expand(type, size, 2010:2012) 76 | 77 | # Use `anti_join()` to determine which observations are missing 78 | all <- fruits \%>\% expand(type, size, year) 79 | all 80 | all \%>\% dplyr::anti_join(fruits) 81 | 82 | # Use with `right_join()` to fill in missing rows 83 | fruits \%>\% dplyr::right_join(all) 84 | } 85 | -------------------------------------------------------------------------------- /man/figures/dt-seal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidyverse/dtplyr/75310e32cbc8130bfecd80cca83c7c8fa78de609/man/figures/dt-seal.png -------------------------------------------------------------------------------- /man/figures/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidyverse/dtplyr/75310e32cbc8130bfecd80cca83c7c8fa78de609/man/figures/logo.png -------------------------------------------------------------------------------- /man/fill.dtplyr_step.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/fill.R 3 | \name{fill.dtplyr_step} 4 | \alias{fill.dtplyr_step} 5 | \title{Fill in missing values with previous or next value} 6 | \usage{ 7 | \method{fill}{dtplyr_step}(data, ..., .direction = c("down", "up", "downup", "updown")) 8 | } 9 | \arguments{ 10 | \item{data}{A data frame.} 11 | 12 | \item{...}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> Columns to fill.} 13 | 14 | \item{.direction}{Direction in which to fill missing values. Currently 15 | either "down" (the default), "up", "downup" (i.e. first down and then up) 16 | or "updown" (first up and then down).} 17 | } 18 | \description{ 19 | This is a method for the tidyr \code{fill()} generic. It is translated to 20 | \code{\link[data.table:nafill]{data.table::nafill()}}. Note that \code{data.table::nafill()} currently only 21 | works for integer and double columns. 22 | } 23 | \examples{ 24 | library(tidyr) 25 | 26 | # Value (year) is recorded only when it changes 27 | sales <- lazy_dt(tibble::tribble( 28 | ~quarter, ~year, ~sales, 29 | "Q1", 2000, 66013, 30 | "Q2", NA, 69182, 31 | "Q3", NA, 53175, 32 | "Q4", NA, 21001, 33 | "Q1", 2001, 46036, 34 | "Q2", NA, 58842, 35 | "Q3", NA, 44568, 36 | "Q4", NA, 50197, 37 | "Q1", 2002, 39113, 38 | "Q2", NA, 41668, 39 | "Q3", NA, 30144, 40 | "Q4", NA, 52897, 41 | "Q1", 2004, 32129, 42 | "Q2", NA, 67686, 43 | "Q3", NA, 31768, 44 | "Q4", NA, 49094 45 | )) 46 | 47 | # `fill()` defaults to replacing missing data from top to bottom 48 | sales \%>\% fill(year) 49 | 50 | # Value (n_squirrels) is missing above and below within a group 51 | squirrels <- lazy_dt(tibble::tribble( 52 | ~group, ~name, ~role, ~n_squirrels, 53 | 1, "Sam", "Observer", NA, 54 | 1, "Mara", "Scorekeeper", 8, 55 | 1, "Jesse", "Observer", NA, 56 | 1, "Tom", "Observer", NA, 57 | 2, "Mike", "Observer", NA, 58 | 2, "Rachael", "Observer", NA, 59 | 2, "Sydekea", "Scorekeeper", 14, 60 | 2, "Gabriela", "Observer", NA, 61 | 3, "Derrick", "Observer", NA, 62 | 3, "Kara", "Scorekeeper", 9, 63 | 3, "Emily", "Observer", NA, 64 | 3, "Danielle", "Observer", NA 65 | )) 66 | 67 | # The values are inconsistently missing by position within the group 68 | # Use .direction = "downup" to fill missing values in both directions 69 | squirrels \%>\% 70 | dplyr::group_by(group) \%>\% 71 | fill(n_squirrels, .direction = "downup") \%>\% 72 | dplyr::ungroup() 73 | 74 | # Using `.direction = "updown"` accomplishes the same goal in this example 75 | } 76 | -------------------------------------------------------------------------------- /man/filter.dtplyr_step.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/step-subset-filter.R 3 | \name{filter.dtplyr_step} 4 | \alias{filter.dtplyr_step} 5 | \title{Subset rows using column values} 6 | \usage{ 7 | \method{filter}{dtplyr_step}(.data, ..., .by = NULL, .preserve = FALSE) 8 | } 9 | \arguments{ 10 | \item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}.} 11 | 12 | \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Expressions that 13 | return a logical value, and are defined in terms of the variables in 14 | \code{.data}. If multiple expressions are included, they are combined with the 15 | \code{&} operator. Only rows for which all conditions evaluate to \code{TRUE} are 16 | kept.} 17 | 18 | \item{.by}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} 19 | 20 | <\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Optionally, a selection of columns to 21 | group by for just this operation, functioning as an alternative to \code{\link[dplyr:group_by]{group_by()}}. For 22 | details and examples, see \link[dplyr:dplyr_by]{?dplyr_by}.} 23 | 24 | \item{.preserve}{Ignored} 25 | } 26 | \description{ 27 | This is a method for the dplyr \code{\link[=arrange]{arrange()}} generic. It is translated to 28 | the \code{i} argument of \verb{[.data.table} 29 | } 30 | \examples{ 31 | library(dplyr, warn.conflicts = FALSE) 32 | 33 | dt <- lazy_dt(mtcars) 34 | dt \%>\% filter(cyl == 4) 35 | dt \%>\% filter(vs, am) 36 | 37 | dt \%>\% 38 | group_by(cyl) \%>\% 39 | filter(mpg > mean(mpg)) 40 | } 41 | -------------------------------------------------------------------------------- /man/group_by.dtplyr_step.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/step-group.R 3 | \name{group_by.dtplyr_step} 4 | \alias{group_by.dtplyr_step} 5 | \alias{ungroup.dtplyr_step} 6 | \title{Group and ungroup} 7 | \usage{ 8 | \method{group_by}{dtplyr_step}(.data, ..., .add = FALSE, arrange = TRUE) 9 | 10 | \method{ungroup}{dtplyr_step}(x, ...) 11 | } 12 | \arguments{ 13 | \item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}} 14 | 15 | \item{...}{In \code{group_by()}, variables or computations to group by. 16 | Computations are always done on the ungrouped data frame. 17 | To perform computations on the grouped data, you need to use 18 | a separate \code{mutate()} step before the \code{group_by()}. 19 | Computations are not allowed in \code{nest_by()}. 20 | In \code{ungroup()}, variables to remove from the grouping.} 21 | 22 | \item{.add, add}{When \code{FALSE}, the default, \code{group_by()} will 23 | override existing groups. To add to the existing groups, use 24 | \code{.add = TRUE}. 25 | 26 | This argument was previously called \code{add}, but that prevented 27 | creating a new grouping variable called \code{add}, and conflicts with 28 | our naming conventions.} 29 | 30 | \item{arrange}{If \code{TRUE}, will automatically arrange the output of 31 | subsequent grouped operations by group. If \code{FALSE}, output order will be 32 | left unchanged. In the generated data.table code this switches between 33 | using the \code{keyby} (\code{TRUE}) and \code{by} (\code{FALSE}) arguments.} 34 | 35 | \item{x}{A \code{\link[dplyr:tbl]{tbl()}}} 36 | } 37 | \description{ 38 | These are methods for dplyr's \code{\link[=group_by]{group_by()}} and \code{\link[=ungroup]{ungroup()}} generics. 39 | Grouping is translated to the either \code{keyby} and \code{by} argument of 40 | \verb{[.data.table} depending on the value of the \code{arrange} argument. 41 | } 42 | \examples{ 43 | library(dplyr, warn.conflicts = FALSE) 44 | dt <- lazy_dt(mtcars) 45 | 46 | # group_by() is usually translated to `keyby` so that the groups 47 | # are ordered in the output 48 | dt \%>\% 49 | group_by(cyl) \%>\% 50 | summarise(mpg = mean(mpg)) 51 | 52 | # use `arrange = FALSE` to instead use `by` so the original order 53 | # or groups is preserved 54 | dt \%>\% 55 | group_by(cyl, arrange = FALSE) \%>\% 56 | summarise(mpg = mean(mpg)) 57 | } 58 | -------------------------------------------------------------------------------- /man/group_modify.dtplyr_step.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/step-modify.R 3 | \name{group_modify.dtplyr_step} 4 | \alias{group_modify.dtplyr_step} 5 | \alias{group_map.dtplyr_step} 6 | \title{Apply a function to each group} 7 | \usage{ 8 | \method{group_modify}{dtplyr_step}(.data, .f, ..., keep = FALSE) 9 | 10 | \method{group_map}{dtplyr_step}(.data, .f, ..., keep = FALSE) 11 | } 12 | \arguments{ 13 | \item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}} 14 | 15 | \item{.f}{The name of a two argument function. The first argument is passed 16 | \code{.SD},the data.table representing the current group; the second argument 17 | is passed \code{.BY}, a list giving the current values of the grouping 18 | variables. The function should return a list or data.table.} 19 | 20 | \item{...}{Additional arguments passed to \code{.f}} 21 | 22 | \item{keep}{Not supported for \link{lazy_dt}.} 23 | } 24 | \value{ 25 | \code{group_map()} applies \code{.f} to each group, returning a list. 26 | \code{group_modify()} replaces each group with the results of \code{.f}, returning a 27 | modified \code{\link[=lazy_dt]{lazy_dt()}}. 28 | } 29 | \description{ 30 | These are methods for the dplyr \code{\link[=group_map]{group_map()}} and \code{\link[=group_modify]{group_modify()}} generics. 31 | They are both translated to \verb{[.data.table}. 32 | } 33 | \examples{ 34 | library(dplyr) 35 | 36 | dt <- lazy_dt(mtcars) 37 | 38 | dt \%>\% 39 | group_by(cyl) \%>\% 40 | group_modify(head, n = 2L) 41 | 42 | dt \%>\% 43 | group_by(cyl) \%>\% 44 | group_map(head, n = 2L) 45 | } 46 | -------------------------------------------------------------------------------- /man/head.dtplyr_step.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/step-call.R 3 | \name{head.dtplyr_step} 4 | \alias{head.dtplyr_step} 5 | \alias{tail.dtplyr_step} 6 | \title{Subset first or last rows} 7 | \usage{ 8 | \method{head}{dtplyr_step}(x, n = 6L, ...) 9 | 10 | \method{tail}{dtplyr_step}(x, n = 6L, ...) 11 | } 12 | \arguments{ 13 | \item{x}{A \code{\link[=lazy_dt]{lazy_dt()}}} 14 | 15 | \item{n}{Number of rows to select. Can use a negative number to instead 16 | drop rows from the other end.} 17 | 18 | \item{...}{Passed on to \code{\link[=head]{head()}}/\code{\link[=tail]{tail()}}.} 19 | } 20 | \description{ 21 | These are methods for the base generics \code{\link[=head]{head()}} and \code{\link[=tail]{tail()}}. They 22 | are not translated. 23 | } 24 | \examples{ 25 | library(dplyr, warn.conflicts = FALSE) 26 | dt <- lazy_dt(data.frame(x = 1:10)) 27 | 28 | # first three rows 29 | head(dt, 3) 30 | # last three rows 31 | tail(dt, 3) 32 | 33 | # drop first three rows 34 | tail(dt, -3) 35 | } 36 | -------------------------------------------------------------------------------- /man/intersect.dtplyr_step.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/step-set.R 3 | \name{intersect.dtplyr_step} 4 | \alias{intersect.dtplyr_step} 5 | \alias{union.dtplyr_step} 6 | \alias{union_all.dtplyr_step} 7 | \alias{setdiff.dtplyr_step} 8 | \title{Set operations} 9 | \usage{ 10 | \method{intersect}{dtplyr_step}(x, y, ...) 11 | 12 | \method{union}{dtplyr_step}(x, y, ...) 13 | 14 | \method{union_all}{dtplyr_step}(x, y, ...) 15 | 16 | \method{setdiff}{dtplyr_step}(x, y, ...) 17 | } 18 | \arguments{ 19 | \item{x, y}{A pair of \code{\link[=lazy_dt]{lazy_dt()}}s.} 20 | 21 | \item{...}{Ignored} 22 | } 23 | \description{ 24 | These are methods for the dplyr generics \code{\link[=intersect]{intersect()}}, \code{\link[=union]{union()}}, 25 | \code{\link[=union_all]{union_all()}}, and \code{\link[=setdiff]{setdiff()}}. They are translated to 26 | \code{\link[data.table:setops]{data.table::fintersect()}}, \code{\link[data.table:setops]{data.table::funion()}}, and 27 | \code{\link[data.table:setops]{data.table::fsetdiff()}}. 28 | } 29 | \examples{ 30 | dt1 <- lazy_dt(data.frame(x = 1:4)) 31 | dt2 <- lazy_dt(data.frame(x = c(2, 4, 6))) 32 | 33 | intersect(dt1, dt2) 34 | union(dt1, dt2) 35 | setdiff(dt1, dt2) 36 | 37 | } 38 | -------------------------------------------------------------------------------- /man/lazy_dt.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/step-first.R 3 | \name{lazy_dt} 4 | \alias{lazy_dt} 5 | \alias{tbl_dt} 6 | \alias{grouped_dt} 7 | \title{Create a "lazy" data.table for use with dplyr verbs} 8 | \usage{ 9 | lazy_dt(x, name = NULL, immutable = TRUE, key_by = NULL) 10 | } 11 | \arguments{ 12 | \item{x}{A data table (or something can can be coerced to a data table).} 13 | 14 | \item{name}{Optionally, supply a name to be used in generated expressions. 15 | For expert use only.} 16 | 17 | \item{immutable}{If \code{TRUE}, \code{x} is treated as immutable and will never 18 | be modified by any code generated by dtplyr. Alternatively, you can set 19 | \code{immutable = FALSE} to allow dtplyr to modify the input object.} 20 | 21 | \item{key_by}{Set keys for data frame, using \code{\link[=select]{select()}} semantics (e.g. 22 | \code{key_by = c(key1, key2)}. 23 | 24 | This uses \code{\link[data.table:setkey]{data.table::setkey()}} to sort the table and build an index. 25 | This will considerably improve performance for subsets, summaries, and 26 | joins that use the keys. 27 | 28 | See \code{vignette("datatable-keys-fast-subset")} for more details.} 29 | } 30 | \description{ 31 | A lazy data.table captures the intent of dplyr verbs, only actually 32 | performing computation when requested (with \code{\link[=collect]{collect()}}, \code{\link[=pull]{pull()}}, 33 | \code{\link[=as.data.frame]{as.data.frame()}}, \code{\link[data.table:as.data.table]{data.table::as.data.table()}}, or \code{\link[tibble:as_tibble]{tibble::as_tibble()}}). 34 | This allows dtplyr to convert dplyr verbs into as few data.table expressions 35 | as possible, which leads to a high performance translation. 36 | 37 | See \code{vignette("translation")} for the details of the translation. 38 | } 39 | \examples{ 40 | library(dplyr, warn.conflicts = FALSE) 41 | 42 | mtcars2 <- lazy_dt(mtcars) 43 | mtcars2 44 | mtcars2 \%>\% select(mpg:cyl) 45 | mtcars2 \%>\% select(x = mpg, y = cyl) 46 | mtcars2 \%>\% filter(cyl == 4) \%>\% select(mpg) 47 | mtcars2 \%>\% select(mpg, cyl) \%>\% filter(cyl == 4) 48 | mtcars2 \%>\% mutate(cyl2 = cyl * 2, cyl4 = cyl2 * 2) 49 | mtcars2 \%>\% transmute(cyl2 = cyl * 2, vs2 = vs * 2) 50 | mtcars2 \%>\% filter(cyl == 8) \%>\% mutate(cyl2 = cyl * 2) 51 | 52 | # Learn more about translation in vignette("translation") 53 | by_cyl <- mtcars2 \%>\% group_by(cyl) 54 | by_cyl \%>\% summarise(mpg = mean(mpg)) 55 | by_cyl \%>\% mutate(mpg = mean(mpg)) 56 | by_cyl \%>\% 57 | filter(mpg < mean(mpg)) \%>\% 58 | summarise(hp = mean(hp)) 59 | } 60 | -------------------------------------------------------------------------------- /man/left_join.dtplyr_step.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/step-join.R 3 | \name{left_join.dtplyr_step} 4 | \alias{left_join.dtplyr_step} 5 | \title{Join data tables} 6 | \usage{ 7 | \method{left_join}{dtplyr_step}(x, y, ..., by = NULL, copy = FALSE, suffix = c(".x", ".y")) 8 | } 9 | \arguments{ 10 | \item{x, y}{A pair of \code{\link[=lazy_dt]{lazy_dt()}}s.} 11 | 12 | \item{...}{Other parameters passed onto methods.} 13 | 14 | \item{by}{A join specification created with \code{\link[dplyr:join_by]{join_by()}}, or a character 15 | vector of variables to join by. 16 | 17 | If \code{NULL}, the default, \verb{*_join()} will perform a natural join, using all 18 | variables in common across \code{x} and \code{y}. A message lists the variables so 19 | that you can check they're correct; suppress the message by supplying \code{by} 20 | explicitly. 21 | 22 | To join on different variables between \code{x} and \code{y}, use a \code{\link[dplyr:join_by]{join_by()}} 23 | specification. For example, \code{join_by(a == b)} will match \code{x$a} to \code{y$b}. 24 | 25 | To join by multiple variables, use a \code{\link[dplyr:join_by]{join_by()}} specification with 26 | multiple expressions. For example, \code{join_by(a == b, c == d)} will match 27 | \code{x$a} to \code{y$b} and \code{x$c} to \code{y$d}. If the column names are the same between 28 | \code{x} and \code{y}, you can shorten this by listing only the variable names, like 29 | \code{join_by(a, c)}. 30 | 31 | \code{\link[dplyr:join_by]{join_by()}} can also be used to perform inequality, rolling, and overlap 32 | joins. See the documentation at \link[dplyr:join_by]{?join_by} for details on 33 | these types of joins. 34 | 35 | For simple equality joins, you can alternatively specify a character vector 36 | of variable names to join by. For example, \code{by = c("a", "b")} joins \code{x$a} 37 | to \code{y$a} and \code{x$b} to \code{y$b}. If variable names differ between \code{x} and \code{y}, 38 | use a named character vector like \code{by = c("x_a" = "y_a", "x_b" = "y_b")}. 39 | 40 | To perform a cross-join, generating all combinations of \code{x} and \code{y}, see 41 | \code{\link[dplyr:cross_join]{cross_join()}}.} 42 | 43 | \item{copy}{If \code{x} and \code{y} are not from the same data source, 44 | and \code{copy} is \code{TRUE}, then \code{y} will be copied into the 45 | same src as \code{x}. This allows you to join tables across srcs, but 46 | it is a potentially expensive operation so you must opt into it.} 47 | 48 | \item{suffix}{If there are non-joined duplicate variables in \code{x} and 49 | \code{y}, these suffixes will be added to the output to disambiguate them. 50 | Should be a character vector of length 2.} 51 | } 52 | \description{ 53 | These are methods for the dplyr generics \code{\link[=left_join]{left_join()}}, \code{\link[=right_join]{right_join()}}, 54 | \code{\link[=inner_join]{inner_join()}}, \code{\link[=full_join]{full_join()}}, \code{\link[=anti_join]{anti_join()}}, and \code{\link[=semi_join]{semi_join()}}. Left, right, 55 | inner, and anti join are translated to the \verb{[.data.table} equivalent, 56 | full joins to \code{\link[data.table:merge]{data.table::merge.data.table()}}. 57 | Left, right, and full joins are in some cases followed by calls to 58 | \code{\link[data.table:setcolorder]{data.table::setcolorder()}} and \code{\link[data.table:setattr]{data.table::setnames()}} to ensure that column 59 | order and names match dplyr conventions. 60 | Semi-joins don't have a direct data.table equivalent. 61 | } 62 | \examples{ 63 | library(dplyr, warn.conflicts = FALSE) 64 | 65 | band_dt <- lazy_dt(dplyr::band_members) 66 | instrument_dt <- lazy_dt(dplyr::band_instruments) 67 | 68 | band_dt \%>\% left_join(instrument_dt) 69 | band_dt \%>\% right_join(instrument_dt) 70 | band_dt \%>\% inner_join(instrument_dt) 71 | band_dt \%>\% full_join(instrument_dt) 72 | 73 | band_dt \%>\% semi_join(instrument_dt) 74 | band_dt \%>\% anti_join(instrument_dt) 75 | } 76 | -------------------------------------------------------------------------------- /man/mutate.dtplyr_step.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/step-mutate.R 3 | \name{mutate.dtplyr_step} 4 | \alias{mutate.dtplyr_step} 5 | \title{Create and modify columns} 6 | \usage{ 7 | \method{mutate}{dtplyr_step}( 8 | .data, 9 | ..., 10 | .by = NULL, 11 | .keep = c("all", "used", "unused", "none"), 12 | .before = NULL, 13 | .after = NULL 14 | ) 15 | } 16 | \arguments{ 17 | \item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}.} 18 | 19 | \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Name-value pairs. 20 | The name gives the name of the column in the output. 21 | 22 | The value can be: 23 | \itemize{ 24 | \item A vector of length 1, which will be recycled to the correct length. 25 | \item A vector the same length as the current group (or the whole data frame 26 | if ungrouped). 27 | \item \code{NULL}, to remove the column. 28 | \item A data frame or tibble, to create multiple columns in the output. 29 | }} 30 | 31 | \item{.by}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} 32 | 33 | <\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Optionally, a selection of columns to 34 | group by for just this operation, functioning as an alternative to \code{\link[dplyr:group_by]{group_by()}}. For 35 | details and examples, see \link[dplyr:dplyr_by]{?dplyr_by}.} 36 | 37 | \item{.keep}{Control which columns from \code{.data} are retained in the output. Grouping 38 | columns and columns created by \code{...} are always kept. 39 | \itemize{ 40 | \item \code{"all"} retains all columns from \code{.data}. This is the default. 41 | \item \code{"used"} retains only the columns used in \code{...} to create new 42 | columns. This is useful for checking your work, as it displays inputs 43 | and outputs side-by-side. 44 | \item \code{"unused"} retains only the columns \emph{not} used in \code{...} to create new 45 | columns. This is useful if you generate new columns, but no longer need 46 | the columns used to generate them. 47 | \item \code{"none"} doesn't retain any extra columns from \code{.data}. Only the grouping 48 | variables and columns created by \code{...} are kept. 49 | } 50 | 51 | Note: With dtplyr \code{.keep} will only work with column names passed as symbols, and won't 52 | work with other workflows (e.g. \code{eval(parse(text = "x + 1"))})} 53 | 54 | \item{.before, .after}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Optionally, control where new columns 55 | should appear (the default is to add to the right hand side). See 56 | \code{\link[dplyr:relocate]{relocate()}} for more details.} 57 | } 58 | \description{ 59 | This is a method for the dplyr \code{\link[=mutate]{mutate()}} generic. It is translated to 60 | the \code{j} argument of \verb{[.data.table}, using \verb{:=} to modify "in place". If 61 | \code{.before} or \code{.after} is provided, the new columns are relocated with a call 62 | to \code{\link[data.table:setcolorder]{data.table::setcolorder()}}. 63 | } 64 | \examples{ 65 | library(dplyr, warn.conflicts = FALSE) 66 | 67 | dt <- lazy_dt(data.frame(x = 1:5, y = 5:1)) 68 | dt \%>\% 69 | mutate(a = (x + y) / 2, b = sqrt(x^2 + y^2)) 70 | 71 | # It uses a more sophisticated translation when newly created variables 72 | # are used in the same expression 73 | dt \%>\% 74 | mutate(x1 = x + 1, x2 = x1 + 1) 75 | } 76 | -------------------------------------------------------------------------------- /man/nest.dtplyr_step.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/step-nest.R 3 | \name{nest.dtplyr_step} 4 | \alias{nest.dtplyr_step} 5 | \title{Nest} 6 | \usage{ 7 | \method{nest}{dtplyr_step}(.data, ..., .names_sep = NULL, .key = deprecated()) 8 | } 9 | \arguments{ 10 | \item{.data}{A data frame.} 11 | 12 | \item{...}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> Columns to nest, specified 13 | using name-variable pairs of the form \code{new_col = c(col1, col2, col3)}. 14 | The right hand side can be any valid tidy select expression.} 15 | 16 | \item{.names_sep}{If \code{NULL}, the default, the inner names will come from 17 | the former outer names. If a string, the new inner names will use the 18 | outer names with \code{names_sep} automatically stripped. This makes 19 | \code{names_sep} roughly symmetric between nesting and unnesting.} 20 | 21 | \item{.key}{Not supported.} 22 | 23 | \item{data}{A \code{\link[=lazy_dt]{lazy_dt()}}.} 24 | } 25 | \description{ 26 | This is a method for the tidyr \code{\link[tidyr:nest]{tidyr::nest()}} generic. It is translated 27 | using the non-nested variables in the \code{by} argument and \code{.SD} in the \code{j} 28 | argument. 29 | } 30 | \examples{ 31 | if (require("tidyr", quietly = TRUE)) { 32 | dt <- lazy_dt(tibble(x = c(1, 2, 1), y = c("a", "a", "b"))) 33 | dt \%>\% nest(data = y) 34 | 35 | dt \%>\% dplyr::group_by(x) \%>\% nest() 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /man/reframe.dtplyr_step.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/reframe.R 3 | \name{reframe.dtplyr_step} 4 | \alias{reframe.dtplyr_step} 5 | \title{Summarise each group to one row} 6 | \usage{ 7 | \method{reframe}{dtplyr_step}(.data, ..., .by = NULL) 8 | } 9 | \arguments{ 10 | \item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}.} 11 | 12 | \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> 13 | 14 | Name-value pairs of functions. The name will be the name of the variable in 15 | the result. The value can be a vector of any length. 16 | 17 | Unnamed data frame values add multiple columns from a single expression.} 18 | 19 | \item{.by}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} 20 | 21 | <\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Optionally, a selection of columns to 22 | group by for just this operation, functioning as an alternative to \code{\link[dplyr:group_by]{group_by()}}. For 23 | details and examples, see \link[dplyr:dplyr_by]{?dplyr_by}.} 24 | } 25 | \description{ 26 | This is a method for the dplyr \code{\link[=reframe]{reframe()}} generic. It is translated to 27 | the \code{j} argument of \verb{[.data.table}. 28 | } 29 | \examples{ 30 | library(dplyr, warn.conflicts = FALSE) 31 | 32 | dt <- lazy_dt(mtcars) 33 | 34 | dt \%>\% 35 | reframe(qs = quantile(disp, c(0.25, 0.75)), 36 | prob = c(0.25, 0.75), 37 | .by = cyl) 38 | 39 | dt \%>\% 40 | group_by(cyl) \%>\% 41 | reframe(qs = quantile(disp, c(0.25, 0.75)), 42 | prob = c(0.25, 0.75)) 43 | } 44 | -------------------------------------------------------------------------------- /man/relocate.dtplyr_step.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/step-colorder-relocate.R 3 | \name{relocate.dtplyr_step} 4 | \alias{relocate.dtplyr_step} 5 | \title{Relocate variables using their names} 6 | \usage{ 7 | \method{relocate}{dtplyr_step}(.data, ..., .before = NULL, .after = NULL) 8 | } 9 | \arguments{ 10 | \item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}.} 11 | 12 | \item{...}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Columns to move.} 13 | 14 | \item{.before, .after}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Destination of 15 | columns selected by \code{...}. Supplying neither will move columns to the 16 | left-hand side; specifying both is an error.} 17 | } 18 | \description{ 19 | This is a method for the dplyr \code{\link[=relocate]{relocate()}} generic. It is translated to 20 | the \code{j} argument of \verb{[.data.table}. 21 | } 22 | \examples{ 23 | library(dplyr, warn.conflicts = FALSE) 24 | 25 | dt <- lazy_dt(data.frame(x = 1, y = 2, z = 3)) 26 | 27 | dt \%>\% relocate(z) 28 | dt \%>\% relocate(y, .before = x) 29 | dt \%>\% relocate(y, .after = y) 30 | } 31 | -------------------------------------------------------------------------------- /man/rename.dtplyr_step.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/step-call.R 3 | \name{rename.dtplyr_step} 4 | \alias{rename.dtplyr_step} 5 | \alias{rename_with.dtplyr_step} 6 | \title{Rename columns using their names} 7 | \usage{ 8 | \method{rename}{dtplyr_step}(.data, ...) 9 | 10 | \method{rename_with}{dtplyr_step}(.data, .fn, .cols = everything(), ...) 11 | } 12 | \arguments{ 13 | \item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}} 14 | 15 | \item{...}{For \code{rename()}: <\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Use 16 | \code{new_name = old_name} to rename selected variables. 17 | 18 | For \code{rename_with()}: additional arguments passed onto \code{.fn}.} 19 | 20 | \item{.fn}{A function used to transform the selected \code{.cols}. Should 21 | return a character vector the same length as the input.} 22 | 23 | \item{.cols}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Columns to rename; 24 | defaults to all columns.} 25 | } 26 | \description{ 27 | These are methods for the dplyr generics \code{\link[=rename]{rename()}} and \code{\link[=rename_with]{rename_with()}}. 28 | They are both translated to \code{\link[data.table:setattr]{data.table::setnames()}}. 29 | } 30 | \examples{ 31 | library(dplyr, warn.conflicts = FALSE) 32 | dt <- lazy_dt(data.frame(x = 1, y = 2, z = 3)) 33 | dt \%>\% rename(new_x = x, new_y = y) 34 | dt \%>\% rename_with(toupper) 35 | } 36 | -------------------------------------------------------------------------------- /man/replace_na.dtplyr_step.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/replace_na.R 3 | \name{replace_na.dtplyr_step} 4 | \alias{replace_na.dtplyr_step} 5 | \title{Replace NAs with specified values} 6 | \usage{ 7 | \method{replace_na}{dtplyr_step}(data, replace = list()) 8 | } 9 | \arguments{ 10 | \item{data}{A \code{\link[=lazy_dt]{lazy_dt()}}.} 11 | 12 | \item{replace}{If \code{data} is a data frame, \code{replace} takes a named list of 13 | values, with one value for each column that has missing values to be 14 | replaced. Each value in \code{replace} will be cast to the type of the column 15 | in \code{data} that it being used as a replacement in. 16 | 17 | If \code{data} is a vector, \code{replace} takes a single value. This single value 18 | replaces all of the missing values in the vector. \code{replace} will be cast 19 | to the type of \code{data}.} 20 | } 21 | \description{ 22 | This is a method for the tidyr \code{replace_na()} generic. It is translated to 23 | \code{\link[data.table:coalesce]{data.table::fcoalesce()}}. 24 | 25 | Note that unlike \code{tidyr::replace_na()}, \code{data.table::fcoalesce()} cannot 26 | replace \code{NULL} values in lists. 27 | } 28 | \examples{ 29 | library(tidyr) 30 | 31 | # Replace NAs in a data frame 32 | dt <- lazy_dt(tibble(x = c(1, 2, NA), y = c("a", NA, "b"))) 33 | dt \%>\% replace_na(list(x = 0, y = "unknown")) 34 | 35 | # Replace NAs using `dplyr::mutate()` 36 | dt \%>\% dplyr::mutate(x = replace_na(x, 0)) 37 | } 38 | -------------------------------------------------------------------------------- /man/select.dtplyr_step.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/step-subset-select.R 3 | \name{select.dtplyr_step} 4 | \alias{select.dtplyr_step} 5 | \title{Subset columns using their names} 6 | \usage{ 7 | \method{select}{dtplyr_step}(.data, ...) 8 | } 9 | \arguments{ 10 | \item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}.} 11 | 12 | \item{...}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> One or more unquoted 13 | expressions separated by commas. Variable names can be used as if they 14 | were positions in the data frame, so expressions like \code{x:y} can 15 | be used to select a range of variables.} 16 | } 17 | \description{ 18 | This is a method for the dplyr \code{\link[=select]{select()}} generic. It is translated to 19 | the \code{j} argument of \verb{[.data.table}. 20 | } 21 | \examples{ 22 | library(dplyr, warn.conflicts = FALSE) 23 | 24 | dt <- lazy_dt(data.frame(x1 = 1, x2 = 2, y1 = 3, y2 = 4)) 25 | 26 | dt \%>\% select(starts_with("x")) 27 | dt \%>\% select(ends_with("2")) 28 | dt \%>\% select(z1 = x1, z2 = x2) 29 | } 30 | -------------------------------------------------------------------------------- /man/separate.dtplyr_step.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/step-subset-separate.R 3 | \name{separate.dtplyr_step} 4 | \alias{separate.dtplyr_step} 5 | \title{Separate a character column into multiple columns with a regular 6 | expression or numeric locations} 7 | \usage{ 8 | \method{separate}{dtplyr_step}( 9 | data, 10 | col, 11 | into, 12 | sep = "[^[:alnum:]]+", 13 | remove = TRUE, 14 | convert = FALSE, 15 | ... 16 | ) 17 | } 18 | \arguments{ 19 | \item{data}{A \code{\link[=lazy_dt]{lazy_dt()}}.} 20 | 21 | \item{col}{Column name or position. 22 | 23 | This argument is passed by expression and supports quasiquotation 24 | (you can unquote column names or column positions).} 25 | 26 | \item{into}{Names of new variables to create as character vector. 27 | Use \code{NA} to omit the variable in the output.} 28 | 29 | \item{sep}{Separator between columns. 30 | The default value is a regular expression that matches any sequence of non-alphanumeric values.} 31 | 32 | \item{remove}{If TRUE, remove the input column from the output data frame.} 33 | 34 | \item{convert}{If TRUE, will run type.convert() with as.is = TRUE on new columns. 35 | This is useful if the component columns are integer, numeric or logical. 36 | 37 | NB: this will cause string "NA"s to be converted to NAs.} 38 | 39 | \item{...}{Arguments passed on to methods} 40 | } 41 | \description{ 42 | This is a method for the \code{\link[tidyr:separate]{tidyr::separate()}} generic. It is translated to 43 | \code{\link[data.table:tstrsplit]{data.table::tstrsplit()}} in the \code{j} argument of \verb{[.data.table}. 44 | } 45 | \examples{ 46 | library(tidyr) 47 | # If you want to split by any non-alphanumeric value (the default): 48 | df <- lazy_dt(data.frame(x = c(NA, "x.y", "x.z", "y.z")), "DT") 49 | df \%>\% separate(x, c("A", "B")) 50 | 51 | # If you just want the second variable: 52 | df \%>\% separate(x, c(NA, "B")) 53 | 54 | # Use regular expressions to separate on multiple characters: 55 | df <- lazy_dt(data.frame(x = c(NA, "x?y", "x.z", "y:z")), "DT") 56 | df \%>\% separate(x, c("A","B"), sep = "([.?:])") 57 | 58 | # convert = TRUE detects column classes: 59 | df <- lazy_dt(data.frame(x = c("x:1", "x:2", "y:4", "z", NA)), "DT") 60 | df \%>\% separate(x, c("key","value"), ":") \%>\% str 61 | df \%>\% separate(x, c("key","value"), ":", convert = TRUE) \%>\% str 62 | } 63 | -------------------------------------------------------------------------------- /man/slice.dtplyr_step.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/step-subset-slice.R 3 | \name{slice.dtplyr_step} 4 | \alias{slice.dtplyr_step} 5 | \alias{slice_head.dtplyr_step} 6 | \alias{slice_tail.dtplyr_step} 7 | \alias{slice_min.dtplyr_step} 8 | \alias{slice_max.dtplyr_step} 9 | \title{Subset rows using their positions} 10 | \usage{ 11 | \method{slice}{dtplyr_step}(.data, ..., .by = NULL) 12 | 13 | \method{slice_head}{dtplyr_step}(.data, ..., n, prop, by = NULL) 14 | 15 | \method{slice_tail}{dtplyr_step}(.data, ..., n, prop, by = NULL) 16 | 17 | \method{slice_min}{dtplyr_step}(.data, order_by, ..., n, prop, by = NULL, with_ties = TRUE) 18 | 19 | \method{slice_max}{dtplyr_step}(.data, order_by, ..., n, prop, by = NULL, with_ties = TRUE) 20 | } 21 | \arguments{ 22 | \item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}.} 23 | 24 | \item{...}{For \code{slice()}: <\code{\link[rlang:args_data_masking]{data-masking}}> 25 | Integer row values. 26 | 27 | Provide either positive values to keep, or negative values to drop. 28 | The values provided must be either all positive or all negative. 29 | Indices beyond the number of rows in the input are silently ignored. 30 | 31 | For \verb{slice_*()}, these arguments are passed on to methods.} 32 | 33 | \item{.by, by}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} 34 | 35 | <\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Optionally, a selection of columns to 36 | group by for just this operation, functioning as an alternative to \code{\link[dplyr:group_by]{group_by()}}. For 37 | details and examples, see \link[dplyr:dplyr_by]{?dplyr_by}.} 38 | 39 | \item{n, prop}{Provide either \code{n}, the number of rows, or \code{prop}, the 40 | proportion of rows to select. If neither are supplied, \code{n = 1} will be 41 | used. If \code{n} is greater than the number of rows in the group 42 | (or \code{prop > 1}), the result will be silently truncated to the group size. 43 | \code{prop} will be rounded towards zero to generate an integer number of 44 | rows. 45 | 46 | A negative value of \code{n} or \code{prop} will be subtracted from the group 47 | size. For example, \code{n = -2} with a group of 5 rows will select 5 - 2 = 3 48 | rows; \code{prop = -0.25} with 8 rows will select 8 * (1 - 0.25) = 6 rows.} 49 | 50 | \item{order_by}{<\code{\link[rlang:args_data_masking]{data-masking}}> Variable or 51 | function of variables to order by. To order by multiple variables, wrap 52 | them in a data frame or tibble.} 53 | 54 | \item{with_ties}{Should ties be kept together? The default, \code{TRUE}, 55 | may return more rows than you request. Use \code{FALSE} to ignore ties, 56 | and return the first \code{n} rows.} 57 | } 58 | \description{ 59 | These are methods for the dplyr \code{\link[=slice]{slice()}}, \code{slice_head()}, \code{slice_tail()}, 60 | \code{slice_min()}, \code{slice_max()} and \code{slice_sample()} generics. They are 61 | translated to the \code{i} argument of \verb{[.data.table}. 62 | 63 | Unlike dplyr, \code{slice()} (and \code{slice()} alone) returns the same number of 64 | rows per group, regardless of whether or not the indices appear in each 65 | group. 66 | } 67 | \examples{ 68 | library(dplyr, warn.conflicts = FALSE) 69 | 70 | dt <- lazy_dt(mtcars) 71 | dt \%>\% slice(1, 5, 10) 72 | dt \%>\% slice(-(1:4)) 73 | 74 | # First and last rows based on existing order 75 | dt \%>\% slice_head(n = 5) 76 | dt \%>\% slice_tail(n = 5) 77 | 78 | # Rows with minimum and maximum values of a variable 79 | dt \%>\% slice_min(mpg, n = 5) 80 | dt \%>\% slice_max(mpg, n = 5) 81 | 82 | # slice_min() and slice_max() may return more rows than requested 83 | # in the presence of ties. Use with_ties = FALSE to suppress 84 | dt \%>\% slice_min(cyl, n = 1) 85 | dt \%>\% slice_min(cyl, n = 1, with_ties = FALSE) 86 | 87 | # slice_sample() allows you to random select with or without replacement 88 | dt \%>\% slice_sample(n = 5) 89 | dt \%>\% slice_sample(n = 5, replace = TRUE) 90 | 91 | # you can optionally weight by a variable - this code weights by the 92 | # physical weight of the cars, so heavy cars are more likely to get 93 | # selected 94 | dt \%>\% slice_sample(weight_by = wt, n = 5) 95 | } 96 | -------------------------------------------------------------------------------- /man/summarise.dtplyr_step.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/step-subset-summarise.R 3 | \name{summarise.dtplyr_step} 4 | \alias{summarise.dtplyr_step} 5 | \title{Summarise each group to one row} 6 | \usage{ 7 | \method{summarise}{dtplyr_step}(.data, ..., .by = NULL, .groups = NULL) 8 | } 9 | \arguments{ 10 | \item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}.} 11 | 12 | \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Name-value pairs of 13 | summary functions. The name will be the name of the variable in the result. 14 | 15 | The value can be: 16 | \itemize{ 17 | \item A vector of length 1, e.g. \code{min(x)}, \code{n()}, or \code{sum(is.na(y))}. 18 | \item A data frame, to add multiple columns from a single expression. 19 | } 20 | 21 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Returning values with size 0 or >1 was 22 | deprecated as of 1.1.0. Please use \code{\link[dplyr:reframe]{reframe()}} for this instead.} 23 | 24 | \item{.by}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} 25 | 26 | <\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Optionally, a selection of columns to 27 | group by for just this operation, functioning as an alternative to \code{\link[dplyr:group_by]{group_by()}}. For 28 | details and examples, see \link[dplyr:dplyr_by]{?dplyr_by}.} 29 | 30 | \item{.groups}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} Grouping structure of the 31 | result. 32 | \itemize{ 33 | \item "drop_last": dropping the last level of grouping. This was the 34 | only supported option before version 1.0.0. 35 | \item "drop": All levels of grouping are dropped. 36 | \item "keep": Same grouping structure as \code{.data}. 37 | \item "rowwise": Each row is its own group. 38 | } 39 | 40 | When \code{.groups} is not specified, it is chosen 41 | based on the number of rows of the results: 42 | \itemize{ 43 | \item If all the results have 1 row, you get "drop_last". 44 | \item If the number of rows varies, you get "keep" (note that returning a 45 | variable number of rows was deprecated in favor of \code{\link[dplyr:reframe]{reframe()}}, which 46 | also unconditionally drops all levels of grouping). 47 | } 48 | 49 | In addition, a message informs you of that choice, unless the result is ungrouped, 50 | the option "dplyr.summarise.inform" is set to \code{FALSE}, 51 | or when \code{summarise()} is called from a function in a package.} 52 | } 53 | \description{ 54 | This is a method for the dplyr \code{\link[=summarise]{summarise()}} generic. It is translated to 55 | the \code{j} argument of \verb{[.data.table}. 56 | } 57 | \examples{ 58 | library(dplyr, warn.conflicts = FALSE) 59 | 60 | dt <- lazy_dt(mtcars) 61 | 62 | dt \%>\% 63 | group_by(cyl) \%>\% 64 | summarise(vs = mean(vs)) 65 | 66 | dt \%>\% 67 | group_by(cyl) \%>\% 68 | summarise(across(disp:wt, mean)) 69 | } 70 | -------------------------------------------------------------------------------- /man/transmute.dtplyr_step.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/step-subset-transmute.R 3 | \name{transmute.dtplyr_step} 4 | \alias{transmute.dtplyr_step} 5 | \title{Create new columns, dropping old} 6 | \usage{ 7 | \method{transmute}{dtplyr_step}(.data, ...) 8 | } 9 | \arguments{ 10 | \item{.data}{A \code{\link[=lazy_dt]{lazy_dt()}}.} 11 | 12 | \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Name-value pairs. 13 | The name gives the name of the column in the output. 14 | 15 | The value can be: 16 | \itemize{ 17 | \item A vector of length 1, which will be recycled to the correct length. 18 | \item A vector the same length as the current group (or the whole data frame 19 | if ungrouped). 20 | \item \code{NULL}, to remove the column. 21 | \item A data frame or tibble, to create multiple columns in the output. 22 | }} 23 | } 24 | \description{ 25 | This is a method for the dplyr \code{\link[=transmute]{transmute()}} generic. It is translated to 26 | the \code{j} argument of \verb{[.data.table}. 27 | } 28 | \examples{ 29 | library(dplyr, warn.conflicts = FALSE) 30 | 31 | dt <- lazy_dt(dplyr::starwars) 32 | dt \%>\% transmute(name, sh = paste0(species, "/", homeworld)) 33 | } 34 | -------------------------------------------------------------------------------- /man/unite.dtplyr_step.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/unite.R 3 | \name{unite.dtplyr_step} 4 | \alias{unite.dtplyr_step} 5 | \title{Unite multiple columns into one by pasting strings together.} 6 | \usage{ 7 | \method{unite}{dtplyr_step}(data, col, ..., sep = "_", remove = TRUE, na.rm = FALSE) 8 | } 9 | \arguments{ 10 | \item{data}{A data frame.} 11 | 12 | \item{col}{The name of the new column, as a string or symbol. 13 | 14 | This argument is passed by expression and supports 15 | \link[rlang:topic-inject]{quasiquotation} (you can unquote strings 16 | and symbols). The name is captured from the expression with 17 | \code{\link[rlang:defusing-advanced]{rlang::ensym()}} (note that this kind of interface where 18 | symbols do not represent actual objects is now discouraged in the 19 | tidyverse; we support it here for backward compatibility).} 20 | 21 | \item{...}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> Columns to unite} 22 | 23 | \item{sep}{Separator to use between values.} 24 | 25 | \item{remove}{If \code{TRUE}, remove input columns from output data frame.} 26 | 27 | \item{na.rm}{If \code{TRUE}, missing values will be removed prior to uniting 28 | each value.} 29 | } 30 | \description{ 31 | This is a method for the tidyr \code{unite()} generic. 32 | } 33 | \examples{ 34 | library(tidyr) 35 | 36 | df <- lazy_dt(expand_grid(x = c("a", NA), y = c("b", NA))) 37 | df 38 | 39 | df \%>\% unite("z", x:y, remove = FALSE) 40 | 41 | # Separate is almost the complement of unite 42 | df \%>\% 43 | unite("xy", x:y) \%>\% 44 | separate(xy, c("x", "y")) 45 | # (but note `x` and `y` contain now "NA" not NA) 46 | } 47 | -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-120x120.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidyverse/dtplyr/75310e32cbc8130bfecd80cca83c7c8fa78de609/pkgdown/favicon/apple-touch-icon-120x120.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-152x152.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidyverse/dtplyr/75310e32cbc8130bfecd80cca83c7c8fa78de609/pkgdown/favicon/apple-touch-icon-152x152.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-180x180.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidyverse/dtplyr/75310e32cbc8130bfecd80cca83c7c8fa78de609/pkgdown/favicon/apple-touch-icon-180x180.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-60x60.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidyverse/dtplyr/75310e32cbc8130bfecd80cca83c7c8fa78de609/pkgdown/favicon/apple-touch-icon-60x60.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-76x76.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidyverse/dtplyr/75310e32cbc8130bfecd80cca83c7c8fa78de609/pkgdown/favicon/apple-touch-icon-76x76.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidyverse/dtplyr/75310e32cbc8130bfecd80cca83c7c8fa78de609/pkgdown/favicon/apple-touch-icon.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidyverse/dtplyr/75310e32cbc8130bfecd80cca83c7c8fa78de609/pkgdown/favicon/favicon-16x16.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidyverse/dtplyr/75310e32cbc8130bfecd80cca83c7c8fa78de609/pkgdown/favicon/favicon-32x32.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidyverse/dtplyr/75310e32cbc8130bfecd80cca83c7c8fa78de609/pkgdown/favicon/favicon.ico -------------------------------------------------------------------------------- /revdep/.gitignore: -------------------------------------------------------------------------------- 1 | checks 2 | library 3 | checks.noindex 4 | library.noindex 5 | data.sqlite 6 | *.html 7 | cloud.noindex 8 | -------------------------------------------------------------------------------- /revdep/README.md: -------------------------------------------------------------------------------- 1 | # Revdeps 2 | 3 | ## New problems (1) 4 | 5 | |package |version |error |warning |note | 6 | |:-------|:-------|:------|:-------|:----| 7 | |[rFIA](problems.md#rfia)|1.0.0 |__+1__ | | | 8 | 9 | -------------------------------------------------------------------------------- /revdep/cran.md: -------------------------------------------------------------------------------- 1 | ## revdepcheck results 2 | 3 | We checked 11 reverse dependencies, comparing R CMD check results across CRAN and dev versions of this package. 4 | 5 | * We saw 1 new problems 6 | * We failed to check 0 packages 7 | 8 | Issues with CRAN packages are summarised below. 9 | 10 | ### New problems 11 | (This reports the first line of each new failure) 12 | 13 | * rFIA 14 | checking examples ... ERROR 15 | 16 | -------------------------------------------------------------------------------- /revdep/email.yml: -------------------------------------------------------------------------------- 1 | release_date: Feb 19 2 | rel_release_date: two weeks 3 | my_news_url: https://github.com/tidyverse/dtplyr/blob/master/NEWS.md 4 | release_version: 1.1.0 5 | release_details: > 6 | This is a relatively short deadline, but it looks the problems are 7 | relatively small, and I've contributed PRs where needed. 8 | 9 | -------------------------------------------------------------------------------- /revdep/failures.md: -------------------------------------------------------------------------------- 1 | *Wow, no problems at all. :)* -------------------------------------------------------------------------------- /revdep/problems.md: -------------------------------------------------------------------------------- 1 | # rFIA 2 | 3 |
4 | 5 | * Version: 1.0.0 6 | * GitHub: https://github.com/hunter-stanke/rFIA 7 | * Source code: https://github.com/cran/rFIA 8 | * Date/Publication: 2021-12-15 18:10:02 UTC 9 | * Number of recursive dependencies: 84 10 | 11 | Run `revdepcheck::cloud_details(, "rFIA")` for more info 12 | 13 |
14 | 15 | ## Newly broken 16 | 17 | * checking examples ... ERROR 18 | ``` 19 | Running examples in ‘rFIA-Ex.R’ failed 20 | The error most likely occurred in: 21 | 22 | > ### Name: area 23 | > ### Title: Estimate land area from FIADB 24 | > ### Aliases: area 25 | > 26 | > ### ** Examples 27 | > 28 | > ## Load data from the rFIA package 29 | ... 30 | 14. │ └─tidyselect:::walk_data_tree(new, data_mask, context_mask) 31 | 15. │ └─tidyselect:::as_indices_sel_impl(...) 32 | 16. │ └─tidyselect:::as_indices_impl(...) 33 | 17. │ └─tidyselect:::chr_as_locations(x, vars, call = call, arg = arg) 34 | 18. │ └─vctrs::vec_as_location(...) 35 | 19. └─vctrs (local) ``() 36 | 20. └─vctrs:::stop_subscript_oob(...) 37 | 21. └─vctrs:::stop_subscript(...) 38 | 22. └─rlang::abort(...) 39 | Execution halted 40 | ``` 41 | 42 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(dtplyr) 3 | 4 | test_check("dtplyr") 5 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/count.md: -------------------------------------------------------------------------------- 1 | # can control name 2 | 3 | Code 4 | dt %>% count(name = 10) %>% collect() 5 | Condition 6 | Error in `check_name()`: 7 | ! `name` must be a string 8 | 9 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/step-call-pivot_longer.md: -------------------------------------------------------------------------------- 1 | # can pivot to multiple measure cols 2 | 3 | Code 4 | show_query(step) 5 | Output 6 | melt(DT, measure.vars = list(c("x1", "x2", "x3", "x4"), c("y1", 7 | "y2", "y3", "y4")), variable.name = "set", value.name = c("x", 8 | "y"), variable.factor = FALSE)[, `:=`(set = c("1", "1", "2", 9 | "2", "3", "3", "4", "4"))] 10 | 11 | # errors on unbalanced datasets 12 | 13 | Code 14 | pivot_longer(dt, everything(), names_to = c(".value", "id"), names_sep = "_") 15 | Condition 16 | Error in `pivot_longer()`: 17 | ! `data.table::melt()` doesn't currently support melting of unbalanced datasets. 18 | 19 | # informative errors on unsupported features 20 | 21 | Code 22 | dt %>% pivot_longer(names_ptypes = list()) 23 | Condition 24 | Error in `pivot_longer()`: 25 | ! `names_ptypes` is not supported by dtplyr 26 | Code 27 | dt %>% pivot_longer(names_transform = list()) 28 | Condition 29 | Error in `pivot_longer()`: 30 | ! `names_transform` is not supported by dtplyr 31 | Code 32 | dt %>% pivot_longer(values_ptypes = list()) 33 | Condition 34 | Error in `pivot_longer()`: 35 | ! `values_ptypes` is not supported by dtplyr 36 | Code 37 | dt %>% pivot_longer(values_transform = list()) 38 | Condition 39 | Error in `pivot_longer()`: 40 | ! `values_transform` is not supported by dtplyr 41 | 42 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/step-call-pivot_wider.md: -------------------------------------------------------------------------------- 1 | # names_glue affects output names 2 | 3 | Code 4 | show_query(step) 5 | Output 6 | setnames(dcast(DT, formula = "..." ~ x + y, value.var = c("a", 7 | "b"))[, `:=`(".", NULL)], c("a_X_1", "a_Y_2", "b_X_1", "b_Y_2" 8 | ), c("X1_a", "Y2_a", "X1_b", "Y2_b")) 9 | 10 | # can sort column names 11 | 12 | Code 13 | show_query(step) 14 | Output 15 | setcolorder(dcast(DT, formula = "..." ~ chr, value.var = "int")[, 16 | `:=`(".", NULL)], c("Mon", "Tue", "Wed")) 17 | 18 | # can sort column names with id 19 | 20 | Code 21 | show_query(step) 22 | Output 23 | setcolorder(dcast(DT, formula = id ~ chr, value.var = "int"), 24 | c("id", "Mon", "Tue", "Wed")) 25 | 26 | # can repair names if requested 27 | 28 | Code 29 | pivot_wider(df, names_from = lab, values_from = val) 30 | Condition 31 | Error in `step_repair()`: 32 | ! Names must be unique. 33 | x These names are duplicated: 34 | * "x" at locations 1 and 2. 35 | Code 36 | pivot_wider(df, names_from = lab, values_from = val, names_repair = "unique") 37 | Message 38 | New names: 39 | * `x` -> `x...1` 40 | * `x` -> `x...2` 41 | Output 42 | Source: local data table [1 x 2] 43 | Call: setnames(dcast(copy(DT), formula = x ~ lab, value.var = "val"), 44 | 1:2, c("x...1", "x...2")) 45 | 46 | x...1 x...2 47 | 48 | 1 1 2 49 | 50 | # Use as.data.table()/as.data.frame()/as_tibble() to access results 51 | 52 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/step-call.md: -------------------------------------------------------------------------------- 1 | # but not with anything else 2 | 3 | Code 4 | dt %>% rename_with(1) 5 | Condition 6 | Error in `rename_with()`: 7 | ! `.fn` must be a function name or formula 8 | 9 | # rename_with generates minimal spec 10 | 11 | Code 12 | dt %>% rename_with(toupper) %>% show_query() 13 | Output 14 | setnames(copy(DT), toupper) 15 | Code 16 | dt %>% rename_with(toupper, 1:3) %>% show_query() 17 | Output 18 | setnames(copy(DT), c("a", "b", "c"), toupper) 19 | 20 | # can compute distinct computed variables 21 | 22 | Code 23 | dt %>% distinct(z = x + y) %>% show_query() 24 | Output 25 | unique(copy(dt)[, `:=`(z = x + y)][, `:=`(c("x", "y"), NULL)]) 26 | 27 | # errors are raised 28 | 29 | Code 30 | collect(drop_na(dt, "z")) 31 | Condition 32 | Error in `drop_na()`: 33 | ! Can't select columns that don't exist. 34 | x Column `z` doesn't exist. 35 | 36 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/step-colorder-relocate.md: -------------------------------------------------------------------------------- 1 | # can only supply one of .before and .after 2 | 3 | Code 4 | relocate(dt, y, .before = x, .after = x) 5 | Condition 6 | Error in `relocate()`: 7 | ! Can't supply both `.before` and `.after`. 8 | 9 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/step-colorder.md: -------------------------------------------------------------------------------- 1 | # can handle duplicate column names 2 | 3 | The column(s) x do not uniquely match a column in `x`. 4 | 5 | # checks col_order 6 | 7 | Every element of `col_order` must be unique. 8 | 9 | --- 10 | 11 | Every element of `col_order` must be unique. 12 | 13 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/step-group.md: -------------------------------------------------------------------------------- 1 | # can add groups if requested 2 | 3 | Code 4 | . <- dt %>% group_by(x) %>% group_by(y, add = TRUE) 5 | Condition 6 | Warning: 7 | The `add` argument of `group_by()` is deprecated as of dplyr 1.0.0. 8 | i Please use the `.add` argument instead. 9 | 10 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/step-join.md: -------------------------------------------------------------------------------- 1 | # performs cross join 2 | 3 | Code 4 | left_join(dt1, dt2, by = character()) 5 | Output 6 | Source: local data table [4 x 3] 7 | Call: setnames(setcolorder(copy(dt2)[, `:=`(.cross_join_col = 1)][copy(dt1)[, 8 | `:=`(.cross_join_col = 1)], on = .(.cross_join_col), allow.cartesian = TRUE], 9 | c(3L, 4L, 2L, 1L)), c("i.x", "x"), c("x.x", "x.y"))[, !".cross_join_col"] 10 | 11 | x.x y x.y 12 | 13 | 1 1 a 3 14 | 2 1 a 4 15 | 3 2 a 3 16 | 4 2 a 4 17 | 18 | # Use as.data.table()/as.data.frame()/as_tibble() to access results 19 | 20 | --- 21 | 22 | Code 23 | right_join(dt1, dt2, by = character()) 24 | Output 25 | Source: local data table [4 x 3] 26 | Call: setnames(setcolorder(copy(dt2)[, `:=`(.cross_join_col = 1)][copy(dt1)[, 27 | `:=`(.cross_join_col = 1)], on = .(.cross_join_col), allow.cartesian = TRUE], 28 | c(3L, 4L, 2L, 1L)), c("i.x", "x"), c("x.x", "x.y"))[, !".cross_join_col"] 29 | 30 | x.x y x.y 31 | 32 | 1 1 a 3 33 | 2 1 a 4 34 | 3 2 a 3 35 | 4 2 a 4 36 | 37 | # Use as.data.table()/as.data.frame()/as_tibble() to access results 38 | 39 | --- 40 | 41 | Code 42 | full_join(dt1, dt2, by = character()) 43 | Output 44 | Source: local data table [4 x 3] 45 | Call: setnames(setcolorder(copy(dt2)[, `:=`(.cross_join_col = 1)][copy(dt1)[, 46 | `:=`(.cross_join_col = 1)], on = .(.cross_join_col), allow.cartesian = TRUE], 47 | c(3L, 4L, 2L, 1L)), c("i.x", "x"), c("x.x", "x.y"))[, !".cross_join_col"] 48 | 49 | x.x y x.y 50 | 51 | 1 1 a 3 52 | 2 1 a 4 53 | 3 2 a 3 54 | 4 2 a 4 55 | 56 | # Use as.data.table()/as.data.frame()/as_tibble() to access results 57 | 58 | --- 59 | 60 | Code 61 | inner_join(dt1, dt2, by = character()) 62 | Output 63 | Source: local data table [4 x 3] 64 | Call: setnames(setcolorder(copy(dt2)[, `:=`(.cross_join_col = 1)][copy(dt1)[, 65 | `:=`(.cross_join_col = 1)], on = .(.cross_join_col), allow.cartesian = TRUE], 66 | c(3L, 4L, 2L, 1L)), c("i.x", "x"), c("x.x", "x.y"))[, !".cross_join_col"] 67 | 68 | x.x y x.y 69 | 70 | 1 1 a 3 71 | 2 1 a 4 72 | 3 2 a 3 73 | 4 2 a 4 74 | 75 | # Use as.data.table()/as.data.frame()/as_tibble() to access results 76 | 77 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/step-mutate.md: -------------------------------------------------------------------------------- 1 | # unnamed arguments matching column names are ignored 2 | 3 | Code 4 | mutate(dt, y) 5 | Condition 6 | Error: 7 | ! object 'y' not found 8 | 9 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/step-subset-filter.md: -------------------------------------------------------------------------------- 1 | # errors for named input 2 | 3 | Code 4 | filter(dt, x = 1) 5 | Condition 6 | Error in `filter()`: 7 | ! Problem with `filter()` input `..1`. 8 | x Input `..1` is named. 9 | i This usually means that you've used `=` instead of `==`. 10 | i Did you mean `x == 1`? 11 | 12 | --- 13 | 14 | Code 15 | filter(dt, y > 1, x = 1) 16 | Condition 17 | Error in `filter()`: 18 | ! Problem with `filter()` input `..2`. 19 | x Input `..2` is named. 20 | i This usually means that you've used `=` instead of `==`. 21 | i Did you mean `x == 1`? 22 | 23 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/step-subset-select.md: -------------------------------------------------------------------------------- 1 | # empty select returns no columns 2 | 3 | Code 4 | out <- lz %>% group_by(x) %>% select() 5 | Message 6 | Adding missing grouping variables: `x` 7 | 8 | # copied data: empty select returns no columns 9 | 10 | Code 11 | out <- lz %>% group_by(x) %>% select() 12 | Message 13 | Adding missing grouping variables: `x` 14 | 15 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/step-subset-separate.md: -------------------------------------------------------------------------------- 1 | # checks type of `into` and `sep` 2 | 3 | Code 4 | separate(dt, x, "x", FALSE) 5 | Condition 6 | Error in `separate()`: 7 | ! `sep` must be a character vector. 8 | 9 | --- 10 | 11 | Code 12 | separate(dt, x, FALSE) 13 | Condition 14 | Error in `separate()`: 15 | ! `into` must be a character vector. 16 | 17 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/step-subset-slice.md: -------------------------------------------------------------------------------- 1 | # slice_*() checks for empty ... 2 | 3 | Code 4 | slice_head(dt, 5) 5 | Condition 6 | Error in `slice_head()`: 7 | ! `n` must be explicitly named. 8 | i Did you mean `slice_head(n = 5)`? 9 | Code 10 | slice_tail(dt, 5) 11 | Condition 12 | Error in `slice_tail()`: 13 | ! `n` must be explicitly named. 14 | i Did you mean `slice_tail(n = 5)`? 15 | Code 16 | slice_min(dt, x, 5) 17 | Condition 18 | Error in `slice_min()`: 19 | ! `n` must be explicitly named. 20 | i Did you mean `slice_min(n = 5)`? 21 | Code 22 | slice_max(dt, x, 5) 23 | Condition 24 | Error in `slice_max()`: 25 | ! `n` must be explicitly named. 26 | i Did you mean `slice_max(n = 5)`? 27 | Code 28 | slice_sample(dt, 5) 29 | Condition 30 | Error in `slice_sample()`: 31 | ! `n` must be explicitly named. 32 | i Did you mean `slice_sample(n = 5)`? 33 | 34 | --- 35 | 36 | Code 37 | slice_min(dt) 38 | Condition 39 | Error in `slice_min()`: 40 | ! `order_by` is absent but must be supplied. 41 | Code 42 | slice_max(dt) 43 | Condition 44 | Error in `slice_max()`: 45 | ! `order_by` is absent but must be supplied. 46 | 47 | # check_slice_catches common errors 48 | 49 | Code 50 | slice_head(dt, n = 1, prop = 1) 51 | Condition 52 | Error in `slice_head()`: 53 | ! Must supply exactly one of `n` and `prop` arguments. 54 | Code 55 | slice_head(dt, n = "a") 56 | Condition 57 | Error in `slice_head()`: 58 | ! `n` must be a single number. 59 | Code 60 | slice_head(dt, prop = "a") 61 | Condition 62 | Error in `slice_head()`: 63 | ! `prop` must be a single number. 64 | Code 65 | slice_head(dt, n = NA) 66 | Condition 67 | Error in `slice_head()`: 68 | ! `n` must be a single number. 69 | Code 70 | slice_head(dt, prop = NA) 71 | Condition 72 | Error in `slice_head()`: 73 | ! `prop` must be a single number. 74 | 75 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/step-subset-summarise.md: -------------------------------------------------------------------------------- 1 | # summarise(.groups=) 2 | 3 | Code 4 | eval_bare(expr(lazy_dt(data.frame(x = 1, y = 2), "DT") %>% group_by(x, y) %>% 5 | dplyr::summarise() %>% show_query()), env(global_env())) 6 | Message 7 | `summarise()` has grouped output by 'x'. You can override using the `.groups` argument. 8 | Output 9 | unique(DT) 10 | 11 | --- 12 | 13 | `.groups` can't be "rowwise" in dtplyr 14 | i Possible values are NULL (default), "drop_last", "drop", and "keep" 15 | 16 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/tidyeval-across.md: -------------------------------------------------------------------------------- 1 | # across() does not support formulas with dots 2 | 3 | Code 4 | (expect_error(capture_across(dt, across(a:b, ~ log(.x, base = .y), base = 2)))) 5 | Output 6 | 7 | Error in `across_fun()`: 8 | ! `dtplyr::across()` does not support `...` when a purrr-style lambda is used in `.fns`. 9 | i Use a lambda instead. 10 | i Or inline them via a purrr-style lambda. 11 | Code 12 | (expect_error(capture_across(dt, across(a:b, list(~ log(.x, base = .y)), base = 2))) 13 | ) 14 | Output 15 | 16 | Error in `FUN()`: 17 | ! `dtplyr::across()` does not support `...` when a purrr-style lambda is used in `.fns`. 18 | i Use a lambda instead. 19 | i Or inline them via a purrr-style lambda. 20 | 21 | # across() gives informative errors 22 | 23 | Code 24 | capture_across(dt, across(a, 1)) 25 | Condition 26 | Error in `across_funs()`: 27 | ! `.fns` argument to dtplyr::across() must be a NULL, a function, formula, or list 28 | Code 29 | capture_across(dt, across(a, list(1))) 30 | Condition 31 | Error in `FUN()`: 32 | ! .fns argument to dtplyr::across() must contain a function or a formula 33 | x Problem with 1 34 | 35 | # if_all() gives informative errors 36 | 37 | Code 38 | capture_if_all(dt, if_all(a, 1)) 39 | Condition 40 | Error in `across_funs()`: 41 | ! `.fns` argument to dtplyr::across() must be a NULL, a function, formula, or list 42 | Code 43 | capture_if_all(dt, if_all(a, list(1))) 44 | Condition 45 | Error in `FUN()`: 46 | ! .fns argument to dtplyr::across() must contain a function or a formula 47 | x Problem with 1 48 | 49 | # if_all() cannot rename variables 50 | 51 | Code 52 | (expect_error(capture_if_all(dt, if_all(c(a = x, b = y))))) 53 | Output 54 | 55 | Error in `if_all()`: 56 | ! Can't rename variables in this context. 57 | 58 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/tidyeval.md: -------------------------------------------------------------------------------- 1 | # translates lag()/lead() 2 | 3 | The `order_by` argument of `lag()` is not supported by dtplyr 4 | 5 | # errors when `where()` is used, #271/#368 6 | 7 | This tidyselect interface doesn't support predicates. 8 | 9 | --- 10 | 11 | This tidyselect interface doesn't support predicates. 12 | 13 | # desc() checks the number of arguments 14 | 15 | Code 16 | capture_dot(df, desc(a, b)) 17 | Condition 18 | Error in `check_one_arg()`: 19 | ! `desc()` expects exactly one argument. 20 | 21 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/unite.md: -------------------------------------------------------------------------------- 1 | # errors on na.rm 2 | 3 | `na.rm` is not implemented in dtplyr 4 | 5 | -------------------------------------------------------------------------------- /tests/testthat/helpers-library.R: -------------------------------------------------------------------------------- 1 | library(dplyr, warn.conflicts = FALSE) 2 | library(tidyr, warn.conflicts = FALSE) 3 | -------------------------------------------------------------------------------- /tests/testthat/test-complete.R: -------------------------------------------------------------------------------- 1 | test_that("complete with no variables returns data as is", { 2 | mtcars_dt <- lazy_dt(mtcars, "DT") 3 | expect_equal(complete(mtcars_dt), mtcars_dt) 4 | }) 5 | 6 | test_that("basic invocation works", { 7 | tbl <- tibble(x = 1:2, y = 1:2, z = 3:4) 8 | dt <- lazy_dt(tbl, "DT") 9 | out <- dt %>% complete(x, y) %>% collect() 10 | 11 | expect_equal(nrow(out), 4) 12 | expect_equal(out$z, c(3, NA, NA, 4)) 13 | }) 14 | 15 | test_that("empty expansion returns original", { 16 | tbl <- tibble(x = character()) 17 | dt <- lazy_dt(tbl, "DT") 18 | out <- dt %>% complete(y = NULL) %>% collect() 19 | expect_equal(out, tbl) 20 | 21 | tbl <- tibble(x = 1:4) 22 | dt <- lazy_dt(tbl, "DT") 23 | out <- dt %>% complete(y = NULL) %>% collect() 24 | expect_equal(out, tbl) 25 | }) 26 | -------------------------------------------------------------------------------- /tests/testthat/test-count.R: -------------------------------------------------------------------------------- 1 | 2 | test_that("can be used grouped or ungrouped", { 3 | dt <- lazy_dt(data.table(x = c(1, 1, 1, 2)), "DT") 4 | 5 | expect_equal( 6 | dt %>% count(x) %>% collect(), 7 | tibble(x = c(1, 2), n = c(3, 1)) 8 | ) 9 | expect_equal( 10 | dt %>% group_by(x) %>% count() %>% collect(), 11 | tibble(x = c(1, 2), n = c(3, 1)) %>% group_by(x) 12 | ) 13 | }) 14 | 15 | test_that("can control name", { 16 | dt <- lazy_dt(data.table(x = c(1, 1, 1, 2)), "DT") 17 | 18 | expect_equal( 19 | dt %>% count(x, name = "y") %>% collect(), 20 | tibble(x = c(1, 2), y = c(3, 1)) 21 | ) 22 | expect_snapshot( 23 | dt %>% count(name = 10) %>% collect(), 24 | error = TRUE 25 | ) 26 | }) 27 | 28 | test_that("name can match existing group var", { 29 | dt <- lazy_dt(data.table(a = 2)) 30 | 31 | expect_equal( 32 | dt %>% group_by(a) %>% tally(name = 'a') %>% collect(), 33 | tibble(a = 1) 34 | ) 35 | expect_equal( 36 | dt %>% count(a, name = 'a') %>% collect(), 37 | tibble(a = 1) 38 | ) 39 | }) 40 | 41 | 42 | test_that("can weight", { 43 | dt <- lazy_dt(data.table(x = c(1, 1, 2), y = c(1, 2, 10)), "DT") 44 | expect_equal( 45 | dt %>% count(x, wt = y) %>% collect(), 46 | tibble(x = c(1, 2), n = c(3, 10)) 47 | ) 48 | expect_equal( 49 | dt %>% add_count(x, wt = y) %>% collect(), 50 | dt %>% mutate(n = c(3, 3, 10)) %>% collect() 51 | ) 52 | }) 53 | 54 | test_that("can sort", { 55 | dt <- lazy_dt(data.table(x = c(1, 1, 2), y = c(1, 2, 10)), "DT") 56 | expect_equal( 57 | dt %>% count(x, wt = y, sort = TRUE) %>% collect(), 58 | tibble(x = c(2, 1), n = c(10, 3)) 59 | ) 60 | expect_equal( 61 | dt %>% add_count(x, wt = y, sort = TRUE) %>% collect(), 62 | tibble(x = c(2, 1, 1), y = c(10, 1, 2), n = c(10, 3, 3)) 63 | ) 64 | }) 65 | 66 | test_that("tally works", { 67 | dt <- lazy_dt(data.table(x = c(1, 1, 1, 2)), "DT") 68 | expect_equal( 69 | dt %>% group_by(x) %>% tally() %>% collect(), 70 | tibble(x = c(1, 2), n = c(3, 1)) 71 | ) 72 | }) 73 | 74 | test_that("informs if n column already present, unless overridden", { 75 | dt <- lazy_dt(data.frame(n = c(1, 1, 2, 2, 2))) 76 | expect_message(out <- count(dt, n), "already present") 77 | expect_named(as_tibble(out), c("n", "nn")) 78 | 79 | # not a good idea, but supported 80 | expect_message(out <- count(dt, n, name = "n"), NA) 81 | expect_named(as_tibble(out), "n") 82 | 83 | expect_message(out <- count(dt, n, name = "nn"), NA) 84 | expect_named(as_tibble(out), c("n", "nn")) 85 | 86 | dt <- lazy_dt(data.frame(n = c(1, 1, 2, 2, 2), nn = 1:5)) 87 | expect_message(out <- count(dt, n), "already present") 88 | expect_named(as_tibble(out), c("n", "nn")) 89 | 90 | expect_message(out <- count(dt, n, nn), "already present") 91 | expect_named(as_tibble(out), c("n", "nn", "nnn")) 92 | }) 93 | 94 | test_that("name must be string", { 95 | dt <- lazy_dt(data.frame(x = c(1, 2))) 96 | expect_error(count(dt, x, name = 1), "string") 97 | expect_error(count(dt, x, name = letters), "string") 98 | }) 99 | 100 | # add_count --------------------------------------------------------------- 101 | 102 | test_that("add_count() gives expected calls and groups", { 103 | dt <- lazy_dt(data.frame(g = c(1, 2, 2, 2)), "DT") 104 | 105 | res <- dt %>% add_count(g) 106 | expect_equal(show_query(res), expr(copy(DT)[, `:=`(n = .N), by = .(g)])) 107 | expect_equal(res$groups, character()) 108 | 109 | res <- dt %>% group_by(g) %>% add_count() 110 | expect_equal(show_query(res), expr(copy(DT)[, `:=`(n = .N), by = .(g)])) 111 | expect_equal(res$groups, "g") 112 | }) 113 | -------------------------------------------------------------------------------- /tests/testthat/test-fill.R: -------------------------------------------------------------------------------- 1 | test_that("missings are filled correctly & translations are correct", { 2 | tbl <- tibble(x = c(NA, 1, NA, 2, NA, NA)) 3 | dt <- lazy_dt(tbl, "DT") 4 | 5 | step <- fill(dt, x) 6 | expect_equal(show_query(step), expr(copy(DT)[, `:=`(x = nafill(x, "locf"))])) 7 | expect_equal(collect(step)$x, c(NA, 1, 1, 2, 2, 2)) 8 | 9 | step <- fill(dt, x, .direction = "up") 10 | expect_equal(show_query(step), expr(copy(DT)[, `:=`(x = nafill(x, "nocb"))])) 11 | expect_equal(collect(step)$x, c(1, 1, 2, 2, NA, NA)) 12 | 13 | step <- fill(dt, x, .direction = 'downup') 14 | expect_equal(show_query(step), expr(copy(DT)[, `:=`(x = nafill(nafill(x, "locf"), "nocb"))])) 15 | expect_equal(collect(step)$x, c(1, 1, 1, 2, 2, 2)) 16 | 17 | step <- fill(dt, x, .direction = 'updown') 18 | expect_equal(show_query(step), expr(copy(DT)[, `:=`(x = nafill(nafill(x, "nocb"), "locf"))])) 19 | expect_equal(collect(step)$x, c(1, 1, 2, 2, 2, 2)) 20 | }) 21 | 22 | test_that("auto-conversion to lazy_dt works as intended", { 23 | dt <- data.table(x = c(NA, 1, NA, 2, NA, NA)) 24 | 25 | out <- collect(fill(dt, x)) 26 | expect_equal(out$x, c(NA, 1, 1, 2, 2, 2)) 27 | }) 28 | -------------------------------------------------------------------------------- /tests/testthat/test-reframe.R: -------------------------------------------------------------------------------- 1 | test_that("`reframe()` allows summaries", { 2 | df <- lazy_dt(tibble(g = c(1, 1, 1, 2, 2), x = 1:5)) 3 | 4 | expect_identical( 5 | collect(reframe(df, x = mean(x))), 6 | tibble(x = 3) 7 | ) 8 | expect_identical( 9 | collect(reframe(df, x = mean(x), .by = g)), 10 | tibble(g = c(1, 2), x = c(2, 4.5)) 11 | ) 12 | }) 13 | 14 | test_that("`reframe()` allows size 0 results", { 15 | df <- lazy_dt(tibble(g = c(1, 1, 1, 2, 2), x = 1:5)) 16 | gdf <- group_by(df, g) 17 | 18 | expect_identical( 19 | collect(reframe(df, x = which(x > 5))), 20 | tibble(x = integer()) 21 | ) 22 | expect_identical( 23 | collect(reframe(df, x = which(x > 5), .by = g)), 24 | tibble(g = double(), x = integer()) 25 | ) 26 | expect_identical( 27 | collect(reframe(gdf, x = which(x > 5))), 28 | tibble(g = double(), x = integer()) 29 | ) 30 | }) 31 | 32 | test_that("`reframe()` allows size >1 results", { 33 | df <- lazy_dt(tibble(g = c(1, 1, 1, 2, 2), x = 1:5)) 34 | gdf <- group_by(df, g) 35 | 36 | expect_identical( 37 | collect(reframe(df, x = which(x > 2))), 38 | tibble(x = 3:5) 39 | ) 40 | expect_identical( 41 | collect(reframe(df, x = which(x > 2), .by = g)), 42 | tibble(g = c(1, 2, 2), x = c(3L, 1L, 2L)) 43 | ) 44 | expect_identical( 45 | collect(reframe(gdf, x = which(x > 2))), 46 | tibble(g = c(1, 2, 2), x = c(3L, 1L, 2L)) 47 | ) 48 | }) 49 | 50 | test_that("`reframe()` ungroups output", { 51 | df <- lazy_dt(tibble(g = c(1, 1, 1, 2, 2), x = 1:5)) 52 | gdf <- group_by(df, g, x) 53 | res <- reframe(gdf, row_num = row_number()) 54 | 55 | expect_true(length(group_vars(res)) == 0) 56 | }) 57 | -------------------------------------------------------------------------------- /tests/testthat/test-replace_na.R: -------------------------------------------------------------------------------- 1 | # lazy data.tables ----------------------------------------------------------- 2 | 3 | test_that("empty call does nothing", { 4 | tbl <- tibble(x = c(1, NA)) 5 | dt <- lazy_dt(tbl, "DT") 6 | out <- collect(replace_na(dt)) 7 | expect_equal(out, tbl) 8 | }) 9 | 10 | test_that("missing values are replaced", { 11 | tbl <- tibble(x = c(1, NA)) 12 | dt <- lazy_dt(tbl, "DT") 13 | step <- replace_na(dt, list(x = 0)) 14 | out <- collect(step) 15 | expect_equal(show_query(step), expr(copy(DT)[, `:=`(x = fcoalesce(x, 0))])) 16 | expect_equal(out$x, c(1, 0)) 17 | }) 18 | 19 | test_that("don't complain about variables that don't exist", { 20 | tbl <- tibble(a = c(1, NA)) 21 | dt <- lazy_dt(tbl, "DT") 22 | out <- collect(replace_na(dt, list(a = 100, b = 0))) 23 | expect_equal(out, tibble(a = c(1, 100))) 24 | }) 25 | 26 | # Inside mutate() ----------------------------------------------------------- 27 | 28 | test_that("missing values are replaced", { 29 | tbl <- tibble(x = c(1, NA)) 30 | dt <- lazy_dt(tbl, "DT") 31 | step <- mutate(dt, x = replace_na(x, 0)) 32 | out <- collect(step) 33 | expect_equal(show_query(step), expr(copy(DT)[, `:=`(x = fcoalesce(x, 0))])) 34 | expect_equal(out$x, c(1, 0)) 35 | }) 36 | -------------------------------------------------------------------------------- /tests/testthat/test-step-colorder-relocate.R: -------------------------------------------------------------------------------- 1 | test_that(".before and .after relocate individual cols", { 2 | dt <- lazy_dt(data.table(x = 1, y = 1), "DT") 3 | 4 | expect_equal( 5 | dt %>% relocate(x, .after = y) %>% show_query(), 6 | expr(setcolorder(copy(DT), !!c("y", "x"))) 7 | ) 8 | expect_equal( 9 | dt %>% relocate(y, .before = x) %>% show_query(), 10 | expr(setcolorder(copy(DT), !!c("y", "x"))) 11 | ) 12 | }) 13 | 14 | test_that("can move blocks of variables", { 15 | dt <- lazy_dt(data.table(x = 1, a = 1, y = 1, b = 1), "DT") 16 | 17 | expect_equal( 18 | dt %>% relocate(y, b, .before = a) %>% show_query(), 19 | expr(setcolorder(copy(DT), !!c("x", "y", "b", "a"))) 20 | ) 21 | expect_equal( 22 | dt %>% relocate(any_of(c("y", "b")), .before = a) %>% show_query(), 23 | expr(setcolorder(copy(DT), !!c("x", "y", "b", "a"))) 24 | ) 25 | }) 26 | 27 | test_that("All columns move before (after) columns in .before (.after)", { 28 | dt <- lazy_dt(data.table(x = 1, a = 1, y = 1, b = 1), "DT") 29 | 30 | expect_equal( 31 | dt %>% relocate(y, b, .before = c(x, a)) %>% show_query(), 32 | expr(setcolorder(copy(DT), !!c("y", "b", "x", "a"))) 33 | ) 34 | expect_equal( 35 | dt %>% relocate(x, a, .after = c(y, b)) %>% show_query(), 36 | expr(setcolorder(copy(DT), !!c("y", "b", "x", "a"))) 37 | ) 38 | }) 39 | 40 | test_that("extra variables in .before/.after unaffected", { 41 | dt <- lazy_dt(data.table(a = 1, b = 1, c = 1, d = 1, e = 1), "DT") 42 | 43 | expect_equal( 44 | dt %>% relocate(b, .after = c(a, c, e)) %>% show_query(), 45 | expr(setcolorder(copy(DT), !!c("a", "c", "d", "e", "b"))) 46 | ) 47 | expect_equal( 48 | dt %>% relocate(e, .before = c(b, d)) %>% show_query(), 49 | expr(setcolorder(copy(DT), !!c("a", "e", "b", "c", "d"))) 50 | ) 51 | }) 52 | 53 | test_that("no .before/.after moves to front", { 54 | dt <- lazy_dt(data.table(x = 1, y = 2), "DT") 55 | 56 | expect_equal( 57 | dt %>% relocate(y) %>% show_query(), 58 | expr(setcolorder(copy(DT), !!c("y", "x"))) 59 | ) 60 | }) 61 | 62 | test_that("can only supply one of .before and .after", { 63 | dt <- lazy_dt(data.table(x = 1, y = 1), "DT") 64 | 65 | expect_snapshot(relocate(dt, y, .before = x, .after = x), error = TRUE) 66 | }) 67 | 68 | test_that("relocate() respects order specified by ...", { 69 | dt <- lazy_dt(data.table(a = 1, x = 1, b = 1, z = 1, y = 1), "DT") 70 | 71 | expect_equal( 72 | dt %>% relocate(x, y, z, .before = x) %>% show_query(), 73 | expr(setcolorder(copy(DT), !!c("a", "x", "y", "z", "b"))) 74 | ) 75 | expect_equal( 76 | dt %>% relocate(x, y, z, .after = last_col()) %>% show_query(), 77 | expr(setcolorder(copy(DT), !!c("a", "b", "x", "y", "z"))) 78 | ) 79 | expect_equal( 80 | dt %>% relocate(x, a, z) %>% show_query(), 81 | expr(setcolorder(copy(DT), !!c("x", "a", "z", "b", "y"))) 82 | ) 83 | }) 84 | 85 | test_that("relocate() only not alter grouping", { 86 | dt <- lazy_dt(data.table(x = 1, y = 1, z = 1), "DT") 87 | 88 | expect_equal( 89 | dt %>% group_by(x, y) %>% relocate(y, .before = x) %>% .$groups, 90 | c("x", "y") 91 | ) 92 | }) 93 | -------------------------------------------------------------------------------- /tests/testthat/test-step-colorder.R: -------------------------------------------------------------------------------- 1 | test_that("can reorder columns", { 2 | dt <- lazy_dt(data.frame(x = 1:3, y = 1), "DT") 3 | 4 | expect_equal( 5 | dt %>% step_colorder(c("y", "x")) %>% show_query(), 6 | expr(setcolorder(copy(DT), !!c("y", "x"))) 7 | ) 8 | 9 | expect_named( 10 | dt %>% step_colorder(c("y", "x")) %>% collect(), 11 | c("y", "x") 12 | ) 13 | 14 | expect_equal( 15 | dt %>% step_colorder(c(2L, 1L)) %>% show_query(), 16 | expr(setcolorder(copy(DT), !!c(2L, 1L))) 17 | ) 18 | 19 | expect_named( 20 | dt %>% step_colorder(c(2L, 1L)) %>% collect(), 21 | c("y", "x") 22 | ) 23 | }) 24 | 25 | test_that("can handle duplicate column names", { 26 | dt <- lazy_dt(data.table(x = 3, x = 2, y = 1), "DT") 27 | 28 | expect_snapshot_error(dt %>% step_colorder(c("y", "x"))) 29 | 30 | expect_equal( 31 | dt %>% step_colorder(c(3L, 2L)) %>% show_query(), 32 | expr(setcolorder(copy(DT), !!c(3L, 2L))) 33 | ) 34 | 35 | expect_equal( 36 | dt %>% step_colorder(c(3L, 2L)) %>% as.data.table(), 37 | data.table(y = 1, x = 2, x = 3) 38 | ) 39 | }) 40 | 41 | test_that("checks col_order", { 42 | dt <- lazy_dt(data.frame(x = 1:3, y = 1), "DT") 43 | 44 | expect_snapshot_error(dt %>% step_colorder(c("y", "y"))) 45 | expect_snapshot_error(dt %>% step_colorder(c(1L, 1L))) 46 | }) 47 | 48 | test_that("works for empty input", { 49 | dt <- lazy_dt(data.frame(x = 1), "DT") 50 | 51 | expect_equal(dt %>% step_colorder(character()), dt) 52 | expect_equal(dt %>% step_colorder(integer()), dt) 53 | }) 54 | 55 | test_that("doesn't add step if not necessary", { 56 | dt <- lazy_dt(data.frame(x = 1, y = 2), "DT") 57 | 58 | expect_equal(dt %>% step_colorder(c("x", "y")), dt) 59 | expect_equal(dt %>% step_colorder("x"), dt) 60 | 61 | expect_equal(dt %>% step_colorder(1:2), dt) 62 | expect_equal(dt %>% step_colorder(1L), dt) 63 | }) 64 | -------------------------------------------------------------------------------- /tests/testthat/test-step-first.R: -------------------------------------------------------------------------------- 1 | test_that("constructor has sensible defaults", { 2 | dt <- data.table(x = 1:2, y = 1:2) 3 | step <- step_first(dt) 4 | 5 | expect_s3_class(step, "dtplyr_step_first") 6 | expect_equal(step$parent, dt) 7 | expect_equal(step$vars, c("x", "y")) 8 | expect_equal(step$groups, character()) 9 | expect_match(as.character(step$name), "_DT") 10 | }) 11 | 12 | 13 | # mutability -------------------------------------------------------------- 14 | 15 | test_that("doesn't need copy", { 16 | dt <- lazy_dt(mtcars) 17 | expect_false(dt$needs_copy) 18 | }) 19 | 20 | test_that("mutable object must be a data table", { 21 | expect_error(lazy_dt(mtcars, immutable = FALSE), "not already a data table") 22 | }) 23 | 24 | test_that("mutable object never needs copy", { 25 | dt <- lazy_dt(as.data.table(mtcars), immutable = FALSE) 26 | expect_false(dt$needs_copy) 27 | expect_false(dt %>% mutate(x = 1) %>% .$needs_copy) 28 | }) 29 | 30 | test_that("dt_call() copies if requested", { 31 | dt <- lazy_dt(mtcars, name = "DT") 32 | 33 | expect_equal(dt_call(dt, FALSE), quote(DT)) 34 | expect_equal(dt_call(dt, TRUE), quote(copy(DT))) 35 | }) 36 | 37 | test_that("lazy_dt doesn't copy input", { 38 | dt <- data.table(x = 1) 39 | lz <- lazy_dt(dt) 40 | 41 | expect_equal(data.table::address(dt), data.table::address(lz$parent)) 42 | }) 43 | 44 | # keys -------------------------------------------------------------------- 45 | 46 | test_that("can set keys", { 47 | dt <- lazy_dt(mtcars, key_by = cyl) 48 | expect_equal(data.table::key(dt$parent), "cyl") 49 | }) 50 | 51 | test_that("setting doesn't modify data.table", { 52 | dt1 <- data.table(x = c(5, 1, 2)) 53 | dt2 <- lazy_dt(dt1, key_by = x) 54 | 55 | expect_equal(data.table::key(dt1$parent), NULL) 56 | expect_equal(data.table::key(dt2$parent), "x") 57 | }) 58 | 59 | # groups ------------------------------------------------------------------ 60 | 61 | test_that("keeps groups", { 62 | dt <- lazy_dt(group_by(mtcars, cyl)) 63 | expect_equal(group_vars(dt), "cyl") 64 | }) 65 | -------------------------------------------------------------------------------- /tests/testthat/test-step-group.R: -------------------------------------------------------------------------------- 1 | test_that("grouping and ungrouping adjust groups field", { 2 | dt <- lazy_dt(data.frame(x = 1:3, y = 1:3)) 3 | 4 | expect_equal(dt %>% .$groups, character()) 5 | expect_equal(dt %>% group_by(x) %>% .$groups, "x") 6 | expect_equal(dt %>% group_by(a = x) %>% .$groups, "a") 7 | expect_equal(dt %>% group_by(x) %>% group_by(y) %>% .$groups, "y") 8 | expect_equal(dt %>% group_by(x) %>% ungroup() %>% .$groups, character()) 9 | }) 10 | 11 | test_that("ungroup can remove variables from grouping", { 12 | dt <- lazy_dt(data.frame(x = 1:3, y = 1:3)) %>% group_by(x, y) 13 | 14 | expect_equal(dt %>% ungroup(y) %>% group_vars(), "x") 15 | }) 16 | 17 | test_that("can use across", { 18 | dt <- lazy_dt(data.frame(x = 1:3, y = 1:3)) 19 | expect_equal(dt %>% group_by(across(everything())) %>% .$groups, c("x", "y")) 20 | }) 21 | 22 | test_that("can add groups if requested", { 23 | dt <- lazy_dt(data.frame(x = 1:3, y = 1:3), "DT") 24 | expect_equal( 25 | dt %>% group_by(x) %>% group_by(y, .add = TRUE) %>% .$groups, 26 | c("x", "y") 27 | ) 28 | 29 | expect_snapshot({ 30 | . <- dt %>% group_by(x) %>% group_by(y, add = TRUE) 31 | }) 32 | }) 33 | 34 | test_that("grouping can compute new variables if needed", { 35 | dt <- lazy_dt(data.frame(x = 1:3, y = 1:3), "DT") 36 | 37 | expect_equal( 38 | dt %>% group_by(xy = x + y) %>% show_query(), 39 | expr(copy(DT)[, `:=`(xy = x + y)]) 40 | ) 41 | 42 | # also works when RHS is only a symbol 43 | expect_equal( 44 | dt %>% group_by(z = x) %>% show_query(), 45 | expr(copy(DT)[, `:=`(z = x)]) 46 | ) 47 | 48 | expect_equal( 49 | dt %>% group_by(xy = x + y) %>% summarise(x = mean(x)) %>% show_query(), 50 | expr(copy(DT)[, `:=`(xy = x + y)][, .(x = mean(x)), keyby = .(xy)]) 51 | ) 52 | }) 53 | 54 | test_that("vars set correctly", { 55 | dt <- lazy_dt(data.frame(x = 1:3, y = 1:3)) 56 | expect_equal(dt %>% group_by(x) %>% .$vars, c("x", "y")) 57 | }) 58 | 59 | test_that("`key` switches between keyby= and by=", { 60 | dt <- lazy_dt(data.frame(x = 1:3, y = 1:3), "DT") 61 | dt1 <- lazy_dt(mtcars, "DT1") 62 | 63 | expect_equal( 64 | dt %>% group_by(xy = x + y, arrange = FALSE) %>% summarize(x = mean(x)) %>% show_query(), 65 | expr(copy(DT)[, `:=`(xy = x + y)][, .(x = mean(x)), by = .(xy)]) 66 | ) 67 | 68 | expect_equal( 69 | dt1 %>% group_by(cyl, arrange = FALSE) %>% summarize(mean_mpg = mean(mpg)) %>% show_query(), 70 | expr(DT1[, .(mean_mpg = mean(mpg)), by = .(cyl)]) 71 | ) 72 | 73 | expect_equal( 74 | dt1 %>% group_by(cyl) %>% summarize(mean_mpg = mean(mpg)) %>% show_query(), 75 | expr(DT1[, .(mean_mpg = mean(mpg)), keyby = .(cyl)]) 76 | ) 77 | }) 78 | 79 | test_that("emtpy and NULL group_by ungroups", { 80 | dt <- lazy_dt(data.frame(x = 1)) %>% group_by(x) 81 | expect_equal(group_by(dt) %>% group_vars(), character()) 82 | expect_equal(group_by(dt, NULL) %>% group_vars(), character()) 83 | expect_equal(group_by(dt, !!!list()) %>% group_vars(), character()) 84 | }) 85 | 86 | test_that("only adds step if necessary", { 87 | dt <- lazy_dt(data.table(x = 1, y = 1), "DT") 88 | expect_equal(dt %>% group_by(), dt) 89 | 90 | expect_equal(dt %>% ungroup(), dt) 91 | expect_equal(dt %>% ungroup(x), dt) 92 | 93 | dt_grouped <- dt %>% group_by(x) 94 | dt_grouped2 <- dt_grouped %>% group_by(x) 95 | expect_equal(dt_grouped, dt_grouped2) 96 | expect_equal(dt_grouped %>% ungroup(y), dt_grouped) 97 | 98 | out <- dt_grouped %>% mutate(y = y - mean(y)) %>% group_by() 99 | expect_s3_class(out, "dtplyr_step_group") 100 | expect_equal(group_vars(out), character()) 101 | }) 102 | 103 | test_that("works with non-standard column names, #451", { 104 | dt <- lazy_dt(tibble(`a a` = "a")) 105 | res <- dt %>% 106 | group_by(`a a`) %>% 107 | count() %>% 108 | as_tibble() 109 | expect_named(res, c("a a", "n")) 110 | expect_equal(res$`a a`, "a") 111 | }) 112 | -------------------------------------------------------------------------------- /tests/testthat/test-step-modify.R: -------------------------------------------------------------------------------- 1 | test_that("group_modify creates modified data frame", { 2 | dt <- lazy_dt(data.table(g = c(1, 1, 2), x = 1:3)) 3 | 4 | foo <- function(rows, g) { 5 | list(nc = ncol(rows), nr = nrow(rows)) 6 | } 7 | out <- dt %>% group_by(g) %>% group_modify(foo) %>% collect() 8 | 9 | expect_equal(out$nc, c(1, 1)) 10 | expect_equal(out$nr, c(2, 1)) 11 | }) 12 | 13 | test_that("group_map works", { 14 | dt <- lazy_dt(data.table(g = c(1, 1, 2), x = 1:3)) 15 | out <- dt %>% group_by(g) %>% group_map(~ nrow(.)) 16 | expect_equal(out, list(2, 1)) 17 | 18 | # don't include group data 19 | out <- dt %>% group_by(g) %>% group_map(~ ncol(.)) 20 | expect_equal(out, list(1, 1)) 21 | }) 22 | -------------------------------------------------------------------------------- /tests/testthat/test-step-nest.R: -------------------------------------------------------------------------------- 1 | test_that("nest turns grouped values into one list-df", { 2 | ldt <- lazy_dt(tibble(x = c(1, 1, 1), y = 1:3), "DT") 3 | out <- nest(ldt, data = y) 4 | outc <- collect(out) 5 | 6 | expect_equal(show_query(out), expr(DT[, .(data = .(.SD)), by = .(x)])) 7 | 8 | expect_equal(group_vars(out), character()) 9 | expect_equal(out$vars, c("x", "data")) 10 | 11 | expect_equal(outc$x, 1) 12 | expect_equal(length(outc$data), 1L) 13 | expect_equal(outc$data[[1L]], data.table(y = 1:3)) 14 | }) 15 | 16 | test_that("nest uses grouping vars if present", { 17 | ldt <- lazy_dt(tibble(x = c(1, 1, 1), y = 1:3), "DT") 18 | out <- nest(dplyr::group_by(ldt, x)) 19 | 20 | expect_equal(group_vars(out), "x") 21 | expect_equal(show_query(out), expr(DT[, .(data = .(.SD)), by = .(x)])) 22 | }) 23 | 24 | test_that("provided grouping vars override grouped defaults", { 25 | ldt <- tibble(x = 1, y = 2, z = 3) %>% group_by(x) %>% lazy_dt("DT") 26 | out <- nest(ldt, data = y) 27 | 28 | expect_equal(show_query(out), expr(DT[, .(data = .(.SD)), by = .(x, z)])) 29 | expect_equal(group_vars(out), "x") 30 | expect_equal(out$vars, c("x", "z", "data")) 31 | }) 32 | 33 | test_that("puts data into the correct row", { 34 | ldt <- tibble(x = 1:3, y = c("B", "A", "A")) %>% lazy_dt() 35 | out <- nest(ldt, data = x) %>% collect() %>% dplyr::filter(y == "B") 36 | expect_equal(out$data[[1]]$x, 1) 37 | }) 38 | 39 | test_that("nesting everything yields a simple data frame", { 40 | dt <- data.table(x = 1:3, y = c("B", "A", "A")) 41 | ldt <- lazy_dt(dt, "DT") 42 | out <- nest(ldt, data = c(x, y)) 43 | 44 | expect_equal(show_query(out), expr(DT[, .(data = .(.SD))])) 45 | expect_equal(out$vars, "data") 46 | 47 | expect_equal(collect(out)$data, list(dt)) 48 | }) 49 | 50 | test_that("nest preserves order of data", { 51 | ldt <- lazy_dt(tibble(x = c(1, 3, 2, 3, 2), y = 1:5), "DT") 52 | out <- nest(ldt, data = y) 53 | expect_equal(collect(out)$x, c(1, 3, 2)) 54 | }) 55 | 56 | test_that("can strip names", { 57 | ldt <- lazy_dt(tibble(x = c(1, 1, 1), ya = 1:3, yb = 4:6), "DT") 58 | out <- nest(ldt, y = starts_with("y"), .names_sep = "") 59 | 60 | expect_equal( 61 | show_query(out), 62 | expr(DT[, .(y = .(data.table(a = ya, b = yb))), by = .(x)]) 63 | ) 64 | 65 | expect_named(collect(out)$y[[1]], c("a", "b")) 66 | }) 67 | 68 | test_that("can nest multiple columns", { 69 | ldt <- lazy_dt(tibble(x = 1, a1 = 1, a2 = 2, b1 = 1, b2 = 2), "DT") 70 | out <- ldt %>% nest(a = c(a1, a2), b = c(b1, b2)) 71 | 72 | expect_equal( 73 | show_query(out), 74 | expr(DT[, .(a = .(data.table(a1, a2)), b = .(data.table(b1, b2))), by = .(x)]) 75 | ) 76 | expect_equal(out$vars, c("x", "a", "b")) 77 | }) 78 | 79 | test_that("nesting no columns nests all inputs", { 80 | # included only for backward compatibility 81 | ldt <- lazy_dt(tibble(a1 = 1, a2 = 2, b1 = 1, b2 = 2), "DT") 82 | expect_warning(out <- nest(ldt), "must not be empty") 83 | expect_equal(show_query(out), expr(DT[, .(data = .(.SD))])) 84 | }) 85 | -------------------------------------------------------------------------------- /tests/testthat/test-step-set.R: -------------------------------------------------------------------------------- 1 | test_that("basic ops generate expected translation", { 2 | dt1 <- lazy_dt(data.frame(x = 1:3), "dt1") 3 | dt2 <- lazy_dt(data.frame(x = 2L), "dt2") 4 | 5 | expect_equal( 6 | dt1 %>% intersect(dt2) %>% show_query(), 7 | expr(fintersect(dt1, dt2)) 8 | ) 9 | expect_equal( 10 | dt1 %>% union(dt2) %>% show_query(), 11 | expr(funion(dt1, dt2)) 12 | ) 13 | expect_equal( 14 | dt1 %>% union_all(dt2) %>% show_query(), 15 | expr(funion(dt1, dt2, all = TRUE)) 16 | ) 17 | expect_equal( 18 | dt1 %>% setdiff(dt2) %>% show_query(), 19 | expr(fsetdiff(dt1, dt2)) 20 | ) 21 | }) 22 | 23 | test_that("joins captures locals from both parents", { 24 | dt1 <- lazy_dt(data.frame(x = 1)) %>% mutate(y = 1) %>% compute("D1") 25 | dt2 <- lazy_dt(data.frame(x = 1)) %>% mutate(z = 1) %>% compute("D2") 26 | 27 | expect_named(intersect(dt1, dt2)$locals, c("D1", "D2")) 28 | }) 29 | 30 | test_that("vars set correctly", { 31 | # data.table functions require the inputs to have same columns 32 | dt1 <- lazy_dt(data.frame(x = 1, y = 2), "dt1") 33 | dt2 <- lazy_dt(data.frame(x = 2, y = 2), "dt2") 34 | 35 | expect_equal(dt1 %>% union(dt2) %>% .$vars, c("x", "y")) 36 | }) 37 | -------------------------------------------------------------------------------- /tests/testthat/test-step-subset-arrange.R: -------------------------------------------------------------------------------- 1 | test_that("arrange orders variables", { 2 | dt <- lazy_dt(data.table(x = 1, y = 1, z = 1), "DT") 3 | 4 | expect_equal( 5 | dt %>% arrange(x) %>% show_query(), 6 | expr(DT[order(x)]) 7 | ) 8 | }) 9 | 10 | test_that("arrange doesn't use, but still preserves, grouping", { 11 | dt <- group_by(lazy_dt(data.table(x = 1, y = 2), "DT"), x) 12 | 13 | step <- arrange(dt, y) 14 | expect_equal(step$groups, "x") 15 | expect_equal(dt_call(step), expr(DT[order(y)])) 16 | 17 | step2 <- arrange(dt, y, .by_group = TRUE) 18 | expect_equal(dt_call(step2), expr(DT[order(x, y)])) 19 | }) 20 | 21 | test_that("empty arrange returns input unchanged", { 22 | dt <- lazy_dt(data.table(x = 1, y = 1, z = 1), "DT") 23 | expect_true(identical(arrange(dt), dt)) 24 | }) 25 | 26 | test_that("can use with across", { 27 | dt <- lazy_dt(data.table(x = 1, y = 1, z = 1), "DT") 28 | 29 | expect_equal( 30 | dt %>% arrange(across(x:y)) %>% show_query(), 31 | expr(DT[order(x, y)]) 32 | ) 33 | }) 34 | 35 | test_that("vars set correctly", { 36 | dt <- lazy_dt(data.frame(x = 1:3, y = 1:3)) 37 | expect_equal(dt %>% arrange(x) %>% .$vars, c("x", "y")) 38 | }) 39 | 40 | test_that("desc works with internal quosure", { 41 | dt <- lazy_dt(data.table(x = c(4,3,9,7), y = 1:4)) 42 | 43 | desc_df <- dt %>% arrange(desc(!!quo(x))) %>% collect() 44 | 45 | expect_equal(desc_df$x, c(9,7,4,3)) 46 | }) 47 | 48 | test_that("desc works .data pronoun", { 49 | dt <- lazy_dt(data.table(x = c(4,3,9,7), y = 1:4)) 50 | 51 | desc_df <- dt %>% arrange(desc(.data$x)) %>% collect() 52 | 53 | expect_equal(desc_df$x, c(9,7,4,3)) 54 | }) 55 | 56 | test_that("only add step if necessary", { 57 | dt <- lazy_dt(data.frame(x = 1:3, y = 1:3)) 58 | 59 | expect_equal(dt %>% arrange(), dt) 60 | expect_equal(dt %>% arrange(!!!list()), dt) 61 | }) 62 | 63 | test_that("uses setorder when there is already a copy", { 64 | dt <- lazy_dt(data.frame(x = 1:3, y = 1:3), "DT") 65 | 66 | # Works with implicit copy 67 | step_implicit <- dt %>% 68 | filter(x < 4) %>% 69 | arrange(x, y) 70 | 71 | expect_equal( 72 | show_query(step_implicit), 73 | expr(setorder(DT[x < 4], x, y, na.last = TRUE)) 74 | ) 75 | 76 | # Works with explicit copy 77 | step_explicit <- dt %>% 78 | mutate(x = x * 2) %>% 79 | arrange(x, -y) 80 | 81 | expect_equal( 82 | show_query(step_explicit), 83 | expr(setorder(copy(DT)[, `:=`(x = x * 2)], x, -y, na.last = TRUE)) 84 | ) 85 | }) 86 | 87 | test_that("setorder places NAs last", { 88 | dt <- lazy_dt(tibble(x = c("b", NA, "a")), "DT") 89 | dt$needs_copy <- TRUE 90 | 91 | # Works with implicit copy 92 | res <- dt %>% 93 | arrange(x) %>% 94 | as.data.table() 95 | 96 | expect_equal(res$x, c("a", "b", NA)) 97 | }) 98 | 99 | test_that("works with a transmute expression", { 100 | dt <- lazy_dt(data.frame(x = 1:3, y = 1:3), "DT") 101 | 102 | step <- dt %>% 103 | arrange(x + 1) 104 | expect_equal(show_query(step), expr(DT[order(x + 1)])) 105 | 106 | # Works with complex expression 107 | step <- dt %>% 108 | arrange(-(x + y)) 109 | expect_equal(show_query(step), expr(DT[order(-(x + y))])) 110 | }) 111 | -------------------------------------------------------------------------------- /tests/testthat/test-step-subset-do.R: -------------------------------------------------------------------------------- 1 | test_that("basic operation as expected", { 2 | dt <- lazy_dt(data.frame(g = c(1, 1, 2), x = 1:3), "DT") 3 | 4 | expect_equal( 5 | dt %>% do(y = ncol(.)) %>% show_query(), 6 | expr(DT[, .(y = .(ncol(.SD)))]) 7 | ) 8 | 9 | expect_equal( 10 | dt %>% group_by(g) %>% do(y = ncol(.)) %>% show_query(), 11 | expr(DT[, .(y = .(ncol(.SD))), keyby = .(g)]) 12 | ) 13 | }) 14 | 15 | -------------------------------------------------------------------------------- /tests/testthat/test-step-subset-expand.R: -------------------------------------------------------------------------------- 1 | test_that("expand completes all values", { 2 | tbl <- tibble(x = 1:2, y = 1:2) 3 | dt <- lazy_dt(tbl, "DT") 4 | step <- expand(dt, x, y) 5 | out <- collect(step) 6 | 7 | expect_equal( 8 | show_query(step), 9 | expr(DT[, CJ(x = x, y = y, unique = TRUE)]) 10 | ) 11 | expect_equal(step$vars, c("x", "y")) 12 | expect_equal(nrow(out), 4) 13 | }) 14 | 15 | test_that("multiple variables in one arg doesn't expand", { 16 | tbl <- tibble(x = 1:2, y = 1:2) 17 | dt <- lazy_dt(tbl, "DT") 18 | step <- expand(dt, c(x, y)) 19 | out <- collect(step) 20 | 21 | expect_equal(nrow(out), 2) 22 | }) 23 | 24 | test_that("works with unnamed vectors", { 25 | tbl <- tibble(x = 1:2, y = 1:2) 26 | dt <- lazy_dt(tbl, "DT") 27 | step <- expand(dt, x, 1:2) 28 | out <- collect(step) 29 | 30 | expect_equal( 31 | show_query(step), 32 | expr(DT[, CJ(x = x, V2 = 1:2, unique = TRUE)]) 33 | ) 34 | expect_equal(step$vars, c("x", "V2")) 35 | expect_equal(nrow(out), 4) 36 | }) 37 | 38 | test_that("works with named vectors", { 39 | tbl <- tibble(x = 1:2, y = 1:2) 40 | dt <- lazy_dt(tbl, "DT") 41 | step <- expand(dt, x, val = 1:2) 42 | out <- collect(step) 43 | 44 | expect_equal( 45 | show_query(step), 46 | expr(DT[, CJ(x = x, val = 1:2, unique = TRUE)]) 47 | ) 48 | expect_equal(step$vars, c("x", "val")) 49 | expect_equal(nrow(out), 4) 50 | }) 51 | 52 | test_that("expand respects groups", { 53 | tbl <- tibble( 54 | a = c(1L, 1L, 2L), 55 | b = c(1L, 2L, 1L), 56 | c = c(2L, 1L, 1L) 57 | ) 58 | dt <- lazy_dt(tbl, "DT") 59 | step <- dt %>% group_by(c) %>% expand(a, b) 60 | out <- collect(step) 61 | 62 | expect_equal( 63 | show_query(step), 64 | expr(DT[, CJ(a = a, b = b, unique = TRUE), keyby = .(c)]) 65 | ) 66 | expect_equal(step$vars, c("c", "a", "b")) 67 | expect_equal(out$a, c(1, 1, 2, 2, 1)) 68 | expect_equal(out$b, c(1, 2, 1, 2, 1)) 69 | }) 70 | 71 | test_that("expand handles group variables as arguments", { 72 | dt <- lazy_dt(data.frame(x = 1, y = 2, z = 3), "DT") 73 | 74 | # single group var, not redefined 75 | res <- dt %>% group_by(x) %>% expand(x, y) 76 | expect_equal( 77 | show_query(res), 78 | expr(DT[, CJ(x = x, y = y, unique = TRUE), keyby = .(x)][, `:=`("x", NULL)]) 79 | ) 80 | expect_equal( 81 | res$groups, 82 | "x" 83 | ) 84 | 85 | # multiple group vars, not redefined 86 | res <- dt %>% group_by(x, y) %>% expand(x, y, z) 87 | expect_equal( 88 | show_query(res), 89 | expr(DT[, CJ(x = x, y = y, z = z, unique = TRUE), keyby = .(x, y) 90 | ][, !!expr(!!c("x", "y") := NULL)]) 91 | ) 92 | expect_equal( 93 | res$groups, 94 | c("x", "y") 95 | ) 96 | 97 | # redefined group var 98 | res <- dt %>% group_by(x) %>% expand(x = 5, y) 99 | expect_equal( 100 | show_query(res), 101 | expr(DT[, CJ(x = 5, y = y, unique = TRUE), keyby = .(x)][, `:=`("x", NULL)]) 102 | ) 103 | expect_equal( 104 | res$groups, 105 | c("x") 106 | ) 107 | expect_equal( 108 | as_tibble(res), 109 | tibble(x = 5, y = 2) 110 | ) 111 | }) 112 | 113 | test_that("NULL inputs", { 114 | tbl <- tibble(x = 1:5) 115 | dt <- lazy_dt(tbl, "DT") 116 | step <- expand(dt, x, y = NULL) 117 | out <- collect(step) 118 | expect_equal(out, tbl) 119 | }) 120 | 121 | test_that("expand respects .name_repair", { 122 | dt <- lazy_dt(tibble(x = 1:2), "DT") 123 | 124 | suppressMessages( 125 | expect_named(dt %>% expand(x, x, .name_repair = "unique") %>% collect(), c("x...1", "x...2")) 126 | ) 127 | }) 128 | -------------------------------------------------------------------------------- /tests/testthat/test-step-subset-filter.R: -------------------------------------------------------------------------------- 1 | test_that("can filter by value", { 2 | dt <- lazy_dt(data.table(x = 1, y = 1, z = 1), "DT") 3 | 4 | expect_equal( 5 | dt %>% filter() %>% show_query(), 6 | expr(DT) 7 | ) 8 | expect_equal( 9 | dt %>% filter(x) %>% show_query(), 10 | expr(DT[(x)]) 11 | ) 12 | 13 | expect_equal( 14 | dt %>% filter(x > 1) %>% show_query(), 15 | expr(DT[x > 1]) 16 | ) 17 | 18 | expect_equal( 19 | dt %>% filter(x > 1, y > 2) %>% show_query(), 20 | expr(DT[x > 1 & y > 2]) 21 | ) 22 | }) 23 | 24 | test_that("can filter with logical columns", { 25 | dt <- lazy_dt(data.table(x = c(TRUE, FALSE)), "DT") 26 | 27 | expect_equal( 28 | dt %>% filter(x) %>% show_query(), 29 | expr(DT[(x)]) 30 | ) 31 | 32 | expect_equal( 33 | dt %>% filter(!x) %>% show_query(), 34 | expr(DT[(!x)]) 35 | ) 36 | }) 37 | 38 | 39 | test_that("inlines external variables", { 40 | dt <- lazy_dt(data.table(x = 1), "DT") 41 | l <- c(1, 10) 42 | 43 | expect_equal( 44 | dt %>% filter(x %in% l) %>% show_query(), 45 | quote(DT[x %in% !!l]) 46 | ) 47 | 48 | # Except in the global environment 49 | # But I can't figure out how to test this - it's not too important 50 | # as it only affects the quality of the translation not the correctness 51 | }) 52 | 53 | test_that("can use with across", { 54 | dt <- lazy_dt(data.table(x = 1, y = 1, z = 1), "DT") 55 | 56 | expect_equal( 57 | dt %>% filter(across(x:y, ~ . > 0)) %>% show_query(), 58 | expr(DT[x > 0 & y > 0]) 59 | ) 60 | 61 | expect_equal( 62 | dt %>% filter(if_all(x:y, ~ . > 0)) %>% show_query(), 63 | expr(DT[x > 0 & y > 0]) 64 | ) 65 | expect_equal( 66 | dt %>% filter(if_any(x:y, ~ . > 0)) %>% show_query(), 67 | expr(DT[x > 0 | y > 0]) 68 | ) 69 | 70 | # .cols defaults to everything() 71 | expect_equal( 72 | dt %>% filter(if_all(.fns = ~ . > 0)) %>% show_query(), 73 | expr(DT[x > 0 & y > 0 & z > 0]) 74 | ) 75 | expect_equal( 76 | dt %>% filter(if_any(.fns = ~ . > 0)) %>% show_query(), 77 | expr(DT[x > 0 | y > 0 | z > 0]) 78 | ) 79 | }) 80 | 81 | test_that("can filter when grouped", { 82 | dt1 <- lazy_dt(data.table(x = c(1, 1, 2, 2), y = c(1, 2, 3, 4)), "DT") 83 | dt2 <- dt1 %>% group_by(x) %>% filter(sum(y) == 3) 84 | 85 | expect_equal( 86 | dt2 %>% show_query(), 87 | expr(DT[DT[, .I[sum(y) == 3], by = .(x)]$V1]) 88 | ) 89 | 90 | expect_equal(as_tibble(dt2), tibble(x = c(1, 1), y = c(1, 2))) 91 | }) 92 | 93 | test_that("grouped filter doesn't reorder", { 94 | dt1 <- lazy_dt(data.frame(x = c(2, 2, 1, 1), y = 1:4), "DT") 95 | dt2 <- dt1 %>% group_by(x) %>% filter(TRUE) 96 | 97 | expect_equal( 98 | dt2 %>% show_query(), 99 | expr(DT[DT[, .I[TRUE], by = .(x)]$V1]) 100 | ) 101 | expect_equal(dt2 %>% as_tibble(), as_tibble(dt1)) 102 | }) 103 | 104 | test_that("only adds step if dots are not empty", { 105 | dt <- lazy_dt(data.table(x = 1), "DT") 106 | 107 | expect_equal(dt %>% filter(), dt) 108 | expect_equal(dt %>% filter(!!!list()), dt) 109 | }) 110 | 111 | test_that("errors for named input", { 112 | dt <- lazy_dt(data.table(x = 1, y = 2), "DT") 113 | 114 | expect_snapshot(error = TRUE, filter(dt, x = 1)) 115 | expect_snapshot(error = TRUE, filter(dt, y > 1, x = 1)) 116 | }) 117 | 118 | test_that("allows named constants that resolve to logical vectors", { 119 | dt <- lazy_dt(mtcars, "DT") 120 | filters <- mtcars %>% 121 | transmute( 122 | cyl %in% 6:8, 123 | hp / drat > 50 124 | ) 125 | 126 | expect_equal( 127 | filter(dt, !!!filters), 128 | filter(dt, !!!unname(filters)) 129 | ) 130 | }) 131 | -------------------------------------------------------------------------------- /tests/testthat/test-step-subset-select.R: -------------------------------------------------------------------------------- 1 | test_that("can select variables", { 2 | dt <- lazy_dt(data.table(x = 1, y = 1, z = 1), "DT") 3 | 4 | expect_equal( 5 | dt %>% select(-z) %>% show_query(), 6 | expr(DT[, .(x, y)]) 7 | ) 8 | 9 | expect_equal( 10 | dt %>% select(a = x, y) %>% show_query(), 11 | expr(DT[, .(a = x, y)]) 12 | ) 13 | }) 14 | 15 | test_that("can merge iff j-generating call comes after i", { 16 | dt <- lazy_dt(data.table(x = 1, y = 1, z = 1), "DT") 17 | 18 | expect_equal( 19 | dt %>% filter(x > 1) %>% select(y) %>% show_query(), 20 | expr(DT[x > 1, .(y)]) 21 | ) 22 | expect_equal( 23 | dt %>% select(x = y) %>% filter(x > 1) %>% show_query(), 24 | expr(DT[, .(x = y)][x > 1]) 25 | ) 26 | }) 27 | 28 | test_that("renames grouping vars", { 29 | dt <- lazy_dt(data.table(x = 1, y = 1, z = 1)) 30 | gt <- group_by(dt, x) 31 | 32 | expect_equal(select(gt, y = x)$groups, "y") 33 | }) 34 | 35 | test_that("empty select returns no columns", { 36 | dt <- data.table(x = 1, y = 1, z = 1) 37 | lz <- lazy_dt(dt, "DT") 38 | expect_equal( 39 | lz %>% select() %>% collect(), 40 | tibble() 41 | ) 42 | 43 | # unless it's grouped 44 | skip_if(utils::packageVersion("rlang") < "0.5.0") 45 | expect_snapshot(out <- lz %>% group_by(x) %>% select()) 46 | expect_equal( 47 | out %>% collect(), 48 | group_by(tibble(x = 1), x) 49 | ) 50 | }) 51 | 52 | test_that("vars set correctly", { 53 | dt <- lazy_dt(data.frame(x = 1:3, y = 1:3)) 54 | expect_equal(dt %>% select(a = x, y) %>% .$vars, c("a", "y")) 55 | }) 56 | 57 | test_that("only add step if necessary", { 58 | dt <- lazy_dt(data.frame(x = 1:3, y = 1:3), "DT") 59 | expect_equal(dt %>% select(everything()), dt) 60 | expect_equal(dt %>% select(x, y), dt) 61 | }) 62 | 63 | ### When data is copied (either implicitly or explicitly) 64 | 65 | test_that("copied data: can select variables", { 66 | dt <- lazy_dt(data.table(x = 1, y = 2, z = 3), "DT") 67 | dt$needs_copy <- TRUE 68 | 69 | expect_equal( 70 | dt %>% select(-z) %>% show_query(), 71 | expr(copy(DT)[, `:=`(!!"z", NULL)]) 72 | ) 73 | 74 | expect_equal( 75 | dt %>% select(y, x) %>% show_query(), 76 | expr(setcolorder(copy(DT)[, `:=`("z", NULL)], !!c("y", "x"))) 77 | ) 78 | 79 | expect_equal( 80 | dt %>% select(a = x, y) %>% show_query(), 81 | expr(copy(DT)[, .(a = x, y)]) 82 | ) 83 | }) 84 | 85 | test_that("copied data: renaming uses regular selection", { 86 | dt <- lazy_dt(data.table(x = 1, y = 2, z = 3), "DT") 87 | dt$needs_copy <- TRUE 88 | 89 | step <- dt %>% select(a = x, y) 90 | 91 | expect_equal( 92 | show_query(step), 93 | expr(copy(DT)[, .(a = x, y)]) 94 | ) 95 | 96 | expect_named(collect(step), c("a", "y")) 97 | }) 98 | 99 | test_that("copied data: can merge iff j-generating call comes after i", { 100 | dt <- lazy_dt(data.table(x = 1, y = 2, z = 3), "DT") 101 | dt$needs_copy <- TRUE 102 | 103 | expect_equal( 104 | dt %>% filter(x > 1) %>% select(y) %>% show_query(), 105 | expr(copy(DT)[x > 1, .(y)]) 106 | ) 107 | expect_equal( 108 | dt %>% select(x = y) %>% filter(x > 1) %>% show_query(), 109 | expr(copy(DT)[, .(x = y)][x > 1]) 110 | ) 111 | 112 | }) 113 | 114 | test_that("copied data: renames grouping vars", { 115 | dt <- lazy_dt(data.table(x = 1, y = 1, z = 1)) 116 | gt <- group_by(dt, x) 117 | gt$needs_copy <- TRUE 118 | 119 | expect_equal(select(gt, y = x)$groups, "y") 120 | }) 121 | 122 | test_that("copied data: empty select returns no columns", { 123 | dt <- data.table(x = 1, y = 2, z = 3) 124 | lz <- lazy_dt(dt, "DT") 125 | lz$needs_copy <- TRUE 126 | expect_equal( 127 | lz %>% select() %>% collect(), 128 | tibble() 129 | ) 130 | 131 | # unless it's grouped 132 | expect_snapshot(out <- lz %>% group_by(x) %>% select()) 133 | expect_equal( 134 | out %>% collect(), 135 | group_by(tibble(x = 1), x) 136 | ) 137 | }) 138 | 139 | test_that("copied data: only add step if necessary", { 140 | dt <- lazy_dt(data.frame(x = 1:3, y = 1:3), "DT") 141 | dt$needs_copy <- TRUE 142 | expect_equal(dt %>% select(everything()), dt) 143 | expect_equal(dt %>% select(x, y), dt) 144 | }) 145 | -------------------------------------------------------------------------------- /tests/testthat/test-step-subset-separate.R: -------------------------------------------------------------------------------- 1 | test_that("missing values in input are missing in output", { 2 | dt <- lazy_dt(tibble(x = c(NA, "a b")), "DT") 3 | step <- separate(dt, x, c("x", "y")) 4 | out <- collect(step) 5 | expect_equal( 6 | show_query(step), 7 | expr(copy(DT)[, `:=`(!!c("x", "y"), tstrsplit(x, split = "[^[:alnum:]]+"))]) 8 | ) 9 | expect_equal(out$x, c(NA, "a")) 10 | expect_equal(out$y, c(NA, "b")) 11 | }) 12 | 13 | test_that("convert produces integers etc", { 14 | dt <- lazy_dt(tibble(x = "1-1.5-FALSE"), "DT") 15 | step <- separate(dt, x, c("x", "y", "z"), "-", convert = TRUE) 16 | out <- collect(step) 17 | expect_equal( 18 | show_query(step), 19 | expr(copy(DT)[, `:=`(!!c("x", "y", "z"), tstrsplit(x, split = "-", type.convert = TRUE))]) 20 | ) 21 | expect_equal(out$x, 1L) 22 | expect_equal(out$y, 1.5) 23 | expect_equal(out$z, FALSE) 24 | }) 25 | 26 | test_that("overwrites existing columns", { 27 | dt <- lazy_dt(tibble(x = "a:b"), "DT") 28 | step <- dt %>% separate(x, c("x", "y")) 29 | out <- collect(step) 30 | 31 | expect_equal( 32 | show_query(step), 33 | expr(copy(DT)[, `:=`(!!c("x", "y"), tstrsplit(x, split = "[^[:alnum:]]+"))]) 34 | ) 35 | expect_equal(step$vars, c("x", "y")) 36 | expect_equal(out$x, "a") 37 | }) 38 | 39 | test_that("drops NA columns", { 40 | dt <- lazy_dt(tibble(x = c(NA, "a-b", "c-d")), "DT") 41 | step <- separate(dt, x, c(NA, "y"), "-") 42 | out <- collect(step) 43 | expect_equal(step$vars, "y") 44 | expect_equal(out$y, c(NA, "b", "d")) 45 | }) 46 | 47 | test_that("checks type of `into` and `sep`", { 48 | dt <- lazy_dt(tibble(x = "a:b"), "DT") 49 | expect_snapshot( 50 | separate(dt, x, "x", FALSE), 51 | error = TRUE 52 | ) 53 | expect_snapshot( 54 | separate(dt, x, FALSE), 55 | error = TRUE 56 | ) 57 | }) 58 | 59 | test_that("only copies when necessary", { 60 | dt <- tibble(x = paste(letters[1:3], letters[1:3], sep = "-"), y = 1:3) %>% 61 | lazy_dt("DT") 62 | step <- dt %>% 63 | filter(y < 4) %>% 64 | separate(x, into = c("left", "right"), sep = "-") 65 | expect_equal( 66 | show_query(step), 67 | expr(DT[y < 4][, `:=`(!!c("left", "right"), tstrsplit(x, split = "-"))][, `:=`("x", NULL)]) 68 | ) 69 | }) 70 | 71 | test_that("can pass quosure to `col` arg, #359", { 72 | dt <- lazy_dt(tibble(combined = c("a_b", "a_b")), "DT") 73 | separate2 <- function(df, col, into) { 74 | collect(separate(df, {{ col }}, into)) 75 | } 76 | out <- separate2(dt, combined, into = c("a", "b")) 77 | expect_named(out, c("a", "b")) 78 | expect_equal(out$a, c("a", "a")) 79 | expect_equal(out$b, c("b", "b")) 80 | }) 81 | 82 | test_that("can use numeric `col` arg", { 83 | dt <- lazy_dt(tibble(combined = c("a_b", "a_b")), "DT") 84 | 85 | out <- collect(separate(dt, 1, into = c("a", "b"))) 86 | expect_named(out, c("a", "b")) 87 | expect_equal(out$a, c("a", "a")) 88 | expect_equal(out$b, c("b", "b")) 89 | }) 90 | 91 | test_that("errors on multiple columns in `col`", { 92 | dt <- lazy_dt(tibble(x = c("a_b", "a_b"), y = x), "DT") 93 | 94 | expect_error(separate(dt, c(x, y), into = c("left", "right")), 95 | "must select exactly one column") 96 | }) 97 | -------------------------------------------------------------------------------- /tests/testthat/test-step-subset-transmute.R: -------------------------------------------------------------------------------- 1 | test_that("works", { 2 | dt <- lazy_dt(data.table(x = 1, y = 1, z = 1), "DT") 3 | 4 | expect_equal( 5 | dt %>% transmute(x) %>% collect(), 6 | dt %>% mutate(x, .keep = "none") %>% collect() 7 | ) 8 | }) 9 | 10 | test_that("empty dots preserves groups", { 11 | dt <- lazy_dt(data.table(x = 1, y = 1, z = 1), "DT") %>% 12 | group_by(y) 13 | 14 | res <- dt %>% transmute() %>% collect() 15 | 16 | expect_equal(names(res), "y") 17 | }) 18 | 19 | test_that("preserves column order", { 20 | dt <- lazy_dt(data.table(x = 1, y = 1), "DT") 21 | 22 | res <- dt %>% transmute(y, x) %>% collect() 23 | 24 | expect_equal(names(res), c("y", "x")) 25 | }) 26 | 27 | test_that("works correctly when column is both added and removed in the same call", { 28 | dt <- lazy_dt(data.table(x = 1, y = 2), "DT") 29 | 30 | res <- dt %>% transmute(y, z = 3, z = NULL) %>% collect() 31 | 32 | expect_equal(names(res), "y") 33 | }) 34 | 35 | -------------------------------------------------------------------------------- /tests/testthat/test-step-subset.R: -------------------------------------------------------------------------------- 1 | test_that("construtor has sensible defaults", { 2 | first <- step_first(data.table(x = 1), "DT") 3 | step <- step_subset(first) 4 | 5 | expect_s3_class(step, "dtplyr_step_subset") 6 | expect_equal(step$parent, first) 7 | expect_equal(step$vars, "x") 8 | expect_equal(step$groups, character()) 9 | expect_equal(step$i, NULL) 10 | expect_equal(step$j, NULL) 11 | }) 12 | 13 | test_that("generates expected calls", { 14 | first <- lazy_dt(data.table(x = 1), "DT") 15 | 16 | ungrouped <- step_subset(first, i = quote(i), j = quote(j)) 17 | expect_equal(dt_call(ungrouped), expr(DT[i, j])) 18 | 19 | with_i <- step_subset(first, i = quote(i), j = quote(j), groups = "x") 20 | expect_equal(dt_call(with_i), expr(DT[i, j, keyby = .(x)])) 21 | 22 | without_i <- step_subset(first, j = quote(j), groups = "x") 23 | expect_equal(dt_call(without_i), expr(DT[, j, keyby = .(x)])) 24 | }) 25 | -------------------------------------------------------------------------------- /tests/testthat/test-step.R: -------------------------------------------------------------------------------- 1 | test_that("tbl metadata as expected", { 2 | dt <- lazy_dt(data.table(x = c(1, 1, 1, 2, 2, 3)), "DT") 3 | 4 | expect_equal(dim(dt), c(6, 1)) 5 | expect_equal(as.character(tbl_vars(dt)), "x") 6 | expect_equal(show_query(dt), expr(DT)) 7 | }) 8 | 9 | test_that("group metadata as expected", { 10 | dt <- lazy_dt(data.table(x = c(1, 1, 1, 2, 2, 3))) 11 | expect_equal(group_vars(dt), character()) 12 | expect_equal(groups(dt), list()) 13 | expect_equal(group_size(dt), 6) 14 | expect_equal(n_groups(dt), 1) 15 | 16 | gt <- group_by(dt, x) 17 | expect_equal(group_vars(gt), c("x")) 18 | expect_equal(groups(gt), syms("x")) 19 | expect_equal(group_size(gt), c(3, 2, 1)) 20 | expect_equal(n_groups(gt), 3) 21 | }) 22 | 23 | test_that("has useful display methods", { 24 | expect_snapshot({ 25 | dt <- lazy_dt(mtcars, "DT") 26 | dt 27 | dt %>% group_by(vs, am) 28 | dt %>% mutate(y = 10) %>% compute("DT2") 29 | }) 30 | }) 31 | 32 | test_that("can print using n/max_extra_cols/max_footer_lines, #464, ", { 33 | expect_snapshot({ 34 | dt <- letters %>% 35 | lapply(function(.x) tibble(!!.x := 1:10)) %>% 36 | bind_cols() %>% 37 | lazy_dt("DT") 38 | print(dt, n = 3) 39 | print(dt, max_extra_cols = 3) 40 | print(dt, max_footer_lines = 1) 41 | }) 42 | }) 43 | 44 | test_that("can evaluate to any data frame type", { 45 | dt <- lazy_dt(mtcars, "DT") 46 | 47 | expect_identical(class(as.data.frame(dt)), "data.frame") 48 | expect_s3_class(as.data.table(dt), "data.table") 49 | expect_s3_class(as_tibble(dt), "tbl_df") 50 | 51 | expect_s3_class(collect(dt), "tbl_df") 52 | }) 53 | 54 | test_that("compute returns lazy_dt", { 55 | dt <- lazy_dt(mtcars, "DT") 56 | dt <- summarise(dt, n = n()) 57 | 58 | dt2 <- compute(dt) 59 | expect_s3_class(dt2, "dtplyr_step") 60 | expect_equal(as.character(tbl_vars(dt2)), "n") 61 | }) 62 | 63 | test_that("collect and compute return grouped data", { 64 | dt <- group_by(lazy_dt(data.table(x = 1, y = 1), "DT"), x) 65 | 66 | expect_equal(dt %>% compute() %>% group_vars(), "x") 67 | expect_equal(dt %>% collect() %>% group_vars(), "x") 68 | }) 69 | 70 | 71 | # pull() ------------------------------------------------------------------ 72 | 73 | test_that("pull default extracts last var from data frame", { 74 | df <- lazy_dt(tibble(x = 1:10, y = 1:10), "DT") 75 | expect_equal(pull(df), 1:10) 76 | }) 77 | 78 | test_that("can extract by name, or positive/negative position", { 79 | x <- 1:10 80 | df <- lazy_dt(tibble(x = x, y = runif(10)), "DT") 81 | 82 | expect_equal(pull(df, x), x) 83 | expect_equal(pull(df, 1), x) 84 | expect_equal(pull(df, -2L), x) 85 | }) 86 | 87 | test_that("can extract named vectors", { 88 | x <- 1:10 89 | y <- letters[x] 90 | df <- lazy_dt(tibble(x = x, y = y), "DT") 91 | xn <- set_names(x, y) 92 | 93 | expect_equal(pull(df, x, y), xn) 94 | expect_equal(pull(df, 1, 2), xn) 95 | }) 96 | -------------------------------------------------------------------------------- /tests/testthat/test-unite.R: -------------------------------------------------------------------------------- 1 | test_that("unite pastes columns together & removes old col", { 2 | df <- lazy_dt(data.table(x = "a", y = "b"), "DT") 3 | step <- unite(df, "z", x:y) 4 | out <- as.data.table(step) 5 | expect_equal(names(out), "z") 6 | expect_equal(out$z, "a_b") 7 | expect_equal( 8 | show_query(step), 9 | expr(copy(DT)[, `:=`(z = paste(x, y, sep = "_"))][, `:=`(!!c("x", "y"), NULL)]) 10 | ) 11 | }) 12 | 13 | test_that("unite does not remove new col in case of name clash", { 14 | df <- lazy_dt(data.table(x = "a", y = "b"), "DT") 15 | step <- unite(df, x, x:y) 16 | out <- as.data.table(step) 17 | expect_equal(names(out), "x") 18 | expect_equal(out$x, "a_b") 19 | }) 20 | 21 | test_that("correct column order when remove = FALSE", { 22 | df <- lazy_dt(data.table(x = "a", y = "b"), "DT") 23 | step <- unite(df, "united", y, x, remove = FALSE) 24 | out <- as.data.table(step) 25 | expect_equal(names(out), c("united", "x", "y")) 26 | expect_equal(out$united, "b_a") 27 | }) 28 | 29 | test_that("unite preserves grouping", { 30 | df <- lazy_dt(data.table(g = 1, x = "a"), "DT") %>% group_by(g) 31 | step <- df %>% unite(x, x) 32 | expect_equal(dplyr::group_vars(df), dplyr::group_vars(step)) 33 | }) 34 | 35 | test_that("doesn't use `by` for unite step", { 36 | df <- lazy_dt(data.table(x = "a", y = "b", z = "c"), "DT") %>% group_by(z) 37 | step <- unite(df, "z", x:y) 38 | out <- as.data.table(step) 39 | expect_equal(names(out), "z") 40 | expect_equal(out$z, "a_b") 41 | expect_equal(step$groups, "z") 42 | expect_equal( 43 | show_query(step), 44 | expr(copy(DT)[, `:=`(z = paste(x, y, sep = "_"))][, `:=`(!!c("x", "y"), NULL)]) 45 | ) 46 | }) 47 | 48 | test_that("drops grouping when needed", { 49 | df <- lazy_dt(data.table(g = 1, x = "a"), "DT") %>% group_by(g) 50 | step <- df %>% unite(gx, g, x) 51 | rs <- as.data.table(step) 52 | expect_equal(rs$gx, "1_a") 53 | expect_equal(dplyr::group_vars(rs), character()) 54 | }) 55 | 56 | test_that("keeps groups when needed", { 57 | df <- lazy_dt(data.table(x = "x", y = "y"), "DT") %>% group_by(x, y) 58 | step <- df %>% unite("z", x) 59 | rs <- as.data.table(step) 60 | expect_equal(rs$z, "x") 61 | expect_equal(dplyr::group_vars(step), "y") 62 | }) 63 | 64 | test_that("empty var spec uses all vars", { 65 | df <- lazy_dt(data.table(x = "a", y = "b"), "DT") 66 | expect_equal(collect(unite(df, "z")), tibble(z = "a_b")) 67 | }) 68 | 69 | test_that("errors on na.rm", { 70 | df <- lazy_dt(data.table(x = c("a", NA), y = c("b", NA)), "DT") 71 | expect_snapshot_error(unite(df, "z", x:y, na.rm = TRUE)) 72 | }) 73 | -------------------------------------------------------------------------------- /vignettes/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.R 3 | -------------------------------------------------------------------------------- /vignettes/benchmark.R: -------------------------------------------------------------------------------- 1 | df <- vroom::vroom("G1_1e7_1e2_0_0.csv") 2 | dt <- data.table(df) 3 | 4 | 5 | # Q1 ---------------------------------------------------------------------- 6 | bench::mark( 7 | dplyr = dt %>% group_by(id1) %>% summarise(sum(v1)), 8 | direct = dt[, .(v1 = sum(v1)), by = id1], 9 | min_iterations = 3, 10 | check = FALSE 11 | ) 12 | 13 | dt %>% group_by(id1) %>% summarise(sum(v1)) 14 | 15 | # Q2 ---------------------------------------------------------------------- 16 | DF %>% 17 | group_by(id1, id2) %>% 18 | summarise(v1 = sum(v1)) 19 | 20 | DT[, .(v1=sum(v1)), by=.(id1, id2)] 21 | 22 | # Q3 ---------------------------------------------------------------------- 23 | DF %>% 24 | group_by(id3) %>% 25 | summarise(v1 = sum(v1), v3 = mean(v3)) 26 | 27 | DT[, .(v1=sum(v1), v3=mean(v3)), by=id3] 28 | 29 | # Q4 ---------------------------------------------------------------------- 30 | DF %>% 31 | group_by(id4) %>% 32 | summarise_at("mean", c("v1", "v2", "v3")) 33 | 34 | DT[, lapply(.SD, mean), by=id4, .SDcols=v1:v3]) 35 | 36 | # Q5 ---------------------------------------------------------------------- 37 | DF %>% 38 | group_by(id6) %>% 39 | summarise_at("sum", c("v1", "v2", "v3")) 40 | 41 | DT[, lapply(.SD, sum), by=id6, .SDcols=v1:v3]) 42 | 43 | # Q6 ---------------------------------------------------------------------- 44 | DF %>% 45 | group_by(id2, id4) %>% 46 | summarise(median_v3 = median(v3), sd_v3 = sd(v3)) 47 | 48 | DT[, .(median_v3=median(v3), sd_v3=sd(v3)), by=.(id2, id4)]) 49 | 50 | # Q7 ---------------------------------------------------------------------- 51 | DF %>% 52 | group_by(id2, id4) %>% 53 | summarise(range_v1_v2 = max(v1) - min(v2)) 54 | 55 | DT[, .(range_v1_v2=max(v1)-min(v2)), by=.(id2, id4)] 56 | 57 | # Q8 ---------------------------------------------------------------------- 58 | DF %>% 59 | select(id2, id4, largest2_v3 = v3) %>% 60 | arrange(desc(largest2_v3)) %>% 61 | group_by(id2, id4) %>% 62 | filter(row_number() <= 2L) 63 | 64 | DT[order(-v3), .(largest2_v3 = head(v3, 2L)), by=.(id2, id4)] 65 | 66 | # Q9 ---------------------------------------------------------------------- 67 | DF %>% 68 | group_by(id2, id4) %>% 69 | summarise(r2 = cor(v1, v2)^2) 70 | 71 | DT[, .(r2=cor(v1, v2)^2), by=.(id2, id4)] 72 | 73 | # Q10 ---------------------------------------------------------------------- 74 | DF %>% 75 | group_by(id1, id2, id3, id4, id5, id6) %>% 76 | summarise(v3 = sum(v3), count = n()) 77 | 78 | DT[, .(v3=sum(v3), count=.N), by=id1:id6] 79 | --------------------------------------------------------------------------------