├── .Rbuildignore ├── .github ├── .gitignore └── workflows │ ├── R-CMD-check-dev.yaml │ ├── R-CMD-check-status.yaml │ ├── R-CMD-check.yaml │ ├── check │ └── action.yml │ ├── commit │ └── action.yml │ ├── covr │ └── action.yml │ ├── dep-matrix │ └── action.yml │ ├── dep-suggests-matrix │ ├── action.R │ └── action.yml │ ├── get-extra │ └── action.yml │ ├── git-identity │ └── action.yml │ ├── install │ └── action.yml │ ├── lock.yaml │ ├── matrix-check │ └── action.yml │ ├── pkgdown-build │ └── action.yml │ ├── pkgdown-deploy │ └── action.yml │ ├── pkgdown.yaml │ ├── pr-commands.yaml │ ├── rate-limit │ └── action.yml │ ├── revdep.yaml │ ├── roxygenize │ └── action.yml │ ├── style │ └── action.yml │ ├── update-snapshots │ └── action.yml │ └── versions-matrix │ ├── action.R │ └── action.yml ├── .gitignore ├── CRAN-SUBMISSION ├── DESCRIPTION ├── LICENSE ├── LICENSE.md ├── NAMESPACE ├── NEWS.md ├── R ├── check.R ├── coalesce.R ├── conflict.R ├── dplyr.R ├── extended-equality.R ├── fuzzy.R ├── generics_and_methods.R ├── power_join_mutate.R ├── powerjoin-package.R ├── preprocess.R └── preprocess_by.R ├── README.Rmd ├── README.md ├── _pkgdown.yml ├── cran-comments.md ├── docs ├── 404.html ├── LICENSE.html ├── apple-touch-icon-120x120.png ├── apple-touch-icon-152x152.png ├── apple-touch-icon-180x180.png ├── apple-touch-icon-60x60.png ├── apple-touch-icon-76x76.png ├── apple-touch-icon.png ├── authors.html ├── bootstrap-toc.css ├── bootstrap-toc.js ├── docsearch.css ├── docsearch.js ├── favicon-16x16.png ├── favicon-32x32.png ├── favicon.ico ├── index.html ├── link.svg ├── logo.png ├── pkgdown.css ├── pkgdown.js ├── pkgdown.yml ├── reference │ ├── Rplot001.png │ ├── check_specs.html │ ├── coalesce_xy.html │ ├── figures │ │ └── logo.png │ ├── full_diagnostic.html │ ├── index.html │ ├── power_left_join.html │ ├── powerjoin-package.html │ └── preprocess_inputs.html └── sitemap.xml ├── man ├── check_specs.Rd ├── coalesce_xy.Rd ├── extended-equality.Rd ├── figures │ └── logo.png ├── full_diagnostic.Rd ├── paste_xy.Rd ├── power_left_join.Rd ├── powerjoin-package.Rd └── preprocess_inputs.Rd ├── pkgdown └── favicon │ ├── apple-touch-icon-120x120.png │ ├── apple-touch-icon-152x152.png │ ├── apple-touch-icon-180x180.png │ ├── apple-touch-icon-60x60.png │ ├── apple-touch-icon-76x76.png │ ├── apple-touch-icon.png │ ├── favicon-16x16.png │ ├── favicon-32x32.png │ └── favicon.ico ├── powerjoin.Rproj └── tests ├── testthat.R └── testthat ├── test-check.R ├── test-check_specs.R ├── test-conflict.R ├── test-corner_cases.R ├── test-fill.R ├── test-fuzzy_joins.R ├── test-keep.R ├── test-preprocess.R ├── test-recursive.R └── test-standard_joins.R /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^powerjoin\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^README\.Rmd$ 4 | ^cran-comments\.md$ 5 | ^_pkgdown\.yml$ 6 | ^docs$ 7 | ^pkgdown$ 8 | ^CRAN-SUBMISSION$ 9 | ^LICENSE\.md$ 10 | .github 11 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | /pkg.lock 2 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check-dev.yaml: -------------------------------------------------------------------------------- 1 | # This workflow calls the GitHub API very frequently. 2 | # Can't be run as part of commits 3 | on: 4 | schedule: 5 | - cron: "0 5 * * *" # 05:00 UTC every day only run on main branch 6 | push: 7 | branches: 8 | - "cran-*" 9 | tags: 10 | - "v*" 11 | 12 | name: rcc dev 13 | 14 | jobs: 15 | matrix: 16 | runs-on: ubuntu-22.04 17 | outputs: 18 | matrix: ${{ steps.set-matrix.outputs.matrix }} 19 | 20 | name: Collect deps 21 | 22 | steps: 23 | - uses: actions/checkout@v4 24 | 25 | - uses: ./.github/workflows/rate-limit 26 | with: 27 | token: ${{ secrets.GITHUB_TOKEN }} 28 | 29 | - uses: r-lib/actions/setup-r@v2 30 | 31 | - id: set-matrix 32 | uses: ./.github/workflows/dep-matrix 33 | 34 | check-matrix: 35 | runs-on: ubuntu-22.04 36 | needs: matrix 37 | 38 | name: Check deps 39 | 40 | steps: 41 | - name: Install json2yaml 42 | run: | 43 | sudo npm install -g json2yaml 44 | 45 | - name: Check matrix definition 46 | run: | 47 | matrix='${{ needs.matrix.outputs.matrix }}' 48 | echo $matrix 49 | echo $matrix | jq . 50 | echo $matrix | json2yaml 51 | 52 | R-CMD-check-base: 53 | runs-on: ubuntu-22.04 54 | 55 | name: base 56 | 57 | # Begin custom: services 58 | # End custom: services 59 | 60 | strategy: 61 | fail-fast: false 62 | 63 | steps: 64 | - uses: actions/checkout@v4 65 | 66 | - uses: ./.github/workflows/custom/before-install 67 | if: hashFiles('.github/workflows/custom/before-install/action.yml') != '' 68 | 69 | - uses: ./.github/workflows/install 70 | with: 71 | cache-version: rcc-dev-base-1 72 | needs: build, check 73 | extra-packages: "any::rcmdcheck any::remotes ." 74 | token: ${{ secrets.GITHUB_TOKEN }} 75 | 76 | - name: Session info 77 | run: | 78 | options(width = 100) 79 | if (!requireNamespace("sessioninfo", quietly = TRUE)) install.packages("sessioninfo") 80 | pkgs <- installed.packages()[, "Package"] 81 | sessioninfo::session_info(pkgs, include_base = TRUE) 82 | shell: Rscript {0} 83 | 84 | - uses: ./.github/workflows/custom/after-install 85 | if: hashFiles('.github/workflows/custom/after-install/action.yml') != '' 86 | 87 | - uses: ./.github/workflows/update-snapshots 88 | if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository 89 | 90 | - uses: ./.github/workflows/check 91 | with: 92 | results: ${{ matrix.package }} 93 | 94 | R-CMD-check-dev: 95 | needs: 96 | - matrix 97 | - R-CMD-check-base 98 | 99 | runs-on: ubuntu-22.04 100 | 101 | name: 'rcc-dev: ${{ matrix.package }}' 102 | 103 | # Begin custom: services 104 | # End custom: services 105 | 106 | strategy: 107 | fail-fast: false 108 | matrix: ${{fromJson(needs.matrix.outputs.matrix)}} 109 | 110 | steps: 111 | - uses: actions/checkout@v4 112 | 113 | - uses: ./.github/workflows/custom/before-install 114 | if: hashFiles('.github/workflows/custom/before-install/action.yml') != '' 115 | 116 | - uses: ./.github/workflows/install 117 | with: 118 | cache-version: rcc-dev-${{ matrix.package }}-1 119 | needs: build, check 120 | extra-packages: "any::rcmdcheck any::remotes ." 121 | token: ${{ secrets.GITHUB_TOKEN }} 122 | 123 | - name: Install dev version of ${{ matrix.package }} 124 | env: 125 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 126 | run: | 127 | remotes::install_dev("${{ matrix.package }}", "https://cloud.r-project.org", upgrade = "always") 128 | shell: Rscript {0} 129 | 130 | - name: Session info 131 | run: | 132 | options(width = 100) 133 | if (!requireNamespace("sessioninfo", quietly = TRUE)) install.packages("sessioninfo") 134 | pkgs <- installed.packages()[, "Package"] 135 | sessioninfo::session_info(pkgs, include_base = TRUE) 136 | shell: Rscript {0} 137 | 138 | - uses: ./.github/workflows/custom/after-install 139 | if: hashFiles('.github/workflows/custom/after-install/action.yml') != '' 140 | 141 | - uses: ./.github/workflows/update-snapshots 142 | if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository 143 | 144 | - uses: ./.github/workflows/check 145 | with: 146 | results: ${{ matrix.package }} 147 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check-status.yaml: -------------------------------------------------------------------------------- 1 | # Workflow to update the status of a commit for the R-CMD-check workflow 2 | # Necessary because remote PRs cannot update the status of the commit 3 | on: 4 | workflow_run: 5 | workflows: 6 | - rcc 7 | types: 8 | - requested 9 | - completed 10 | 11 | name: rcc-status 12 | 13 | jobs: 14 | rcc-status: 15 | runs-on: ubuntu-24.04 16 | 17 | name: "Update commit status" 18 | 19 | permissions: 20 | contents: read 21 | statuses: write 22 | 23 | steps: 24 | - name: "Update commit status" 25 | # Only run if triggered by rcc workflow 26 | if: github.event.workflow_run.name == 'rcc' 27 | env: 28 | GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} 29 | run: | 30 | set -x 31 | 32 | if [ "${{ github.event.workflow_run.status }}" == "completed" ]; then 33 | if [ "${{ github.event.workflow_run.conclusion }}" == "success" ]; then 34 | state="success" 35 | else 36 | state="failure" 37 | fi 38 | 39 | # Read artifact ID 40 | artifact_id=$(gh api \ 41 | -H "Accept: application/vnd.github+json" \ 42 | -H "X-GitHub-Api-Version: 2022-11-28" \ 43 | repos/${{ github.repository }}/actions/runs/${{ github.event.workflow_run.id }}/artifacts | jq -r '.artifacts[] | select(.name == "rcc-smoke-sha") | .id') 44 | 45 | if [ -n "${artifact_id}" ]; then 46 | # Download artifact 47 | curl -L -o rcc-smoke-sha.zip \ 48 | -H "Accept: application/vnd.github+json" \ 49 | -H "Authorization: Bearer ${GH_TOKEN}" \ 50 | -H "X-GitHub-Api-Version: 2022-11-28" \ 51 | https://api.github.com/repos/${{ github.repository }}/actions/artifacts/${artifact_id}/zip 52 | 53 | # Unzip artifact 54 | unzip rcc-smoke-sha.zip 55 | 56 | # Read artifact 57 | sha=$(cat rcc-smoke-sha.txt) 58 | 59 | # Clean up 60 | rm rcc-smoke-sha.zip rcc-smoke-sha.txt 61 | fi 62 | else 63 | state="pending" 64 | fi 65 | 66 | if [ -z "${sha}" ]; then 67 | sha=${{ github.event.workflow_run.head_sha }} 68 | fi 69 | 70 | html_url=${{ github.event.workflow_run.html_url }} 71 | description=${{ github.event.workflow_run.name }} 72 | 73 | gh api \ 74 | --method POST \ 75 | -H "Accept: application/vnd.github+json" \ 76 | -H "X-GitHub-Api-Version: 2022-11-28" \ 77 | repos/${{ github.repository }}/statuses/${sha} \ 78 | -f "state=${state}" -f "target_url=${html_url}" -f "description=${description}" -f "context=rcc" 79 | shell: bash 80 | -------------------------------------------------------------------------------- /.github/workflows/check/action.yml: -------------------------------------------------------------------------------- 1 | name: "Actions to check an R package" 2 | inputs: 3 | results: 4 | description: Slug for check results 5 | required: true 6 | 7 | runs: 8 | using: "composite" 9 | steps: 10 | - uses: r-lib/actions/check-r-package@v2 11 | with: 12 | # Fails on R 3.6 on Windows, remove when this job is removed? 13 | args: 'c("--no-manual", "--as-cran", "--no-multiarch")' 14 | error-on: ${{ env.RCMDCHECK_ERROR_ON || '"note"' }} 15 | 16 | - name: Show test output 17 | if: always() 18 | run: | 19 | ## -- Show test output -- 20 | echo "::group::Test output" 21 | find check -name '*.Rout*' -exec head -n 1000000 '{}' \; || true 22 | echo "::endgroup::" 23 | shell: bash 24 | 25 | - name: Upload check results 26 | if: failure() 27 | uses: actions/upload-artifact@main 28 | with: 29 | name: ${{ inputs.results }}-results 30 | path: check 31 | -------------------------------------------------------------------------------- /.github/workflows/commit/action.yml: -------------------------------------------------------------------------------- 1 | name: "Action to commit changes to the repository" 2 | inputs: 3 | token: 4 | description: "GitHub token" 5 | required: true 6 | outputs: 7 | sha: 8 | description: "SHA of generated commit" 9 | value: ${{ steps.commit.outputs.sha }} 10 | 11 | runs: 12 | using: "composite" 13 | steps: 14 | - name: Check for changes 15 | id: check 16 | run: | 17 | set -x 18 | if [ -n "$(git status --porcelain)" ]; then 19 | echo "has_changes=true" | tee -a $GITHUB_OUTPUT 20 | else 21 | echo "has_changes=false" | tee -a $GITHUB_OUTPUT 22 | fi 23 | shell: bash 24 | 25 | - name: Commit if changed, create a PR if protected 26 | id: commit 27 | if: steps.check.outputs.has_changes == 'true' 28 | env: 29 | GITHUB_TOKEN: ${{ inputs.token }} 30 | run: | 31 | set -x 32 | protected=${{ github.ref_protected }} 33 | foreign=${{ github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name != github.repository }} 34 | is_pr=${{ github.event_name == 'pull_request' }} 35 | if [ "${is_pr}" = "true" ]; then 36 | # Running on a PR - will use reviewdog in next step 37 | echo "Code changes detected on PR, will suggest changes via reviewdog" 38 | echo "use_reviewdog=true" | tee -a $GITHUB_OUTPUT 39 | git reset HEAD 40 | git status 41 | elif [ "${foreign}" = "true" ]; then 42 | # https://github.com/krlmlr/actions-sync/issues/44 43 | echo "Can't push to foreign branch" 44 | elif [ "${protected}" = "true" ]; then 45 | current_branch=$(git branch --show-current) 46 | new_branch=gha-commit-$(git rev-parse --short HEAD) 47 | git checkout -b ${new_branch} 48 | git add . 49 | git commit -m "chore: Auto-update from GitHub Actions"$'\n'$'\n'"Run: ${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}" 50 | # Force-push, used in only one place 51 | # Alternative: separate branch names for each usage 52 | git push -u origin HEAD -f 53 | 54 | existing_pr=$(gh pr list --state open --base main --head ${new_branch} --json number --jq '.[] | .number') 55 | if [ -n "${existing_pr}" ]; then 56 | echo "Existing PR: ${existing_pr}" 57 | else 58 | gh pr create --base main --head ${new_branch} --title "chore: Auto-update from GitHub Actions" --body "Run: ${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}" 59 | fi 60 | 61 | gh workflow run rcc -f ref=$(git rev-parse HEAD) 62 | gh pr merge --merge --auto 63 | else 64 | git fetch 65 | if [ -n "${GITHUB_HEAD_REF}" ]; then 66 | git add . 67 | git stash save 68 | git switch ${GITHUB_HEAD_REF} 69 | git merge origin/${GITHUB_BASE_REF} --no-edit 70 | git stash pop 71 | fi 72 | git add . 73 | git commit -m "chore: Auto-update from GitHub Actions"$'\n'$'\n'"Run: ${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}" 74 | git push -u origin HEAD 75 | 76 | # Only set output if changed 77 | echo sha=$(git rev-parse HEAD) >> $GITHUB_OUTPUT 78 | fi 79 | shell: bash 80 | 81 | - name: Suggest changes via reviewdog 82 | if: steps.commit.outputs.use_reviewdog == 'true' 83 | uses: krlmlr/action-suggester@main 84 | with: 85 | github_token: ${{ inputs.token }} 86 | tool_name: "rcc" 87 | -------------------------------------------------------------------------------- /.github/workflows/covr/action.yml: -------------------------------------------------------------------------------- 1 | name: "Actions to run covr for an R package" 2 | description: "Run covr to check code coverage for an R package and upload results to Codecov." 3 | inputs: 4 | token: 5 | description: codecov token 6 | required: false 7 | 8 | runs: 9 | using: "composite" 10 | steps: 11 | - name: Run coverage check 12 | run: | 13 | if (dir.exists("tests/testthat")) { 14 | cov <- covr::package_coverage( 15 | quiet = FALSE, 16 | clean = FALSE, 17 | install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package") 18 | ) 19 | covr::to_cobertura(cov) 20 | } else { 21 | message("No tests found, coverage not tested.") 22 | } 23 | shell: Rscript {0} 24 | 25 | - uses: codecov/codecov-action@v5 26 | with: 27 | # Fail if token is given 28 | fail_ci_if_error: ${{ inputs.token != '' }} 29 | files: ./cobertura.xml 30 | plugins: noop 31 | disable_search: true 32 | token: ${{ inputs.token }} 33 | 34 | - name: Show testthat output 35 | if: always() 36 | run: | 37 | ## -------------------------------------------------------------------- 38 | find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true 39 | shell: bash 40 | 41 | - name: Upload test results 42 | if: failure() 43 | uses: actions/upload-artifact@v4 44 | with: 45 | name: coverage-test-failures 46 | path: ${{ runner.temp }}/package 47 | -------------------------------------------------------------------------------- /.github/workflows/dep-matrix/action.yml: -------------------------------------------------------------------------------- 1 | name: "Actions to compute a matrix with all dependent packages" 2 | outputs: 3 | matrix: 4 | description: "Generated matrix" 5 | value: ${{ steps.set-matrix.outputs.matrix }} 6 | 7 | runs: 8 | using: "composite" 9 | steps: 10 | - id: set-matrix 11 | run: | 12 | # Determine package dependencies 13 | # From remotes 14 | read_dcf <- function(path) { 15 | fields <- colnames(read.dcf(path)) 16 | as.list(read.dcf(path, keep.white = fields)[1, ]) 17 | } 18 | 19 | re_match <- function(text, pattern, perl = TRUE, ...) { 20 | 21 | stopifnot(is.character(pattern), length(pattern) == 1, !is.na(pattern)) 22 | text <- as.character(text) 23 | 24 | match <- regexpr(pattern, text, perl = perl, ...) 25 | 26 | start <- as.vector(match) 27 | length <- attr(match, "match.length") 28 | end <- start + length - 1L 29 | 30 | matchstr <- substring(text, start, end) 31 | matchstr[ start == -1 ] <- NA_character_ 32 | 33 | res <- data.frame( 34 | stringsAsFactors = FALSE, 35 | .text = text, 36 | .match = matchstr 37 | ) 38 | 39 | if (!is.null(attr(match, "capture.start"))) { 40 | 41 | gstart <- attr(match, "capture.start") 42 | glength <- attr(match, "capture.length") 43 | gend <- gstart + glength - 1L 44 | 45 | groupstr <- substring(text, gstart, gend) 46 | groupstr[ gstart == -1 ] <- NA_character_ 47 | dim(groupstr) <- dim(gstart) 48 | 49 | res <- cbind(groupstr, res, stringsAsFactors = FALSE) 50 | } 51 | 52 | names(res) <- c(attr(match, "capture.names"), ".text", ".match") 53 | class(res) <- c("tbl_df", "tbl", class(res)) 54 | res 55 | } 56 | 57 | dev_split_ref <- function(x) { 58 | re_match(x, "^(?[^@#]+)(?[@#].*)?$") 59 | } 60 | 61 | has_dev_dep <- function(package) { 62 | cran_url <- "https://cloud.r-project.org" 63 | 64 | refs <- dev_split_ref(package) 65 | url <- file.path(cran_url, "web", "packages", refs[["pkg"]], "DESCRIPTION") 66 | 67 | f <- tempfile() 68 | on.exit(unlink(f)) 69 | 70 | utils::download.file(url, f) 71 | desc <- read_dcf(f) 72 | 73 | url_fields <- c(desc$URL, desc$BugReports) 74 | 75 | if (length(url_fields) == 0) { 76 | return(FALSE) 77 | } 78 | 79 | pkg_urls <- unlist(strsplit(url_fields, "[[:space:]]*,[[:space:]]*")) 80 | 81 | # Remove trailing "/issues" from the BugReports URL 82 | pkg_urls <- sub("/issues$", "", pkg_urls) 83 | 84 | valid_domains <- c("github[.]com", "gitlab[.]com", "bitbucket[.]org") 85 | 86 | parts <- 87 | re_match(pkg_urls, 88 | sprintf("^https?://(?%s)/(?%s)/(?%s)(?:/(?%s))?", 89 | domain = paste0(valid_domains, collapse = "|"), 90 | username = "[^/]+", 91 | repo = "[^/@#]+", 92 | subdir = "[^/@$ ]+" 93 | ) 94 | )[c("domain", "username", "repo", "subdir")] 95 | 96 | # Remove cases which don't match and duplicates 97 | 98 | parts <- unique(stats::na.omit(parts)) 99 | 100 | nrow(parts) == 1 101 | } 102 | 103 | if (!requireNamespace("desc", quietly = TRUE)) { 104 | install.packages("desc") 105 | } 106 | 107 | deps_df <- desc::desc_get_deps() 108 | deps_df <- deps_df[deps_df$type %in% c("Depends", "Imports", "LinkingTo", "Suggests"), ] 109 | 110 | packages <- sort(deps_df$package) 111 | packages <- intersect(packages, rownames(available.packages())) 112 | 113 | valid_dev_dep <- vapply(packages, has_dev_dep, logical(1)) 114 | 115 | # https://github.com/r-lib/remotes/issues/576 116 | valid_dev_dep[packages %in% c("igraph", "duckdb", "logging")] <- FALSE 117 | 118 | deps <- packages[valid_dev_dep] 119 | if (any(!valid_dev_dep)) { 120 | msg <- paste0( 121 | "Could not determine development repository for packages: ", 122 | paste(packages[!valid_dev_dep], collapse = ", ") 123 | ) 124 | writeLines(paste0("::warning::", msg)) 125 | } 126 | 127 | json <- paste0( 128 | '{"package":[', 129 | paste0('"', deps, '"', collapse = ","), 130 | ']}' 131 | ) 132 | writeLines(json) 133 | writeLines(paste0("matrix=", json), Sys.getenv("GITHUB_OUTPUT")) 134 | shell: Rscript {0} 135 | -------------------------------------------------------------------------------- /.github/workflows/dep-suggests-matrix/action.R: -------------------------------------------------------------------------------- 1 | # FIXME: Dynamic lookup by parsing https://svn.r-project.org/R/tags/ 2 | get_deps <- function() { 3 | # Determine package dependencies 4 | if (!requireNamespace("desc", quietly = TRUE)) { 5 | install.packages("desc") 6 | } 7 | 8 | deps_df <- desc::desc_get_deps() 9 | deps_df_optional <- deps_df$package[deps_df$type %in% c("Suggests", "Enhances")] 10 | deps_df_hard <- deps_df$package[deps_df$type %in% c("Depends", "Imports", "LinkingTo")] 11 | deps_df_base <- unlist(tools::standard_package_names(), use.names = FALSE) 12 | 13 | packages <- sort(deps_df_optional) 14 | packages <- intersect(packages, rownames(available.packages())) 15 | 16 | # Too big to fail, or can't be avoided: 17 | off_limits <- c("testthat", "rmarkdown", "rcmdcheck", deps_df_hard, deps_df_base) 18 | off_limits_dep <- unlist(tools::package_dependencies(off_limits, recursive = TRUE, which = "strong")) 19 | setdiff(packages, c(off_limits, off_limits_dep)) 20 | } 21 | 22 | if (Sys.getenv("GITHUB_BASE_REF") != "") { 23 | print(Sys.getenv("GITHUB_BASE_REF")) 24 | system("git fetch origin ${GITHUB_BASE_REF}") 25 | # Use .. to avoid having to fetch the entire history 26 | # https://github.com/krlmlr/actions-sync/issues/45 27 | diff_cmd <- "git diff origin/${GITHUB_BASE_REF}.. -- R/ tests/ | egrep '^[+][^+]' | grep -q ::" 28 | diff_lines <- system(diff_cmd, intern = TRUE) 29 | if (length(diff_lines) > 0) { 30 | writeLines("Changes using :: in R/ or tests/:") 31 | writeLines(diff_lines) 32 | packages <- get_deps() 33 | } else { 34 | writeLines("No changes using :: found in R/ or tests/, not checking without suggested packages") 35 | packages <- character() 36 | } 37 | } else { 38 | writeLines("No GITHUB_BASE_REF, checking without suggested packages") 39 | packages <- get_deps() 40 | } 41 | 42 | if (length(packages) > 0) { 43 | json <- paste0( 44 | '{"package":[', 45 | paste0('"', packages, '"', collapse = ","), 46 | "]}" 47 | ) 48 | writeLines(paste0("matrix=", json), Sys.getenv("GITHUB_OUTPUT")) 49 | writeLines(json) 50 | } else { 51 | writeLines("No suggested packages found.") 52 | } 53 | -------------------------------------------------------------------------------- /.github/workflows/dep-suggests-matrix/action.yml: -------------------------------------------------------------------------------- 1 | name: "Actions to compute a matrix with all suggested packages" 2 | outputs: 3 | matrix: 4 | description: "Generated matrix" 5 | value: ${{ steps.set-matrix.outputs.matrix }} 6 | 7 | runs: 8 | using: "composite" 9 | steps: 10 | - id: set-matrix 11 | run: | 12 | Rscript ./.github/workflows/dep-suggests-matrix/action.R 13 | shell: bash 14 | -------------------------------------------------------------------------------- /.github/workflows/get-extra/action.yml: -------------------------------------------------------------------------------- 1 | name: "Action to determine extra packages to be installed" 2 | outputs: 3 | packages: 4 | description: "List of extra packages" 5 | value: ${{ steps.get-extra.outputs.packages }} 6 | 7 | runs: 8 | using: "composite" 9 | steps: 10 | - name: Get extra packages 11 | id: get-extra 12 | run: | 13 | set -x 14 | packages=$( ( grep Config/gha/extra-packages DESCRIPTION || true ) | cut -d " " -f 2) 15 | echo packages=$packages >> $GITHUB_OUTPUT 16 | shell: bash 17 | -------------------------------------------------------------------------------- /.github/workflows/git-identity/action.yml: -------------------------------------------------------------------------------- 1 | name: "Actions to set up a Git identity" 2 | 3 | runs: 4 | using: "composite" 5 | steps: 6 | - name: Configure Git identity 7 | run: | 8 | env | sort 9 | git config --local user.name "$GITHUB_ACTOR" 10 | git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com" 11 | shell: bash 12 | -------------------------------------------------------------------------------- /.github/workflows/install/action.yml: -------------------------------------------------------------------------------- 1 | name: "Actions to run for installing R packages" 2 | inputs: 3 | token: 4 | description: GitHub token, set to secrets.GITHUB_TOKEN 5 | required: true 6 | r-version: 7 | description: Passed on to r-lib/actions/setup-r@v2 8 | required: false 9 | default: release 10 | install-r: 11 | description: Passed on to r-lib/actions/setup-r@v2 12 | required: false 13 | default: true 14 | needs: 15 | description: Passed on to r-lib/actions/setup-r-dependencies@v2 16 | required: false 17 | default: "" 18 | packages: 19 | description: Passed on to r-lib/actions/setup-r-dependencies@v2 20 | required: false 21 | default: deps::., any::sessioninfo 22 | extra-packages: 23 | description: Passed on to r-lib/actions/setup-r-dependencies@v2 24 | required: false 25 | default: any::rcmdcheck 26 | cache-version: 27 | description: Passed on to r-lib/actions/setup-r-dependencies@v2 28 | required: false 29 | default: 1 30 | 31 | runs: 32 | using: "composite" 33 | steps: 34 | - name: Set environment variables 35 | run: | 36 | echo "R_REMOTES_NO_ERRORS_FROM_WARNINGS=true" | tee -a $GITHUB_ENV 37 | echo "R_KEEP_PKG_SOURCE=yes" | tee -a $GITHUB_ENV 38 | echo "_R_CHECK_SYSTEM_CLOCK_=false" | tee -a $GITHUB_ENV 39 | echo "_R_CHECK_FUTURE_FILE_TIMESTAMPS_=false" | tee -a $GITHUB_ENV 40 | # prevent rgl issues because no X11 display is available 41 | echo "RGL_USE_NULL=true" | tee -a $GITHUB_ENV 42 | # from https://github.com/r-devel/r-dev-web/blob/main/CRAN/QA/Kurt/lib/R/Scripts/check_CRAN_incoming.R 43 | echo "_R_CHECK_CRAN_INCOMING_CHECK_FILE_URIS_=true" | tee -a $GITHUB_ENV 44 | echo "_R_CHECK_CRAN_INCOMING_NOTE_GNU_MAKE_=true" | tee -a $GITHUB_ENV 45 | echo "_R_CHECK_PACKAGE_DEPENDS_IGNORE_MISSING_ENHANCES_=true" | tee -a $GITHUB_ENV 46 | echo "_R_CHECK_CODE_CLASS_IS_STRING_=true" | tee -a $GITHUB_ENV 47 | echo "_R_CHECK_CODOC_VARIABLES_IN_USAGES_=true" | tee -a $GITHUB_ENV 48 | echo "_R_CHECK_CONNECTIONS_LEFT_OPEN_=true" | tee -a $GITHUB_ENV 49 | echo "_R_CHECK_DATALIST_=true" | tee -a $GITHUB_ENV 50 | echo "_R_CHECK_NEWS_IN_PLAIN_TEXT_=true" | tee -a $GITHUB_ENV 51 | echo "_R_CHECK_PACKAGES_USED_CRAN_INCOMING_NOTES_=true" | tee -a $GITHUB_ENV 52 | echo "_R_CHECK_RD_CONTENTS_KEYWORDS_=true" | tee -a $GITHUB_ENV 53 | echo "_R_CHECK_R_DEPENDS_=warn" | tee -a $GITHUB_ENV 54 | echo "_R_CHECK_S3_METHODS_SHOW_POSSIBLE_ISSUES_=true" | tee -a $GITHUB_ENV 55 | echo "_R_CHECK_THINGS_IN_TEMP_DIR_=true" | tee -a $GITHUB_ENV 56 | echo "_R_CHECK_UNDOC_USE_ALL_NAMES_=true" | tee -a $GITHUB_ENV 57 | echo "_R_CHECK_URLS_SHOW_301_STATUS_=true" | tee -a $GITHUB_ENV 58 | echo "_R_CXX_USE_NO_REMAP_=true" | tee -a $GITHUB_ENV 59 | # There is no way to disable recency and frequency checks when the incoming checks are run 60 | # echo "_R_CHECK_CRAN_INCOMING_=true" | tee -a $GITHUB_ENV 61 | echo "_R_CHECK_CRAN_INCOMING_SKIP_LARGE_VERSION_=true" | tee -a $GITHUB_ENV 62 | echo "_R_CHECK_FORCE_SUGGESTS_=false" | tee -a $GITHUB_ENV 63 | shell: bash 64 | 65 | - name: Set environment variables (non-macOS only) 66 | if: runner.os != 'macOS' 67 | run: | 68 | echo "_R_CHECK_THINGS_IN_OTHER_DIRS_=true" | tee -a $GITHUB_ENV 69 | shell: bash 70 | 71 | - name: Set environment variables (non-Windows only) 72 | if: runner.os != 'Windows' 73 | run: | 74 | echo "_R_CHECK_BASHISMS_=true" | tee -a $GITHUB_ENV 75 | shell: bash 76 | 77 | - name: Update apt 78 | if: runner.os == 'Linux' 79 | run: | 80 | sudo apt-get update 81 | sudo apt-get install -y aspell 82 | echo "_R_CHECK_CRAN_INCOMING_USE_ASPELL_=true" | tee -a $GITHUB_ENV 83 | shell: bash 84 | 85 | - name: Remove pkg-config@0.29.2 86 | if: runner.os == 'macOS' 87 | run: | 88 | brew uninstall pkg-config@0.29.2 || true 89 | shell: bash 90 | 91 | - uses: r-lib/actions/setup-pandoc@v2 92 | 93 | - uses: r-lib/actions/setup-r@v2 94 | with: 95 | r-version: ${{ inputs.r-version }} 96 | install-r: ${{ inputs.install-r }} 97 | http-user-agent: ${{ matrix.config.http-user-agent }} 98 | use-public-rspm: true 99 | 100 | - id: get-extra 101 | run: | 102 | set -x 103 | packages=$( ( grep Config/gha/extra-packages DESCRIPTION || true ) | cut -d " " -f 2-) 104 | echo packages=$packages >> $GITHUB_OUTPUT 105 | shell: bash 106 | 107 | - uses: r-lib/actions/setup-r-dependencies@v2 108 | env: 109 | GITHUB_PAT: ${{ inputs.token }} 110 | with: 111 | pak-version: stable 112 | needs: ${{ inputs.needs }} 113 | packages: ${{ inputs.packages }} 114 | extra-packages: ${{ inputs.extra-packages }} ${{ ( matrix.covr && 'r-lib/covr#611 xml2' ) || '' }} ${{ steps.get-extra.outputs.packages }} 115 | cache-version: ${{ inputs.cache-version }} 116 | 117 | - name: Add pkg.lock to .gitignore 118 | run: | 119 | set -x 120 | if ! [ -f .github/.gitignore ] || [ -z "$(grep '^/pkg.lock$' .github/.gitignore)" ]; then 121 | echo /pkg.lock >> .github/.gitignore 122 | fi 123 | shell: bash 124 | 125 | - name: Add fake qpdf and checkbashisms 126 | if: runner.os == 'Linux' 127 | run: | 128 | sudo ln -s $(which true) /usr/local/bin/qpdf 129 | sudo ln -s $(which true) /usr/local/bin/checkbashisms 130 | shell: bash 131 | 132 | - name: Install ccache 133 | uses: krlmlr/ccache-action@parallel-dir 134 | with: 135 | max-size: 10G 136 | verbose: 1 137 | save: false 138 | restore: false 139 | 140 | - name: Use ccache for compiling R code, and parallelize 141 | run: | 142 | mkdir -p ~/.R 143 | echo 'CC := ccache $(CC)' >> ~/.R/Makevars 144 | echo 'CXX := ccache $(CXX)' >> ~/.R/Makevars 145 | echo 'CXX11 := ccache $(CXX11)' >> ~/.R/Makevars 146 | echo 'CXX14 := ccache $(CXX14)' >> ~/.R/Makevars 147 | echo 'CXX17 := ccache $(CXX17)' >> ~/.R/Makevars 148 | echo 'MAKEFLAGS = -j2' >> ~/.R/Makevars 149 | cat ~/.R/Makevars 150 | 151 | echo 'CCACHE_SLOPPINESS=locale,time_macros' | tee -a $GITHUB_ENV 152 | 153 | # echo 'CCACHE_DEBUG=true' | tee -a $GITHUB_ENV 154 | # echo "CCACHE_DEBUGDIR=$(dirname $(pwd))/ccache-debug" | tee -a $GITHUB_ENV 155 | # mkdir -p $(dirname $(pwd))/.ccache-debug 156 | 157 | echo 'PKG_BUILD_EXTRA_FLAGS=false' | tee -a $GITHUB_ENV 158 | 159 | # Repair 160 | git rm -rf .ccache || true 161 | rm -rf .ccache 162 | shell: bash 163 | 164 | - name: Show R CMD config --all 165 | run: | 166 | R CMD config --all 167 | shell: bash 168 | -------------------------------------------------------------------------------- /.github/workflows/lock.yaml: -------------------------------------------------------------------------------- 1 | name: "Lock threads" 2 | permissions: 3 | issues: write 4 | pull-requests: write 5 | discussions: write 6 | on: 7 | workflow_dispatch: 8 | schedule: 9 | - cron: "37 2 * * *" 10 | 11 | jobs: 12 | lock: 13 | runs-on: ubuntu-24.04 14 | steps: 15 | - uses: krlmlr/lock-threads@patch-1 16 | with: 17 | github-token: ${{ github.token }} 18 | issue-inactive-days: "365" 19 | issue-lock-reason: "" 20 | issue-comment: > 21 | This old thread has been automatically locked. If you think you have 22 | found something related to this, please open a new issue and link to this 23 | old issue if necessary. 24 | -------------------------------------------------------------------------------- /.github/workflows/matrix-check/action.yml: -------------------------------------------------------------------------------- 1 | name: "Actions to check a matrix with all R and OS versions, computed with the versions-matrix action" 2 | inputs: 3 | matrix: 4 | description: "Generated matrix" 5 | required: true 6 | 7 | runs: 8 | using: "composite" 9 | steps: 10 | - name: Install json2yaml 11 | run: | 12 | sudo npm install -g json2yaml 13 | shell: bash 14 | 15 | - run: | 16 | matrix='${{ inputs.matrix }}' 17 | if [ -n "${matrix}" ]; then 18 | echo $matrix | jq . 19 | echo $matrix | json2yaml 20 | else 21 | echo "No matrix found" 22 | fi 23 | shell: bash 24 | -------------------------------------------------------------------------------- /.github/workflows/pkgdown-build/action.yml: -------------------------------------------------------------------------------- 1 | name: "Action to build a pkgdown website" 2 | 3 | runs: 4 | using: "composite" 5 | steps: 6 | - name: Build site 7 | run: | 8 | pkgdown::build_site() 9 | shell: Rscript {0} 10 | -------------------------------------------------------------------------------- /.github/workflows/pkgdown-deploy/action.yml: -------------------------------------------------------------------------------- 1 | name: "Action to deploy a pkgdown website" 2 | 3 | runs: 4 | using: "composite" 5 | steps: 6 | - name: Deploy site 7 | uses: nick-fields/retry@v3 8 | with: 9 | timeout_minutes: 15 10 | max_attempts: 10 11 | command: | 12 | R -q -e 'pkgdown::deploy_to_branch(new_process = FALSE)' 13 | -------------------------------------------------------------------------------- /.github/workflows/pkgdown.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Also included in R-CMD-check.yaml, this workflow only listens to pushes to branches 3 | # that start with "docs*" or "cran-*" and does not need to act on pushes to the main branch. 4 | on: 5 | push: 6 | branches: 7 | - "docs*" 8 | - "cran-*" 9 | # The main branch is excluded here, it is handled by the R-CMD-check workflow. 10 | # This workflow is only for handling pushes to designated branches. 11 | workflow_dispatch: 12 | 13 | name: pkgdown 14 | 15 | concurrency: 16 | group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.sha }}-${{ github.base_ref || '' }} 17 | cancel-in-progress: true 18 | 19 | jobs: 20 | pkgdown: 21 | runs-on: ubuntu-24.04 22 | 23 | name: "pkgdown" 24 | 25 | # Begin custom: services 26 | # End custom: services 27 | 28 | steps: 29 | - uses: actions/checkout@v4 30 | 31 | - uses: ./.github/workflows/rate-limit 32 | with: 33 | token: ${{ secrets.GITHUB_TOKEN }} 34 | 35 | - uses: ./.github/workflows/git-identity 36 | if: github.event_name == 'push' 37 | 38 | - uses: ./.github/workflows/custom/before-install 39 | if: hashFiles('.github/workflows/custom/before-install/action.yml') != '' 40 | 41 | - uses: ./.github/workflows/install 42 | with: 43 | token: ${{ secrets.GITHUB_TOKEN }} 44 | cache-version: pkgdown-2 45 | needs: website 46 | extra-packages: r-lib/pkgdown local::. 47 | 48 | - uses: ./.github/workflows/custom/after-install 49 | if: hashFiles('.github/workflows/custom/after-install/action.yml') != '' 50 | 51 | - uses: ./.github/workflows/pkgdown-build 52 | if: github.event_name != 'push' 53 | 54 | - uses: ./.github/workflows/pkgdown-deploy 55 | if: github.event_name == 'push' 56 | -------------------------------------------------------------------------------- /.github/workflows/pr-commands.yaml: -------------------------------------------------------------------------------- 1 | on: 2 | issue_comment: 3 | types: [created] 4 | name: Commands 5 | jobs: 6 | document: 7 | if: startsWith(github.event.comment.body, '/document') 8 | name: document 9 | # macos is actually better here due to native binary packages 10 | runs-on: macos-latest 11 | env: 12 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 13 | steps: 14 | - uses: actions/checkout@v4 15 | - uses: r-lib/actions/pr-fetch@v2 16 | with: 17 | repo-token: ${{ secrets.GITHUB_TOKEN }} 18 | - uses: r-lib/actions/setup-r@v2 19 | - name: Configure Git identity 20 | run: | 21 | env | sort 22 | git config --local user.name "$GITHUB_ACTOR" 23 | git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com" 24 | shell: bash 25 | - name: Install dependencies 26 | run: | 27 | install.packages(c("remotes", "roxygen2"), type = "binary") 28 | remotes::install_deps(dependencies = TRUE) 29 | shell: Rscript {0} 30 | - name: Document 31 | run: | 32 | roxygen2::roxygenise() 33 | shell: Rscript {0} 34 | - name: commit 35 | run: | 36 | if [ -n "$(git status --porcelain man/ NAMESPACE)" ]; then 37 | git add man/ NAMESPACE 38 | git commit -m 'Document' 39 | fi 40 | - uses: r-lib/actions/pr-push@v2 41 | with: 42 | repo-token: ${{ secrets.GITHUB_TOKEN }} 43 | style: 44 | if: startsWith(github.event.comment.body, '/style') 45 | name: style 46 | # macos is actually better here due to native binary packages 47 | runs-on: macos-latest 48 | env: 49 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 50 | steps: 51 | - uses: actions/checkout@v4 52 | - uses: r-lib/actions/pr-fetch@v2 53 | with: 54 | repo-token: ${{ secrets.GITHUB_TOKEN }} 55 | - uses: r-lib/actions/setup-r@v2 56 | - name: Configure Git identity 57 | run: | 58 | env | sort 59 | git config --local user.name "$GITHUB_ACTOR" 60 | git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com" 61 | shell: bash 62 | - name: Install dependencies 63 | run: | 64 | install.packages(c("styler", "roxygen2"), type = "binary") 65 | shell: Rscript {0} 66 | - name: Style 67 | run: | 68 | styler::style_pkg(strict = FALSE) 69 | shell: Rscript {0} 70 | - name: commit 71 | run: | 72 | if [ -n "$(git status --porcelain '*.R' '*.Rmd')" ]; then 73 | git add '*.R' '*.Rmd' 74 | git commit -m 'Style' 75 | fi 76 | - uses: r-lib/actions/pr-push@v2 77 | with: 78 | repo-token: ${{ secrets.GITHUB_TOKEN }} 79 | merge: 80 | if: startsWith(github.event.comment.body, '/merge') 81 | name: merge 82 | runs-on: ubuntu-22.04 83 | steps: 84 | - name: Create and merge pull request 85 | run: | 86 | set -exo pipefail 87 | PR_DETAILS=$( curl -s --header "authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" https://api.github.com/repos/${{ github.repository }}/pulls/${{ github.event.issue.number }} ) 88 | echo "$PR_DETAILS" | jq . 89 | PR_BASE=$(echo "$PR_DETAILS" | jq -r .base.ref) 90 | PR_HEAD=$(echo "$PR_DETAILS" | jq -r .head.ref) 91 | PR_URL=$(curl -s -X POST --header "authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" --data '{ "head": "'$PR_BASE'", "base": "'$PR_HEAD'", "title": "Merge back PR target branch", "body": "Target: #${{ github.event.issue.number }}" }' https://api.github.com/repos/${{ github.repository }}/pulls | jq -r .url ) 92 | echo $PR_URL 93 | # Merging here won't run CI/CD 94 | # curl -s -X PUT --header "authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" $PR_URL/merge 95 | # A mock job just to ensure we have a successful build status 96 | finish: 97 | runs-on: ubuntu-22.04 98 | steps: 99 | - run: true 100 | -------------------------------------------------------------------------------- /.github/workflows/rate-limit/action.yml: -------------------------------------------------------------------------------- 1 | name: "Check GitHub rate limits" 2 | inputs: 3 | token: # id of input 4 | description: GitHub token, pass secrets.GITHUB_TOKEN 5 | required: true 6 | 7 | runs: 8 | using: "composite" 9 | steps: 10 | - name: Check rate limits 11 | run: | 12 | curl -s --header "authorization: Bearer ${{ inputs.token }}" https://api.github.com/rate_limit 13 | shell: bash 14 | -------------------------------------------------------------------------------- /.github/workflows/revdep.yaml: -------------------------------------------------------------------------------- 1 | # This workflow creates many jobs, run only when a branch is created 2 | on: 3 | push: 4 | branches: 5 | - "revdep*" # never run automatically on main branch 6 | 7 | name: revdep 8 | 9 | jobs: 10 | matrix: 11 | runs-on: ubuntu-22.04 12 | outputs: 13 | matrix: ${{ steps.set-matrix.outputs.matrix }} 14 | 15 | name: Collect revdeps 16 | 17 | env: 18 | R_REMOTES_NO_ERRORS_FROM_WARNINGS: true 19 | RSPM: https://packagemanager.rstudio.com/cran/__linux__/bionic/latest 20 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 21 | # prevent rgl issues because no X11 display is available 22 | RGL_USE_NULL: true 23 | # Begin custom: env vars 24 | # End custom: env vars 25 | 26 | steps: 27 | - name: Check rate limits 28 | run: | 29 | curl -s --header "authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" https://api.github.com/rate_limit 30 | shell: bash 31 | 32 | - uses: actions/checkout@v4 33 | 34 | # FIXME: Avoid reissuing succesful jobs 35 | # https://docs.github.com/en/free-pro-team@latest/rest/reference/actions#list-jobs-for-a-workflow-run 36 | # https://docs.github.com/en/free-pro-team@latest/rest/reference/actions#workflow-runs 37 | - id: set-matrix 38 | run: | 39 | package <- read.dcf("DESCRIPTION")[, "Package"][[1]] 40 | deps <- tools:::package_dependencies(package, reverse = TRUE, which = c("Depends", "Imports", "LinkingTo", "Suggests"))[[1]] 41 | json <- paste0( 42 | '{"package":[', 43 | paste0('"', deps, '"', collapse = ","), 44 | ']}' 45 | ) 46 | writeLines(json) 47 | writeLines(paste0("matrix=", json), Sys.getenv("GITHUB_OUTPUT")) 48 | shell: Rscript {0} 49 | 50 | check-matrix: 51 | runs-on: ubuntu-22.04 52 | needs: matrix 53 | steps: 54 | - name: Install json2yaml 55 | run: | 56 | sudo npm install -g json2yaml 57 | 58 | - name: Check matrix definition 59 | run: | 60 | matrix='${{ needs.matrix.outputs.matrix }}' 61 | echo $matrix 62 | echo $matrix | jq . 63 | echo $matrix | json2yaml 64 | 65 | R-CMD-check: 66 | needs: matrix 67 | 68 | runs-on: ubuntu-22.04 69 | 70 | name: 'revdep: ${{ matrix.package }}' 71 | 72 | # Begin custom: services 73 | # End custom: services 74 | 75 | strategy: 76 | fail-fast: false 77 | matrix: ${{fromJson(needs.matrix.outputs.matrix)}} 78 | 79 | env: 80 | R_REMOTES_NO_ERRORS_FROM_WARNINGS: true 81 | RSPM: https://packagemanager.rstudio.com/cran/__linux__/bionic/latest 82 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 83 | # prevent rgl issues because no X11 display is available 84 | RGL_USE_NULL: true 85 | # Begin custom: env vars 86 | # End custom: env vars 87 | 88 | steps: 89 | - name: Check rate limits 90 | run: | 91 | curl -s --header "authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" https://api.github.com/rate_limit 92 | shell: bash 93 | 94 | - uses: actions/checkout@v4 95 | 96 | # Begin custom: before install 97 | # End custom: before install 98 | 99 | - name: Use RSPM 100 | run: | 101 | mkdir -p /home/runner/work/_temp/Library 102 | echo 'local({release <- system2("lsb_release", "-sc", stdout = TRUE); options(repos=c(CRAN = paste0("https://packagemanager.rstudio.com/all/__linux__/", release, "/latest")), HTTPUserAgent = sprintf("R/%s R (%s)", getRversion(), paste(getRversion(), R.version$platform, R.version$arch, R.version$os)))}); .libPaths("/home/runner/work/_temp/Library")' | sudo tee /etc/R/Rprofile.site 103 | 104 | - name: Install remotes 105 | run: | 106 | if (!requireNamespace("curl", quietly = TRUE)) install.packages("curl") 107 | if (!requireNamespace("remotes", quietly = TRUE)) install.packages("remotes") 108 | shell: Rscript {0} 109 | 110 | - uses: r-lib/actions/setup-pandoc@v2 111 | 112 | - name: Install system dependencies 113 | if: runner.os == 'Linux' 114 | run: | 115 | sudo apt-get update -y 116 | Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "22.04")); package <- "${{ matrix.package }}"; deps <- tools::package_dependencies(package, which = "Suggests")[[1]]; lapply(c(package, deps), function(x) { writeLines(remotes::system_requirements("ubuntu", "22.04", package = x)) })' | sort | uniq > .github/deps.sh 117 | cat .github/deps.sh 118 | sudo sh < .github/deps.sh 119 | 120 | - name: Install package 121 | run: | 122 | package <- "${{ matrix.package }}" 123 | install.packages(package, dependencies = TRUE) 124 | remotes::install_cran("rcmdcheck") 125 | shell: Rscript {0} 126 | 127 | - name: Session info old 128 | run: | 129 | options(width = 100) 130 | if (!requireNamespace("sessioninfo", quietly = TRUE)) install.packages("sessioninfo") 131 | pkgs <- installed.packages()[, "Package"] 132 | sessioninfo::session_info(pkgs, include_base = TRUE) 133 | shell: Rscript {0} 134 | 135 | # Begin custom: after install 136 | # End custom: after install 137 | 138 | - name: Check old 139 | env: 140 | _R_CHECK_CRAN_INCOMING_: false 141 | _R_CHECK_SYSTEM_CLOCK_: false 142 | _R_CHECK_FUTURE_FILE_TIMESTAMPS_: false 143 | # Avoid downloading binary package from RSPM 144 | run: | 145 | package <- "${{ matrix.package }}" 146 | options(HTTPUserAgent = "gha") 147 | path <- download.packages(package, destdir = ".github")[, 2] 148 | print(path) 149 | 150 | dir <- file.path("revdep", package) 151 | dir.create(dir, showWarnings = FALSE, recursive = TRUE) 152 | check <- rcmdcheck::rcmdcheck(path, args = c("--no-manual", "--as-cran"), error_on = "never", check_dir = file.path(dir, "check")) 153 | file.rename(file.path(dir, "check"), file.path(dir, "old")) 154 | saveRDS(check, file.path(dir, "old.rds")) 155 | shell: Rscript {0} 156 | 157 | - name: Install local package 158 | run: | 159 | remotes::install_local(".", force = TRUE) 160 | shell: Rscript {0} 161 | 162 | - name: Session info new 163 | run: | 164 | options(width = 100) 165 | pkgs <- installed.packages()[, "Package"] 166 | sessioninfo::session_info(pkgs, include_base = TRUE) 167 | shell: Rscript {0} 168 | 169 | - name: Check new 170 | env: 171 | _R_CHECK_CRAN_INCOMING_: false 172 | _R_CHECK_SYSTEM_CLOCK_: false 173 | _R_CHECK_FUTURE_FILE_TIMESTAMPS_: false 174 | run: | 175 | package <- "${{ matrix.package }}" 176 | path <- dir(".github", pattern = paste0("^", package), full.names = TRUE)[[1]] 177 | print(path) 178 | 179 | dir <- file.path("revdep", package) 180 | check <- rcmdcheck::rcmdcheck(path, args = c("--no-manual", "--as-cran"), error_on = "never", check_dir = file.path(dir, "check")) 181 | file.rename(file.path(dir, "check"), file.path(dir, "new")) 182 | saveRDS(check, file.path(dir, "new.rds")) 183 | shell: Rscript {0} 184 | 185 | - name: Compare 186 | run: | 187 | package <- "${{ matrix.package }}" 188 | dir <- file.path("revdep", package) 189 | old <- readRDS(file.path(dir, "old.rds")) 190 | new <- readRDS(file.path(dir, "new.rds")) 191 | compare <- rcmdcheck::compare_checks(old, new) 192 | compare 193 | cmp <- compare$cmp 194 | if (!identical(cmp[cmp$which == "old", "output"], cmp[cmp$which == "new", "output"])) { 195 | if (!requireNamespace("waldo", quietly = TRUE)) install.packages("waldo") 196 | print(waldo::compare(old, new)) 197 | 198 | stop("Check output differs.") 199 | } 200 | shell: Rscript {0} 201 | 202 | - name: Upload check results 203 | if: failure() 204 | uses: actions/upload-artifact@main 205 | with: 206 | name: ${{ matrix.package }}-results 207 | path: revdep/${{ matrix.package }} 208 | 209 | - name: Check rate limits 210 | if: always() 211 | run: | 212 | curl -s --header "authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" https://api.github.com/rate_limit 213 | shell: bash 214 | -------------------------------------------------------------------------------- /.github/workflows/roxygenize/action.yml: -------------------------------------------------------------------------------- 1 | name: "Action to create documentation with roxygen2" 2 | 3 | runs: 4 | using: "composite" 5 | steps: 6 | - name: Roxygenize 7 | run: | 8 | try(roxygen2::roxygenize()) 9 | shell: Rscript {0} 10 | -------------------------------------------------------------------------------- /.github/workflows/style/action.yml: -------------------------------------------------------------------------------- 1 | name: "Action to auto-style a package" 2 | 3 | runs: 4 | using: "composite" 5 | steps: 6 | - name: Check air.toml 7 | id: check 8 | run: | 9 | set -x 10 | if [ -f air.toml ]; then 11 | echo enabled=true >> $GITHUB_OUTPUT 12 | else 13 | echo enabled=false >> $GITHUB_OUTPUT 14 | fi 15 | shell: bash 16 | 17 | - name: Install air 18 | if: ${{ steps.check.outputs.enabled == 'true' }} 19 | uses: posit-dev/setup-air@v1 20 | 21 | - name: Run air 22 | if: ${{ steps.check.outputs.enabled == 'true' }} 23 | run: | 24 | air format . 25 | shell: bash 26 | 27 | - name: Check Git status 28 | if: ${{ steps.check.outputs.enabled == 'true' }} 29 | run: | 30 | git status 31 | shell: bash 32 | -------------------------------------------------------------------------------- /.github/workflows/update-snapshots/action.yml: -------------------------------------------------------------------------------- 1 | name: "Action to create pull requests for updated testthat snapshots" 2 | description: > 3 | This action will run `testthat::test_local()` for tests that seem to use snapshots, 4 | this is determined by reading and grepping the test files. 5 | If the tests are failing, snapshots are updated, and a pull request is opened. 6 | inputs: 7 | base: 8 | description: "The base branch to create the pull request against." 9 | required: false 10 | default: "main" 11 | 12 | runs: 13 | using: "composite" 14 | steps: 15 | - name: Run tests on test files that use snapshots 16 | id: run-tests 17 | run: | 18 | ## -- Run tests on test files that use snapshots -- 19 | rx <- "^test-(.*)[.][rR]$" 20 | files <- dir("tests/testthat", pattern = rx) 21 | has_snapshot <- vapply(files, function(.x) any(grepl("snapshot", readLines(file.path("tests/testthat", .x)), fixed = TRUE)), logical(1)) 22 | if (any(has_snapshot)) { 23 | patterns <- gsub(rx, "^\\1$", files[has_snapshot]) 24 | pattern <- paste0(patterns, collapse = "|") 25 | tryCatch( 26 | { 27 | result <- as.data.frame(testthat::test_local(pattern = pattern, reporter = "silent", stop_on_failure = FALSE)) 28 | print(result) 29 | failures <- result[result$failed + result$warning > 0, ] 30 | if (nrow(failures) > 0) { 31 | writeLines("Snapshot tests failed/warned.") 32 | print(failures[names(failures) != "result"]) 33 | print(failures$result) 34 | testthat::snapshot_accept() 35 | writeLines("changed=true", Sys.getenv("GITHUB_OUTPUT")) 36 | } else { 37 | writeLines("Snapshot tests ran successfully.") 38 | } 39 | }, 40 | error = print 41 | ) 42 | } else { 43 | writeLines("No snapshots found.") 44 | } 45 | shell: Rscript {0} 46 | 47 | - name: Add snapshots to Git 48 | if: ${{ steps.run-tests.outputs.changed }} 49 | run: | 50 | ## -- Add snapshots to Git -- 51 | mkdir -p tests/testthat/_snaps 52 | git add -- tests/testthat/_snaps 53 | shell: bash 54 | 55 | - name: Check changed files 56 | if: ${{ steps.run-tests.outputs.changed }} 57 | id: check-changed 58 | run: | 59 | set -x 60 | if [ "${{ github.event_name}}" != "pull_request" ] ; then 61 | echo "changed=$(git status --porcelain -- tests/testthat/_snaps | head -n 1)" | tee -a $GITHUB_OUTPUT 62 | fi 63 | shell: bash 64 | 65 | - name: Derive branch name 66 | if: ${{ steps.check-changed.outputs.changed }} 67 | id: matrix-desc 68 | run: | 69 | set -x 70 | config=$(echo '${{ toJSON(matrix) }}' | jq -c .) 71 | echo "text=$(echo ${config})" | tee -a $GITHUB_OUTPUT 72 | echo "branch=$(echo ${config} | sed -r 's/[^0-9a-zA-Z]+/-/g;s/^-//;s/-$//')" | tee -a $GITHUB_OUTPUT 73 | shell: bash 74 | 75 | - name: Create pull request 76 | # Fall through if PR, will use reviewdog/action-suggester in the commit action 77 | if: ${{ steps.check-changed.outputs.changed }} 78 | id: cpr 79 | uses: peter-evans/create-pull-request@v6 80 | with: 81 | base: ${{ inputs.base }} 82 | branch: snapshot-${{ inputs.base }}-${{ github.job }}-${{ steps.matrix-desc.outputs.branch }} 83 | delete-branch: true 84 | title: "test: Snapshot updates for ${{ github.job }} (${{ steps.matrix-desc.outputs.text }})" 85 | body: "Automated changes by [create-pull-request](https://github.com/peter-evans/create-pull-request) GitHub action${{ github.event.number && format(' for #{0}', github.event.number) || '' }}." 86 | add-paths: | 87 | tests/testthat/_snaps 88 | 89 | - name: Fail if pull request created 90 | if: ${{ steps.cpr.outputs.pull-request-number }} 91 | run: | 92 | false 93 | shell: bash 94 | -------------------------------------------------------------------------------- /.github/workflows/versions-matrix/action.R: -------------------------------------------------------------------------------- 1 | # Determine active versions of R to test against 2 | tags <- xml2::read_html("https://svn.r-project.org/R/tags/") 3 | 4 | bullets <- 5 | tags |> 6 | xml2::xml_find_all("//li") |> 7 | xml2::xml_text() 8 | 9 | version_bullets <- grep("^R-([0-9]+-[0-9]+-[0-9]+)/$", bullets, value = TRUE) 10 | versions <- unique(gsub("^R-([0-9]+)-([0-9]+)-[0-9]+/$", "\\1.\\2", version_bullets)) 11 | 12 | r_release <- head(sort(as.package_version(versions), decreasing = TRUE), 5) 13 | 14 | deps <- desc::desc_get_deps() 15 | r_crit <- deps$version[deps$package == "R"] 16 | if (length(r_crit) == 1) { 17 | min_r <- as.package_version(gsub("^>= ([0-9]+[.][0-9]+)(?:.*)$", "\\1", r_crit)) 18 | r_release <- r_release[r_release >= min_r] 19 | } 20 | 21 | r_versions <- c("devel", as.character(r_release)) 22 | 23 | macos <- data.frame(os = "macos-latest", r = r_versions[2:3]) 24 | windows <- data.frame(os = "windows-latest", r = r_versions[1:3]) 25 | linux_devel <- data.frame(os = "ubuntu-22.04", r = r_versions[1], `http-user-agent` = "release", check.names = FALSE) 26 | linux <- data.frame(os = "ubuntu-22.04", r = r_versions[-1]) 27 | covr <- data.frame(os = "ubuntu-22.04", r = r_versions[2], covr = "true", desc = "with covr") 28 | 29 | include_list <- list(macos, windows, linux_devel, linux, covr) 30 | 31 | if (file.exists(".github/versions-matrix.R")) { 32 | custom <- source(".github/versions-matrix.R")$value 33 | if (is.data.frame(custom)) { 34 | custom <- list(custom) 35 | } 36 | include_list <- c(include_list, custom) 37 | } 38 | 39 | print(include_list) 40 | 41 | filter <- read.dcf("DESCRIPTION")[1, ]["Config/gha/filter"] 42 | if (!is.na(filter)) { 43 | filter_expr <- parse(text = filter)[[1]] 44 | subset_fun_expr <- bquote(function(x) subset(x, .(filter_expr))) 45 | subset_fun <- eval(subset_fun_expr) 46 | include_list <- lapply(include_list, subset_fun) 47 | print(include_list) 48 | } 49 | 50 | to_json <- function(x) { 51 | if (nrow(x) == 0) return(character()) 52 | parallel <- vector("list", length(x)) 53 | for (i in seq_along(x)) { 54 | parallel[[i]] <- paste0('"', names(x)[[i]], '":"', x[[i]], '"') 55 | } 56 | paste0("{", do.call(paste, c(parallel, sep = ",")), "}") 57 | } 58 | 59 | configs <- unlist(lapply(include_list, to_json)) 60 | json <- paste0('{"include":[', paste(configs, collapse = ","), "]}") 61 | 62 | if (Sys.getenv("GITHUB_OUTPUT") != "") { 63 | writeLines(paste0("matrix=", json), Sys.getenv("GITHUB_OUTPUT")) 64 | } 65 | writeLines(json) 66 | -------------------------------------------------------------------------------- /.github/workflows/versions-matrix/action.yml: -------------------------------------------------------------------------------- 1 | name: "Actions to compute a matrix with all R and OS versions" 2 | 3 | outputs: 4 | matrix: 5 | description: "Generated matrix" 6 | value: ${{ steps.set-matrix.outputs.matrix }} 7 | 8 | runs: 9 | using: "composite" 10 | steps: 11 | - name: Install json2yaml 12 | run: | 13 | sudo npm install -g json2yaml 14 | shell: bash 15 | 16 | - id: set-matrix 17 | run: | 18 | Rscript ./.github/workflows/versions-matrix/action.R 19 | shell: bash 20 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .Rdata 4 | .httr-oauth 5 | .DS_Store 6 | docs 7 | -------------------------------------------------------------------------------- /CRAN-SUBMISSION: -------------------------------------------------------------------------------- 1 | Version: 0.1.0 2 | Date: 2022-11-03 05:59:23 UTC 3 | SHA: 7beab93ca2657d3ef31f05d1672a2a310b3c07d8 4 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: powerjoin 2 | Title: Extensions of 'dplyr' and 'fuzzyjoin' Join Functions 3 | Version: 0.1.0 4 | Authors@R: 5 | c(person(given = "Antoine", 6 | family = "Fabri", 7 | role = c("aut", "cre"), 8 | email = "antoine.fabri@gmail.com"), 9 | person(given = "Hadley", 10 | family = "Wickham", 11 | role = c("ctb"), 12 | email = "hadley@rstudio.com", 13 | comment = c( 14 | "aut/cre of dplyr", 15 | ORCID = "0000-0003-4757-117X")), 16 | person(given = "Romain", 17 | family = "François", 18 | role = "ctb", 19 | comment = c( 20 | "aut of dplyr", 21 | ORCID = "0000-0002-2444-4226")), 22 | person(given = "David", 23 | family = "Robinson", 24 | role = "ctb", 25 | email = "admiral.david@gmail.com", 26 | comment = "aut of fuzzyjoin"), 27 | person(given = "RStudio", 28 | role = c("cph", "fnd"), 29 | comment = "cph/fnd dplyr")) 30 | Description: We extend 'dplyr' and 'fuzzyjoin' join functions with 31 | features to preprocess the data, apply various data checks, and deal with 32 | conflicting columns. 33 | License: MIT + file LICENSE 34 | Encoding: UTF-8 35 | LazyData: true 36 | Roxygen: list(markdown = TRUE) 37 | RoxygenNote: 7.3.3.9000 38 | Imports: 39 | dplyr, 40 | glue, 41 | rlang, 42 | tidyselect, 43 | vctrs, 44 | purrr, 45 | tibble, 46 | tidyr, 47 | cli, 48 | methods 49 | URL: https://github.com/moodymudskipper/powerjoin 50 | BugReports: https://github.com/moodymudskipper/powerjoin/issues 51 | Suggests: 52 | testthat (>= 3.0.0) 53 | Config/testthat/edition: 3 54 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2022 2 | COPYRIGHT HOLDER: powerjoin authors 3 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2022 powerjoin authors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(c,powerjoin_check) 4 | S3method(power_full_join,data.frame) 5 | S3method(power_full_join,list) 6 | S3method(power_inner_join,data.frame) 7 | S3method(power_inner_join,list) 8 | S3method(power_left_join,data.frame) 9 | S3method(power_left_join,list) 10 | S3method(power_right_join,data.frame) 11 | S3method(power_right_join,list) 12 | S3method(print,powerjoin_check) 13 | S3method(tbl_vars,data.frame) 14 | export("%==%") 15 | export("%in.%") 16 | export(check_specs) 17 | export(coalesce_xy) 18 | export(coalesce_yx) 19 | export(complete_keys) 20 | export(full_diagnostic) 21 | export(nest_by_keys) 22 | export(pack_along_keys) 23 | export(paste_xy) 24 | export(paste_yx) 25 | export(power_full_join) 26 | export(power_inner_join) 27 | export(power_left_join) 28 | export(power_right_join) 29 | export(select_keys_and) 30 | export(summarize_by_keys) 31 | import(rlang) 32 | importFrom(dplyr,"%>%") 33 | importFrom(dplyr,across) 34 | importFrom(dplyr,arrange) 35 | importFrom(dplyr,as_tibble) 36 | importFrom(dplyr,auto_copy) 37 | importFrom(dplyr,bind_cols) 38 | importFrom(dplyr,coalesce) 39 | importFrom(dplyr,distinct) 40 | importFrom(dplyr,dplyr_reconstruct) 41 | importFrom(dplyr,full_join) 42 | importFrom(dplyr,group_by_at) 43 | importFrom(dplyr,group_vars) 44 | importFrom(dplyr,intersect) 45 | importFrom(dplyr,left_join) 46 | importFrom(dplyr,mutate) 47 | importFrom(dplyr,n_distinct) 48 | importFrom(dplyr,one_of) 49 | importFrom(dplyr,rename) 50 | importFrom(dplyr,select) 51 | importFrom(dplyr,select_at) 52 | importFrom(dplyr,setdiff) 53 | importFrom(dplyr,setequal) 54 | importFrom(dplyr,summarize) 55 | importFrom(dplyr,tibble) 56 | importFrom(dplyr,transmute) 57 | importFrom(dplyr,ungroup) 58 | importFrom(dplyr,union) 59 | importFrom(dplyr,vars) 60 | importFrom(glue,glue) 61 | importFrom(glue,glue_collapse) 62 | importFrom(methods,allNames) 63 | importFrom(purrr,map) 64 | importFrom(purrr,map_chr) 65 | importFrom(purrr,map_lgl) 66 | importFrom(stats,setNames) 67 | importFrom(tidyr,nest) 68 | importFrom(tidyr,pivot_longer) 69 | importFrom(tidyr,pivot_wider) 70 | importFrom(tidyselect,matches) 71 | importFrom(tidyselect,peek_vars) 72 | importFrom(utils,capture.output) 73 | importFrom(utils,getFromNamespace) 74 | importFrom(vctrs,vec_assign) 75 | importFrom(vctrs,vec_cast) 76 | importFrom(vctrs,vec_detect_missing) 77 | importFrom(vctrs,vec_group_loc) 78 | importFrom(vctrs,vec_in) 79 | importFrom(vctrs,vec_match) 80 | importFrom(vctrs,vec_ptype_common) 81 | importFrom(vctrs,vec_ptype_full) 82 | importFrom(vctrs,vec_size) 83 | importFrom(vctrs,vec_slice) 84 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # powerjoin 0.1.0 2 | 3 | * The join functions will fail if not provided a `check` argument built with `check_specs()` 4 | * New functions `paste_xy()` and `paste_yx()` to conflict columns by pasting, ignoring NAs and 5 | empty strings by default 6 | * using `rw ~ ...` in the `conflict` argument is better documented 7 | * The class of dates is not lost anymore when using `rw ~ ...` in the `conflict` argument 8 | * Obsolete imports were removed 9 | * Rebuilding the documentation solves the issues that got the package kicked from CRAN 10 | -------------------------------------------------------------------------------- /R/coalesce.R: -------------------------------------------------------------------------------- 1 | #' Coalesce helpers 2 | #' 3 | #' These are wrappers around `dplyr::coalesce`, designed for convenient use in 4 | #' the `conflict` argument of \pkg{powerjoin}'s join functions. `coalesce_xy()` is 5 | #' just like `dplyr::coalesce` (except it takes only 2 arguments), `coalesce_yx()` 6 | #' looks first in `y` and then in `x` if `y` is missing. 7 | #' 8 | #' @param x A vector 9 | #' @param y A vector 10 | #' 11 | #' @export 12 | #' @return A vector 13 | #' @examples 14 | #' coalesce_xy(c(NA, 2, 3), c(11, 12, NA)) 15 | #' coalesce_yx(c(NA, 2, 3), c(11, 12, NA)) 16 | coalesce_xy <- function(x, y) { 17 | dplyr::coalesce(x, y) 18 | } 19 | 20 | #' @rdname coalesce_xy 21 | #' @export 22 | coalesce_yx <- function(x, y) { 23 | dplyr::coalesce(y, x) 24 | } 25 | 26 | #' Paste helpers 27 | #' 28 | #' These are similar to `paste()` but by default ignore `NA` and empty strings 29 | #' (`""`). If they are found in a conflicting column we return the value from 30 | #' the other column without using the separator. If both columns have such values 31 | #' we return an empty string. 32 | #' 33 | #' @param x A vector 34 | #' @param y A vector 35 | #' @param sep separator 36 | #' @param na How to treat `NAs`, they are ignored by default, if `NA` the result 37 | #' will be `NA`, just as with `stringr::str_c`, if `"NA"` NAs will be coerced 38 | #' to character just as with `paste()`. Any other string can be used 39 | #' @param ignore_empty Whether to ignore empty strings, to avoid trailing and leading separators 40 | #' 41 | #' @return A character vector 42 | #' @export 43 | #' 44 | #' @examples 45 | #' paste_xy(letters[1:3], c("d", NA, "")) 46 | #' paste_yx(letters[1:3], c("d", NA, "")) 47 | #' paste_xy(letters[1:3], c("d", NA, ""), na = NA, ignore_empty = FALSE) 48 | #' paste_xy(letters[1:3], c("d", NA, ""), na = "NA", ignore_empty = FALSE) 49 | paste_xy <- function(x, y, sep = " ", na = NULL, ignore_empty = TRUE) { 50 | if (is.null(na) && ignore_empty) { 51 | x_lgl <- is.na(x) | x == "" 52 | y_lgl <- is.na(y) | y == "" 53 | res <- dplyr::case_when( 54 | x_lgl & y_lgl ~ "", 55 | x_lgl ~ y, 56 | y_lgl ~ x, 57 | TRUE ~ paste(x, y, sep = sep) 58 | ) 59 | return(res) 60 | } 61 | 62 | if (is.null(na)) { 63 | x_lgl <- is.na(x) 64 | y_lgl <- is.na(y) 65 | res <- dplyr::case_when( 66 | x_lgl & y_lgl ~ "", 67 | x_lgl ~ y, 68 | y_lgl ~ x, 69 | TRUE ~ paste(x, y, sep = sep) 70 | ) 71 | return(res) 72 | } 73 | 74 | if (is.na(na)) { 75 | res <- dplyr::case_when( 76 | is.na(x) | is.na(y) ~ NA_character_, 77 | TRUE ~ paste(x, y, sep = sep) 78 | ) 79 | return(res) 80 | } 81 | 82 | x_lgl <- !is.na(x) & x == "" 83 | y_lgl <- !is.na(y) & y == "" 84 | res <- dplyr::case_when( 85 | x_lgl & y_lgl ~ "", 86 | x_lgl ~ y, 87 | y_lgl ~ x, 88 | TRUE ~ paste(x, y, sep = sep) 89 | ) 90 | res 91 | } 92 | 93 | #' @export 94 | #' @rdname paste_xy 95 | paste_yx <- function(x, y, sep = " ", na = NULL, ignore_empty = TRUE) { 96 | paste_xy(y, x) 97 | } 98 | -------------------------------------------------------------------------------- /R/conflict.R: -------------------------------------------------------------------------------- 1 | as_conflict_function <- function(conflict) { 2 | if(is_function(conflict)) return(conflict) 3 | if(!is_formula(conflict)) abort("wrong `conflict` argument") 4 | if(identical(conflict[[2]], quote(rw))) { 5 | conflict <- conflict[-2] 6 | fun <- function(x,y) mapply(as_function(conflict), x, y) 7 | return(fun) 8 | } 9 | as_function(conflict) 10 | } 11 | 12 | handle_conflicts <- function(out, x_slicer, y_slicer, conflicted_data, conflict) { 13 | # return unaltered input if no conflict 14 | if(is.null(conflict) || is.null(conflicted_data)) return(out) 15 | 16 | if(is.list(conflict)) { 17 | res <- list() 18 | for(nm in names(conflict)) { 19 | conflict_i <- conflict[[nm]] 20 | # special case for patch 21 | if(identical(conflict_i, "patch")) { 22 | res[[nm]] <- conflicted_data$x[[nm]][x_slicer] 23 | res[[nm]][!is.na(y_slicer)] <- 24 | conflicted_data$y[[nm]][y_slicer][!is.na(y_slicer)] 25 | } else { 26 | conflict_i <- as_conflict_function(conflict_i) 27 | res[[nm]] <- conflict_i( 28 | conflicted_data$x[[nm]][x_slicer], 29 | conflicted_data$y[[nm]][y_slicer]) 30 | } 31 | } 32 | out[names(res)] <- res 33 | return(out) 34 | } 35 | 36 | # special case for patch 37 | if(identical(conflict, "patch")) { 38 | res <- conflicted_data$x[x_slicer,] 39 | res[!is.na(y_slicer),] <- 40 | conflicted_data$y[y_slicer,][!is.na(y_slicer),] 41 | out[names(res)] <- res 42 | return(out) 43 | } 44 | 45 | # vectorized conflict support 46 | res <- Map( 47 | as_conflict_function(conflict), 48 | conflicted_data$x[x_slicer,], 49 | conflicted_data$y[y_slicer,]) 50 | 51 | out[names(res)] <- res 52 | out 53 | } 54 | -------------------------------------------------------------------------------- /R/dplyr.R: -------------------------------------------------------------------------------- 1 | 2 | #nocov start 3 | 4 | # from dplyr 1.0.7 5 | `glubort` <- function(header, ..., .envir = parent.frame(), .abort = abort) { 6 | text <- glue(..., .envir = .envir) 7 | if (!is_null(header)) { 8 | text <- paste0(header, " ", text) 9 | } 10 | .abort(text) 11 | } 12 | 13 | # from dplyr 1.0.7 14 | `check_duplicate_vars` <- function(vars, input) { 15 | dup <- duplicated(vars) 16 | if (any(dup)) { 17 | abort(c(glue("Input columns in `{input}` must be unique."), 18 | x = glue("Problem with {err_vars(vars[dup])}.") 19 | )) 20 | } 21 | } 22 | 23 | # from dplyr 1.0.7 24 | `bad_args` <- function(args, ..., .envir = parent.frame()) { 25 | glubort(fmt_args(args), ..., .envir = .envir) 26 | } 27 | 28 | # from dplyr 1.0.7 29 | `fmt_args` <- function(x) { 30 | x <- parse_args(x) 31 | fmt_obj(x) 32 | } 33 | 34 | # from dplyr 1.0.7 35 | `parse_args` <- function(x) { 36 | x <- unlist(list(x), recursive = FALSE) 37 | is_fml <- map_lgl(x, is_formula) 38 | x[is_fml] <- map_chr(map(x[is_fml], "[[", 2), as_string) 39 | unlist(x) 40 | } 41 | 42 | # from dplyr 1.0.7 43 | `fmt_obj` <- function(x) { 44 | fmt_comma(fmt_obj1(x)) 45 | } 46 | 47 | # from dplyr 1.0.7 48 | `fmt_comma` <- function(..., .max = 6) { 49 | x <- paste0(...) 50 | if (length(x) > .max) { 51 | length(x) <- .max 52 | x[[.max]] <- "..." 53 | } 54 | commas(x) 55 | } 56 | 57 | # from dplyr 1.0.7 58 | `commas` <- function(...) { 59 | paste0(..., collapse = ", ") 60 | } 61 | 62 | # from dplyr 1.0.7 63 | `fmt_obj1` <- function(x) { 64 | paste0("`", x, "`") 65 | } 66 | 67 | # from dplyr 1.0.7 68 | `check_join_vars` <- function(vars, names) { 69 | if (!is.character(vars)) { 70 | abort("join columns must be character vectors.") 71 | } 72 | na <- is.na(vars) 73 | if (any(na)) { 74 | abort(c("Join columns must be not NA.", x = glue("Problem at position {err_vars(na)}."))) 75 | } 76 | dup <- duplicated(vars) 77 | if (any(dup)) { 78 | abort(c("Join columns must be unique.", x = glue("Problem at position {err_vars(dup)}."))) 79 | } 80 | missing <- setdiff(vars, names) 81 | if (length(missing) > 0) { 82 | abort(c("Join columns must be present in data.", x = glue("Problem with {err_vars(missing)}."))) 83 | } 84 | } 85 | 86 | # from dplyr 1.0.7 87 | `standardise_join_suffix` <- function(x) { 88 | if (!is.character(x) || length(x) != 2) { 89 | abort(c("`suffix` must be a character vector of length 2.", 90 | i = glue("suffix is {friendly_type_of(x)} of length {length(x)}.") 91 | )) 92 | } 93 | if (any(is.na(x))) { 94 | bad_args("suffix", "can't be NA.") 95 | } 96 | list(x = x[[1]], y = x[[2]]) 97 | } 98 | 99 | # from dplyr 1.0.7 100 | `tbl_vars` <- function(x) { 101 | return(new_sel_vars(tbl_vars_dispatch(x), group_vars(x))) 102 | UseMethod("tbl_vars") 103 | } 104 | 105 | # from dplyr 1.0.7 106 | `new_sel_vars` <- function(vars, group_vars) { 107 | structure(vars, groups = group_vars, class = c( 108 | "dplyr_sel_vars", 109 | "character" 110 | )) 111 | } 112 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 113 | # tbl_vars_dispatch (copied from dplyr:::tbl_vars_dispatch) 114 | # from dplyr 1.0.7 115 | `tbl_vars_dispatch` <- function(x) { 116 | UseMethod("tbl_vars") 117 | } 118 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 119 | # tbl_vars.data.frame (copied from dplyr:::tbl_vars.data.frame) 120 | # from dplyr 1.0.7 121 | #' @export 122 | `tbl_vars.data.frame` <- function(x) { 123 | names(x) 124 | } 125 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 126 | # check_na_matches (copied from dplyr:::check_na_matches) 127 | # from dplyr 1.0.7 128 | check_na_matches <- function(na_matches = c("na", "never")) { 129 | if (isNamespaceLoaded("pkgconfig")) { 130 | conf <- asNamespace("pkgconfig")$get_config("dplyr::na_matches") 131 | if (!is.null(conf)) { 132 | warn(c( 133 | "`dplyr::na_matches` pkgconfig options is now ignored.", 134 | "Please set `na_matches` directly." 135 | )) 136 | } 137 | } 138 | arg_match(na_matches) == "na" 139 | } 140 | 141 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 142 | # index_flatten (copied from dplyr:::index_flatten) 143 | # from dplyr 1.0.7 144 | index_flatten <- function(x) { 145 | unlist(x, recursive = FALSE, use.names = FALSE) 146 | } 147 | 148 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 149 | # err_vars (copied from dplyr:::index_flatten) 150 | # from dplyr 1.0.7 151 | err_vars <- function (x) 152 | { 153 | if (is.logical(x)) { 154 | x <- which(x) 155 | } 156 | if (is.character(x)) { 157 | x <- encodeString(x, quote = "`") 158 | } 159 | glue_collapse(x, sep = ", ", last = if (length(x) <= 2) 160 | " and " 161 | else ", and ") 162 | } 163 | 164 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 165 | # friendly_type_of (copied from dplyr:::friendly_type_of) 166 | # from dplyr 1.0.7 167 | friendly_type_of <- function (x) 168 | { 169 | if (is.object(x)) { 170 | sprintf("a `%s` object", fmt_classes(x)) 171 | } 172 | else { 173 | as_friendly_type(typeof(x)) 174 | } 175 | } 176 | 177 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 178 | # fmt_classes (copied from dplyr:::fmt_classes) 179 | # from dplyr 1.0.7 180 | fmt_classes <- function (x) 181 | { 182 | paste(class(x), collapse = "/") 183 | } 184 | 185 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 186 | # add_suffixes (copied from dplyr:::add_suffixes) 187 | # from dplyr 1.0.7 188 | add_suffixes <- function (x, y, suffix) 189 | { 190 | if (identical(suffix, "")) { 191 | return(x) 192 | } 193 | out <- rep_along(x, na_chr) 194 | for (i in seq_along(x)) { 195 | nm <- x[[i]] 196 | while (nm %in% y || nm %in% out[seq_len(i - 1)]) { 197 | nm <- paste0(nm, suffix) 198 | } 199 | out[[i]] <- nm 200 | } 201 | out 202 | } 203 | 204 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 205 | # as_friendly_type (copied from dplyr:::as_friendly_type) 206 | # from dplyr 1.0.7 207 | as_friendly_type <- function (type) 208 | { 209 | switch(type, logical = "a logical vector", integer = "an integer vector", 210 | numeric = , double = "a double vector", complex = "a complex vector", 211 | character = "a character vector", raw = "a raw vector", 212 | string = "a string", list = "a list", `NULL` = "NULL", 213 | environment = "an environment", externalptr = "a pointer", 214 | weakref = "a weak reference", S4 = "an S4 object", name = , 215 | symbol = "a symbol", language = "a call", pairlist = "a pairlist node", 216 | expression = "an expression vector", quosure = "a quosure", 217 | formula = "a formula", char = "an internal string", promise = "an internal promise", 218 | ... = "an internal dots object", any = "an internal `any` object", 219 | bytecode = "an internal bytecode object", primitive = , 220 | builtin = , special = "a primitive function", closure = "a function", 221 | type) 222 | } 223 | 224 | #nocov end 225 | 226 | -------------------------------------------------------------------------------- /R/extended-equality.R: -------------------------------------------------------------------------------- 1 | #' Extended equality operators 2 | #' 3 | #' `%==%` is the bone operator, it works like `==` but `NA %==% 1` is `FALSE` and 4 | #' `NA %==% NA` is `TRUE`. `%in.%` is the a vectorized `%in%`, that 5 | #' can be seen as a rowwise `%in%` when applied to data frame columns. These are 6 | #' convenient helpers for fuzzy joins. 7 | #' 8 | #' @param x A vector 9 | #' @param y A vector for `%==%`, a list of vectors for `%in.%` 10 | #' 11 | #' @export 12 | #' @name extended-equality 13 | #' @examples 14 | #' df1 <- data.frame(key = c("b", "z")) 15 | #' df2 <- data.frame(key1 = c("a", "b", "c"), key2 = c("x", "y", "z"), val = 1:3) 16 | #' power_left_join(df1, df2, ~ .x$key %in.% list(.y$key1, .y$key2)) 17 | #' 18 | #' df3 <- data.frame(key1 = c("a", NA)) 19 | #' df4 <- data.frame(key2 = c("a", "b", NA), val = 1:3) 20 | #' 21 | #' # note the difference 22 | #' power_inner_join(df3, df4, by = ~ .x$key1 == .y$key2) 23 | #' power_inner_join(df3, df4, by = ~ .x$key1 %==% .y$key2) 24 | #' 25 | #' # typically we would only use the conditions above as part of more complex conditions. 26 | #' # In this precise case they are equivalent to these equi joins 27 | #' power_inner_join(df3, df4, by = c(key1 = "key2")) 28 | #' power_inner_join(df3, df4, by = c(key1 = "key2"), na_matches = "never") 29 | `%==%` <- function(x, y) { 30 | is.na(x) & is.na(y) | !is.na(x) & !is.na(y) & x == y 31 | } 32 | 33 | #' @export 34 | #' @rdname extended-equality 35 | `%in.%` <- function(x, y) { 36 | conds <- lapply(y, function(yi) x %==% yi) 37 | Reduce(`|`, conds) 38 | } 39 | -------------------------------------------------------------------------------- /R/fuzzy.R: -------------------------------------------------------------------------------- 1 | fuzzy_specs <- function(by, env) { 2 | 3 | by_x <- list() 4 | by_y <- list() 5 | new_cols <- list() 6 | rec <- function(call) { 7 | if(!is.call(call) || length(call) == 1) return(call) 8 | calling_dollar <- identical(call[[1]], quote(`$`)) 9 | calling_dollar_on_x <- calling_dollar && identical(call[[2]], quote(`.x`)) 10 | if(calling_dollar_on_x) { 11 | var <- as.character(call[[3]]) 12 | by_x[[length(by_x) + 1]] <<- var 13 | return(call) 14 | } 15 | calling_dollar_on_y <- calling_dollar && identical(call[[2]], quote(`.y`)) 16 | if(calling_dollar_on_y) { 17 | var <- as.character(call[[3]]) 18 | by_y[[length(by_y) + 1]] <<- var 19 | return(call) 20 | } 21 | 22 | calling_bracket2 <- identical(call[[1]], quote(`[[`)) 23 | calling_bracket2_on_x <- calling_bracket2 && identical(call[[2]], quote(`.x`)) 24 | if(calling_bracket2_on_x) { 25 | var <- eval(call[[3]], env) 26 | by_x[[length(by_x) + 1]] <<- var 27 | return(call) 28 | } 29 | calling_bracket2_on_y <- calling_bracket2 && identical(call[[2]], quote(`.y`)) 30 | if(calling_bracket2_on_y) { 31 | var <- eval(call[[3]], env) 32 | by_y[[length(by_y) + 1]] <<- var 33 | return(call) 34 | } 35 | calling_arrow <- identical(call[[1]], quote(`<-`)) 36 | if(calling_arrow) { 37 | new_cols[[length(new_cols) + 1]] <<- as.character(call[[2]]) 38 | } 39 | call[] <- lapply(call, rec) 40 | call 41 | } 42 | multi_match_fun_body <- rec(by[[2]]) 43 | by_x <- unlist(unique(by_x)) 44 | by_y <- unlist(unique(by_y)) 45 | if(length(new_cols)) { 46 | multi_match_fun_body <- as.call(c(quote(data.frame), ..match.. = multi_match_fun_body, syms(new_cols))) 47 | } 48 | 49 | #multi_match_fun_body <- # depends if we have new cols 50 | multi_match_fun <- as.function(c(alist(.x=,.y=), multi_match_fun_body)) 51 | multi_by <- list(x = by_x, y = by_y) 52 | list( 53 | multi_match_fun = multi_match_fun, 54 | multi_by = multi_by, 55 | extra_cols = unlist(new_cols) 56 | ) 57 | } 58 | 59 | join_rows_fuzzy <- function(x, y, by, multi_match_fun, type = "left") { 60 | multi_match_fun <- purrr::as_mapper(multi_match_fun) 61 | 62 | number_x_rows <- nrow(x) 63 | number_y_rows <- nrow(y) 64 | 65 | indices_x <- x %>% 66 | ungroup() %>% 67 | select_at(by$x) %>% 68 | mutate(indices = seq_len(number_x_rows)) %>% 69 | group_by_at(vars(-one_of("indices"))) %>% 70 | tidyr::nest() %>% 71 | mutate(indices = purrr::map(data, "indices")) %>% 72 | ungroup() 73 | 74 | indices_y <- y %>% 75 | ungroup() %>% 76 | select_at(by$y) %>% 77 | mutate(indices = seq_len(number_y_rows)) %>% 78 | group_by_at(vars(-one_of("indices"))) %>% 79 | tidyr::nest() %>% 80 | mutate(indices = purrr::map(data, "indices")) %>% 81 | ungroup() 82 | 83 | ux <- indices_x[by$x] 84 | uy <- indices_y[by$y] 85 | 86 | ix <- rep(seq(nrow(ux)), nrow(uy)) 87 | iy <- rep(seq(nrow(uy)), each = nrow(ux)) 88 | 89 | ux_input <- ux[ix,, drop = FALSE] 90 | uy_input <- uy[iy,, drop = FALSE] 91 | 92 | m <- multi_match_fun(ux_input, uy_input) 93 | 94 | extra_cols <- NULL 95 | if (is.data.frame(m)) { 96 | if (ncol(m) > 1) { 97 | extra_cols <- m[, -1, drop = FALSE] 98 | } 99 | m <- m[[1]] 100 | } 101 | m[is.na(m)] <- FALSE 102 | 103 | if (sum(m) == 0) { 104 | # there are no matches 105 | matches <- tibble::tibble(x = numeric(0), y = numeric(0)) 106 | matches <- bind_cols(matches, extra_cols[0,, drop = FALSE]) 107 | } else { 108 | 109 | x_indices_l <- indices_x$indices[ix[m]] 110 | y_indices_l <- indices_y$indices[iy[m]] 111 | xls <- lengths(x_indices_l) 112 | yls <- lengths(y_indices_l) 113 | x_rep <- unlist(purrr::map2(x_indices_l, yls, function(x, y) rep(x, each = y))) 114 | y_rep <- unlist(purrr::map2(y_indices_l, xls, function(y, x) rep(y, x))) 115 | 116 | matches <- tibble::tibble(x = x_rep, y = y_rep) 117 | 118 | if (!is.null(extra_cols)) { 119 | extra_indices <- rep(which(m), xls * yls) 120 | extra_cols_rep <- extra_cols[extra_indices, , drop = FALSE] 121 | matches <- bind_cols(matches, extra_cols_rep) 122 | } 123 | } 124 | 125 | #----------------------------------------------------------------------------- 126 | matches <- arrange(matches, x, y) 127 | 128 | # fill in indices of the x, y, or both 129 | # curious if there's a higher performance approach 130 | if (type == "left") { 131 | matches <- tibble::tibble(x = seq_len(number_x_rows)) %>% 132 | left_join(matches, by = "x") 133 | } else if (type == "right") { 134 | matches <- tibble::tibble(y = seq_len(number_y_rows)) %>% 135 | left_join(matches, by = "y") 136 | } else if (type == "full") { 137 | matches <- matches %>% 138 | full_join(tibble::tibble(x = seq_len(number_x_rows)), by = "x", multiple = "all") %>% 139 | full_join(tibble::tibble(y = seq_len(number_y_rows)), by = "y", multiple = "all") 140 | } 141 | 142 | #----------------------------------------------------------------------------- 143 | # harmonize with dplyr 144 | na_x_ind <- is.na(matches$x) 145 | y_extra <- matches$y[na_x_ind] 146 | x_loc <- matches$x[!na_x_ind] 147 | y_loc <- matches$y[!na_x_ind] 148 | x_unmatched <- setdiff(seq(number_x_rows), x_loc[!is.na(y_loc)]) 149 | y_unmatched <- setdiff(seq(number_y_rows), y_loc[!is.na(x_loc)]) 150 | 151 | extra_cols <- matches[-(1:2)] 152 | 153 | list(x = x_loc, y = y_loc, y_extra = y_extra, 154 | x_unmatched = x_unmatched, y_unmatched = y_unmatched, 155 | extra_cols = extra_cols) 156 | } 157 | 158 | 159 | 160 | -------------------------------------------------------------------------------- /R/powerjoin-package.R: -------------------------------------------------------------------------------- 1 | #' @importFrom dplyr auto_copy as_tibble group_vars ungroup mutate group_by_at 2 | #' @importFrom dplyr select_at vars bind_cols arrange full_join dplyr_reconstruct 3 | #' @importFrom dplyr select transmute coalesce summarize tibble left_join n_distinct 4 | #' @importFrom dplyr distinct rename one_of %>% across 5 | #' @importFrom dplyr setdiff union intersect setequal 6 | #' @importFrom tidyselect peek_vars matches 7 | #' @importFrom utils capture.output getFromNamespace 8 | #' @importFrom methods allNames 9 | #' @importFrom stats setNames 10 | #' @importFrom glue glue glue_collapse 11 | #' @importFrom tidyr nest pivot_longer pivot_wider 12 | #' @importFrom purrr map map_lgl map_chr 13 | #' @importFrom vctrs vec_cast vec_ptype_common vec_slice vec_assign vec_detect_missing 14 | #' @importFrom vctrs vec_group_loc vec_in vec_match vec_ptype_full vec_size 15 | #' @import rlang 16 | #' @keywords internal 17 | "_PACKAGE" 18 | 19 | globalVariables(c( 20 | "type", 21 | "data", 22 | "name", 23 | "value" 24 | )) 25 | 26 | # The following block is used by usethis to automatically manage 27 | # roxygen namespace tags. Modify with care! 28 | ## usethis namespace: start 29 | ## usethis namespace: end 30 | NULL 31 | -------------------------------------------------------------------------------- /R/preprocess.R: -------------------------------------------------------------------------------- 1 | #' Preprocess powerjoin inputs 2 | #' 3 | #' These functions are named after the tidyverse (dplyr and tidyr) functions `select`, `summarize`, 4 | #' `nest`, `pack`, `pivot_wider` and `pivot_longer` and are designed to avoid 5 | #' repetition of key columns when preprocessing the data for a join. They should 6 | #' only be used in the `x` and `y` arguments of \pkg{powerjoin} join functions. No 7 | #' further transformation should be applied on top of them. 8 | #' 9 | #' Unlike their tidyverse counterparts these just add an attribute to the input and 10 | #' don't reshape it. The join function then preprocesses the inputs using these 11 | #' attributes and the keys. 12 | #' 13 | #' @param name Name of created column 14 | #' @inheritParams tidyr::pivot_wider 15 | #' @inheritParams tidyr::pivot_longer 16 | #' @name preprocess_inputs 17 | #' @return A data frame identical to the `.data` but with a `"powerjoin_preprocess"` 18 | #' attribute to be handled by the join functions 19 | #' @examples 20 | #' # in practice you'll mostly use those in join function calls directly 21 | #' x <- select_keys_and(head(iris, 2), Sepal.Width) 22 | #' # all it does is add an attribute that will be processed by the join function 23 | #' attr(x, "powerjoin_preprocess") 24 | #' # see `?power_left_join` or README for practical examples 25 | NULL 26 | 27 | #' @export 28 | #' @rdname preprocess_inputs 29 | select_keys_and <- function(.data, ...) { 30 | attr(.data, "powerjoin_preprocess") <- list(type = "select_keys_and", args = enquos(...)) 31 | .data 32 | } 33 | 34 | #' @export 35 | #' @rdname preprocess_inputs 36 | summarize_by_keys <- function(.data, ...) { 37 | attr(.data, "powerjoin_preprocess") <- list(type = "summarize_by_keys", args = enquos(...)) 38 | .data 39 | } 40 | 41 | #' @export 42 | #' @rdname preprocess_inputs 43 | nest_by_keys <- function(.data, ..., name = NULL) { 44 | attr(.data, "powerjoin_preprocess") <- list(type = "nest_by_keys", args = enquos(name = name,...)) 45 | .data 46 | } 47 | 48 | #' @export 49 | #' @rdname preprocess_inputs 50 | pack_along_keys <- function(.data, ..., name) { 51 | if(missing(name)) abort("The `name` argument of `pack_along_keys` cannot be missing") 52 | attr(.data, "powerjoin_preprocess") <- list(type = "pack_along_keys", args = enquos(name = name, ...)) 53 | .data 54 | } 55 | 56 | #' @export 57 | #' @rdname preprocess_inputs 58 | complete_keys <- function(.data) { 59 | attr(.data, "powerjoin_preprocess") <- list(type = "complete_keys") 60 | .data 61 | } 62 | 63 | # #' @export 64 | # #' @rdname preprocess_inputs 65 | # pivot_wider_by_keys <- function(data, names_from = name, names_prefix = "", 66 | # names_sep = "_", names_glue = NULL, names_sort = FALSE, names_repair = "check_unique", 67 | # values_from = value, values_fill = NULL, values_fn = NULL, 68 | # ...) { 69 | # attr(data, "powerjoin_preprocess") <- list(type = "pivot_wider_by_keys", args = enquos( 70 | # names_from = names_from, 71 | # names_prefix = names_prefix, 72 | # names_sep = names_sep, 73 | # names_glue = names_glue, 74 | # names_sort = names_sort, 75 | # names_repair = names_repair, 76 | # values_from = values_from, 77 | # values_fill = values_fill, 78 | # values_fn = values_fn, 79 | # ...)) 80 | # data 81 | # } 82 | # 83 | # #' @export 84 | # #' @rdname preprocess_inputs 85 | # pivot_longer_by_keys <- function(data, names_to = "name", names_prefix = NULL, 86 | # names_sep = NULL, names_pattern = NULL, names_ptypes = list(), 87 | # names_transform = list(), names_repair = "check_unique", 88 | # values_to = "value", values_drop_na = FALSE, values_ptypes = list(), 89 | # values_transform = list(), ...) { 90 | # attr(data, "powerjoin_preprocess") <- list(type = "pivot_longer_by_keys", args = enquos( 91 | # names_to = names_to, 92 | # names_prefix = names_prefix, 93 | # names_sep = names_sep, 94 | # names_pattern = names_pattern, 95 | # names_ptypes = names_ptypes, 96 | # names_transform = names_transform, 97 | # names_repair = names_repair, 98 | # values_to = values_to, 99 | # values_drop_na = values_drop_na, 100 | # values_ptypes = values_ptypes, 101 | # values_transform = values_transform, 102 | # ... 103 | # )) 104 | # data 105 | # } 106 | 107 | preprocess <- function(.data, by) { 108 | attr_ <- attr(.data, "powerjoin_preprocess") 109 | if(is.null(attr_)) return(.data) 110 | attr(.data, 'powerjoin_preprocess') <- NULL 111 | 112 | if(attr_$type == "select_keys_and") { 113 | # ugly but not sure there's much better 114 | # if first arg is negative we should start with negative in select too 115 | # we select `by` as well to be sure they're not removed 116 | if(length(attr_$args) && 117 | is.call(rlang::quo_squash(attr_$args[[1]])) && 118 | identical(quo_squash(attr_$args[[1]])[[1]], quote(`-`))) { 119 | .data <- select(.data, !!!attr_$args, !!by) 120 | } else { 121 | .data <- select(.data, !!by, !!!attr_$args) 122 | } 123 | 124 | return(.data) 125 | } 126 | 127 | if(attr_$type == "summarize_by_keys") { 128 | .data <- .data %>% 129 | group_by_at(by) %>% 130 | summarize(!!!attr_$args, .groups = "drop") 131 | return(.data) 132 | } 133 | 134 | if(attr_$type == "nest_by_keys") { 135 | name <- eval_tidy(attr_$args$name) 136 | if(is.null(name)) { 137 | .data <- .data %>% 138 | group_by_at(by) %>% 139 | summarize(across(c(!!!attr_$args[-1]), list), .groups = "drop") 140 | } else { 141 | if(length(attr_$args[-1])) { 142 | .data <- select(.data, !!by, !!!attr_$args[-1]) 143 | } 144 | .data <- .data %>% 145 | nest((!!attr_$args$name) := -!!by) 146 | } 147 | return(.data) 148 | } 149 | 150 | # if(attr_$type == "pivot_wider_by_keys") { 151 | # # pivot_wider takes default columns name and value, we must not have them so we can give only one 152 | # .data <- eval_tidy(expr(pivot_wider(.data, id_cols = !!by, !!!attr_$args))) 153 | # return(.data) 154 | # } 155 | # 156 | # if(attr_$type == "pivot_longer_by_keys") { 157 | # # 158 | # .data <- eval_tidy(expr(pivot_longer(.data, cols = -!!by, !!!attr_$args))) 159 | # return(.data) 160 | # } 161 | 162 | if(attr_$type == "pack_along_keys") { 163 | .data <- tibble::as_tibble(.data, .name_repair = "minimal") 164 | pack <- select(.data, -!!by) 165 | if(length(attr_$args[-1])) { 166 | pack <- select(pack, !!!attr_$args[-1]) 167 | } 168 | .data <- transmute(.data, !!!syms(by), (!!attr_$args$name) := pack) 169 | return(.data) 170 | } 171 | 172 | if(attr_$type == "complete_keys") { 173 | cl_bkp <- class(.data) 174 | .data <- tidyr::complete(.data, !!!syms(by)) 175 | class(.data) <- cl_bkp 176 | return(.data) 177 | } 178 | } 179 | 180 | -------------------------------------------------------------------------------- /R/preprocess_by.R: -------------------------------------------------------------------------------- 1 | preprocess_by <- function(x_names, y_names, by = NULL, check, na_equal, env) { 2 | fml_lgl <- sapply(by, is_formula) 3 | if(is_formula(by)) { 4 | equi_keys <- NULL 5 | specs <- fuzzy_specs(by, env) 6 | by <- specs$multi_by 7 | by$multi_match_fun <- specs$multi_match_fun 8 | by$extra_cols <- specs$extra_cols 9 | by$fuzzy <- TRUE 10 | } else if(is.list(by) && any(fml_lgl)) { 11 | # harmonize unnamed 12 | names(by) <- allNames(by) 13 | names(by)[!fml_lgl] <- ifelse(names(by[!fml_lgl]) == "", unlist(by[!fml_lgl]), names(by[!fml_lgl])) 14 | equi_keys <- by[!fml_lgl] 15 | # extract lhs 16 | by[fml_lgl] <- lapply(by[fml_lgl], `[[`, 2) 17 | if (na_equal) { 18 | # na_matches == "na" 19 | by[!fml_lgl] <- Map( 20 | function(x, y) substitute(is.na(.x$X) & is.na(.y$Y) | !is.na(.x$X) & !is.na(.y$Y) & .x$X == .y$Y, list(X = sym(x), Y = sym(y))), 21 | names(by[!fml_lgl]), 22 | by[!fml_lgl] 23 | ) 24 | } else { 25 | # na_matches == "never" 26 | by[!fml_lgl] <- Map( 27 | function(x, y) substitute(!is.na(.x$X) & !is.na(.y$Y) & .x$X == .y$Y, list(X = sym(x), Y = sym(y))), 28 | names(by[!fml_lgl]), 29 | by[!fml_lgl] 30 | ) 31 | } 32 | # concat 33 | by <- Reduce(function(x,y) call("&", x, y), by) 34 | # rebuild formula 35 | by <- call("~", by) 36 | specs <- fuzzy_specs(by, env) 37 | by <- specs$multi_by 38 | by$multi_match_fun <- specs$multi_match_fun 39 | by$fuzzy <- TRUE 40 | by$equi_keys <- equi_keys 41 | by$extra_cols <- specs$extra_cols 42 | } else { 43 | fuzzy <- FALSE 44 | #--------------------------------------------------------------------------- 45 | # modified dplyr code 46 | by <- preprocess_by_equi(x_names, y_names, by = by, check = check) 47 | by$fuzzy <- FALSE 48 | } 49 | by 50 | } 51 | 52 | # Adapted from join_mutate in dplyr 1.0.7 53 | preprocess_by_equi <- function(x_names, y_names, by = NULL, check) { 54 | # original dplyr code 55 | check_duplicate_vars(x_names, "x") 56 | check_duplicate_vars(y_names, "y") 57 | by <- standardise_join_by(by, x_names = x_names, y_names = y_names, 58 | # arg from powerjoin 59 | check = check) 60 | by 61 | } 62 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | url: ~ 2 | template: 3 | bootstrap: 5 4 | 5 | -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | ## R CMD check results 2 | 3 | 0 errors | 0 warnings | 0 notes 4 | 5 | ## comments 6 | 7 | The package was archived on CRAN due to new rules on html, we fixed it by 8 | redocumenting the package using an up to date version of 'roxygen2' 9 | -------------------------------------------------------------------------------- /docs/404.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Page not found (404) • powerjoin 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 25 | 26 | 27 | Skip to contents 28 | 29 | 30 |
67 |
68 |
72 | 73 | Content not found. Please use links in the navbar. 74 | 75 |
76 |
77 | 78 | 79 |
83 | 84 | 88 | 89 |
90 |
91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | -------------------------------------------------------------------------------- /docs/LICENSE.html: -------------------------------------------------------------------------------- 1 | 2 | MIT License • powerjoin 6 | Skip to contents 7 | 8 | 9 |
41 |
42 |
46 | 47 |
48 | 49 |

Copyright (c) 2022 powerjoin authors

50 |

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

51 |

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

52 |

THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

53 |
54 | 55 |
56 | 57 | 58 |
61 | 62 | 65 | 66 |
67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | -------------------------------------------------------------------------------- /docs/apple-touch-icon-120x120.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moodymudskipper/powerjoin/4df5007075e10f3707388ac3428851488e8b7041/docs/apple-touch-icon-120x120.png -------------------------------------------------------------------------------- /docs/apple-touch-icon-152x152.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moodymudskipper/powerjoin/4df5007075e10f3707388ac3428851488e8b7041/docs/apple-touch-icon-152x152.png -------------------------------------------------------------------------------- /docs/apple-touch-icon-180x180.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moodymudskipper/powerjoin/4df5007075e10f3707388ac3428851488e8b7041/docs/apple-touch-icon-180x180.png -------------------------------------------------------------------------------- /docs/apple-touch-icon-60x60.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moodymudskipper/powerjoin/4df5007075e10f3707388ac3428851488e8b7041/docs/apple-touch-icon-60x60.png -------------------------------------------------------------------------------- /docs/apple-touch-icon-76x76.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moodymudskipper/powerjoin/4df5007075e10f3707388ac3428851488e8b7041/docs/apple-touch-icon-76x76.png -------------------------------------------------------------------------------- /docs/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moodymudskipper/powerjoin/4df5007075e10f3707388ac3428851488e8b7041/docs/apple-touch-icon.png -------------------------------------------------------------------------------- /docs/authors.html: -------------------------------------------------------------------------------- 1 | 2 | Authors and Citation • powerjoin 6 | Skip to contents 7 | 8 | 9 |
41 |
42 |
45 | 46 |
47 |

Authors

48 | 49 |
  • 50 |

    Antoine Fabri. Author, maintainer. 51 |

    52 |
  • 53 |
  • 54 |

    Hadley Wickham. Contributor. 55 |
    aut/cre of dplyr

    56 |
  • 57 |
  • 58 |

    Romain François. Contributor. 59 |
    aut of dplyr

    60 |
  • 61 |
  • 62 |

    David Robinson. Contributor. 63 |
    aut of fuzzyjoin

    64 |
  • 65 |
  • 66 |

    RStudio. Copyright holder, funder. 67 |
    cph/fnd dplyr

    68 |
  • 69 |
70 | 71 |
72 |

Citation

73 |

Source: DESCRIPTION

74 | 75 |

Fabri A (2022). 76 | powerjoin: Extensions of 'dplyr' and 'fuzzyjoin' Join Functions. 77 | R package version 0.1.0, https://github.com/moodymudskipper/powerjoin. 78 |

79 |
@Manual{,
 80 |   title = {powerjoin: Extensions of 'dplyr' and 'fuzzyjoin' Join Functions},
 81 |   author = {Antoine Fabri},
 82 |   year = {2022},
 83 |   note = {R package version 0.1.0},
 84 |   url = {https://github.com/moodymudskipper/powerjoin},
 85 | }
86 |
87 |
89 | 90 | 91 |
94 | 95 | 98 | 99 |
100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | -------------------------------------------------------------------------------- /docs/bootstrap-toc.css: -------------------------------------------------------------------------------- 1 | /*! 2 | * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/) 3 | * Copyright 2015 Aidan Feldman 4 | * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */ 5 | 6 | /* modified from https://github.com/twbs/bootstrap/blob/94b4076dd2efba9af71f0b18d4ee4b163aa9e0dd/docs/assets/css/src/docs.css#L548-L601 */ 7 | 8 | /* All levels of nav */ 9 | nav[data-toggle='toc'] .nav > li > a { 10 | display: block; 11 | padding: 4px 20px; 12 | font-size: 13px; 13 | font-weight: 500; 14 | color: #767676; 15 | } 16 | nav[data-toggle='toc'] .nav > li > a:hover, 17 | nav[data-toggle='toc'] .nav > li > a:focus { 18 | padding-left: 19px; 19 | color: #563d7c; 20 | text-decoration: none; 21 | background-color: transparent; 22 | border-left: 1px solid #563d7c; 23 | } 24 | nav[data-toggle='toc'] .nav > .active > a, 25 | nav[data-toggle='toc'] .nav > .active:hover > a, 26 | nav[data-toggle='toc'] .nav > .active:focus > a { 27 | padding-left: 18px; 28 | font-weight: bold; 29 | color: #563d7c; 30 | background-color: transparent; 31 | border-left: 2px solid #563d7c; 32 | } 33 | 34 | /* Nav: second level (shown on .active) */ 35 | nav[data-toggle='toc'] .nav .nav { 36 | display: none; /* Hide by default, but at >768px, show it */ 37 | padding-bottom: 10px; 38 | } 39 | nav[data-toggle='toc'] .nav .nav > li > a { 40 | padding-top: 1px; 41 | padding-bottom: 1px; 42 | padding-left: 30px; 43 | font-size: 12px; 44 | font-weight: normal; 45 | } 46 | nav[data-toggle='toc'] .nav .nav > li > a:hover, 47 | nav[data-toggle='toc'] .nav .nav > li > a:focus { 48 | padding-left: 29px; 49 | } 50 | nav[data-toggle='toc'] .nav .nav > .active > a, 51 | nav[data-toggle='toc'] .nav .nav > .active:hover > a, 52 | nav[data-toggle='toc'] .nav .nav > .active:focus > a { 53 | padding-left: 28px; 54 | font-weight: 500; 55 | } 56 | 57 | /* from https://github.com/twbs/bootstrap/blob/e38f066d8c203c3e032da0ff23cd2d6098ee2dd6/docs/assets/css/src/docs.css#L631-L634 */ 58 | nav[data-toggle='toc'] .nav > .active > ul { 59 | display: block; 60 | } 61 | -------------------------------------------------------------------------------- /docs/bootstrap-toc.js: -------------------------------------------------------------------------------- 1 | /*! 2 | * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/) 3 | * Copyright 2015 Aidan Feldman 4 | * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */ 5 | (function() { 6 | 'use strict'; 7 | 8 | window.Toc = { 9 | helpers: { 10 | // return all matching elements in the set, or their descendants 11 | findOrFilter: function($el, selector) { 12 | // http://danielnouri.org/notes/2011/03/14/a-jquery-find-that-also-finds-the-root-element/ 13 | // http://stackoverflow.com/a/12731439/358804 14 | var $descendants = $el.find(selector); 15 | return $el.filter(selector).add($descendants).filter(':not([data-toc-skip])'); 16 | }, 17 | 18 | generateUniqueIdBase: function(el) { 19 | var text = $(el).text(); 20 | var anchor = text.trim().toLowerCase().replace(/[^A-Za-z0-9]+/g, '-'); 21 | return anchor || el.tagName.toLowerCase(); 22 | }, 23 | 24 | generateUniqueId: function(el) { 25 | var anchorBase = this.generateUniqueIdBase(el); 26 | for (var i = 0; ; i++) { 27 | var anchor = anchorBase; 28 | if (i > 0) { 29 | // add suffix 30 | anchor += '-' + i; 31 | } 32 | // check if ID already exists 33 | if (!document.getElementById(anchor)) { 34 | return anchor; 35 | } 36 | } 37 | }, 38 | 39 | generateAnchor: function(el) { 40 | if (el.id) { 41 | return el.id; 42 | } else { 43 | var anchor = this.generateUniqueId(el); 44 | el.id = anchor; 45 | return anchor; 46 | } 47 | }, 48 | 49 | createNavList: function() { 50 | return $(''); 51 | }, 52 | 53 | createChildNavList: function($parent) { 54 | var $childList = this.createNavList(); 55 | $parent.append($childList); 56 | return $childList; 57 | }, 58 | 59 | generateNavEl: function(anchor, text) { 60 | var $a = $(''); 61 | $a.attr('href', '#' + anchor); 62 | $a.text(text); 63 | var $li = $('
  • '); 64 | $li.append($a); 65 | return $li; 66 | }, 67 | 68 | generateNavItem: function(headingEl) { 69 | var anchor = this.generateAnchor(headingEl); 70 | var $heading = $(headingEl); 71 | var text = $heading.data('toc-text') || $heading.text(); 72 | return this.generateNavEl(anchor, text); 73 | }, 74 | 75 | // Find the first heading level (`

    `, then `

    `, etc.) that has more than one element. Defaults to 1 (for `

    `). 76 | getTopLevel: function($scope) { 77 | for (var i = 1; i <= 6; i++) { 78 | var $headings = this.findOrFilter($scope, 'h' + i); 79 | if ($headings.length > 1) { 80 | return i; 81 | } 82 | } 83 | 84 | return 1; 85 | }, 86 | 87 | // returns the elements for the top level, and the next below it 88 | getHeadings: function($scope, topLevel) { 89 | var topSelector = 'h' + topLevel; 90 | 91 | var secondaryLevel = topLevel + 1; 92 | var secondarySelector = 'h' + secondaryLevel; 93 | 94 | return this.findOrFilter($scope, topSelector + ',' + secondarySelector); 95 | }, 96 | 97 | getNavLevel: function(el) { 98 | return parseInt(el.tagName.charAt(1), 10); 99 | }, 100 | 101 | populateNav: function($topContext, topLevel, $headings) { 102 | var $context = $topContext; 103 | var $prevNav; 104 | 105 | var helpers = this; 106 | $headings.each(function(i, el) { 107 | var $newNav = helpers.generateNavItem(el); 108 | var navLevel = helpers.getNavLevel(el); 109 | 110 | // determine the proper $context 111 | if (navLevel === topLevel) { 112 | // use top level 113 | $context = $topContext; 114 | } else if ($prevNav && $context === $topContext) { 115 | // create a new level of the tree and switch to it 116 | $context = helpers.createChildNavList($prevNav); 117 | } // else use the current $context 118 | 119 | $context.append($newNav); 120 | 121 | $prevNav = $newNav; 122 | }); 123 | }, 124 | 125 | parseOps: function(arg) { 126 | var opts; 127 | if (arg.jquery) { 128 | opts = { 129 | $nav: arg 130 | }; 131 | } else { 132 | opts = arg; 133 | } 134 | opts.$scope = opts.$scope || $(document.body); 135 | return opts; 136 | } 137 | }, 138 | 139 | // accepts a jQuery object, or an options object 140 | init: function(opts) { 141 | opts = this.helpers.parseOps(opts); 142 | 143 | // ensure that the data attribute is in place for styling 144 | opts.$nav.attr('data-toggle', 'toc'); 145 | 146 | var $topContext = this.helpers.createChildNavList(opts.$nav); 147 | var topLevel = this.helpers.getTopLevel(opts.$scope); 148 | var $headings = this.helpers.getHeadings(opts.$scope, topLevel); 149 | this.helpers.populateNav($topContext, topLevel, $headings); 150 | } 151 | }; 152 | 153 | $(function() { 154 | $('nav[data-toggle="toc"]').each(function(i, el) { 155 | var $nav = $(el); 156 | Toc.init($nav); 157 | }); 158 | }); 159 | })(); 160 | -------------------------------------------------------------------------------- /docs/docsearch.js: -------------------------------------------------------------------------------- 1 | $(function() { 2 | 3 | // register a handler to move the focus to the search bar 4 | // upon pressing shift + "/" (i.e. "?") 5 | $(document).on('keydown', function(e) { 6 | if (e.shiftKey && e.keyCode == 191) { 7 | e.preventDefault(); 8 | $("#search-input").focus(); 9 | } 10 | }); 11 | 12 | $(document).ready(function() { 13 | // do keyword highlighting 14 | /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */ 15 | var mark = function() { 16 | 17 | var referrer = document.URL ; 18 | var paramKey = "q" ; 19 | 20 | if (referrer.indexOf("?") !== -1) { 21 | var qs = referrer.substr(referrer.indexOf('?') + 1); 22 | var qs_noanchor = qs.split('#')[0]; 23 | var qsa = qs_noanchor.split('&'); 24 | var keyword = ""; 25 | 26 | for (var i = 0; i < qsa.length; i++) { 27 | var currentParam = qsa[i].split('='); 28 | 29 | if (currentParam.length !== 2) { 30 | continue; 31 | } 32 | 33 | if (currentParam[0] == paramKey) { 34 | keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20")); 35 | } 36 | } 37 | 38 | if (keyword !== "") { 39 | $(".contents").unmark({ 40 | done: function() { 41 | $(".contents").mark(keyword); 42 | } 43 | }); 44 | } 45 | } 46 | }; 47 | 48 | mark(); 49 | }); 50 | }); 51 | 52 | /* Search term highlighting ------------------------------*/ 53 | 54 | function matchedWords(hit) { 55 | var words = []; 56 | 57 | var hierarchy = hit._highlightResult.hierarchy; 58 | // loop to fetch from lvl0, lvl1, etc. 59 | for (var idx in hierarchy) { 60 | words = words.concat(hierarchy[idx].matchedWords); 61 | } 62 | 63 | var content = hit._highlightResult.content; 64 | if (content) { 65 | words = words.concat(content.matchedWords); 66 | } 67 | 68 | // return unique words 69 | var words_uniq = [...new Set(words)]; 70 | return words_uniq; 71 | } 72 | 73 | function updateHitURL(hit) { 74 | 75 | var words = matchedWords(hit); 76 | var url = ""; 77 | 78 | if (hit.anchor) { 79 | url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor; 80 | } else { 81 | url = hit.url + '?q=' + escape(words.join(" ")); 82 | } 83 | 84 | return url; 85 | } 86 | -------------------------------------------------------------------------------- /docs/favicon-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moodymudskipper/powerjoin/4df5007075e10f3707388ac3428851488e8b7041/docs/favicon-16x16.png -------------------------------------------------------------------------------- /docs/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moodymudskipper/powerjoin/4df5007075e10f3707388ac3428851488e8b7041/docs/favicon-32x32.png -------------------------------------------------------------------------------- /docs/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moodymudskipper/powerjoin/4df5007075e10f3707388ac3428851488e8b7041/docs/favicon.ico -------------------------------------------------------------------------------- /docs/link.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 8 | 12 | 13 | -------------------------------------------------------------------------------- /docs/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moodymudskipper/powerjoin/4df5007075e10f3707388ac3428851488e8b7041/docs/logo.png -------------------------------------------------------------------------------- /docs/pkgdown.css: -------------------------------------------------------------------------------- 1 | /* Sticky footer */ 2 | 3 | /** 4 | * Basic idea: https://philipwalton.github.io/solved-by-flexbox/demos/sticky-footer/ 5 | * Details: https://github.com/philipwalton/solved-by-flexbox/blob/master/assets/css/components/site.css 6 | * 7 | * .Site -> body > .container 8 | * .Site-content -> body > .container .row 9 | * .footer -> footer 10 | * 11 | * Key idea seems to be to ensure that .container and __all its parents__ 12 | * have height set to 100% 13 | * 14 | */ 15 | 16 | html, body { 17 | height: 100%; 18 | } 19 | 20 | body { 21 | position: relative; 22 | } 23 | 24 | body > .container { 25 | display: flex; 26 | height: 100%; 27 | flex-direction: column; 28 | } 29 | 30 | body > .container .row { 31 | flex: 1 0 auto; 32 | } 33 | 34 | footer { 35 | margin-top: 45px; 36 | padding: 35px 0 36px; 37 | border-top: 1px solid #e5e5e5; 38 | color: #666; 39 | display: flex; 40 | flex-shrink: 0; 41 | } 42 | footer p { 43 | margin-bottom: 0; 44 | } 45 | footer div { 46 | flex: 1; 47 | } 48 | footer .pkgdown { 49 | text-align: right; 50 | } 51 | footer p { 52 | margin-bottom: 0; 53 | } 54 | 55 | img.icon { 56 | float: right; 57 | } 58 | 59 | /* Ensure in-page images don't run outside their container */ 60 | .contents img { 61 | max-width: 100%; 62 | height: auto; 63 | } 64 | 65 | /* Fix bug in bootstrap (only seen in firefox) */ 66 | summary { 67 | display: list-item; 68 | } 69 | 70 | /* Typographic tweaking ---------------------------------*/ 71 | 72 | .contents .page-header { 73 | margin-top: calc(-60px + 1em); 74 | } 75 | 76 | dd { 77 | margin-left: 3em; 78 | } 79 | 80 | /* Section anchors ---------------------------------*/ 81 | 82 | a.anchor { 83 | display: none; 84 | margin-left: 5px; 85 | width: 20px; 86 | height: 20px; 87 | 88 | background-image: url(./link.svg); 89 | background-repeat: no-repeat; 90 | background-size: 20px 20px; 91 | background-position: center center; 92 | } 93 | 94 | h1:hover .anchor, 95 | h2:hover .anchor, 96 | h3:hover .anchor, 97 | h4:hover .anchor, 98 | h5:hover .anchor, 99 | h6:hover .anchor { 100 | display: inline-block; 101 | } 102 | 103 | /* Fixes for fixed navbar --------------------------*/ 104 | 105 | .contents h1, .contents h2, .contents h3, .contents h4 { 106 | padding-top: 60px; 107 | margin-top: -40px; 108 | } 109 | 110 | /* Navbar submenu --------------------------*/ 111 | 112 | .dropdown-submenu { 113 | position: relative; 114 | } 115 | 116 | .dropdown-submenu>.dropdown-menu { 117 | top: 0; 118 | left: 100%; 119 | margin-top: -6px; 120 | margin-left: -1px; 121 | border-radius: 0 6px 6px 6px; 122 | } 123 | 124 | .dropdown-submenu:hover>.dropdown-menu { 125 | display: block; 126 | } 127 | 128 | .dropdown-submenu>a:after { 129 | display: block; 130 | content: " "; 131 | float: right; 132 | width: 0; 133 | height: 0; 134 | border-color: transparent; 135 | border-style: solid; 136 | border-width: 5px 0 5px 5px; 137 | border-left-color: #cccccc; 138 | margin-top: 5px; 139 | margin-right: -10px; 140 | } 141 | 142 | .dropdown-submenu:hover>a:after { 143 | border-left-color: #ffffff; 144 | } 145 | 146 | .dropdown-submenu.pull-left { 147 | float: none; 148 | } 149 | 150 | .dropdown-submenu.pull-left>.dropdown-menu { 151 | left: -100%; 152 | margin-left: 10px; 153 | border-radius: 6px 0 6px 6px; 154 | } 155 | 156 | /* Sidebar --------------------------*/ 157 | 158 | #pkgdown-sidebar { 159 | margin-top: 30px; 160 | position: -webkit-sticky; 161 | position: sticky; 162 | top: 70px; 163 | } 164 | 165 | #pkgdown-sidebar h2 { 166 | font-size: 1.5em; 167 | margin-top: 1em; 168 | } 169 | 170 | #pkgdown-sidebar h2:first-child { 171 | margin-top: 0; 172 | } 173 | 174 | #pkgdown-sidebar .list-unstyled li { 175 | margin-bottom: 0.5em; 176 | } 177 | 178 | /* bootstrap-toc tweaks ------------------------------------------------------*/ 179 | 180 | /* All levels of nav */ 181 | 182 | nav[data-toggle='toc'] .nav > li > a { 183 | padding: 4px 20px 4px 6px; 184 | font-size: 1.5rem; 185 | font-weight: 400; 186 | color: inherit; 187 | } 188 | 189 | nav[data-toggle='toc'] .nav > li > a:hover, 190 | nav[data-toggle='toc'] .nav > li > a:focus { 191 | padding-left: 5px; 192 | color: inherit; 193 | border-left: 1px solid #878787; 194 | } 195 | 196 | nav[data-toggle='toc'] .nav > .active > a, 197 | nav[data-toggle='toc'] .nav > .active:hover > a, 198 | nav[data-toggle='toc'] .nav > .active:focus > a { 199 | padding-left: 5px; 200 | font-size: 1.5rem; 201 | font-weight: 400; 202 | color: inherit; 203 | border-left: 2px solid #878787; 204 | } 205 | 206 | /* Nav: second level (shown on .active) */ 207 | 208 | nav[data-toggle='toc'] .nav .nav { 209 | display: none; /* Hide by default, but at >768px, show it */ 210 | padding-bottom: 10px; 211 | } 212 | 213 | nav[data-toggle='toc'] .nav .nav > li > a { 214 | padding-left: 16px; 215 | font-size: 1.35rem; 216 | } 217 | 218 | nav[data-toggle='toc'] .nav .nav > li > a:hover, 219 | nav[data-toggle='toc'] .nav .nav > li > a:focus { 220 | padding-left: 15px; 221 | } 222 | 223 | nav[data-toggle='toc'] .nav .nav > .active > a, 224 | nav[data-toggle='toc'] .nav .nav > .active:hover > a, 225 | nav[data-toggle='toc'] .nav .nav > .active:focus > a { 226 | padding-left: 15px; 227 | font-weight: 500; 228 | font-size: 1.35rem; 229 | } 230 | 231 | /* orcid ------------------------------------------------------------------- */ 232 | 233 | .orcid { 234 | font-size: 16px; 235 | color: #A6CE39; 236 | /* margins are required by official ORCID trademark and display guidelines */ 237 | margin-left:4px; 238 | margin-right:4px; 239 | vertical-align: middle; 240 | } 241 | 242 | /* Reference index & topics ----------------------------------------------- */ 243 | 244 | .ref-index th {font-weight: normal;} 245 | 246 | .ref-index td {vertical-align: top; min-width: 100px} 247 | .ref-index .icon {width: 40px;} 248 | .ref-index .alias {width: 40%;} 249 | .ref-index-icons .alias {width: calc(40% - 40px);} 250 | .ref-index .title {width: 60%;} 251 | 252 | .ref-arguments th {text-align: right; padding-right: 10px;} 253 | .ref-arguments th, .ref-arguments td {vertical-align: top; min-width: 100px} 254 | .ref-arguments .name {width: 20%;} 255 | .ref-arguments .desc {width: 80%;} 256 | 257 | /* Nice scrolling for wide elements --------------------------------------- */ 258 | 259 | table { 260 | display: block; 261 | overflow: auto; 262 | } 263 | 264 | /* Syntax highlighting ---------------------------------------------------- */ 265 | 266 | pre, code, pre code { 267 | background-color: #f8f8f8; 268 | color: #333; 269 | } 270 | pre, pre code { 271 | white-space: pre-wrap; 272 | word-break: break-all; 273 | overflow-wrap: break-word; 274 | } 275 | 276 | pre { 277 | border: 1px solid #eee; 278 | } 279 | 280 | pre .img, pre .r-plt { 281 | margin: 5px 0; 282 | } 283 | 284 | pre .img img, pre .r-plt img { 285 | background-color: #fff; 286 | } 287 | 288 | code a, pre a { 289 | color: #375f84; 290 | } 291 | 292 | a.sourceLine:hover { 293 | text-decoration: none; 294 | } 295 | 296 | .fl {color: #1514b5;} 297 | .fu {color: #000000;} /* function */ 298 | .ch,.st {color: #036a07;} /* string */ 299 | .kw {color: #264D66;} /* keyword */ 300 | .co {color: #888888;} /* comment */ 301 | 302 | .error {font-weight: bolder;} 303 | .warning {font-weight: bolder;} 304 | 305 | /* Clipboard --------------------------*/ 306 | 307 | .hasCopyButton { 308 | position: relative; 309 | } 310 | 311 | .btn-copy-ex { 312 | position: absolute; 313 | right: 0; 314 | top: 0; 315 | visibility: hidden; 316 | } 317 | 318 | .hasCopyButton:hover button.btn-copy-ex { 319 | visibility: visible; 320 | } 321 | 322 | /* headroom.js ------------------------ */ 323 | 324 | .headroom { 325 | will-change: transform; 326 | transition: transform 200ms linear; 327 | } 328 | .headroom--pinned { 329 | transform: translateY(0%); 330 | } 331 | .headroom--unpinned { 332 | transform: translateY(-100%); 333 | } 334 | 335 | /* mark.js ----------------------------*/ 336 | 337 | mark { 338 | background-color: rgba(255, 255, 51, 0.5); 339 | border-bottom: 2px solid rgba(255, 153, 51, 0.3); 340 | padding: 1px; 341 | } 342 | 343 | /* vertical spacing after htmlwidgets */ 344 | .html-widget { 345 | margin-bottom: 10px; 346 | } 347 | 348 | /* fontawesome ------------------------ */ 349 | 350 | .fab { 351 | font-family: "Font Awesome 5 Brands" !important; 352 | } 353 | 354 | /* don't display links in code chunks when printing */ 355 | /* source: https://stackoverflow.com/a/10781533 */ 356 | @media print { 357 | code a:link:after, code a:visited:after { 358 | content: ""; 359 | } 360 | } 361 | 362 | /* Section anchors --------------------------------- 363 | Added in pandoc 2.11: https://github.com/jgm/pandoc-templates/commit/9904bf71 364 | */ 365 | 366 | div.csl-bib-body { } 367 | div.csl-entry { 368 | clear: both; 369 | } 370 | .hanging-indent div.csl-entry { 371 | margin-left:2em; 372 | text-indent:-2em; 373 | } 374 | div.csl-left-margin { 375 | min-width:2em; 376 | float:left; 377 | } 378 | div.csl-right-inline { 379 | margin-left:2em; 380 | padding-left:1em; 381 | } 382 | div.csl-indent { 383 | margin-left: 2em; 384 | } 385 | -------------------------------------------------------------------------------- /docs/pkgdown.js: -------------------------------------------------------------------------------- 1 | /* http://gregfranko.com/blog/jquery-best-practices/ */ 2 | (function($) { 3 | $(function() { 4 | 5 | $('nav.navbar').headroom(); 6 | 7 | Toc.init({ 8 | $nav: $("#toc"), 9 | $scope: $("main h2, main h3, main h4, main h5, main h6") 10 | }); 11 | 12 | if ($('#toc').length) { 13 | $('body').scrollspy({ 14 | target: '#toc', 15 | offset: $("nav.navbar").outerHeight() + 1 16 | }); 17 | } 18 | 19 | // Activate popovers 20 | $('[data-bs-toggle="popover"]').popover({ 21 | container: 'body', 22 | html: true, 23 | trigger: 'focus', 24 | placement: "top", 25 | sanitize: false, 26 | }); 27 | 28 | $('[data-bs-toggle="tooltip"]').tooltip(); 29 | 30 | /* Clipboard --------------------------*/ 31 | 32 | function changeTooltipMessage(element, msg) { 33 | var tooltipOriginalTitle=element.getAttribute('data-original-title'); 34 | element.setAttribute('data-original-title', msg); 35 | $(element).tooltip('show'); 36 | element.setAttribute('data-original-title', tooltipOriginalTitle); 37 | } 38 | 39 | if(ClipboardJS.isSupported()) { 40 | $(document).ready(function() { 41 | var copyButton = ""; 42 | 43 | $("div.sourceCode").addClass("hasCopyButton"); 44 | 45 | // Insert copy buttons: 46 | $(copyButton).prependTo(".hasCopyButton"); 47 | 48 | // Initialize tooltips: 49 | $('.btn-copy-ex').tooltip({container: 'body'}); 50 | 51 | // Initialize clipboard: 52 | var clipboard = new ClipboardJS('[data-clipboard-copy]', { 53 | text: function(trigger) { 54 | return trigger.parentNode.textContent.replace(/\n#>[^\n]*/g, ""); 55 | } 56 | }); 57 | 58 | clipboard.on('success', function(e) { 59 | changeTooltipMessage(e.trigger, 'Copied!'); 60 | e.clearSelection(); 61 | }); 62 | 63 | clipboard.on('error', function() { 64 | changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy'); 65 | }); 66 | 67 | }); 68 | } 69 | 70 | /* Search marking --------------------------*/ 71 | var url = new URL(window.location.href); 72 | var toMark = url.searchParams.get("q"); 73 | var mark = new Mark("main#main"); 74 | if (toMark) { 75 | mark.mark(toMark, { 76 | accuracy: { 77 | value: "complementary", 78 | limiters: [",", ".", ":", "/"], 79 | } 80 | }); 81 | } 82 | 83 | /* Search --------------------------*/ 84 | /* Adapted from https://github.com/rstudio/bookdown/blob/2d692ba4b61f1e466c92e78fd712b0ab08c11d31/inst/resources/bs4_book/bs4_book.js#L25 */ 85 | // Initialise search index on focus 86 | var fuse; 87 | $("#search-input").focus(async function(e) { 88 | if (fuse) { 89 | return; 90 | } 91 | 92 | $(e.target).addClass("loading"); 93 | var response = await fetch($("#search-input").data("search-index")); 94 | var data = await response.json(); 95 | 96 | var options = { 97 | keys: ["what", "text", "code"], 98 | ignoreLocation: true, 99 | threshold: 0.1, 100 | includeMatches: true, 101 | includeScore: true, 102 | }; 103 | fuse = new Fuse(data, options); 104 | 105 | $(e.target).removeClass("loading"); 106 | }); 107 | 108 | // Use algolia autocomplete 109 | var options = { 110 | autoselect: true, 111 | debug: true, 112 | hint: false, 113 | minLength: 2, 114 | }; 115 | var q; 116 | async function searchFuse(query, callback) { 117 | await fuse; 118 | 119 | var items; 120 | if (!fuse) { 121 | items = []; 122 | } else { 123 | q = query; 124 | var results = fuse.search(query, { limit: 20 }); 125 | items = results 126 | .filter((x) => x.score <= 0.75) 127 | .map((x) => x.item); 128 | if (items.length === 0) { 129 | items = [{dir:"Sorry 😿",previous_headings:"",title:"No results found.",what:"No results found.",path:window.location.href}]; 130 | } 131 | } 132 | callback(items); 133 | } 134 | $("#search-input").autocomplete(options, [ 135 | { 136 | name: "content", 137 | source: searchFuse, 138 | templates: { 139 | suggestion: (s) => { 140 | if (s.title == s.what) { 141 | return `${s.dir} >
    ${s.title}
    `; 142 | } else if (s.previous_headings == "") { 143 | return `${s.dir} >
    ${s.title}
    > ${s.what}`; 144 | } else { 145 | return `${s.dir} >
    ${s.title}
    > ${s.previous_headings} > ${s.what}`; 146 | } 147 | }, 148 | }, 149 | }, 150 | ]).on('autocomplete:selected', function(event, s) { 151 | window.location.href = s.path + "?q=" + q + "#" + s.id; 152 | }); 153 | }); 154 | })(window.jQuery || window.$) 155 | 156 | 157 | -------------------------------------------------------------------------------- /docs/pkgdown.yml: -------------------------------------------------------------------------------- 1 | pandoc: 2.17.1.1 2 | pkgdown: 2.0.6 3 | pkgdown_sha: ~ 4 | articles: {} 5 | last_built: 2022-11-03T05:56Z 6 | 7 | -------------------------------------------------------------------------------- /docs/reference/Rplot001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moodymudskipper/powerjoin/4df5007075e10f3707388ac3428851488e8b7041/docs/reference/Rplot001.png -------------------------------------------------------------------------------- /docs/reference/figures/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moodymudskipper/powerjoin/4df5007075e10f3707388ac3428851488e8b7041/docs/reference/figures/logo.png -------------------------------------------------------------------------------- /docs/reference/full_diagnostic.html: -------------------------------------------------------------------------------- 1 | 2 | Inform on all potential issues — full_diagnostic • powerjoin 8 | Skip to contents 9 | 10 | 11 |
    43 |
    44 |
    49 | 50 |
    51 |

    This is the output of check_specs() with all arguments set to "inform", 52 | it's useful for a complete join diagnostic.

    53 |
    54 | 55 |
    56 |

    Usage

    57 |
    full_diagnostic
    58 |
    59 | 60 |
    61 |

    Format

    62 |

    An object of class powerjoin_check of length 12.

    63 |
    64 | 65 |
    67 | 68 | 69 |
    72 | 73 | 76 | 77 |
    78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | -------------------------------------------------------------------------------- /docs/reference/index.html: -------------------------------------------------------------------------------- 1 | 2 | Function reference • powerjoin 6 | Skip to contents 7 | 8 | 9 |
    41 |
    42 |
    45 | 46 |
    47 |

    All functions

    48 | 49 | 50 | 51 | 52 |
    53 | 54 | 55 | 56 | 57 |
    58 | 59 | check_specs() 60 |
    61 |
    Build a checklist for power joins
    62 |
    63 | 64 | coalesce_xy() coalesce_yx() 65 |
    66 |
    Coalesce helpers
    67 |
    68 | 69 | full_diagnostic 70 |
    71 |
    Inform on all potential issues
    72 |
    73 | 74 | paste_xy() paste_yx() 75 |
    76 |
    Paste helpers
    77 |
    78 | 79 | power_left_join() power_right_join() power_inner_join() power_full_join() 80 |
    81 |
    Power joins
    82 |
    83 | 84 | select_keys_and() summarize_by_keys() nest_by_keys() pack_along_keys() complete_keys() 85 |
    86 |
    Preprocess powerjoin inputs
    87 |
    88 |
    89 | 90 | 91 |
    94 | 95 | 98 | 99 |
    100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | -------------------------------------------------------------------------------- /docs/reference/powerjoin-package.html: -------------------------------------------------------------------------------- 1 | 2 | powerjoin: Extensions of 'dplyr' and 'fuzzyjoin' Join Functions — powerjoin-package • powerjoin 8 | Skip to contents 9 | 10 | 11 |
    43 |
    44 |
    49 | 50 |
    51 |

    52 |

    We extend 'dplyr' and 'fuzzyjoin' join functions with features to preprocess the data, apply various data checks, and deal with conflicting columns.

    53 |
    54 | 55 | 56 |
    57 |

    See also

    58 | 61 |
    62 |
    63 |

    Author

    64 |

    Maintainer: Antoine Fabri antoine.fabri@gmail.com

    65 |

    Other contributors:

    • Hadley Wickham hadley@rstudio.com (ORCID) (aut/cre of dplyr) [contributor]

    • 66 |
    • Romain François (ORCID) (aut of dplyr) [contributor]

    • 67 |
    • David Robinson admiral.david@gmail.com (aut of fuzzyjoin) [contributor]

    • 68 |
    • RStudio (cph/fnd dplyr) [copyright holder, funder]

    • 69 |
    70 | 71 |
    73 | 74 | 75 |
    78 | 79 | 82 | 83 |
    84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | -------------------------------------------------------------------------------- /docs/sitemap.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | /404.html 5 | 6 | 7 | /LICENSE-text.html 8 | 9 | 10 | /LICENSE.html 11 | 12 | 13 | /authors.html 14 | 15 | 16 | /index.html 17 | 18 | 19 | /news/index.html 20 | 21 | 22 | /reference/check_specs.html 23 | 24 | 25 | /reference/coalesce_xy.html 26 | 27 | 28 | /reference/full_diagnostic.html 29 | 30 | 31 | /reference/index.html 32 | 33 | 34 | /reference/paste_xy.html 35 | 36 | 37 | /reference/power_left_join.html 38 | 39 | 40 | /reference/powerjoin-package.html 41 | 42 | 43 | /reference/preprocess_inputs.html 44 | 45 | 46 | -------------------------------------------------------------------------------- /man/check_specs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/check.R 3 | \name{check_specs} 4 | \alias{check_specs} 5 | \title{Build a checklist for power joins} 6 | \usage{ 7 | check_specs( 8 | implicit_keys = c("inform", "ignore", "warn", "abort"), 9 | column_conflict = c("ignore", "inform", "warn", "abort"), 10 | duplicate_keys_left = c("ignore", "inform", "warn", "abort"), 11 | duplicate_keys_right = c("ignore", "inform", "warn", "abort"), 12 | unmatched_keys_left = c("ignore", "inform", "warn", "abort"), 13 | unmatched_keys_right = c("ignore", "inform", "warn", "abort"), 14 | missing_key_combination_left = c("ignore", "inform", "warn", "abort"), 15 | missing_key_combination_right = c("ignore", "inform", "warn", "abort"), 16 | inconsistent_factor_levels = c("ignore", "inform", "warn", "abort"), 17 | inconsistent_type = c("ignore", "inform", "warn", "abort"), 18 | grouped_input = c("ignore", "inform", "warn", "abort"), 19 | na_keys = c("ignore", "inform", "warn", "abort") 20 | ) 21 | } 22 | \arguments{ 23 | \item{implicit_keys}{What to do if keys are not given explicitly through the 24 | \code{by} argument} 25 | 26 | \item{column_conflict}{What to do if the join creates a column conflict which 27 | is not handled by the \code{conflict} argument} 28 | 29 | \item{duplicate_keys_left}{What to do if we find duplicate sets of keys in the 30 | left table} 31 | 32 | \item{duplicate_keys_right}{What to do if we find duplicate sets of keys in the 33 | right table} 34 | 35 | \item{unmatched_keys_left}{What to do if we find unmatched sets of keys in the 36 | left table} 37 | 38 | \item{unmatched_keys_right}{What to do if we find unmatched sets of keys in the 39 | right table} 40 | 41 | \item{missing_key_combination_left}{What to do if the left table doesn't contain 42 | all key combinations} 43 | 44 | \item{missing_key_combination_right}{What to do if the right table doesn't contain 45 | all key combinations} 46 | 47 | \item{inconsistent_factor_levels}{What to do if the key columns from both sides 48 | have inconsistent factor levels} 49 | 50 | \item{inconsistent_type}{What to do if the joined keys have a different type} 51 | 52 | \item{grouped_input}{What to do if one or both of the tables are grouped} 53 | 54 | \item{na_keys}{What to do if keys contain missing values} 55 | } 56 | \value{ 57 | A character vector of class \code{"powerjoin_check"} 58 | } 59 | \description{ 60 | Build a checklist for power joins 61 | } 62 | \examples{ 63 | check_specs( 64 | implicit_keys = "ignore", 65 | grouped_input = "inform", 66 | column_conflict = "abort", 67 | na_keys ="warn") 68 | } 69 | -------------------------------------------------------------------------------- /man/coalesce_xy.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/coalesce.R 3 | \name{coalesce_xy} 4 | \alias{coalesce_xy} 5 | \alias{coalesce_yx} 6 | \title{Coalesce helpers} 7 | \usage{ 8 | coalesce_xy(x, y) 9 | 10 | coalesce_yx(x, y) 11 | } 12 | \arguments{ 13 | \item{x}{A vector} 14 | 15 | \item{y}{A vector} 16 | } 17 | \value{ 18 | A vector 19 | } 20 | \description{ 21 | These are wrappers around \code{dplyr::coalesce}, designed for convenient use in 22 | the \code{conflict} argument of \pkg{powerjoin}'s join functions. \code{coalesce_xy()} is 23 | just like \code{dplyr::coalesce} (except it takes only 2 arguments), \code{coalesce_yx()} 24 | looks first in \code{y} and then in \code{x} if \code{y} is missing. 25 | } 26 | \examples{ 27 | coalesce_xy(c(NA, 2, 3), c(11, 12, NA)) 28 | coalesce_yx(c(NA, 2, 3), c(11, 12, NA)) 29 | } 30 | -------------------------------------------------------------------------------- /man/extended-equality.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/extended-equality.R 3 | \name{extended-equality} 4 | \alias{extended-equality} 5 | \alias{\%==\%} 6 | \alias{\%in.\%} 7 | \title{Extended equality operators} 8 | \usage{ 9 | x \%==\% y 10 | 11 | x \%in.\% y 12 | } 13 | \arguments{ 14 | \item{x}{A vector} 15 | 16 | \item{y}{A vector for \verb{\%==\%}, a list of vectors for \verb{\%in.\%}} 17 | } 18 | \description{ 19 | \verb{\%==\%} is the bone operator, it works like \code{==} but \code{NA \%==\% 1} is \code{FALSE} and 20 | \code{NA \%==\% NA} is \code{TRUE}. \verb{\%in.\%} is the a vectorized \code{\%in\%}, that 21 | can be seen as a rowwise \code{\%in\%} when applied to data frame columns. These are 22 | convenient helpers for fuzzy joins. 23 | } 24 | \examples{ 25 | df1 <- data.frame(key = c("b", "z")) 26 | df2 <- data.frame(key1 = c("a", "b", "c"), key2 = c("x", "y", "z"), val = 1:3) 27 | power_left_join(df1, df2, ~ .x$key \%in.\% list(.y$key1, .y$key2)) 28 | 29 | df3 <- data.frame(key1 = c("a", NA)) 30 | df4 <- data.frame(key2 = c("a", "b", NA), val = 1:3) 31 | 32 | # note the difference 33 | power_inner_join(df3, df4, by = ~ .x$key1 == .y$key2) 34 | power_inner_join(df3, df4, by = ~ .x$key1 \%==\% .y$key2) 35 | 36 | # typically we would only use the conditions above as part of more complex conditions. 37 | # In this precise case they are equivalent to these equi joins 38 | power_inner_join(df3, df4, by = c(key1 = "key2")) 39 | power_inner_join(df3, df4, by = c(key1 = "key2"), na_matches = "never") 40 | } 41 | -------------------------------------------------------------------------------- /man/figures/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moodymudskipper/powerjoin/4df5007075e10f3707388ac3428851488e8b7041/man/figures/logo.png -------------------------------------------------------------------------------- /man/full_diagnostic.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/check.R 3 | \docType{data} 4 | \name{full_diagnostic} 5 | \alias{full_diagnostic} 6 | \title{Inform on all potential issues} 7 | \format{ 8 | An object of class \code{powerjoin_check} of length 12. 9 | } 10 | \usage{ 11 | full_diagnostic 12 | } 13 | \description{ 14 | This is the output of \code{check_specs()} with all arguments set to \code{"inform"}, 15 | it's useful for a complete join diagnostic. 16 | } 17 | \keyword{datasets} 18 | -------------------------------------------------------------------------------- /man/paste_xy.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/coalesce.R 3 | \name{paste_xy} 4 | \alias{paste_xy} 5 | \alias{paste_yx} 6 | \title{Paste helpers} 7 | \usage{ 8 | paste_xy(x, y, sep = " ", na = NULL, ignore_empty = TRUE) 9 | 10 | paste_yx(x, y, sep = " ", na = NULL, ignore_empty = TRUE) 11 | } 12 | \arguments{ 13 | \item{x}{A vector} 14 | 15 | \item{y}{A vector} 16 | 17 | \item{sep}{separator} 18 | 19 | \item{na}{How to treat \code{NAs}, they are ignored by default, if \code{NA} the result 20 | will be \code{NA}, just as with \code{stringr::str_c}, if \code{"NA"} NAs will be coerced 21 | to character just as with \code{paste()}. Any other string can be used} 22 | 23 | \item{ignore_empty}{Whether to ignore empty strings, to avoid trailing and leading separators} 24 | } 25 | \value{ 26 | A character vector 27 | } 28 | \description{ 29 | These are similar to \code{paste()} but by default ignore \code{NA} and empty strings 30 | (\code{""}). If they are found in a conflicting column we return the value from 31 | the other column without using the separator. If both columns have such values 32 | we return an empty string. 33 | } 34 | \examples{ 35 | paste_xy(letters[1:3], c("d", NA, "")) 36 | paste_yx(letters[1:3], c("d", NA, "")) 37 | paste_xy(letters[1:3], c("d", NA, ""), na = NA, ignore_empty = FALSE) 38 | paste_xy(letters[1:3], c("d", NA, ""), na = "NA", ignore_empty = FALSE) 39 | } 40 | -------------------------------------------------------------------------------- /man/power_left_join.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/generics_and_methods.R 3 | \name{power_left_join} 4 | \alias{power_left_join} 5 | \alias{power_right_join} 6 | \alias{power_inner_join} 7 | \alias{power_full_join} 8 | \title{Power joins} 9 | \usage{ 10 | power_left_join( 11 | x, 12 | y = NULL, 13 | by = NULL, 14 | copy = FALSE, 15 | suffix = c(".x", ".y"), 16 | keep = NULL, 17 | na_matches = c("na", "never"), 18 | check = check_specs(), 19 | conflict = NULL, 20 | fill = NULL 21 | ) 22 | 23 | power_right_join( 24 | x, 25 | y = NULL, 26 | by = NULL, 27 | copy = FALSE, 28 | suffix = c(".x", ".y"), 29 | keep = NULL, 30 | na_matches = c("na", "never"), 31 | check = check_specs(), 32 | conflict = NULL, 33 | fill = NULL 34 | ) 35 | 36 | power_inner_join( 37 | x, 38 | y = NULL, 39 | by = NULL, 40 | copy = FALSE, 41 | suffix = c(".x", ".y"), 42 | keep = NULL, 43 | na_matches = c("na", "never"), 44 | check = check_specs(), 45 | conflict = NULL, 46 | fill = NULL 47 | ) 48 | 49 | power_full_join( 50 | x, 51 | y = NULL, 52 | by = NULL, 53 | copy = FALSE, 54 | suffix = c(".x", ".y"), 55 | keep = NULL, 56 | na_matches = c("na", "never"), 57 | check = check_specs(), 58 | conflict = NULL, 59 | fill = NULL 60 | ) 61 | } 62 | \arguments{ 63 | \item{x, y}{A pair of data frames, data frame extensions (e.g. a tibble), or 64 | lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for 65 | more details.} 66 | 67 | \item{by}{As in \pkg{dplyr}, but extended so user can supply a formula or a list 68 | of character and formulas. Formulas are used for fuzzy joins, see dedicated 69 | section below.} 70 | 71 | \item{copy}{Ignored at the moment because \pkg{powerjoin} doesn't support databases} 72 | 73 | \item{suffix}{If there are non-joined duplicate variables in \code{x} and 74 | \code{y}, these suffixes will be added to the output to disambiguate them. 75 | Should be a character vector of length 2.} 76 | 77 | \item{keep}{A boolean for compatibility with \pkg{dplyr}, or a value among "left", 78 | "right", "both", "none" or "default". See dedicated section below.} 79 | 80 | \item{na_matches}{Should two \code{NA} or two \code{NaN} values match? 81 | \itemize{ 82 | \item \code{"na"}, the default, treats two \code{NA} or two \code{NaN} values as equal, like 83 | \code{\%in\%}, \code{\link[=match]{match()}}, and \code{\link[=merge]{merge()}}. 84 | \item \code{"never"} treats two \code{NA} or two \code{NaN} values as different, and will 85 | never match them together or to any other values. This is similar to joins 86 | for database sources and to \code{base::merge(incomparables = NA)}. 87 | }} 88 | 89 | \item{check}{A list created with \code{check_specs()}} 90 | 91 | \item{conflict}{A function, formula, the special value amongst \code{"patch"}, 92 | or a named list of such items. If the LHS of the formula is \code{rw} the rhs will 93 | be applied rowwise. Note that the columns will be subset with \code{[} so for 94 | list columns \code{.x} or \code{.y} will refer to length 1 lists and you might sometimes 95 | need \code{.x[[1]]} or \code{.y[[1]]}.} 96 | 97 | \item{fill}{Values used to replace missing values originating in unmatched keys, 98 | or a named list of such items.} 99 | } 100 | \value{ 101 | A data frame 102 | } 103 | \description{ 104 | Power joins 105 | } 106 | \section{\code{keep} argument values}{ 107 | 108 | \itemize{ 109 | \item \code{NULL} (default) : merge keys and name them as the left table's keys, and 110 | keep columns used for fuzzy joins from both tables 111 | \item \code{left} : keep only key columns for left table 112 | \item \code{right}: keep only key columns for right table 113 | \item \code{both} or \code{TRUE}: keep key columns from both tables, adding suffix if relevant 114 | \item \code{none} : drop all key columns from the output 115 | \item \code{FALSE} : merge keys and name them as the left table's keys, maps to \code{none} for fuzzy joins 116 | } 117 | } 118 | 119 | \section{fuzzy joins}{ 120 | 121 | 122 | To specify fuzzy matching conditions we use formulas in which we refer to 123 | the columns from the left side data frame using \code{.x} and the right side data frame 124 | using \code{.y}, for instance \code{by = ~ .x$col1 > .y$col2}. 125 | 126 | We can specify several conditions and even mix equi condition with fuzzy condition, 127 | for instance \code{by = c(col1 = "col2", ~ .x$col3 > .y$col4)} 128 | 129 | To fuzzy match strings we can leverage the functions from the \pkg{stringr} 130 | package since they are vectorized on all main arguments, 131 | for instance to match observations where \code{col1} contains \code{col1} we can attach 132 | \pkg{stringr} and do \code{by = ~ str_detect(.x$col1, fixed(.y$col2))}. 133 | 134 | Another useful function is \code{stringdist} from the \pkg{stringdist} package to match 135 | strings that are close enough, for instance \code{by = ~ stringdist::stringdist(.x$a,.y$a) < 2} 136 | 137 | We can also define a new column computed during the fuzzy matching, using the 138 | arrow assignment operator, for instance : \code{by = ~ .x$col1 > (mysum <- .y$col2 + .y$col3)} 139 | 140 | When the \code{by} condition evaluates to \code{NA} the observation is dismissed. This makes 141 | \code{by = c(a = "b")} slightly different from \code{by = ~ .x$a == .y$b} when \code{na_matches} 142 | is \code{"na"} (the default). To be able to match \code{NA} with \code{NA} in fuzzy matching condition 143 | we can use the \verb{\%==\%} operator (bone operator), defined in this package. 144 | } 145 | 146 | \examples{ 147 | # See README for a more verbose version 148 | library(tibble) 149 | male_penguins <- tribble( 150 | ~name, ~species, ~island, ~flipper_length_mm, ~body_mass_g, 151 | "Giordan", "Gentoo", "Biscoe", 222L, 5250L, 152 | "Lynden", "Adelie", "Torgersen", 190L, 3900L, 153 | "Reiner", "Adelie", "Dream", 185L, 3650L 154 | ) 155 | 156 | female_penguins <- tribble( 157 | ~name, ~species, ~island, ~flipper_length_mm, ~body_mass_g, 158 | "Alonda", "Gentoo", "Biscoe", 211, 4500L, 159 | "Ola", "Adelie", "Dream", 190, 3600L, 160 | "Mishayla", "Gentoo", "Biscoe", 215, 4750L, 161 | ) 162 | 163 | # apply different checks 164 | power_inner_join( 165 | male_penguins[c("species", "island")], 166 | female_penguins[c("species", "island")], 167 | check = check_specs(implicit_keys = "ignore", duplicate_keys_right = "inform") 168 | ) 169 | 170 | df1 <- tibble(id = 1:3, value = c(10, NA, 30)) 171 | df2 <- tibble(id = 2:4, value = c(22, 32, 42)) 172 | 173 | # handle conflicted columns when joining 174 | power_left_join(df1, df2, by = "id", conflict = `+`) 175 | 176 | # the most frequent use case is to coalesce 177 | power_left_join(df1, df2, by = "id", conflict = coalesce_xy) 178 | power_left_join(df1, df2, by = "id", conflict = coalesce_yx) 179 | 180 | # the conflict function is applied colwise by default! 181 | power_left_join(df1, df2, by = "id", conflict = ~ sum(.x, .y, na.rm = TRUE)) 182 | 183 | # apply conflict function rowwise 184 | power_left_join(df1, df2, by = "id", conflict = rw ~ sum(.x, .y, na.rm = TRUE)) 185 | 186 | # subset columns without repeating keys 187 | power_inner_join( 188 | male_penguins \%>\% select_keys_and(name), 189 | female_penguins \%>\% select_keys_and(female_name = name), 190 | by = c("species", "island") 191 | ) 192 | 193 | # semi join 194 | power_inner_join( 195 | male_penguins, 196 | female_penguins \%>\% select_keys_and(), 197 | by = c("species", "island") 198 | ) 199 | 200 | # agregate without repeating keys 201 | power_left_join( 202 | male_penguins \%>\% summarize_by_keys(male_weight = mean(body_mass_g)), 203 | female_penguins \%>\% summarize_by_keys(female_weight = mean(body_mass_g)), 204 | by = c("species", "island") 205 | ) 206 | 207 | # pack auxiliary colums without repeating keys 208 | power_left_join( 209 | male_penguins \%>\% pack_along_keys(name = "m"), 210 | female_penguins \%>\% pack_along_keys(name = "f"), 211 | by = c("species", "island") 212 | ) 213 | 214 | # fuzzy join 215 | power_inner_join( 216 | male_penguins \%>\% select_keys_and(male_name = name), 217 | female_penguins \%>\% select_keys_and(female_name = name), 218 | by = c(~.x$flipper_length_mm < .y$flipper_length_mm, ~.x$body_mass_g > .y$body_mass_g) 219 | ) 220 | 221 | # fuzzy + equi join 222 | power_inner_join( 223 | male_penguins \%>\% select_keys_and(male_name = name), 224 | female_penguins \%>\% select_keys_and(female_name = name), 225 | by = c("island", ~.x$flipper_length_mm > .y$flipper_length_mm) 226 | ) 227 | 228 | # define new column without repeating computation 229 | power_inner_join( 230 | male_penguins \%>\% select_keys_and(male_name = name), 231 | female_penguins \%>\% select_keys_and(female_name = name), 232 | by = ~ (mass_ratio <- .y$body_mass_g / .x$body_mass_g) > 1.2 233 | ) 234 | power_inner_join( 235 | male_penguins \%>\% select_keys_and(male_name = name), 236 | female_penguins \%>\% select_keys_and(female_name = name), 237 | by = ~ (mass_ratio <- .y$body_mass_g / .x$body_mass_g) > 1.2, 238 | keep = "none" 239 | ) 240 | 241 | # fill unmatched values 242 | df1 <- tibble(id = 1:3) 243 | df2 <- tibble(id = 1:2, value2 = c(2, NA), value3 = c(NA, 3)) 244 | power_left_join(df1, df2, by = "id", fill = 0) 245 | power_left_join(df1, df2, by = "id", fill = list(value2 = 0)) 246 | 247 | # join recursively 248 | df1 <- tibble(id = 1, a = "foo") 249 | df2 <- tibble(id = 1, b = "bar") 250 | df3 <- tibble(id = 1, c = "baz") 251 | power_left_join(list(df1, df2, df3), by = "id") 252 | power_left_join(df1, list(df2, df3), by = "id") 253 | 254 | } 255 | -------------------------------------------------------------------------------- /man/powerjoin-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/powerjoin-package.R 3 | \docType{package} 4 | \name{powerjoin-package} 5 | \alias{powerjoin} 6 | \alias{powerjoin-package} 7 | \title{powerjoin: Extensions of 'dplyr' and 'fuzzyjoin' Join Functions} 8 | \description{ 9 | \if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}} 10 | 11 | We extend 'dplyr' and 'fuzzyjoin' join functions with features to preprocess the data, apply various data checks, and deal with conflicting columns. 12 | } 13 | \seealso{ 14 | Useful links: 15 | \itemize{ 16 | \item \url{https://github.com/moodymudskipper/powerjoin} 17 | \item Report bugs at \url{https://github.com/moodymudskipper/powerjoin/issues} 18 | } 19 | 20 | } 21 | \author{ 22 | \strong{Maintainer}: Antoine Fabri \email{antoine.fabri@gmail.com} 23 | 24 | Other contributors: 25 | \itemize{ 26 | \item Hadley Wickham \email{hadley@rstudio.com} (\href{https://orcid.org/0000-0003-4757-117X}{ORCID}) (aut/cre of dplyr) [contributor] 27 | \item Romain François (\href{https://orcid.org/0000-0002-2444-4226}{ORCID}) (aut of dplyr) [contributor] 28 | \item David Robinson \email{admiral.david@gmail.com} (aut of fuzzyjoin) [contributor] 29 | \item RStudio (cph/fnd dplyr) [copyright holder, funder] 30 | } 31 | 32 | } 33 | \keyword{internal} 34 | -------------------------------------------------------------------------------- /man/preprocess_inputs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/preprocess.R 3 | \name{preprocess_inputs} 4 | \alias{preprocess_inputs} 5 | \alias{select_keys_and} 6 | \alias{summarize_by_keys} 7 | \alias{nest_by_keys} 8 | \alias{pack_along_keys} 9 | \alias{complete_keys} 10 | \title{Preprocess powerjoin inputs} 11 | \usage{ 12 | select_keys_and(.data, ...) 13 | 14 | summarize_by_keys(.data, ...) 15 | 16 | nest_by_keys(.data, ..., name = NULL) 17 | 18 | pack_along_keys(.data, ..., name) 19 | 20 | complete_keys(.data) 21 | } 22 | \arguments{ 23 | \item{.data}{A data frame to pivot.} 24 | 25 | \item{...}{Additional arguments passed on to methods.} 26 | 27 | \item{name}{Name of created column} 28 | } 29 | \value{ 30 | A data frame identical to the \code{.data} but with a \code{"powerjoin_preprocess"} 31 | attribute to be handled by the join functions 32 | } 33 | \description{ 34 | These functions are named after the tidyverse (dplyr and tidyr) functions \code{select}, \code{summarize}, 35 | \code{nest}, \code{pack}, \code{pivot_wider} and \code{pivot_longer} and are designed to avoid 36 | repetition of key columns when preprocessing the data for a join. They should 37 | only be used in the \code{x} and \code{y} arguments of \pkg{powerjoin} join functions. No 38 | further transformation should be applied on top of them. 39 | } 40 | \details{ 41 | Unlike their tidyverse counterparts these just add an attribute to the input and 42 | don't reshape it. The join function then preprocesses the inputs using these 43 | attributes and the keys. 44 | } 45 | \examples{ 46 | # in practice you'll mostly use those in join function calls directly 47 | x <- select_keys_and(head(iris, 2), Sepal.Width) 48 | # all it does is add an attribute that will be processed by the join function 49 | attr(x, "powerjoin_preprocess") 50 | # see `?power_left_join` or README for practical examples 51 | } 52 | -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-120x120.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moodymudskipper/powerjoin/4df5007075e10f3707388ac3428851488e8b7041/pkgdown/favicon/apple-touch-icon-120x120.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-152x152.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moodymudskipper/powerjoin/4df5007075e10f3707388ac3428851488e8b7041/pkgdown/favicon/apple-touch-icon-152x152.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-180x180.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moodymudskipper/powerjoin/4df5007075e10f3707388ac3428851488e8b7041/pkgdown/favicon/apple-touch-icon-180x180.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-60x60.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moodymudskipper/powerjoin/4df5007075e10f3707388ac3428851488e8b7041/pkgdown/favicon/apple-touch-icon-60x60.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-76x76.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moodymudskipper/powerjoin/4df5007075e10f3707388ac3428851488e8b7041/pkgdown/favicon/apple-touch-icon-76x76.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moodymudskipper/powerjoin/4df5007075e10f3707388ac3428851488e8b7041/pkgdown/favicon/apple-touch-icon.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moodymudskipper/powerjoin/4df5007075e10f3707388ac3428851488e8b7041/pkgdown/favicon/favicon-16x16.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moodymudskipper/powerjoin/4df5007075e10f3707388ac3428851488e8b7041/pkgdown/favicon/favicon-32x32.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moodymudskipper/powerjoin/4df5007075e10f3707388ac3428851488e8b7041/pkgdown/favicon/favicon.ico -------------------------------------------------------------------------------- /powerjoin.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | LineEndingConversion: Posix 18 | 19 | BuildType: Package 20 | PackageUseDevtools: Yes 21 | PackageInstallArgs: --no-multiarch --with-keep.source 22 | PackageRoxygenize: rd,collate,namespace 23 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(powerjoin) 3 | 4 | test_check("powerjoin") 5 | -------------------------------------------------------------------------------- /tests/testthat/test-check.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | test_that("implicit_keys", { 4 | df1 <- data.frame(id1 = 1:2, val1 = 1:2) 5 | df2 <- data.frame(id2 = 2:3, val2 = 2:3) 6 | expect_error( 7 | power_inner_join(df1, df2, check = check_specs(implicit_keys = "abort")), 8 | "should be explicit" 9 | ) 10 | }) 11 | 12 | test_that("column conflict", { 13 | df1 <- data.frame(id = 1:2, val = 1:2) 14 | df2 <- data.frame(id = 2:3, val = 3:4) 15 | 16 | expect_error( 17 | power_inner_join(df1, df2, by = "id", check = check_specs(column_conflict = "abort")), 18 | "are conflicted" 19 | ) 20 | 21 | expect_message( 22 | power_inner_join(df1, df2, by = "id", check = check_specs(column_conflict = "inform")), 23 | "are conflicted" 24 | ) 25 | 26 | expect_error( 27 | power_inner_join(df1, df2, by = ~ .x$id < .y$id, check = check_specs(column_conflict = "abort")), 28 | "are conflicted" 29 | ) 30 | 31 | expect_message( 32 | power_inner_join(df1, df2, by = ~ .x$id < .y$id, check = check_specs(column_conflict = "inform")), 33 | "are conflicted" 34 | ) 35 | }) 36 | 37 | 38 | test_that("grouped_input", { 39 | df1 <- dplyr::group_by(data.frame(id = 1:2, val = 1:2), id) 40 | df2 <- dplyr::group_by(data.frame(id = 2:3, val = 3:4), id) 41 | 42 | expect_error( 43 | power_inner_join(df1, df2, by = "id", check = check_specs(grouped_input = "abort")), 44 | "grouped" 45 | ) 46 | 47 | expect_message( 48 | power_inner_join(df1, df2, by = "id", check = check_specs(grouped_input = "inform")), 49 | "grouped" 50 | ) 51 | 52 | expect_error( 53 | power_inner_join(df1, df2, by = ~ .x$id < .y$id, check = check_specs(grouped_input = "abort")), 54 | "grouped" 55 | ) 56 | 57 | expect_message( 58 | power_inner_join(df1, df2, by = ~ .x$id < .y$id, check = check_specs(grouped_input = "inform")), 59 | "grouped" 60 | ) 61 | }) 62 | 63 | 64 | test_that("na_keys", { 65 | x <- data.frame(key = c(1, NA), x = 1:2) 66 | y <- data.frame(key = 1, y = 1) 67 | 68 | expect_error( 69 | power_inner_join(x, y, by = "key", check = check_specs(na_keys = "abort")), 70 | "left" 71 | ) 72 | expect_error( 73 | power_inner_join(y, x, by = "key", check = check_specs(na_keys = "abort")), 74 | "right" 75 | ) 76 | }) 77 | 78 | 79 | -------------------------------------------------------------------------------- /tests/testthat/test-check_specs.R: -------------------------------------------------------------------------------- 1 | test_that("check_specs methods work", { 2 | expect_output( 3 | print(check_specs()), 4 | "check specifications" 5 | ) 6 | 7 | expect_equal( 8 | c(check_specs(column_conflict = "a", duplicate_keys_left = "a"), 9 | check_specs(duplicate_keys_left = "w", duplicate_keys_right = "w")), 10 | check_specs(column_conflict = "a", duplicate_keys_left = "w", duplicate_keys_right = "w") 11 | ) 12 | }) 13 | 14 | 15 | df1 <- data.frame( 16 | id1 = factor(c("a", "a", "b", "d")), 17 | id2 = c(1, 1, 3, NA), 18 | val = 1:4) 19 | df2 <- data.frame(id1 = factor(c("a", "b", "c")), id2 = 1:3) 20 | 21 | test_that("checks work", { 22 | 23 | expect_error( 24 | power_inner_join(df1, df2, 25 | check = check_specs(implicit_keys = "a")), 26 | "explicit" 27 | ) 28 | 29 | expect_error( 30 | power_inner_join(df1, df2, by = c("id1", "id2"), 31 | check = check_specs(duplicate_keys_left = "a")), 32 | "duplicates" 33 | ) 34 | 35 | expect_error( 36 | power_inner_join(df2, df1, by = c("id1", "id2"), 37 | check = check_specs(duplicate_keys_right = "a")), 38 | "duplicates" 39 | ) 40 | 41 | expect_error( 42 | power_inner_join(df1, df2, by = c("id1", "id2"), 43 | check = check_specs(unmatched_keys_left = "a")), 44 | "unmatched" 45 | ) 46 | 47 | expect_error( 48 | power_inner_join(df1, df2, by = c("id1", "id2"), 49 | check = check_specs(unmatched_keys_right = "a")), 50 | "unmatched" 51 | ) 52 | 53 | expect_error( 54 | power_inner_join(df1, df2, by = c("id1", "id2"), 55 | check = check_specs(missing_key_combination_left = "a")), 56 | "combinations" 57 | ) 58 | 59 | expect_error( 60 | power_inner_join(df1, df2, by = c("id1", "id2"), 61 | check = check_specs(missing_key_combination_right = "a")), 62 | "combinations" 63 | ) 64 | 65 | expect_error( 66 | power_inner_join(df1, df2, by = c("id1", "id2"), 67 | check = check_specs(inconsistent_factor_levels = "a")), 68 | "different factor levels" 69 | ) 70 | 71 | expect_message( 72 | power_inner_join(df1, rename(df2, ID2 = id2), by = c("id1", "id2" = "ID2"), 73 | check = check_specs(inconsistent_factor_levels = "inform")), 74 | "different factor levels" 75 | ) 76 | 77 | expect_error( 78 | power_inner_join(rename(df1, ID1 = id1), df2, by = c("ID1" = "id1", "id2"), 79 | check = check_specs(inconsistent_factor_levels = "a")), 80 | "different factor levels" 81 | ) 82 | 83 | expect_message( 84 | power_inner_join(rename(df1, ID1 = id1), df2, by = c("ID1" = "id1", "id2"), 85 | check = check_specs(inconsistent_factor_levels = "inform")), 86 | "different factor levels" 87 | ) 88 | 89 | expect_error( 90 | power_inner_join(df1, df2, by = c("id1", "id2"), 91 | check = check_specs(inconsistent_type = "a")), 92 | "different type" 93 | ) 94 | 95 | 96 | expect_error( 97 | power_inner_join(df1, rename(df2, ID2 = id2), by = c("id1", "id2" = "ID2"), 98 | check = check_specs(inconsistent_type = "a")), 99 | "different type" 100 | ) 101 | }) 102 | 103 | -------------------------------------------------------------------------------- /tests/testthat/test-conflict.R: -------------------------------------------------------------------------------- 1 | df1 <- data.frame(id = 1:2, val = 1:2) 2 | df2 <- data.frame(id = 2:3, val = 3:4) 3 | 4 | test_that("conflict works with functions", { 5 | expect_equal( 6 | power_inner_join(df1, df2, by = "id", conflict = `+`), 7 | data.frame(id = 2, val = 5) 8 | ) 9 | 10 | expect_equal( 11 | power_inner_join(df1, df2, by = "id", conflict = ~ .x + .y), 12 | data.frame(id = 2, val = 5) 13 | ) 14 | 15 | # row wise 16 | expect_equal( 17 | power_inner_join(df1, df2, by = "id", conflict = rw ~ sum(.x, .y)), 18 | data.frame(id = 2, val = 5) 19 | ) 20 | 21 | expect_equal( 22 | power_inner_join(df1, df2, by = "id", conflict = c(val = `+`)), 23 | data.frame(id = 2, val = 5) 24 | ) 25 | 26 | expect_equal( 27 | suppressWarnings(power_inner_join(df1, df2, by = "id", conflict = c(val = `+`, extra = `+`))), 28 | data.frame(id = 2, val = 5) 29 | ) 30 | 31 | expect_warning( 32 | power_inner_join(df1, df2, by = "id", conflict = c(val = `+`, extra = `+`)), 33 | "conflict conditions are not used" 34 | ) 35 | 36 | expect_error( 37 | power_inner_join(df1, df2, by = "id", conflict = "foo"), 38 | "wrong `conflict` argument" 39 | ) 40 | 41 | }) 42 | 43 | 44 | test_that("conflict works special values", { 45 | df1 <- data.frame(id = 1:3, val = 1:3) 46 | df2 <- data.frame(id = 1:2, val = c(2, NA)) 47 | 48 | expect_equal( 49 | power_left_join(df1, df2, by = "id", conflict = coalesce_xy), 50 | data.frame(id = 1:3, val = 1:3) 51 | ) 52 | 53 | expect_equal( 54 | power_left_join(df1, df2, by = "id", conflict = coalesce_yx), 55 | data.frame(id = 1:3, val = c(2, 2, 3)) 56 | ) 57 | 58 | expect_equal( 59 | power_left_join(df1, df2, by = "id", conflict = "patch"), 60 | data.frame(id = 1:3, val = c(2, NA, 3)) 61 | ) 62 | 63 | }) 64 | 65 | test_that("conflict works with lists", { 66 | df1 <- data.frame(id = 1:3, val = 1:3) 67 | df2 <- data.frame(id = 1:2, val = c(2, NA)) 68 | 69 | expect_equal( 70 | power_left_join(df1, df2, by = "id", conflict = list(val = coalesce_xy)), 71 | data.frame(id = 1:3, val = 1:3) 72 | ) 73 | 74 | expect_equal( 75 | power_left_join(df1, df2, by = "id", conflict = list(val =coalesce_yx)), 76 | data.frame(id = 1:3, val = c(2, 2, 3)) 77 | ) 78 | 79 | expect_equal( 80 | power_left_join(df1, df2, by = "id", conflict = list(val="patch")), 81 | data.frame(id = 1:3, val = c(2, NA, 3)) 82 | ) 83 | 84 | }) 85 | -------------------------------------------------------------------------------- /tests/testthat/test-corner_cases.R: -------------------------------------------------------------------------------- 1 | test_that("corner cases work", { 2 | # conflicts between key in x and col in y, key col is not suffixed, as with dplyr 3 | expect_equal( 4 | power_left_join(data.frame(a=c("foo", "bar")), data.frame(key = "foo", a = "FOO"), c(a = "key")), 5 | dplyr::left_join(data.frame(a=c("foo", "bar")), data.frame(key = "foo", a = "FOO"), c(a = "key")) 6 | ) 7 | 8 | # with keep = TRUE key col is renamed too 9 | expect_equal( 10 | power_left_join(data.frame(a=c("foo", "bar")), data.frame(key = "foo", a = "FOO"), c(a = "key"), keep = TRUE), 11 | dplyr::left_join(data.frame(a=c("foo", "bar")), data.frame(key = "foo", a = "FOO"), c(a = "key"), keep = TRUE) 12 | ) 13 | 14 | # the col is not renamed if we don't keep the key col 15 | expect_equal( 16 | power_left_join(data.frame(a=c("foo", "bar")), data.frame(key = "foo", a = "FOO"), c(a = "key"), keep = "none"), 17 | data.frame(a = c("FOO", NA)) 18 | ) 19 | expect_equal( 20 | power_left_join(data.frame(a=c("foo", "bar")), data.frame(key = "foo", a = "FOO"), c(a = "key"), keep = "right"), 21 | data.frame(key = c("foo", NA), a = c("FOO", NA)) 22 | ) 23 | 24 | ## same examples with fuzzy join 25 | 26 | # fuzzy join keep both cols and add suffixes 27 | expect_equal( 28 | power_left_join(data.frame(a=c("foo", "bar")), data.frame(key = "foo", a = "FOO"), ~ .x$a == .y$key), 29 | data.frame(a.x = c("foo", "bar"), key = c("foo", NA), a.y = c("FOO", NA)) 30 | ) 31 | 32 | # we can handle these conflicts 33 | expect_equal( 34 | power_left_join( 35 | data.frame(a=c("foo", "bar")), 36 | data.frame(key = "foo", a = "FOO"), 37 | ~ .x$a == .y$key, 38 | conflict = coalesce), 39 | data.frame(key = c("foo", NA), a = c("foo", "bar")) 40 | ) 41 | 42 | # the col is not renamed if we don't keep the key col 43 | expect_equal( 44 | power_left_join(data.frame(a=c("foo", "bar")), data.frame(key = "foo", a = "FOO"), 45 | ~ .x$a == .y$key, keep = "none"), 46 | data.frame(a = c("FOO", NA)) 47 | ) 48 | 49 | expect_equal( 50 | power_left_join(data.frame(a=c("foo", "bar")), data.frame(key = "foo", a = "FOO"), 51 | ~ .x$a == .y$key, keep = "right"), 52 | data.frame(key = c("foo", NA), a = c("FOO", NA)) 53 | ) 54 | 55 | 56 | }) 57 | 58 | 59 | # to do : tests that mix equi and fuzzy 60 | -------------------------------------------------------------------------------- /tests/testthat/test-fill.R: -------------------------------------------------------------------------------- 1 | df1 <- data.frame(id = 1:2, val1 = 1:2) 2 | df2 <- data.frame(id = 2:3, val2 = 2:3) 3 | test_that("fill works", { 4 | expect_equal( 5 | power_full_join(df1, df2, by = "id", fill = 0), 6 | data.frame(id = 1:3, val1 = c(1:2, 0), val2 = c(0, 2:3)) 7 | ) 8 | 9 | expect_equal( 10 | power_full_join(df1, df2, by = "id", fill = list(val1 = 10, val2 = 20)), 11 | data.frame(id = 1:3, val1 = c(1:2, 10), val2 = c(20, 2:3)) 12 | ) 13 | }) 14 | 15 | -------------------------------------------------------------------------------- /tests/testthat/test-fuzzy_joins.R: -------------------------------------------------------------------------------- 1 | df1 <- data.frame(id = 1:2, val1 = 1:2) 2 | df2 <- data.frame(id = 2:3, val2 = 2:3) 3 | 4 | test_that("fuzzy joins work with `by` formulas", { 5 | 6 | df3 <- data.frame(id.x = c(1, 1:2), val1 = c(1, 1:2), id.y = c(2:3, 3), val2 = c(2:3, 3)) 7 | 8 | # left 9 | suppressWarnings( # suppress mysterious warning that only appears when running full test batch 10 | expect_equal( 11 | power_left_join(df1, df2, by = ~ .x$id < .y$id), 12 | df3 13 | ) 14 | ) 15 | 16 | # right (same thing here) 17 | suppressWarnings( # suppress mysterious warning that only appears when running full test batch 18 | expect_equal( 19 | power_right_join(df1, df2, by = ~ .x$id < .y$id), 20 | df3 21 | ) 22 | ) 23 | 24 | # full (same thing again) 25 | expect_equal( 26 | power_full_join(df1, df2, by = ~ .x$id < .y$id), 27 | df3 28 | ) 29 | 30 | # no match, this should not warn! 31 | expect_equal( 32 | power_inner_join(df1, df2, by = ~ .x$id > .y$id), 33 | df3[0,] 34 | ) 35 | 36 | # zero match 37 | # fuzzyjoin::fuzzy_left_join(df1, df2, by = "id", match_fun = `>`) 38 | # fuzzyjoin::fuzzy_left_join(df1, df2, multi_by = list(x = "id", y = "id"), match_fun = NULL, multi_match_fun = ~ .x > .y) 39 | expect_equal( 40 | power_left_join(df1, df2, by = ~ .x$id > .y$id), 41 | data.frame(id.x = 1:2, val1 = 1:2, id.y = NA_integer_, val2 = NA_integer_) 42 | ) 43 | 44 | # zero match + column creation 45 | # {fuzzyjoin} doesn't have the right behavior here, the column should be created 46 | # fuzzyjoin::fuzzy_left_join(df1, df2, multi_by = list(x = "id", y = "id"), match_fun = NULL, multi_match_fun = ~ .x > .y) 47 | expect_equal( 48 | power_left_join(df1, df2, by = ~ (foo <- .x$id > .y$id)), 49 | data.frame(id.x = 1:2, val1 = 1:2, id.y = NA_integer_, val2 = NA_integer_, foo = NA) 50 | ) 51 | }) 52 | 53 | test_that("fuzzy joins work with `by` lists()", { 54 | suppressWarnings( # suppress mysterious warning that only appears when running full test batch 55 | expect_equal( 56 | power_left_join(df1, df2, by = list(~ .x$id < .y$id)), 57 | data.frame(id.x = c(1, 1:2), val1 = c(1, 1:2), id.y = c(2:3, 3), val2 = c(2:3, 3)) 58 | ) 59 | ) 60 | }) 61 | 62 | test_that("fuzzy joins can create columns", { 63 | suppressWarnings( # suppress mysterious warning that only appears when running full test batch 64 | expect_equal( 65 | power_left_join(df1, df2, by = list(~ (foo <- .x$id < .y$id))), 66 | data.frame(id.x = c(1, 1:2), val1 = c(1, 1:2), id.y = c(2:3, 3), val2 = c(2:3, 3), foo = TRUE) 67 | ) 68 | ) 69 | }) 70 | 71 | 72 | 73 | test_that("when creating columns through fuzzy joins, conflicts are handled", { 74 | # if we create a conflicting col it should be suffixed or handled 75 | expect_equal( 76 | power_left_join(data.frame(a=c("foo", "bar")), data.frame(key = "foo"), 77 | ~ .x$a == (a <- .y$key)), 78 | data.frame(a...1 = c("foo", "bar"), key = c("foo", NA), a...3 = c("foo", NA)) 79 | ) 80 | 81 | expect_error( 82 | power_left_join(data.frame(a=c("foo", "bar")), data.frame(key = "foo"), 83 | ~ .x$a == (a <- .y$key), check = check_specs(column_conflict = "abort")), 84 | "Conflicting columns" 85 | ) 86 | 87 | expect_message( 88 | power_left_join(data.frame(a=c("foo", "bar")), data.frame(key = "foo"), 89 | ~ .x$a == (a <- .y$key), check = check_specs(column_conflict = "inform")), 90 | "Conflicting columns" 91 | ) 92 | }) 93 | 94 | test_that("mixing equi joins and fuzzy joins works", { 95 | df1 <- data.frame(id1 = c(1:2, 2), val = 1:3) 96 | df2 <- data.frame(id2 = c(2,2:3), val = 1:3, foo = 1) 97 | 98 | expect_equal( 99 | power_full_join(df1, df2, by = c("id1" = "id2", ~ .x$val > .y$val)), 100 | data.frame(id1 = c(2, 2, 2, 1, 3), val.x = c(2, 3, 3, 1, NA), 101 | val.y = c(1, 1, 2, NA, 3), foo = c(1, 1, 1, NA, 1)) 102 | ) 103 | 104 | expect_equal( 105 | power_full_join(df1, df2, by = c("id1" = "id2", ~ .x$val > .y$val), fill = 0), 106 | data.frame(id1 = c(2, 2, 2, 1, 3), val.x = c(2, 3, 3, 1, 0), 107 | val.y = c(1, 1, 2, 0, 3), foo = c(1, 1, 1, 0, 1)) 108 | ) 109 | 110 | expect_equal( 111 | power_full_join(df1, df2, by = c("id1" = "id2", ~ .x$val > .y$val), keep = "left"), 112 | data.frame(id1 = c(2, 2, 2, 1, NA), val = c(2, 3, 3, 1, NA), foo = c(1, 1, 1, NA, 1)) 113 | ) 114 | 115 | expect_equal( 116 | power_full_join(df1, df2, by = c("id1" = "id2", ~ .x$val > .y$val), keep = "right"), 117 | data.frame(id2 = c(2, 2, 2, NA, 3), 118 | val = c(1, 1, 2, NA, 3), foo = c(1, 1, 1, NA, 1)) 119 | ) 120 | 121 | expect_equal( 122 | power_full_join(df1, df2, by = c("id1" = "id2", ~ .x$val > .y$val), keep = "both"), 123 | data.frame(id1 = c(2, 2, 2, 1, NA), val.x = c(2, 3, 3, 1, NA), 124 | id2 = c(2, 2, 2, NA, 3), val.y = c(1, 1, 2, NA, 3), foo = c(1, 1, 1, NA, 1)) 125 | ) 126 | 127 | expect_equal( 128 | power_full_join(df1, df2, by = c("id1" = "id2", ~ .x$val > .y$val), keep = "none"), 129 | data.frame(foo = c(1, 1, 1, NA, 1)) 130 | ) 131 | }) 132 | 133 | test_that("NAs are handled properly in fuzzy joins", { 134 | expect_equal( 135 | power_inner_join(data.frame(a = c(1, NA)), data.frame(b = c(1, NA)), by = c(a = "b")), 136 | data.frame(a = c(1, NA)) 137 | ) 138 | 139 | expect_equal( 140 | power_inner_join(data.frame(a = c(1, NA)), data.frame(b = c(1, NA)), by = ~ .x$a == .y$b), 141 | data.frame(a = 1, b = 1) 142 | ) 143 | 144 | expect_equal( 145 | power_inner_join(data.frame(a = c(1, NA)), data.frame(b = c(1, NA)), by = ~ .x$a %==% .y$b), 146 | data.frame(a = c(1, NA), b = c(1, NA)) 147 | ) 148 | }) 149 | 150 | -------------------------------------------------------------------------------- /tests/testthat/test-keep.R: -------------------------------------------------------------------------------- 1 | 2 | test_that("The `keep` argument works with equi joins", { 3 | 4 | # equi join with keep = TRUE keep both id cols with suffixes 5 | expect_equal( 6 | power_full_join( 7 | data.frame(id = 1:2, val1 = 1:2), 8 | data.frame(id = 2:3, val2 = 2:3), 9 | by = "id", keep = TRUE), 10 | data.frame( 11 | id.x = c(1, 2, NA), 12 | val1 = c(1, 2, NA), 13 | id.y = c(NA, 2, 3), 14 | val2 = c(NA, 2, 3)) 15 | ) 16 | # keep = "both" is the same as `keep = TRUE` 17 | expect_equal( 18 | power_full_join( 19 | data.frame(id = 1:2, val1 = 1:2), 20 | data.frame(id = 2:3, val2 = 2:3), 21 | by = "id", keep = "both"), 22 | data.frame( 23 | id.x = c(1, 2, NA), 24 | val1 = c(1, 2, NA), 25 | id.y = c(NA, 2, 3), 26 | val2 = c(NA, 2, 3)) 27 | ) 28 | 29 | # if we use conflict functions key cols are merged 30 | expect_equal( 31 | power_full_join( 32 | data.frame(id = 1:2, val1 = 1:2), 33 | data.frame(id = 2:3, val2 = 2:3), 34 | by = "id", keep = TRUE, conflict = coalesce), 35 | data.frame( 36 | val1 = c(1, 2, NA), 37 | val2 = c(NA, 2, 3), 38 | id = c(1, 2, 3)) 39 | ) 40 | 41 | # equi join with keep = "default" merges key cols 42 | expect_equal( 43 | power_full_join( 44 | data.frame(id1=1, val1= "a"), 45 | data.frame(id2=2, val2= "b"), 46 | by = c(id1 = "id2"), 47 | keep = "default"), 48 | data.frame(id1 = 1:2, val1 = c("a", NA), val2 = c(NA, "b")) 49 | ) 50 | 51 | # keep = FALSE is like keep = "default" for equi joins 52 | expect_equal( 53 | power_full_join( 54 | data.frame(id1=1, val1= "a"), 55 | data.frame(id2=2, val2= "b"), 56 | by = c(id1 = "id2"), 57 | keep = FALSE), 58 | data.frame(id1 = 1:2, val1 = c("a", NA), val2 = c(NA, "b")) 59 | ) 60 | 61 | # keep = "left" keeps only the unmerged left keys 62 | expect_equal( 63 | power_full_join( 64 | data.frame(id1=1, val1= "a"), 65 | data.frame(id2=2, val2= "b"), 66 | by = c(id1 = "id2"), 67 | keep = "left"), 68 | data.frame(id1 = c(1, NA), val1 = c("a", NA), val2 = c(NA, "b")) 69 | ) 70 | 71 | # keep = "right" keeps only the unmerged left keys 72 | expect_equal( 73 | power_full_join( 74 | data.frame(id1=1, val1= "a"), 75 | data.frame(id2=2, val2= "b"), 76 | by = c(id1 = "id2"), 77 | keep = "right"), 78 | data.frame(val1 = c("a", NA), id2 = c(NA, 2), val2 = c(NA, "b")) 79 | ) 80 | 81 | # keep = "none" removes key columns 82 | expect_equal( 83 | power_full_join( 84 | data.frame(id1=1, val1= "a"), 85 | data.frame(id2=2, val2= "b"), 86 | by = c(id1 = "id2"), 87 | keep = "none"), 88 | data.frame(val1 = c("a", NA), val2 = c(NA, "b")) 89 | ) 90 | }) 91 | 92 | test_that("The `keep` argument works with fuzzy joins", { 93 | 94 | # left 95 | expect_equal( 96 | power_inner_join( 97 | data.frame(id1=1, val1= "a"), 98 | data.frame(id2=2, val2= "b"), 99 | by = ~ .x$id1 < .y$id2, 100 | keep = "left"), 101 | data.frame(id1 = 1, val1 = "a", val2 = "b") 102 | ) 103 | 104 | # none 105 | expect_equal( 106 | power_inner_join( 107 | data.frame(id1=1, val1= "a"), 108 | data.frame(id2=2, val2= "b"), 109 | by = ~ .x$id1 < .y$id2, 110 | keep = "none"), 111 | data.frame(val1 = "a", val2 = "b") 112 | ) 113 | 114 | # FALSE is like keep = "none" for fuzzy joins 115 | expect_equal( 116 | power_inner_join( 117 | data.frame(id1=1, val1= "a"), 118 | data.frame(id2=2, val2= "b"), 119 | by = ~ .x$id1 < .y$id2, 120 | keep = FALSE), 121 | data.frame(val1 = "a", val2 = "b") 122 | ) 123 | 124 | # right 125 | expect_equal( 126 | power_inner_join( 127 | data.frame(id1=1, val1= "a"), 128 | data.frame(id2=2, val2= "b"), 129 | by = ~ .x$id1 < .y$id2, 130 | keep = "right"), 131 | data.frame(val1 = "a", id2 = 2, val2 = "b") 132 | ) 133 | }) 134 | 135 | -------------------------------------------------------------------------------- /tests/testthat/test-preprocess.R: -------------------------------------------------------------------------------- 1 | df1 <- data.frame(id = 1:2, val1 = 1:2) 2 | df2 <- data.frame(id = 2:3, val2 = 2:3) 3 | 4 | test_that("`select_keys_and` works", { 5 | expect_equal( 6 | power_inner_join(df1 %>% select_keys_and(val1), df2, by = "id"), 7 | data.frame(id = 2, val1 = 2, val2 = 2) 8 | ) 9 | 10 | expect_equal( 11 | power_inner_join(df1 %>% select_keys_and(-val1), df2, by = "id"), 12 | data.frame(id = 2, val2 = 2) 13 | ) 14 | }) 15 | 16 | test_that("`summarize_by_keys` works", { 17 | expect_equal( 18 | power_inner_join(df1 %>% summarize_by_keys(val1 = max(val1)), df2, by = "id"), 19 | tibble(id = 2, val1 = 2, val2 = 2) 20 | ) 21 | }) 22 | 23 | test_that("`nest_by_keys` works", { 24 | expect_equal( 25 | power_inner_join(df1 %>% nest_by_keys(val1), df2, by = "id"), 26 | tibble(id = 2, val1 = list(2), val2 = 2) 27 | ) 28 | 29 | expect_equal( 30 | power_inner_join(df1 %>% nest_by_keys(name = "foo"), df2, by = "id"), 31 | tibble(id = 2, foo = list(tibble(val1 = 2)), val2 = 2) 32 | ) 33 | 34 | expect_equal( 35 | power_inner_join(df1 %>% nest_by_keys(val1, name = "foo"), df2, by = "id"), 36 | tibble(id = 2, foo = list(tibble(val1 = 2)), val2 = 2) 37 | ) 38 | }) 39 | 40 | test_that("`pack_along_keys` works", { 41 | expect_equal( 42 | power_inner_join(df1 %>% pack_along_keys(val1, name = "foo"), df2, by = "id"), 43 | tibble(id = 2, foo = tibble(val1 = 2), val2 = 2) 44 | ) 45 | }) 46 | 47 | 48 | test_that("`complete_keys` works", { 49 | x1 <- data.frame(key = factor("a", levels = c("a", "b")), x = 1:2) 50 | y1 <- data.frame(key = c("b", "c"), y = 1:2, stringsAsFactors = FALSE) 51 | 52 | expect_equal( 53 | power_left_join(x1 %>% complete_keys(), y1, by = "key"), 54 | data.frame(key = c("a", "a", "b"), x = c(1, 2, NA), y = c(NA, NA, 1)) 55 | ) 56 | 57 | x2 <- data.frame(key1 = 1:2, key2 = 1:2, x = 1:2) 58 | y2 <- data.frame(key1 = 1, key2 = 2, y = 3) 59 | 60 | expect_equal( 61 | power_left_join(x2 %>% complete_keys(), y2, by = c("key1", "key2")), 62 | data.frame(key1 = c(1, 1, 2, 2), key2 = c(1:2, 1:2), x = c(1, NA, NA, 2), y = c(NA, 3, NA, NA)) 63 | ) 64 | }) 65 | 66 | # test_that("`pivot_*_by_keys` works", { 67 | # df_long <- tibble(id = c(1, 1, 2, 2), nm = c("a", "b", "a", "b"), val = 1:4) 68 | # df_wide <- tibble(id = 1:2, a = c(1, 3), b = c(2, 4)) 69 | # df2 <- tibble(id = 1:2) 70 | # expect_equal( 71 | # power_inner_join(df_long %>% pivot_wider_by_keys(names_from = nm, values_from = val), df2, by = "id"), 72 | # df_wide 73 | # ) 74 | # expect_equal( 75 | # power_inner_join(df_wide %>% pivot_longer_by_keys(names_to = "nm", values_to = "val"), df2, by = "id"), 76 | # df_long 77 | # ) 78 | # }) 79 | 80 | 81 | -------------------------------------------------------------------------------- /tests/testthat/test-recursive.R: -------------------------------------------------------------------------------- 1 | df1 <- data.frame(id = 1:2, val1 = 1:2) 2 | df2 <- data.frame(id = 1:2, val2 = 3:4) 3 | df3 <- data.frame(id = 1:2, val3 = 5:6) 4 | out <- data.frame(id = 1:2, val1 = 1:2, val2 = 3:4, val3 = 5:6) 5 | 6 | test_that("recursive joins work", { 7 | 8 | expect_equal( 9 | power_left_join(df1, list(df2, df3), by = "id"), 10 | out 11 | ) 12 | 13 | expect_equal( 14 | power_left_join(list(df1, df2, df3), by = "id"), 15 | out 16 | ) 17 | 18 | expect_equal( 19 | power_left_join(list(df1, df2), df3, by = "id"), 20 | out 21 | ) 22 | 23 | expect_equal( 24 | power_right_join(df1, list(df2, df3), by = "id"), 25 | out 26 | ) 27 | 28 | expect_equal( 29 | power_right_join(list(df1, df2, df3), by = "id"), 30 | out 31 | ) 32 | 33 | expect_equal( 34 | power_right_join(list(df1, df2), df3, by = "id"), 35 | out 36 | ) 37 | 38 | expect_equal( 39 | power_inner_join(df1, list(df2, df3), by = "id"), 40 | out 41 | ) 42 | 43 | expect_equal( 44 | power_inner_join(list(df1, df2, df3), by = "id"), 45 | out 46 | ) 47 | 48 | expect_equal( 49 | power_inner_join(list(df1, df2), df3, by = "id"), 50 | out 51 | ) 52 | 53 | expect_equal( 54 | power_full_join(df1, list(df2, df3), by = "id"), 55 | out 56 | ) 57 | 58 | expect_equal( 59 | power_full_join(list(df1, df2, df3), by = "id"), 60 | out 61 | ) 62 | 63 | expect_equal( 64 | power_full_join(list(df1, df2), df3, by = "id"), 65 | out 66 | ) 67 | 68 | }) 69 | -------------------------------------------------------------------------------- /tests/testthat/test-standard_joins.R: -------------------------------------------------------------------------------- 1 | df1 <- data.frame(id = 1:2, val1 = 1:2) 2 | df2 <- data.frame(id = 2:3, val2 = 2:3) 3 | 4 | test_that("power_left_join works", { 5 | 6 | expect_equal( 7 | suppressMessages(power_left_join(df1, df2)), 8 | data.frame(id = 1:2, val1 = 1:2, val2 = c(NA, 2)) 9 | ) 10 | 11 | expect_message( 12 | power_left_join(df1, df2), 13 | "Joining") 14 | 15 | expect_equal( 16 | power_left_join(df1, df2, by = "id"), 17 | data.frame(id = 1:2, val1 = 1:2, val2 = c(NA, 2)) 18 | ) 19 | 20 | expect_error( 21 | power_left_join(df1, df2, by = "id", check = "B"), 22 | "check_specs" 23 | ) 24 | }) 25 | 26 | test_that("power_right_join works", { 27 | expect_equal( 28 | power_right_join(df1, df2, by = "id"), 29 | data.frame(id = 2:3, val1 = c(2, NA), val2 = 2:3) 30 | ) 31 | expect_error( 32 | power_right_join(df1, df2, by = "id", check = "B"), 33 | "check_specs" 34 | ) 35 | }) 36 | 37 | test_that("power_inner_join works", { 38 | expect_equal( 39 | power_inner_join(df1, df2, by = "id"), 40 | data.frame(id = 2, val1 = 2, val2 = 2) 41 | ) 42 | expect_error( 43 | power_inner_join(df1, df2, by = "id", check = "B"), 44 | "check_specs" 45 | ) 46 | }) 47 | 48 | test_that("power_full_join works", { 49 | expect_equal( 50 | power_full_join(df1, df2, by = "id"), 51 | data.frame(id = 1:3, val1 = c(1:2, NA), val2 = c(NA, 2:3)) 52 | ) 53 | expect_error( 54 | power_full_join(df1, df2, by = "id", check = "b"), 55 | "check_specs" 56 | ) 57 | }) 58 | 59 | test_that("dplyr code is still covered", { 60 | df1 <- data.frame(id = 1:2, val1 = 1:2) 61 | df2 <- data.frame(id = 2:3, val2 = 2:3) 62 | 63 | expect_equal( 64 | power_left_join(df1, df2, by = list(x = "id", y = "id")), 65 | data.frame(id = 1:2, val1 = 1:2, val2 = c(NA, 2)) 66 | ) 67 | 68 | expect_equal( 69 | power_right_join(df1, df2, by = "id", na_matches = "never"), 70 | data.frame(id = 2:3, val1 = c(2, NA), val2 = 2:3) 71 | ) 72 | 73 | df1 <- data.frame(id1 = 1:2, val1 = 1:2) 74 | df2 <- data.frame(id2 = 2:3, val2 = 2:3) 75 | 76 | expect_error( 77 | power_inner_join(df1, df2), 78 | "no common var" 79 | ) 80 | 81 | expect_error( 82 | power_inner_join(df1, df2, by = mean) 83 | ) 84 | 85 | df1 <- data.frame(id = 1:2, val = 1:2) 86 | df2 <- data.frame(id = 2:3, val = 2:3) 87 | 88 | expect_message( 89 | power_inner_join(df1, df2), 90 | "Joining" 91 | ) 92 | 93 | df1 <- data.frame(id = 1:2, val = 1:2) 94 | df2 <- data.frame(id = c("2", "3"), val = 2:3) 95 | 96 | expect_error( 97 | power_inner_join(df1, df2, by = "id"), 98 | "incompatible types" 99 | ) 100 | }) 101 | 102 | --------------------------------------------------------------------------------