├── .Rbuildignore ├── .gitattributes ├── .github ├── .gitignore └── workflows │ ├── R-CMD-check.yaml │ ├── pkgdown.yaml │ ├── pr-commands.yaml │ ├── rhub.yaml │ └── test-coverage.yaml ├── .gitignore ├── DESCRIPTION ├── LICENSE ├── LICENSE.md ├── MAINTENANCE.md ├── NAMESPACE ├── NEWS.md ├── R ├── archive.R ├── archive_extract.R ├── archive_read.R ├── archive_write.R ├── archive_write_dir.R ├── archive_write_files.R ├── cpp11.R ├── file_read.R ├── file_write.R ├── utils.R └── zzz.R ├── README.Rmd ├── README.md ├── _pkgdown.yml ├── archive.Rproj ├── cleanup ├── codecov.yml ├── configure ├── configure.win ├── cran-comments.md ├── inst ├── extdata │ └── data.zip └── lib │ └── README.md ├── man-roxygen └── archive.R ├── man ├── archive.Rd ├── archive_extract.Rd ├── archive_read.Rd ├── archive_write.Rd ├── archive_write_files.Rd ├── file_connections.Rd └── macros │ └── eval.Rd ├── src ├── .clang-format ├── .gitignore ├── Makevars.in ├── Makevars.ucrt ├── Makevars.win ├── archive.cpp ├── archive_extract.cpp ├── archive_read.cpp ├── archive_write.cpp ├── archive_write_direct.cpp ├── archive_write_files.cpp ├── connection │ ├── connection.c │ └── connection.h ├── cpp11.cpp ├── cpp11 │ └── include │ │ ├── cpp11.hpp │ │ ├── cpp11 │ │ ├── R.hpp │ │ ├── altrep.hpp │ │ ├── as.hpp │ │ ├── attribute_proxy.hpp │ │ ├── data_frame.hpp │ │ ├── declarations.hpp │ │ ├── doubles.hpp │ │ ├── environment.hpp │ │ ├── external_pointer.hpp │ │ ├── function.hpp │ │ ├── integers.hpp │ │ ├── list.hpp │ │ ├── list_of.hpp │ │ ├── logicals.hpp │ │ ├── matrix.hpp │ │ ├── named_arg.hpp │ │ ├── protect.hpp │ │ ├── r_bool.hpp │ │ ├── r_string.hpp │ │ ├── r_vector.hpp │ │ ├── raws.hpp │ │ ├── sexp.hpp │ │ └── strings.hpp │ │ └── fmt │ │ ├── core.h │ │ ├── format-inl.h │ │ └── format.h ├── r_archive.cpp └── r_archive.h ├── tests ├── testthat.R └── testthat │ ├── cp866.tar.Z.uu │ ├── helper-mock.R │ ├── helpers.R │ ├── mtcars.tar.gz │ ├── test-archive.R │ ├── test-archive_extract.R │ ├── test-archive_read.R │ ├── test-archive_write.R │ ├── test-archive_write_dir.R │ ├── test-archive_write_files.R │ ├── test-file_read.R │ └── test-file_write.R └── tools ├── dynamic-help.R └── winlibs.R /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^src/lib$ 4 | ^src/include$ 5 | ^script\.R$ 6 | ^\.travis\.yml$ 7 | ^README\.Rmd$ 8 | ^README-.*\.png$ 9 | ^appveyor\.yml$ 10 | ^src/Makevars$ 11 | ^src/\.clang-format$ 12 | ^src/\.ycm_extra_conf\.py$ 13 | ^codecov\.yml$ 14 | ^docs$ 15 | ^_pkgdown\.yml$ 16 | ^man-roxygen$ 17 | ^script.R$ 18 | ^cran-comments\.md$ 19 | ^src-.*/$ 20 | ^windows$ 21 | ^\.github$ 22 | ^.cache$ 23 | ^compile_commands\.json$ 24 | ^LICENSE\.md$ 25 | ^CRAN-RELEASE$ 26 | ^MAINTENANCE\.md$ 27 | ^dev-lib$ 28 | ^[.]deps$ 29 | ^autobrew$ 30 | ^man/macros/eval2.Rd$ 31 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *R text eol=lf 2 | *Rd text eol=lf 3 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | # 4 | # NOTE: This workflow is overkill for most R packages and 5 | # check-standard.yaml is likely a better choice. 6 | # usethis::use_github_action("check-standard") will install it. 7 | on: 8 | push: 9 | branches: [main, master] 10 | pull_request: 11 | branches: [main, master] 12 | workflow_dispatch: 13 | 14 | name: R-CMD-check 15 | 16 | jobs: 17 | R-CMD-check: 18 | runs-on: ${{ matrix.config.os }} 19 | 20 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) ${{ matrix.config.locale }} 21 | 22 | strategy: 23 | fail-fast: false 24 | matrix: 25 | config: 26 | - {os: macOS-latest, r: 'release'} 27 | 28 | - {os: windows-2022, r: 'devel' } 29 | - {os: windows-2022, r: 'next' } 30 | - {os: windows-latest, r: 'release' } 31 | # Use 4.1 for encoding bugs 32 | - {os: windows-latest, r: '4.1'} 33 | 34 | - {os: ubuntu-20.04, r: 'next'} 35 | - {os: ubuntu-22.04, r: 'next'} 36 | # Use older ubuntu to maximise backward compatibility 37 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} 38 | - {os: ubuntu-latest, r: 'release'} 39 | - {os: ubuntu-latest, r: 'oldrel-1'} 40 | - {os: ubuntu-latest, r: 'oldrel-2'} 41 | - {os: ubuntu-latest, r: 'oldrel-3'} 42 | - {os: ubuntu-latest, r: 'oldrel-4'} 43 | - {os: ubuntu-latest, r: 'release', locale: 'en_US.ISO-8859-15'} 44 | env: 45 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 46 | R_KEEP_PKG_SOURCE: yes 47 | 48 | steps: 49 | - name: Set locale 50 | if: matrix.config.locale != '' 51 | run: | 52 | sudo locale-gen ${{ matrix.config.locale }} 53 | echo "LC_ALL=${{ matrix.config.locale }}" >> $GITHUB_ENV 54 | 55 | - uses: actions/checkout@v3 56 | 57 | - uses: r-lib/actions/setup-pandoc@v2-branch 58 | 59 | - uses: r-lib/actions/setup-r@v2-branch 60 | with: 61 | r-version: ${{ matrix.config.r }} 62 | rtools-version: ${{ matrix.config.rtools-version }} 63 | http-user-agent: ${{ matrix.config.http-user-agent }} 64 | use-public-rspm: true 65 | 66 | - uses: r-lib/actions/setup-r-dependencies@v2-branch 67 | with: 68 | extra-packages: any::rcmdcheck 69 | needs: check 70 | 71 | - name: Test compilation 72 | if: runner.os == 'Windows' 73 | run: | 74 | install.packages("filelock", type = "source", repos = "https://cloud.r-project.org") 75 | pak::pkg_install("filelock?source") 76 | pak::pkg_install("tiff?source") 77 | library(filelock) 78 | library(tiff) 79 | shell: Rscript {0} 80 | 81 | - name: Test installation with brew 82 | if: runner.os == 'macOS' 83 | env: 84 | DISABLE_AUTOBREW: true 85 | run: | 86 | brew install libarchive zstd 87 | R CMD INSTALL . 88 | 89 | - uses: r-hub/actions/setup-r-sysreqs@v1 90 | 91 | - uses: r-lib/actions/check-r-package@v2-branch 92 | -------------------------------------------------------------------------------- /.github/workflows/pkgdown.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | release: 9 | types: [published] 10 | workflow_dispatch: 11 | 12 | name: pkgdown 13 | 14 | jobs: 15 | pkgdown: 16 | runs-on: ubuntu-latest 17 | env: 18 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 19 | steps: 20 | - uses: actions/checkout@v2 21 | 22 | - uses: r-lib/actions/setup-pandoc@v2 23 | 24 | - uses: r-lib/actions/setup-r@v2 25 | with: 26 | use-public-rspm: true 27 | 28 | - uses: r-lib/actions/setup-r-dependencies@v2 29 | with: 30 | extra-packages: any::pkgdown, local::. 31 | needs: website 32 | 33 | - name: Deploy package 34 | if: github.event_name != 'pull_request' 35 | run: | 36 | git config --local user.name "$GITHUB_ACTOR" 37 | git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com" 38 | Rscript -e 'pkgdown::deploy_to_branch(new_process = FALSE)' 39 | 40 | - name: Build site without deploying 41 | if: github.event_name == 'pull_request' 42 | run: | 43 | Rscript -e 'pkgdown::build_site(preview = FALSE, install = FALSE)' 44 | -------------------------------------------------------------------------------- /.github/workflows/pr-commands.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | issue_comment: 5 | types: [created] 6 | 7 | name: Commands 8 | 9 | jobs: 10 | document: 11 | if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/document') }} 12 | name: document 13 | runs-on: ubuntu-latest 14 | env: 15 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 16 | steps: 17 | - uses: actions/checkout@v2 18 | 19 | - uses: r-lib/actions/pr-fetch@v2 20 | with: 21 | repo-token: ${{ secrets.GITHUB_TOKEN }} 22 | 23 | - uses: r-lib/actions/setup-r@v2 24 | with: 25 | use-public-rspm: true 26 | 27 | - uses: r-lib/actions/setup-r-dependencies@v2 28 | with: 29 | extra-packages: any::roxygen2 30 | needs: pr-document 31 | 32 | - name: Document 33 | run: Rscript -e 'roxygen2::roxygenise()' 34 | 35 | - name: commit 36 | run: | 37 | git config --local user.name "$GITHUB_ACTOR" 38 | git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com" 39 | git add man/\* NAMESPACE 40 | git commit -m 'Document' 41 | 42 | - uses: r-lib/actions/pr-push@v2 43 | with: 44 | repo-token: ${{ secrets.GITHUB_TOKEN }} 45 | 46 | style: 47 | if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/style') }} 48 | name: style 49 | runs-on: ubuntu-latest 50 | env: 51 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 52 | steps: 53 | - uses: actions/checkout@v2 54 | 55 | - uses: r-lib/actions/pr-fetch@v2 56 | with: 57 | repo-token: ${{ secrets.GITHUB_TOKEN }} 58 | 59 | - uses: r-lib/actions/setup-r@v2 60 | 61 | - name: Install dependencies 62 | run: Rscript -e 'install.packages("styler")' 63 | 64 | - name: Style 65 | run: Rscript -e 'styler::style_pkg()' 66 | 67 | - name: commit 68 | run: | 69 | git config --local user.name "$GITHUB_ACTOR" 70 | git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com" 71 | git add \*.R 72 | git commit -m 'Style' 73 | 74 | - uses: r-lib/actions/pr-push@v2 75 | with: 76 | repo-token: ${{ secrets.GITHUB_TOKEN }} 77 | -------------------------------------------------------------------------------- /.github/workflows/rhub.yaml: -------------------------------------------------------------------------------- 1 | # R-hub's generic GitHub Actions workflow file. It's canonical location is at 2 | # https://github.com/r-hub/actions/blob/v1/workflows/rhub.yaml 3 | # You can update this file to a newer version using the rhub2 package: 4 | # 5 | # rhub::rhub_setup() 6 | # 7 | # It is unlikely that you need to modify this file manually. 8 | 9 | name: R-hub 10 | run-name: "${{ github.event.inputs.id }}: ${{ github.event.inputs.name || format('Manually run by {0}', github.triggering_actor) }}" 11 | 12 | on: 13 | workflow_dispatch: 14 | inputs: 15 | config: 16 | description: 'A comma separated list of R-hub platforms to use.' 17 | type: string 18 | default: 'linux,windows,macos' 19 | name: 20 | description: 'Run name. You can leave this empty now.' 21 | type: string 22 | id: 23 | description: 'Unique ID. You can leave this empty now.' 24 | type: string 25 | 26 | jobs: 27 | 28 | setup: 29 | runs-on: ubuntu-latest 30 | outputs: 31 | containers: ${{ steps.rhub-setup.outputs.containers }} 32 | platforms: ${{ steps.rhub-setup.outputs.platforms }} 33 | 34 | steps: 35 | # NO NEED TO CHECKOUT HERE 36 | - uses: r-hub/actions/setup@main 37 | with: 38 | config: ${{ github.event.inputs.config }} 39 | id: rhub-setup 40 | 41 | linux-containers: 42 | needs: setup 43 | if: ${{ needs.setup.outputs.containers != '[]' }} 44 | runs-on: ubuntu-latest 45 | name: ${{ matrix.config.label }} 46 | strategy: 47 | fail-fast: false 48 | matrix: 49 | config: ${{ fromJson(needs.setup.outputs.containers) }} 50 | container: 51 | image: ${{ matrix.config.container }} 52 | 53 | steps: 54 | - uses: r-hub/actions/checkout@main 55 | - uses: r-hub/actions/platform-info@main 56 | with: 57 | token: ${{ secrets.RHUB_TOKEN }} 58 | job-config: ${{ matrix.config.job-config }} 59 | - uses: r-hub/actions/setup-deps@main 60 | with: 61 | token: ${{ secrets.RHUB_TOKEN }} 62 | job-config: ${{ matrix.config.job-config }} 63 | - uses: r-hub/actions/run-check@main 64 | with: 65 | token: ${{ secrets.RHUB_TOKEN }} 66 | job-config: ${{ matrix.config.job-config }} 67 | 68 | other-platforms: 69 | needs: setup 70 | if: ${{ needs.setup.outputs.platforms != '[]' }} 71 | runs-on: ${{ matrix.config.os }} 72 | name: ${{ matrix.config.label }} 73 | strategy: 74 | fail-fast: false 75 | matrix: 76 | config: ${{ fromJson(needs.setup.outputs.platforms) }} 77 | 78 | steps: 79 | - uses: r-hub/actions/checkout@main 80 | - uses: r-hub/actions/setup-r@main 81 | with: 82 | job-config: ${{ matrix.config.job-config }} 83 | token: ${{ secrets.RHUB_TOKEN }} 84 | - uses: r-hub/actions/platform-info@main 85 | with: 86 | token: ${{ secrets.RHUB_TOKEN }} 87 | job-config: ${{ matrix.config.job-config }} 88 | - uses: r-hub/actions/setup-deps@main 89 | with: 90 | job-config: ${{ matrix.config.job-config }} 91 | token: ${{ secrets.RHUB_TOKEN }} 92 | - uses: r-hub/actions/run-check@main 93 | with: 94 | job-config: ${{ matrix.config.job-config }} 95 | token: ${{ secrets.RHUB_TOKEN }} 96 | -------------------------------------------------------------------------------- /.github/workflows/test-coverage.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | workflow_dispatch: 9 | 10 | name: test-coverage 11 | 12 | jobs: 13 | test-coverage: 14 | runs-on: ubuntu-latest 15 | env: 16 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 17 | 18 | steps: 19 | - uses: actions/checkout@v4 20 | 21 | - uses: r-lib/actions/setup-r@v2 22 | with: 23 | use-public-rspm: true 24 | 25 | - uses: r-lib/actions/setup-r-dependencies@v2 26 | with: 27 | extra-packages: any::covr, any::xml2 28 | needs: coverage 29 | 30 | - name: Test coverage 31 | run: | 32 | cov <- covr::package_coverage( 33 | quiet = FALSE, 34 | clean = FALSE, 35 | install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package") 36 | ) 37 | covr::to_cobertura(cov) 38 | shell: Rscript {0} 39 | 40 | - uses: codecov/codecov-action@v4 41 | with: 42 | fail_ci_if_error: true 43 | file: ./cobertura.xml 44 | plugin: noop 45 | disable_search: true 46 | token: ${{ secrets.CODECOV_TOKEN }} 47 | 48 | - name: Show testthat output 49 | if: always() 50 | run: | 51 | ## -------------------------------------------------------------------- 52 | find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true 53 | shell: bash 54 | 55 | - name: Upload test results 56 | if: failure() 57 | uses: actions/upload-artifact@v4 58 | with: 59 | name: coverage-test-failures 60 | path: ${{ runner.temp }}/package 61 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | src/Makevars 5 | script.R 6 | src-*/ 7 | windows/ 8 | compile_commands.json 9 | .cache 10 | /dev-lib 11 | /inst/lib/libconnection.so 12 | .deps 13 | autobrew 14 | /man/macros/eval2.Rd 15 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: archive 2 | Title: Multi-Format Archive and Compression Support 3 | Version: 1.1.12.9000 4 | Authors@R: c( 5 | person("Jim", "Hester", role = "aut", 6 | comment = c(ORCID = "0000-0002-2739-7082")), 7 | person("Gábor", "Csárdi", , "csardi.gabor@gmail.com", role = c("aut", "cre")), 8 | person("Ondrej", "Holy", role = "cph", 9 | comment = "archive_write_add_filter implementation"), 10 | person("RStudio", role = c("cph", "fnd")) 11 | ) 12 | Description: Bindings to 'libarchive' the 13 | Multi-format archive and compression library. Offers R connections and 14 | direct extraction for many archive formats including 'tar', 'ZIP', 15 | '7-zip', 'RAR', 'CAB' and compression formats including 'gzip', 16 | 'bzip2', 'compress', 'lzma' and 'xz'. 17 | License: MIT + file LICENSE 18 | URL: https://archive.r-lib.org/, https://github.com/r-lib/archive 19 | BugReports: https://github.com/r-lib/archive/issues 20 | Depends: 21 | R (>= 3.6.0) 22 | Imports: 23 | cli, 24 | glue, 25 | rlang, 26 | tibble 27 | Suggests: 28 | covr, 29 | testthat 30 | LinkingTo: 31 | cli 32 | ByteCompile: true 33 | Encoding: UTF-8 34 | Roxygen: list(markdown = TRUE) 35 | RoxygenNote: 7.2.1.9000 36 | SystemRequirements: libarchive: libarchive-dev (deb), 37 | libarchive-devel (rpm), libarchive (homebrew), libarchive_dev (csw) 38 | Biarch: true 39 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2021 2 | COPYRIGHT HOLDER: archive authors 3 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2021 archive authors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MAINTENANCE.md: -------------------------------------------------------------------------------- 1 | ## Current state 2 | 3 | archive is pretty stable, I think it wraps enough of libarchive for our typical use cases. 4 | It could probably be used a bit more automatically in readr / vroom, right now archive is used only by vroom if archive happens to be installed by the user, but most users are not going to know about it and have it installed. 5 | 6 | ## Known outstanding issues 7 | 8 | https://github.com/r-lib/archive/issues/59 mentioned an issue trying to set a C.UTF-8 locale, it seems at least some systems may not have this locale installed, but do have a en_US.UTF-8 locale. 9 | archive needs to set the UTF-8 locale to properly read filenames encoded in UTF-8. 10 | I don't know if it is worth adding code to try multiple locales, or just have the current warning is enough. 11 | 12 | ## Future directions 13 | 14 | In general I don't think there is a lot that needs to be done. Perhaps we could add nicer support for password protected archives, but I think this is not that common in practice. 15 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(archive) 4 | export(archive_extract) 5 | export(archive_read) 6 | export(archive_write) 7 | export(archive_write_dir) 8 | export(archive_write_files) 9 | export(file_read) 10 | export(file_write) 11 | importFrom(cli,cli_progress_bar) 12 | importFrom(glue,glue) 13 | importFrom(glue,glue_collapse) 14 | importFrom(glue,single_quote) 15 | importFrom(rlang,is_character) 16 | importFrom(rlang,is_named) 17 | importFrom(tibble,as_tibble) 18 | useDynLib(archive, .registration = TRUE) 19 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # archive (development version) 2 | 3 | # archive 1.1.12 4 | 5 | * No changes. 6 | 7 | # archive 1.1.11 8 | 9 | * No changes. 10 | 11 | # archive 1.1.10 12 | 13 | * No changes. 14 | 15 | # archive 1.1.9 16 | 17 | * No changes. 18 | 19 | # archive 1.1.8 20 | 21 | * `archive_extract()` now stop reading the input file once the desired 22 | files are extracted, instead of always reading to the end of the file 23 | (#85, @allenluce). 24 | 25 | # archive 1.1.7 26 | 27 | * Fixed an unsafe `printf()` format string (#95). 28 | 29 | # archive 1.1.6 30 | 31 | * Fixed compilation issues on Big-endian platforms (#84, @barracuda156). 32 | 33 | # archive 1.1.5 34 | 35 | * archive now does not produce broken archives on Windows (#72, @cielavenir). 36 | 37 | # archive 1.1.4 38 | 39 | * `archive_write_dir()` now works with relative paths (#69). 40 | 41 | * archive now works properly on UCRT Windows R. 42 | 43 | # archive 1.1.3 44 | 45 | * Gábor Csárdi is now the maintainer. 46 | 47 | # archive 1.1.2 48 | 49 | * Fix UBSAN error related to the progress bar initialization (#55) 50 | 51 | * Fix unterminated progress bars in `archive_write()` and friends (#60, @salim-b) 52 | 53 | # archive 1.1.1 54 | 55 | * `archive_extract()` now returns the extracted files (invisibly) (#50) 56 | 57 | * `archive_extract()`, `archive_write_files()` and `archive_write_dir()` gain progress bars using the cli package (#49) 58 | 59 | * Remove uses of deprecated function `glue::collapse()`. 60 | 61 | # archive 1.1.0 62 | 63 | * `archive_extract()` gains a `strip_components` argument to strip leading pathname components if desired (#27) 64 | 65 | * `archive()`, `archive_read()`, `archive_extract()` and `file_read()` now all accept R connections in addition to file paths. 66 | This allows you to do things like read remote archives with a `url()` or `curl::curl()` connection like you would a file on disk (#7) 67 | 68 | # archive 1.0.2 69 | 70 | * skip tests failing on non-UTF-8 systems 71 | 72 | # archive 1.0.1 73 | 74 | * Fix encoding issues with non-UTF-8 linux systems. 75 | 76 | # archive 1.0.0 77 | 78 | * Initial release 79 | -------------------------------------------------------------------------------- /R/archive.R: -------------------------------------------------------------------------------- 1 | ## usethis namespace: start 2 | #' @useDynLib archive, .registration = TRUE 3 | #' @importFrom tibble as_tibble 4 | #' @importFrom cli cli_progress_bar 5 | ## usethis namespace: end 6 | NULL 7 | 8 | #' Construct a new archive 9 | #' 10 | #' This function retrieves metadata about files in an archive, it can be passed 11 | #' to [archive_read()] or [archive_write] to create a connection to read or 12 | #' write a specific file from the archive. 13 | #' 14 | #' @param file File path to the archive. 15 | #' @inheritParams archive_read 16 | #' @seealso [archive_read()], [archive_write()] to read and write archive files 17 | #' using R connections, [archive_extract()], [archive_write_files()], 18 | #' [archive_write_dir()] to add or extract files from an archive. 19 | #' @return A [tibble][tibble::tibble-package] with details about files in the archive. 20 | #' @examples 21 | #' a <- archive(system.file(package = "archive", "extdata", "data.zip")) 22 | #' a 23 | #' @export 24 | archive <- function(file, options = character(), password = NA_character_) { 25 | if (!inherits(file, "connection")) { 26 | file <- file(file, "rb") 27 | } 28 | 29 | if (!isOpen(file)) { 30 | open(file, "rb") 31 | } 32 | 33 | options <- validate_options(options) 34 | 35 | res <- archive_(file, options, c(password)) 36 | 37 | res 38 | } 39 | 40 | filter_by_extension <- function(file) { 41 | 42 | extension_to_filter <- function(ext) { 43 | switch(ext, 44 | Z = "compress", 45 | 46 | # There is currently no base64enc constant in libarchive 47 | # https://github.com/libarchive/libarchive/pull/907 48 | # base64enc = "base64enc" 49 | 50 | bz2 = "bzip2", 51 | gz = "gzip", 52 | lz = "lzip", 53 | lz4 = "lz4", 54 | lzo = "lzop", 55 | lzma = "lzma", 56 | uu = "uuencode", 57 | xz = "xz", 58 | zst = "zstd", 59 | 60 | NULL) 61 | } 62 | 63 | extensions <- sub("^[^.][.]", "", basename(file)) 64 | 65 | Reduce(`c`, Map(extension_to_filter, strsplit(extensions, "[.]")[[1]])) 66 | } 67 | 68 | format_and_filter_by_extension <- function(file) { 69 | ext <- sub("^[^.]*[.]", "", basename(file)) 70 | switch(ext, 71 | "7z" = list("7zip", "none"), 72 | 73 | "cpio" = list("cpio", "none"), 74 | 75 | "iso" = list("iso9660", "none"), 76 | 77 | "mtree" = list("mtree", "none"), 78 | 79 | "tar" = list("tar", "none"), 80 | 81 | "tgz" = list("tar", "gzip"), 82 | "taz" = list("tar", "gzip"), 83 | "tar.gz" = list("tar", "gzip"), 84 | 85 | "tbz" = list("tar", "bzip2"), 86 | "tbz2" = list("tar", "bzip2"), 87 | "tz2" = list("tar", "bzip2"), 88 | "tar.bz2" = list("tar", "bzip2"), 89 | 90 | "tlz" = list("tar", "lzma"), 91 | "tar.lzma" = list("tar", "lzma"), 92 | 93 | "txz" = list("tar", "xz"), 94 | "tar.xz" = list("tar", "xz"), 95 | 96 | "tzo" = list("tar", "lzop"), 97 | 98 | "taZ" = list("tar", "compress"), 99 | "tZ" = list("tar", "compress"), 100 | 101 | "tar.zst" = list("tar", "zstd"), 102 | 103 | "warc" = list("warc", "none"), 104 | 105 | "jar" = list("zip", "none"), 106 | "zip" = list("zip", "none"), 107 | 108 | NULL) 109 | } 110 | 111 | libarchive_version <- function() { 112 | package_version(libarchive_version_()) 113 | } 114 | 115 | libarchive_zlib_version <- function() { 116 | version <- libarchive_zlib_version_() 117 | # remove suffix from 1.3.1.zlib-ng and similar 118 | version <- sub("[.]?[^0-9.-].*$", "", version) 119 | version <- package_version(version, strict = FALSE) 120 | if (is.na(version)) { 121 | package_version("0.0.0") 122 | } else { 123 | version 124 | } 125 | } 126 | 127 | libarchive_liblzma_version <- function() { 128 | version <- libarchive_liblzma_version_() 129 | if (nzchar(version)) { 130 | return(package_version(version)) 131 | } 132 | package_version("0.0.0") 133 | } 134 | 135 | libarchive_bzlib_version <- function() { 136 | version <- libarchive_bzlib_version_() 137 | 138 | # bzlib versions are of the form 139 | # 1.0.6, 6-Sept-2010 140 | # So remove everything after the comma 141 | version <- sub(",.+", "", version) 142 | 143 | if (nzchar(version)) { 144 | return(package_version(version)) 145 | } 146 | package_version("0.0.0") 147 | } 148 | 149 | libarchive_liblz4_version <- function() { 150 | version <- libarchive_liblz4_version_() 151 | if (nzchar(version)) { 152 | return(package_version(version)) 153 | } 154 | package_version("0.0.0") 155 | } 156 | 157 | libarchive_libzstd_version <- function() { 158 | version <- libarchive_libzstd_version_() 159 | if (nzchar(version)) { 160 | return(package_version(version)) 161 | } 162 | package_version("0.0.0") 163 | } 164 | 165 | print_versions <- function(){ 166 | cat("Linked to:\n") 167 | cat("libarchive", as.character(libarchive_version()), '\n') 168 | cat("zlib", as.character(libarchive_zlib_version()), '\n') 169 | cat("lzma", as.character(libarchive_liblzma_version()), '\n') 170 | cat("bzlib", as.character(libarchive_bzlib_version()), '\n') 171 | cat("zsstd", as.character(libarchive_libzstd_version()), '\n') 172 | } 173 | -------------------------------------------------------------------------------- /R/archive_extract.R: -------------------------------------------------------------------------------- 1 | #' Extract contents of an archive to a directory 2 | #' 3 | #' @inheritParams archive_read 4 | #' @param files `character() || integer() || NULL` One or more files within the archive, 5 | #' specified either by filename or by position. 6 | #' @param dir `character(1)` Directory location to extract archive contents, will be created 7 | #' if it does not exist. 8 | #' @param strip_components Remove the specified number of leading path 9 | #' elements. Pathnames with fewer elements will be silently skipped. 10 | #' @details 11 | #' If `files` is `NULL` (the default) all files will be extracted. 12 | #' @returns The filenames extracted (invisibly). 13 | #' @examples 14 | #' a <- system.file(package = "archive", "extdata", "data.zip") 15 | #' d <- tempfile() 16 | #' 17 | #' # When called with default arguments extracts all files in the archive. 18 | #' archive_extract(a, d) 19 | #' list.files(d) 20 | #' unlink(d) 21 | #' 22 | #' # Can also specify one or more files to extract 23 | #' d <- tempfile() 24 | #' archive_extract(a, d, c("iris.csv", "airquality.csv")) 25 | #' list.files(d) 26 | #' unlink(d) 27 | #' @export 28 | archive_extract <- function(archive, dir = ".", files = NULL, options = character(), strip_components = 0L, password = NA_character_) { 29 | assert("`files` must be a character or numeric vector or `NULL`", 30 | is.null(files) || is.numeric(files) || is.character(files)) 31 | 32 | if (!inherits(archive, "connection")) { 33 | archive <- file(archive, "rb") 34 | } 35 | 36 | if (!isOpen(archive)) { 37 | open(archive, "rb") 38 | } 39 | 40 | if (!identical(dir, ".")) { 41 | if (!dir.exists(dir)) { 42 | dir.create(dir) 43 | } 44 | old <- setwd(dir) 45 | on.exit(setwd(old)) 46 | } 47 | options <- validate_options(options) 48 | 49 | files <- archive_extract_(archive, files, as.integer(strip_components), options, c(password), sz = 2^14) 50 | 51 | invisible(files) 52 | } 53 | -------------------------------------------------------------------------------- /R/archive_read.R: -------------------------------------------------------------------------------- 1 | #' Create a readable connection to a file in an archive. 2 | #' 3 | #' @inheritParams archive_write 4 | #' @returns An 'archive_read' connection to the file within the archive to be read. 5 | #' @examples 6 | #' a <- system.file(package = "archive", "extdata", "data.zip") 7 | #' # Show files in archive 8 | #' a 9 | #' 10 | #' # By default reads the first file in the archive. 11 | #' read.csv(archive_read(a), nrows = 3) 12 | #' 13 | #' # Can also specify a filename directly 14 | #' read.csv(archive_read(a, "mtcars.csv"), nrows = 3) 15 | #' 16 | #' # Or by position 17 | #' read.csv(archive_read(a, 3), nrows = 3) 18 | #' 19 | #' # Explicitly specify the format and filter if automatic detection fails. 20 | #' read.csv(archive_read(a, format = "zip"), nrows = 3) 21 | #' @export 22 | archive_read <- function(archive, file = 1L, mode = "r", format = NULL, filter = NULL, options = character(), password = NA_character_) { 23 | assert("`file` must be a length one character vector or numeric", 24 | length(file) == 1 && (is.character(file) || is.numeric(file))) 25 | 26 | options <- validate_options(options) 27 | 28 | if (!inherits(archive, "connection")) { 29 | archive <- file(archive, "rb") 30 | } 31 | 32 | description <- glue::glue("archive_read({desc})[{file}]", desc = summary(archive)$description) 33 | 34 | archive_read_(archive, file, description, mode, archive_formats()[format], archive_filters()[filter], options, c(password), sz = 2^14) 35 | } 36 | -------------------------------------------------------------------------------- /R/archive_write.R: -------------------------------------------------------------------------------- 1 | #' Create a writable connection to a file in an archive. 2 | #' 3 | #' @param archive `character(1)` The archive filename or an `archive` object. 4 | #' @param file `character(1) || integer(1)` The filename within the archive, 5 | #' specified either by filename or by position. 6 | #' @param mode `character(1)` A description of how to open the 7 | #' connection (if it should be opened initially). See section 8 | #' ‘Modes’ in [base::connections()] for possible values. 9 | #' @template archive 10 | #' @importFrom rlang is_character is_named 11 | #' @details 12 | #' For traditional zip archives [archive_write()] creates a connection which 13 | #' writes the data to the specified file directly. For other archive formats 14 | #' the file size must be known when the archive is created, so the data is 15 | #' first written to a scratch file on disk and then added to the archive. This 16 | #' scratch file is automatically removed when writing is complete. 17 | #' @returns An 'archive_write' connection to the file within the archive to be written. 18 | #' @examples 19 | #' # Archive format and filters can be set automatically from the file extensions. 20 | #' f1 <- tempfile(fileext = ".tar.gz") 21 | #' 22 | #' write.csv(mtcars, archive_write(f1, "mtcars.csv")) 23 | #' archive(f1) 24 | #' unlink(f1) 25 | #' 26 | #' # They can also be specified explicitly 27 | #' f2 <- tempfile() 28 | #' write.csv(mtcars, archive_write(f2, "mtcars.csv", format = "tar", filter = "bzip2")) 29 | #' archive(f2) 30 | #' unlink(f2) 31 | #' 32 | #' # You can also pass additional options to control things like compression level 33 | #' f3 <- tempfile(fileext = ".tar.gz") 34 | #' write.csv(mtcars, archive_write(f3, "mtcars.csv", options = "compression-level=2")) 35 | #' archive(f3) 36 | #' unlink(f3) 37 | #' @export 38 | archive_write <- function(archive, file, mode = "w", format = NULL, filter = NULL, options = character(), password = NA_character_) { 39 | if (is.null(format) && is.null(filter)) { 40 | res <- format_and_filter_by_extension(archive) 41 | 42 | assert("Could not automatically determine the `filter` and `format` from `archive` {archive}", 43 | !is.null(res)) 44 | 45 | format <- res[[1]] 46 | filter <- res[[2]] 47 | } 48 | 49 | assert("`archive` {archive} must be a writable file path", 50 | is_writable(dirname(archive))) 51 | 52 | archive <- normalizePath(archive, mustWork = FALSE) 53 | 54 | assert("`file` must be a length one character vector", 55 | is_string(file)) 56 | 57 | options <- validate_options(options) 58 | 59 | if (identical(format, "zip") || identical(format, "raw")) { 60 | return(archive_write_direct_(archive, file, mode, archive_formats()[format], archive_filters()[filter], options, c(password), 2^14)) 61 | } 62 | 63 | archive_write_(archive, file, mode, archive_formats()[format], archive_filters()[filter], options, c(password), 2^14) 64 | } 65 | -------------------------------------------------------------------------------- /R/archive_write_dir.R: -------------------------------------------------------------------------------- 1 | #' @rdname archive_write_files 2 | #' @param ... additional parameters passed to `base::dir`. 3 | #' @param dir `character(1)` The directory of files to add. 4 | #' @inheritParams base::list.files 5 | #' @returns An 'archive' object representing the new archive (invisibly). 6 | #' @export 7 | archive_write_dir <- function(archive, dir, format = NULL, filter = NULL, options = character(), password = NA_character_, ..., recursive = TRUE, full.names = FALSE) { 8 | assert("`dir` {dir} is not readable", 9 | is_readable(dir)) 10 | 11 | archive <- file.path(normalizePath(dirname(archive)), basename(archive)) 12 | 13 | options <- validate_options(options) 14 | 15 | old <- setwd(dir) 16 | on.exit(setwd(old)) 17 | files <- dir(".", ..., recursive = recursive, full.names = full.names) 18 | 19 | archive_write_files(archive, files, format = format, filter = filter, options = options, password = password) 20 | 21 | invisible(archive(archive, options = character())) 22 | } 23 | -------------------------------------------------------------------------------- /R/archive_write_files.R: -------------------------------------------------------------------------------- 1 | #' Add files to a new archive 2 | #' 3 | #' `archive_write_files()` adds one or more files to a new archive. 4 | #' `archive_write_dir()` adds all the file(s) in a directory to a new archive. 5 | #' @param files `character()` One or more files to add to the archive. 6 | #' @inheritParams archive_write 7 | #' @returns An 'archive' object representing the new archive (invisibly). 8 | #' @examples 9 | #' if (archive:::libarchive_version() > "3.2.0") { 10 | #' # write some files to a directory 11 | #' d <- tempfile() 12 | #' dir.create(d) 13 | #' old <- setwd(d) 14 | #' 15 | #' write.csv(iris, file.path(d, "iris.csv")) 16 | #' write.csv(mtcars, file.path(d, "mtcars.csv")) 17 | #' write.csv(airquality, file.path(d, "airquality.csv")) 18 | #' 19 | #' # Add some to a new archive 20 | #' a <- archive_write_files("data.tar.gz", c("iris.csv", "mtcars.csv")) 21 | #' setwd(old) 22 | #' a 23 | #' 24 | #' # Add all files in a directory 25 | #' a <- archive_write_dir("data.zip", d) 26 | #' a 27 | #' 28 | #' unlink("data.zip") 29 | #' } 30 | #' @export 31 | archive_write_files <- function(archive, files, format = NULL, filter = NULL, options = character(), password = NA_character_) { 32 | assert("`archive` {archive} must be a writable file path", 33 | is_writable(dirname(archive))) 34 | 35 | archive <- normalizePath(archive, mustWork = FALSE) 36 | 37 | assert("`files` must be one or more readable file paths", 38 | lapply(files, is_readable)) 39 | 40 | if (is.null(format) && is.null(filter)) { 41 | res <- format_and_filter_by_extension(archive) 42 | assert("Could not automatically determine the `filter` and `format` from `archive` {archive}", 43 | non_null(res)) 44 | format <- res[[1]] 45 | filter <- res[[2]] 46 | } 47 | options <- validate_options(options) 48 | 49 | archive_write_files_(archive, files, archive_formats()[format], archive_filters()[filter], options, c(password), sz = 2^14) 50 | 51 | invisible(archive(archive, options = character())) 52 | } 53 | -------------------------------------------------------------------------------- /R/cpp11.R: -------------------------------------------------------------------------------- 1 | # Generated by cpp11: do not edit by hand 2 | 3 | archive_extract_ <- function(connection, file, num_strip_components, options, password, sz) { 4 | .Call(`_archive_archive_extract_`, connection, file, num_strip_components, options, password, sz) 5 | } 6 | 7 | archive_read_ <- function(connection, file, description, mode, format, filters, options, password, sz) { 8 | .Call(`_archive_archive_read_`, connection, file, description, mode, format, filters, options, password, sz) 9 | } 10 | 11 | archive_write_direct_ <- function(archive_filename, filename, mode, format, filters, options, password, sz) { 12 | .Call(`_archive_archive_write_direct_`, archive_filename, filename, mode, format, filters, options, password, sz) 13 | } 14 | 15 | archive_write_files_ <- function(archive_filename, files, format, filters, options, password, sz) { 16 | .Call(`_archive_archive_write_files_`, archive_filename, files, format, filters, options, password, sz) 17 | } 18 | 19 | archive_write_ <- function(archive_filename, filename, mode, format, filters, options, password, sz) { 20 | .Call(`_archive_archive_write_`, archive_filename, filename, mode, format, filters, options, password, sz) 21 | } 22 | 23 | archive_ <- function(connection, options, password) { 24 | .Call(`_archive_archive_`, connection, options, password) 25 | } 26 | 27 | archive_filters <- function() { 28 | .Call(`_archive_archive_filters`) 29 | } 30 | 31 | archive_formats <- function() { 32 | .Call(`_archive_archive_formats`) 33 | } 34 | 35 | libarchive_version_ <- function() { 36 | .Call(`_archive_libarchive_version_`) 37 | } 38 | 39 | libarchive_zlib_version_ <- function() { 40 | .Call(`_archive_libarchive_zlib_version_`) 41 | } 42 | 43 | libarchive_liblzma_version_ <- function() { 44 | .Call(`_archive_libarchive_liblzma_version_`) 45 | } 46 | 47 | libarchive_bzlib_version_ <- function() { 48 | .Call(`_archive_libarchive_bzlib_version_`) 49 | } 50 | 51 | libarchive_liblz4_version_ <- function() { 52 | .Call(`_archive_libarchive_liblz4_version_`) 53 | } 54 | 55 | libarchive_libzstd_version <- function() { 56 | .Call(`_archive_libarchive_libzstd_version`) 57 | } 58 | 59 | rchive_init <- function(nc_xptr, rc_xptr) { 60 | invisible(.Call(`_archive_rchive_init`, nc_xptr, rc_xptr)) 61 | } 62 | -------------------------------------------------------------------------------- /R/file_read.R: -------------------------------------------------------------------------------- 1 | #' @rdname file_connections 2 | #' @export 3 | file_read <- function(file, mode = "r", filter = NULL, options = character(), password = NA_character_) { 4 | options <- validate_options(options) 5 | 6 | if (!inherits(archive, "connection")) { 7 | file <- file(file) 8 | } 9 | 10 | description <- glue::glue("file_read({desc})", desc = summary(file)$description) 11 | 12 | archive_read_(file, 1L, description, mode, archive_formats()["raw"], archive_filters()[filter], options, c(password), sz = 2^14) 13 | } 14 | -------------------------------------------------------------------------------- /R/file_write.R: -------------------------------------------------------------------------------- 1 | #' Construct a connections for (possibly compressed) files. 2 | #' 3 | #' They are functionally equivalent to calling [archive_read] or 4 | #' [archive_write] using `format = "raw", archive = file`. 5 | #' 6 | #' `file_write()` returns an writable output connection, 7 | #' `file_read()` returns a readable input connection. 8 | #' @inheritParams archive_read 9 | #' @inheritParams archive_write 10 | #' @name file_connections 11 | #' @returns An 'archive_read' connection (for `file_read()`) or an 'archive_write' connection (for `file_write()`) to the file. 12 | #' @examples 13 | #' if (archive:::libarchive_version() > "3.2.0") { 14 | #' # Write bzip2, base 64 encoded data and use high compression 15 | #' write.csv(mtcars, 16 | #' file_write("mtcars.bz2", 17 | #' filter = c("uuencode", "bzip2"), 18 | #' options = "compression-level=9" 19 | #' ) 20 | #' ) 21 | #' 22 | #' # Read it back 23 | #' read.csv(file_read("mtcars.bz2"), row.names = 1, nrows = 3) 24 | #' unlink("mtcars.bz2") 25 | #' } 26 | #' @export 27 | file_write <- function(file, mode = "w", filter = NULL, options = character(), password = NA_character_) { 28 | 29 | if (is.null(filter)) { 30 | res <- filter_by_extension(file) 31 | assert("Could not automatically determine the `filter` for {file}", 32 | non_null(res)) 33 | filter <- res 34 | } 35 | 36 | archive_write(archive = file, file = file, mode = mode, format = "raw", filter = filter, options = options, password = password) 37 | } 38 | -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | choices_rd <- function(x) { 2 | paste0(collapse = ", ", paste0("\\sQuote{", x, "}")) # nocov 3 | } 4 | 5 | #' @importFrom glue glue_collapse single_quote 6 | collapse_quote_transformer <- function(code, envir) { 7 | collapse_re <- "[*]$" 8 | quote_re <- "^[|]" 9 | should_collapse <- grepl(collapse_re, code) 10 | should_quote <- !grepl(quote_re, code) 11 | code <- sub(collapse_re, "", sub(quote_re, "", code)) 12 | res <- eval(parse(text = code, keep.source = FALSE), envir) 13 | if (should_quote) { 14 | res <- single_quote(res) 15 | } 16 | if (should_collapse) { 17 | res <- glue_collapse(res, sep = ", ", last = " and ") 18 | } 19 | res 20 | } 21 | 22 | #' @importFrom glue glue 23 | assert <- function(msg, ...) { 24 | tests <- unlist(list(...)) 25 | 26 | if (!all(tests)) { 27 | stop(structure(list( 28 | message = glue(msg, .envir = parent.frame(), .transformer = collapse_quote_transformer), 29 | .call = sys.call(-1)), class = c("error", "condition"))) 30 | } 31 | } 32 | 33 | # TODO check the options match the correct formats here? 34 | validate_options <- function(options) { 35 | assert("`options` must be an unnamed character vector", 36 | length(options) == 0 || is_character(options) && !is_named(options) 37 | ) 38 | 39 | if (length(options) > 1) { 40 | options <- glue_collapse(options, ",") 41 | } 42 | 43 | options 44 | } 45 | 46 | is_string <- function(x) { 47 | is.character(x) && length(x) == 1 48 | } 49 | 50 | is_readable <- function(path) { 51 | is_string(path) && 52 | file.exists(path) 53 | # file.access fails on some NFS, such as shared folders on virtualbox 54 | # https://stat.ethz.ch/pipermail/r-devel/2008-December/051461.html 55 | # file.access(path, mode = 4)[[1]] == 0 56 | } 57 | 58 | is_writable <- function(path) { 59 | is_string(path) && 60 | file.exists(path) 61 | # file.access fails on some NFS, such as shared folders on virtualbox 62 | # https://stat.ethz.ch/pipermail/r-devel/2008-December/051461.html 63 | # file.access(path, mode = 2)[[1]] == 0 64 | } 65 | 66 | non_null <- function(x) { 67 | !is.null(x) 68 | } 69 | 70 | is_number <- function(x) { 71 | is.numeric(x) && length(x) == 1 72 | } 73 | 74 | dir.exists <- function(paths) { 75 | x <- base::file.info(paths)$isdir 76 | !is.na(x) & x 77 | } 78 | 79 | file.size <- function(...) { 80 | base::file.info(...)$size 81 | } 82 | -------------------------------------------------------------------------------- /R/zzz.R: -------------------------------------------------------------------------------- 1 | .onLoad <- function(libname, pkgname) { 2 | lib_path <- system.file("lib", .Platform$r_arch, paste0("libconnection", .Platform$dynlib.ext), package = "archive") 3 | res <- dyn.load(lib_path) 4 | 5 | rchive_init(res$new_connection$address, res$read_connection$address) 6 | } 7 | 8 | .onUnload <- function(libname) { 9 | lib_path <- system.file("lib", .Platform$r_arch, paste0("libconnection", .Platform$dynlib.ext), package = "archive") 10 | dyn.unload(lib_path) 11 | } 12 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | 6 | 7 | ```{r, echo = FALSE} 8 | knitr::opts_chunk$set( 9 | collapse = TRUE, 10 | comment = "#>", 11 | fig.path = "README-" 12 | ) 13 | dir <- tempfile() 14 | dir.create(dir) 15 | knitr::opts_knit$set(root.dir = dir) 16 | options(tibble.print_min = 4L) 17 | library(archive) 18 | ``` 19 | # archive 20 | 21 | 22 | [![R-CMD-check](https://github.com/r-lib/archive/workflows/R-CMD-check/badge.svg)](https://github.com/r-lib/archive/actions) 23 | [![Coverage Status](https://img.shields.io/codecov/c/github/r-lib/archive/main)](https://app.codecov.io/github/r-lib/archive?branch=main) 24 | [![CRAN status](https://www.r-pkg.org/badges/version/archive)](https://CRAN.R-project.org/package=archive) 25 | 26 | 27 | R bindings to libarchive . 28 | Supports many archives formats, including tar, ZIP, 7-zip, RAR, CAB. 29 | Also supports many filters such as gzip, bzip2, compress, lzma, xz and uuencoded files, among others. 30 | 31 | archive provides interfaces to read and write connections into archives, as 32 | well as efficiently reading and writing archives directly to disk. 33 | 34 | ## Installation 35 | 36 | You can install archive from CRAN with: 37 | 38 | ``` r 39 | # install.packages("archive") 40 | ``` 41 | 42 | ## Example 43 | 44 | ### Single file archives 45 | 46 | Use `archive_read()` and `archive_write()` to read and write single files to an archive. 47 | These files return connections, which can be passed to any R interface which can take a connection. 48 | Most base R file system functions use connections, as well as some packages like [readr](https://readr.tidyverse.org/). 49 | 50 | ```{r} 51 | library(readr) # read_csv(), write_csv(), cols() 52 | 53 | # Write a single dataset to zip 54 | write_csv(mtcars, archive_write("mtcars.zip", "mtcars.csv")) 55 | 56 | # Read the data back, by default the first file is read from the archive. 57 | read_csv(archive_read("mtcars.zip"), col_types = cols()) 58 | 59 | # Also supports things like archiving and compression together 60 | # Write a single dataset to (gzip compressed) tar 61 | write_csv(mtcars, archive_write("mtcars.tar.gz", "mtcars.csv", options = "compression-level=9")) 62 | 63 | # Read the data back 64 | read_csv(archive_read("mtcars.tar.gz"), col_types = cols()) 65 | 66 | # Archive file sizes 67 | file.size(c("mtcars.zip", "mtcars.tar.gz")) 68 | ``` 69 | 70 | ### Multi file archives 71 | 72 | `archive_write_files()` is used to create a new archive from multiple files on disk. 73 | 74 | ```{r} 75 | # Write a few files to the temp directory 76 | write_csv(iris, "iris.csv") 77 | write_csv(mtcars, "mtcars.csv") 78 | write_csv(airquality, "airquality.csv") 79 | 80 | # Add them to a new archive 81 | archive_write_files("data.tar.xz", c("iris.csv", "mtcars.csv", "airquality.csv")) 82 | 83 | # View archive contents 84 | a <- archive("data.tar.xz") 85 | a 86 | 87 | # By default `archive_read()` will read the first file from a multi-file archive. 88 | read_csv(archive_read("data.tar.xz"), col_types = cols()) 89 | 90 | # Use a number to read a different file 91 | read_csv(archive_read("data.tar.xz", file = 2), col_types = cols()) 92 | 93 | # Or a filename to read a specific file 94 | read_csv(archive_read("data.tar.xz", file = "mtcars.csv"), col_types = cols()) 95 | ``` 96 | 97 | ### Regular files (with compression) 98 | `file_write()` returns a connection to filtered by one or more compressions or 99 | encodings. `file_read()` reads a compressed file, automatically detecting the 100 | compression used. 101 | 102 | ```{r} 103 | # Write bzip2, uuencoded data 104 | write_csv(mtcars, file_write("mtcars.bz2", filter = c("uuencode", "bzip2"))) 105 | 106 | # Read it back, the formats are automatically detected 107 | read_csv(file_read("mtcars.bz2"), col_types = cols()) 108 | ``` 109 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # archive 5 | 6 | 7 | 8 | [![R-CMD-check](https://github.com/r-lib/archive/workflows/R-CMD-check/badge.svg)](https://github.com/r-lib/archive/actions) 9 | [![Coverage 10 | Status](https://img.shields.io/codecov/c/github/r-lib/archive/main)](https://app.codecov.io/github/r-lib/archive?branch=main) 11 | [![CRAN 12 | status](https://www.r-pkg.org/badges/version/archive)](https://CRAN.R-project.org/package=archive) 13 | 14 | 15 | R bindings to libarchive . Supports many 16 | archives formats, including tar, ZIP, 7-zip, RAR, CAB. Also supports 17 | many filters such as gzip, bzip2, compress, lzma, xz and uuencoded 18 | files, among others. 19 | 20 | archive provides interfaces to read and write connections into archives, 21 | as well as efficiently reading and writing archives directly to disk. 22 | 23 | ## Installation 24 | 25 | You can install archive from CRAN with: 26 | 27 | ``` r 28 | # install.packages("archive") 29 | ``` 30 | 31 | ## Example 32 | 33 | ### Single file archives 34 | 35 | Use `archive_read()` and `archive_write()` to read and write single 36 | files to an archive. These files return connections, which can be passed 37 | to any R interface which can take a connection. Most base R file system 38 | functions use connections, as well as some packages like 39 | [readr](https://readr.tidyverse.org/). 40 | 41 | ``` r 42 | library(readr) # read_csv(), write_csv(), cols() 43 | 44 | # Write a single dataset to zip 45 | write_csv(mtcars, archive_write("mtcars.zip", "mtcars.csv")) 46 | 47 | # Read the data back, by default the first file is read from the archive. 48 | read_csv(archive_read("mtcars.zip"), col_types = cols()) 49 | #> # A tibble: 32 × 11 50 | #> mpg cyl disp hp drat wt qsec vs am gear carb 51 | #> 52 | #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 53 | #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 54 | #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 55 | #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 56 | #> # ℹ 28 more rows 57 | 58 | # Also supports things like archiving and compression together 59 | # Write a single dataset to (gzip compressed) tar 60 | write_csv(mtcars, archive_write("mtcars.tar.gz", "mtcars.csv", options = "compression-level=9")) 61 | 62 | # Read the data back 63 | read_csv(archive_read("mtcars.tar.gz"), col_types = cols()) 64 | #> # A tibble: 32 × 11 65 | #> mpg cyl disp hp drat wt qsec vs am gear carb 66 | #> 67 | #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 68 | #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 69 | #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 70 | #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 71 | #> # ℹ 28 more rows 72 | 73 | # Archive file sizes 74 | file.size(c("mtcars.zip", "mtcars.tar.gz")) 75 | #> [1] 742 808 76 | ``` 77 | 78 | ### Multi file archives 79 | 80 | `archive_write_files()` is used to create a new archive from multiple 81 | files on disk. 82 | 83 | ``` r 84 | # Write a few files to the temp directory 85 | write_csv(iris, "iris.csv") 86 | write_csv(mtcars, "mtcars.csv") 87 | write_csv(airquality, "airquality.csv") 88 | 89 | # Add them to a new archive 90 | archive_write_files("data.tar.xz", c("iris.csv", "mtcars.csv", "airquality.csv")) 91 | 92 | # View archive contents 93 | a <- archive("data.tar.xz") 94 | a 95 | #> # A tibble: 3 × 3 96 | #> path size date 97 | #> 98 | #> 1 iris.csv 3716 2023-12-11 12:18:04 99 | #> 2 mtcars.csv 1281 2023-12-11 12:18:04 100 | #> 3 airquality.csv 2890 2023-12-11 12:18:04 101 | 102 | # By default `archive_read()` will read the first file from a multi-file archive. 103 | read_csv(archive_read("data.tar.xz"), col_types = cols()) 104 | #> # A tibble: 150 × 5 105 | #> Sepal.Length Sepal.Width Petal.Length Petal.Width Species 106 | #> 107 | #> 1 5.1 3.5 1.4 0.2 setosa 108 | #> 2 4.9 3 1.4 0.2 setosa 109 | #> 3 4.7 3.2 1.3 0.2 setosa 110 | #> 4 4.6 3.1 1.5 0.2 setosa 111 | #> # ℹ 146 more rows 112 | 113 | # Use a number to read a different file 114 | read_csv(archive_read("data.tar.xz", file = 2), col_types = cols()) 115 | #> # A tibble: 32 × 11 116 | #> mpg cyl disp hp drat wt qsec vs am gear carb 117 | #> 118 | #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 119 | #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 120 | #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 121 | #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 122 | #> # ℹ 28 more rows 123 | 124 | # Or a filename to read a specific file 125 | read_csv(archive_read("data.tar.xz", file = "mtcars.csv"), col_types = cols()) 126 | #> # A tibble: 32 × 11 127 | #> mpg cyl disp hp drat wt qsec vs am gear carb 128 | #> 129 | #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 130 | #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 131 | #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 132 | #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 133 | #> # ℹ 28 more rows 134 | ``` 135 | 136 | ### Regular files (with compression) 137 | 138 | `file_write()` returns a connection to filtered by one or more 139 | compressions or encodings. `file_read()` reads a compressed file, 140 | automatically detecting the compression used. 141 | 142 | ``` r 143 | # Write bzip2, uuencoded data 144 | write_csv(mtcars, file_write("mtcars.bz2", filter = c("uuencode", "bzip2"))) 145 | 146 | # Read it back, the formats are automatically detected 147 | read_csv(file_read("mtcars.bz2"), col_types = cols()) 148 | #> # A tibble: 32 × 11 149 | #> mpg cyl disp hp drat wt qsec vs am gear carb 150 | #> 151 | #> 1 21 6 160 110 3.9 2.62 16.5 0 1 4 4 152 | #> 2 21 6 160 110 3.9 2.88 17.0 0 1 4 4 153 | #> 3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1 154 | #> 4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1 155 | #> # ℹ 28 more rows 156 | ``` 157 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | url: https://archive.r-lib.org 2 | 3 | authors: 4 | "Jim Hester": 5 | href: http://jimhester.com 6 | 7 | development: 8 | mode: auto 9 | 10 | reference: 11 | - title: Read and write archives using R connections. 12 | desc: These functions deal with archive formats such as zip, 7zip, rar and 13 | tar and return connection objects which can be used by many R input / 14 | output functions. 15 | contents: 16 | - archive 17 | - archive_read 18 | - archive_write 19 | 20 | - title: Extract files from archives and write existing files to archives. 21 | desc: These functions create archives from a set of existing files or 22 | extract some or all files from an archive to disk. 23 | contents: 24 | - archive_extract 25 | - archive_write_files 26 | - archive_write_dir 27 | 28 | - title: Read and Write files using R connections. 29 | desc: These functions write or read a file filtered by one or more 30 | compression algorithms or encoding filters supported by libarchive, such 31 | as gzip, bzip2 and xz and return an R connection to that file. 32 | contents: 33 | - file_read 34 | - file_write 35 | -------------------------------------------------------------------------------- /archive.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: rd,collate,namespace 22 | -------------------------------------------------------------------------------- /cleanup: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | rm -f src/Makevars man/macros/eval2.Rd src/*.o src/*.so \ 4 | src/connection/*.o src/connection/*.so inst/lib/*.so 5 | rm -Rf .deps 6 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | 3 | coverage: 4 | status: 5 | project: 6 | default: 7 | target: auto 8 | threshold: 1% 9 | informational: true 10 | patch: 11 | default: 12 | target: auto 13 | threshold: 1% 14 | informational: true 15 | -------------------------------------------------------------------------------- /configure: -------------------------------------------------------------------------------- 1 | # Anticonf (tm) script by Jeroen Ooms, Jim Hester (2017) 2 | 3 | echo "** Running ./configure" 4 | 5 | # Dynamic manual pages 6 | export RBIN="${R_HOME}/bin${R_ARCH_BIN}/R" 7 | "$RBIN" --vanilla --slave -f tools/dynamic-help.R 8 | 9 | # This script will query 'pkg-config' for the required cflags and ldflags. 10 | # If pkg-config is unavailable or does not find the library, try setting 11 | # INCLUDE_DIR and LIB_DIR manually via e.g: 12 | # R CMD INSTALL --configure-vars='INCLUDE_DIR=/.../include LIB_DIR=/.../lib' 13 | 14 | if [ `uname` = "Darwin" ]; then 15 | if [ `arch` = "arm64" ]; then 16 | if ${R_HOME}/bin/R --version | grep Platform | grep -q darwin20; then 17 | PKG_CONFIG_PATH=$PKG_CONFIG_PATH:/opt/R/arm64/lib/pkgconfig 18 | fi 19 | PKG_CONFIG_PATH=$PKG_CONFIG_PATH:/opt/homebrew/opt/libarchive/lib/pkgconfig 20 | else 21 | if ${R_HOME}/bin/R --version | grep Platform | grep -q darwin20; then 22 | PKG_CONFIG_PATH=$PKG_CONFIG_PATH:/opt/R/x86_64/lib/pkgconfig 23 | fi 24 | PKG_CONFIG_PATH=$PKG_CONFIG_PATH:/usr/local/opt/libarchive/lib/pkgconfig 25 | fi 26 | fi 27 | 28 | export PKG_CONFIG_PATH 29 | 30 | # library settings 31 | PKG_CONFIG_NAME="libarchive" 32 | PKG_CONFIG_NAME_BREW="libarchive zstd" 33 | PKG_DEB_NAME="libarchive-dev" 34 | PKG_RPM_NAME="libarchive-devel" 35 | PKG_CSW_NAME="libarchive_dev" 36 | PKG_BREW_NAME="libarchive" 37 | PKG_TEST_HEADER="" 38 | PKG_LIBS="-larchive" 39 | 40 | # Use pkg-config if available 41 | pkg-config ${PKG_CONFIG_NAME} --atleast-version=1.0 2>/dev/null 42 | if [ $? -eq 0 ]; then 43 | PKGCONFIG_CFLAGS=`pkg-config --cflags ${PKG_CONFIG_NAME}` 44 | if [ `uname` = "Darwin" ]; then 45 | if pkg-config --path libarchive | grep -q "^/opt/R"; then 46 | PKGCONFIG_LIBS=`pkg-config --libs --static ${PKG_CONFIG_NAME}` 47 | else 48 | PKGCONFIG_LIBS=`pkg-config --libs ${PKG_CONFIG_NAME}` 49 | fi 50 | else 51 | PKGCONFIG_LIBS=`pkg-config --libs ${PKG_CONFIG_NAME}` 52 | fi 53 | fi 54 | 55 | # Note that cflags may be empty in case of success 56 | if [ "$INCLUDE_DIR" ] || [ "$LIB_DIR" ]; then 57 | echo "Found INCLUDE_DIR and/or LIB_DIR!" 58 | PKG_CFLAGS="-I$INCLUDE_DIR $PKG_CFLAGS" 59 | PKG_LIBS="-L$LIB_DIR $PKG_LIBS" 60 | elif [ "$PKGCONFIG_CFLAGS" ] || [ "$PKGCONFIG_LIBS" ]; then 61 | echo "Found pkg-config cflags and libs!" 62 | PKG_CFLAGS=${PKGCONFIG_CFLAGS} 63 | PKG_LIBS=${PKGCONFIG_LIBS} 64 | elif [ `uname` = "Darwin" ]; then 65 | test ! "$CI" && brew --version 2>/dev/null 66 | if [ $? -eq 0 ]; then 67 | BREWDIR=`brew --prefix` 68 | PKG_CFLAGS="-I$BREWDIR/opt/libarchive/include" 69 | PKG_LIBS="-L$BREWDIR/opt/libarchive/lib $PKG_LIBS" 70 | else 71 | curl -sfL "https://autobrew.github.io/scripts/$PKG_BREW_NAME" > autobrew 72 | . ./autobrew 73 | fi 74 | fi 75 | 76 | # Find compiler 77 | CXX11=`${R_HOME}/bin/R CMD config CXX11` 78 | CXX11FLAGS=`${R_HOME}/bin/R CMD config CXX11FLAGS` 79 | CPPFLAGS=`${R_HOME}/bin/R CMD config CPPFLAGS` 80 | 81 | if [ "`${R_HOME}/bin/R --slave -e 'cat(if (getRversion() < \"3.6.3\") 0 else 1)'`" = "0" ]; then 82 | PKG_CFLAGS="${PKG_CFLAGS} `${R_HOME}/bin/R CMD config CXX11STD`" 83 | fi 84 | 85 | # For debugging 86 | echo "PKG_CONFIG_PATH=$PKG_CONFIG_PATH" 87 | echo "PKG_CFLAGS=$PKG_CFLAGS" 88 | echo "PKG_LIBS=$PKG_LIBS" 89 | 90 | # Test configuration 91 | echo "#include $PKG_TEST_HEADER" | ${CXX11} ${CPPFLAGS} ${PKG_CFLAGS} ${CXX11FLAGS} -E -xc++ - > /dev/null 92 | 93 | # Customize the error 94 | if [ $? -ne 0 ]; then 95 | echo "--------------------------- [ANTICONF] --------------------------------" 96 | echo "Configuration failed because $PKG_CONFIG_NAME was not found. Try installing:" 97 | echo " * deb: $PKG_DEB_NAME (Debian, Ubuntu, etc)" 98 | echo " * rpm: $PKG_RPM_NAME (Fedora, CentOS, RHEL)" 99 | echo " * csw: $PKG_CSW_NAME (Solaris)" 100 | echo " * brew: $PKG_BREW_NAME (Mac OSX)" 101 | echo "If $PKG_CONFIG_NAME is already installed, check that 'pkg-config' is in your" 102 | echo "PATH and PKG_CONFIG_PATH contains a $PKG_CONFIG_NAME.pc file. If pkg-config" 103 | echo "is unavailable you can set INCLUDE_DIR and LIB_DIR manually via:" 104 | echo "R CMD INSTALL --configure-vars='INCLUDE_DIR=... LIB_DIR=...'" 105 | if [ -f configure.log ]; then 106 | echo "-------------------------- [ERROR MESSAGE] ---------------------------" 107 | cat configure.log; 108 | fi 109 | echo "--------------------------------------------------------------------" 110 | exit 1 111 | fi 112 | 113 | # Write to Makevars 114 | sed -e "s|@PKG_CXXFLAGS@|$PKG_CFLAGS|" -e "s|@PKG_LIBS@|$PKG_LIBS|" src/Makevars.in > src/Makevars 115 | 116 | # Success 117 | exit 0 118 | -------------------------------------------------------------------------------- /configure.win: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/archive/5e04bfa3a587af62c2e795716eed15d941e3bbdf/configure.win -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /inst/extdata/data.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/archive/5e04bfa3a587af62c2e795716eed15d941e3bbdf/inst/extdata/data.zip -------------------------------------------------------------------------------- /inst/lib/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/archive/5e04bfa3a587af62c2e795716eed15d941e3bbdf/inst/lib/README.md -------------------------------------------------------------------------------- /man-roxygen/archive.R: -------------------------------------------------------------------------------- 1 | #' @param format \code{character(1)} default: \code{NULL} The archive format, one of \eval{choices_rd(names(archive:::archive_formats()))}. 2 | #' Supported formats differ depending on the libarchive version and build. 3 | #' @param filter \code{character(1)} default: \code{NULL} The archive filter, one of \eval{choices_rd(names(archive:::archive_filters()))}. 4 | #' Supported filters differ depending on the libarchive version and build. 5 | #' @param options \code{character()} default: \code{character(0)} Options to pass to the filter or format. 6 | #' The list of available options are documented in 7 | #' options can have one of the following forms: 8 | #' - `option=value` 9 | #' The option/value pair will be provided to every module. 10 | #' Modules that do not accept an option with this name will 11 | #' ignore it. 12 | #' - `option` 13 | #' The option will be provided to every module with a value 14 | #' of "1". 15 | #' - `!option` 16 | #' The option will be provided to every module with a NULL 17 | #' value. 18 | #' - `module:option=value`, `module:option`, `module:!option` 19 | #' As above, but the corresponding option and value will be 20 | #' provided only to modules whose name matches module. 21 | #' See [read options](https://man.freebsd.org/cgi/man.cgi?query=archive_read_set_options&sektion=3&format=html) for available read options 22 | #' See [write options](https://man.freebsd.org/cgi/man.cgi?query=archive_write_set_options&sektion=3&format=html) for available write options 23 | #' @param password `character(1)` The password to process the archive. 24 | #' @details 25 | #' If `format` and `filter` are `NULL`, they will be set automatically based on 26 | #' the file extension given in `file` when writing and automatically detected 27 | #' using 28 | #' [Robust automatic format detection](https://github.com/libarchive/libarchive/wiki/FormatDetection) 29 | #' when reading. 30 | -------------------------------------------------------------------------------- /man/archive.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/archive.R 3 | \name{archive} 4 | \alias{archive} 5 | \title{Construct a new archive} 6 | \usage{ 7 | archive(file, options = character(), password = NA_character_) 8 | } 9 | \arguments{ 10 | \item{file}{File path to the archive.} 11 | 12 | \item{options}{\code{character()} default: \code{character(0)} Options to pass to the filter or format. 13 | The list of available options are documented in 14 | options can have one of the following forms: 15 | \itemize{ 16 | \item \code{option=value} 17 | The option/value pair will be provided to every module. 18 | Modules that do not accept an option with this name will 19 | ignore it. 20 | \item \code{option} 21 | The option will be provided to every module with a value 22 | of "1". 23 | \item \code{!option} 24 | The option will be provided to every module with a NULL 25 | value. 26 | \item \code{module:option=value}, \code{module:option}, \code{module:!option} 27 | As above, but the corresponding option and value will be 28 | provided only to modules whose name matches module. 29 | See \href{https://man.freebsd.org/cgi/man.cgi?query=archive_read_set_options&sektion=3&format=html}{read options} for available read options 30 | See \href{https://man.freebsd.org/cgi/man.cgi?query=archive_write_set_options&sektion=3&format=html}{write options} for available write options 31 | }} 32 | 33 | \item{password}{\code{character(1)} The password to process the archive.} 34 | } 35 | \value{ 36 | A \link[tibble:tibble-package]{tibble} with details about files in the archive. 37 | } 38 | \description{ 39 | This function retrieves metadata about files in an archive, it can be passed 40 | to \code{\link[=archive_read]{archive_read()}} or \link{archive_write} to create a connection to read or 41 | write a specific file from the archive. 42 | } 43 | \examples{ 44 | a <- archive(system.file(package = "archive", "extdata", "data.zip")) 45 | a 46 | } 47 | \seealso{ 48 | \code{\link[=archive_read]{archive_read()}}, \code{\link[=archive_write]{archive_write()}} to read and write archive files 49 | using R connections, \code{\link[=archive_extract]{archive_extract()}}, \code{\link[=archive_write_files]{archive_write_files()}}, 50 | \code{\link[=archive_write_dir]{archive_write_dir()}} to add or extract files from an archive. 51 | } 52 | -------------------------------------------------------------------------------- /man/archive_extract.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/archive_extract.R 3 | \name{archive_extract} 4 | \alias{archive_extract} 5 | \title{Extract contents of an archive to a directory} 6 | \usage{ 7 | archive_extract( 8 | archive, 9 | dir = ".", 10 | files = NULL, 11 | options = character(), 12 | strip_components = 0L, 13 | password = NA_character_ 14 | ) 15 | } 16 | \arguments{ 17 | \item{archive}{\code{character(1)} The archive filename or an \code{archive} object.} 18 | 19 | \item{dir}{\code{character(1)} Directory location to extract archive contents, will be created 20 | if it does not exist.} 21 | 22 | \item{files}{\code{character() || integer() || NULL} One or more files within the archive, 23 | specified either by filename or by position.} 24 | 25 | \item{options}{\code{character()} default: \code{character(0)} Options to pass to the filter or format. 26 | The list of available options are documented in 27 | options can have one of the following forms: 28 | \itemize{ 29 | \item \code{option=value} 30 | The option/value pair will be provided to every module. 31 | Modules that do not accept an option with this name will 32 | ignore it. 33 | \item \code{option} 34 | The option will be provided to every module with a value 35 | of "1". 36 | \item \code{!option} 37 | The option will be provided to every module with a NULL 38 | value. 39 | \item \code{module:option=value}, \code{module:option}, \code{module:!option} 40 | As above, but the corresponding option and value will be 41 | provided only to modules whose name matches module. 42 | See \href{https://man.freebsd.org/cgi/man.cgi?query=archive_read_set_options&sektion=3&format=html}{read options} for available read options 43 | See \href{https://man.freebsd.org/cgi/man.cgi?query=archive_write_set_options&sektion=3&format=html}{write options} for available write options 44 | }} 45 | 46 | \item{strip_components}{Remove the specified number of leading path 47 | elements. Pathnames with fewer elements will be silently skipped.} 48 | 49 | \item{password}{\code{character(1)} The password to process the archive.} 50 | } 51 | \value{ 52 | The filenames extracted (invisibly). 53 | } 54 | \description{ 55 | Extract contents of an archive to a directory 56 | } 57 | \details{ 58 | If \code{files} is \code{NULL} (the default) all files will be extracted. 59 | } 60 | \examples{ 61 | a <- system.file(package = "archive", "extdata", "data.zip") 62 | d <- tempfile() 63 | 64 | # When called with default arguments extracts all files in the archive. 65 | archive_extract(a, d) 66 | list.files(d) 67 | unlink(d) 68 | 69 | # Can also specify one or more files to extract 70 | d <- tempfile() 71 | archive_extract(a, d, c("iris.csv", "airquality.csv")) 72 | list.files(d) 73 | unlink(d) 74 | } 75 | -------------------------------------------------------------------------------- /man/archive_read.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/archive_read.R 3 | \name{archive_read} 4 | \alias{archive_read} 5 | \title{Create a readable connection to a file in an archive.} 6 | \usage{ 7 | archive_read( 8 | archive, 9 | file = 1L, 10 | mode = "r", 11 | format = NULL, 12 | filter = NULL, 13 | options = character(), 14 | password = NA_character_ 15 | ) 16 | } 17 | \arguments{ 18 | \item{archive}{\code{character(1)} The archive filename or an \code{archive} object.} 19 | 20 | \item{file}{\code{character(1) || integer(1)} The filename within the archive, 21 | specified either by filename or by position.} 22 | 23 | \item{mode}{\code{character(1)} A description of how to open the 24 | connection (if it should be opened initially). See section 25 | ‘Modes’ in \code{\link[base:connections]{base::connections()}} for possible values.} 26 | 27 | \item{format}{\code{character(1)} default: \code{NULL} The archive format, one of \eval{choices_rd(names(archive:::archive_formats()))}. 28 | Supported formats differ depending on the libarchive version and build.} 29 | 30 | \item{filter}{\code{character(1)} default: \code{NULL} The archive filter, one of \eval{choices_rd(names(archive:::archive_filters()))}. 31 | Supported filters differ depending on the libarchive version and build.} 32 | 33 | \item{options}{\code{character()} default: \code{character(0)} Options to pass to the filter or format. 34 | The list of available options are documented in 35 | options can have one of the following forms: 36 | \itemize{ 37 | \item \code{option=value} 38 | The option/value pair will be provided to every module. 39 | Modules that do not accept an option with this name will 40 | ignore it. 41 | \item \code{option} 42 | The option will be provided to every module with a value 43 | of "1". 44 | \item \code{!option} 45 | The option will be provided to every module with a NULL 46 | value. 47 | \item \code{module:option=value}, \code{module:option}, \code{module:!option} 48 | As above, but the corresponding option and value will be 49 | provided only to modules whose name matches module. 50 | See \href{https://man.freebsd.org/cgi/man.cgi?query=archive_read_set_options&sektion=3&format=html}{read options} for available read options 51 | See \href{https://man.freebsd.org/cgi/man.cgi?query=archive_write_set_options&sektion=3&format=html}{write options} for available write options 52 | }} 53 | 54 | \item{password}{\code{character(1)} The password to process the archive.} 55 | } 56 | \value{ 57 | An 'archive_read' connection to the file within the archive to be read. 58 | } 59 | \description{ 60 | Create a readable connection to a file in an archive. 61 | } 62 | \examples{ 63 | a <- system.file(package = "archive", "extdata", "data.zip") 64 | # Show files in archive 65 | a 66 | 67 | # By default reads the first file in the archive. 68 | read.csv(archive_read(a), nrows = 3) 69 | 70 | # Can also specify a filename directly 71 | read.csv(archive_read(a, "mtcars.csv"), nrows = 3) 72 | 73 | # Or by position 74 | read.csv(archive_read(a, 3), nrows = 3) 75 | 76 | # Explicitly specify the format and filter if automatic detection fails. 77 | read.csv(archive_read(a, format = "zip"), nrows = 3) 78 | } 79 | -------------------------------------------------------------------------------- /man/archive_write.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/archive_write.R 3 | \name{archive_write} 4 | \alias{archive_write} 5 | \title{Create a writable connection to a file in an archive.} 6 | \usage{ 7 | archive_write( 8 | archive, 9 | file, 10 | mode = "w", 11 | format = NULL, 12 | filter = NULL, 13 | options = character(), 14 | password = NA_character_ 15 | ) 16 | } 17 | \arguments{ 18 | \item{archive}{\code{character(1)} The archive filename or an \code{archive} object.} 19 | 20 | \item{file}{\code{character(1) || integer(1)} The filename within the archive, 21 | specified either by filename or by position.} 22 | 23 | \item{mode}{\code{character(1)} A description of how to open the 24 | connection (if it should be opened initially). See section 25 | ‘Modes’ in \code{\link[base:connections]{base::connections()}} for possible values.} 26 | 27 | \item{format}{\code{character(1)} default: \code{NULL} The archive format, one of \eval{choices_rd(names(archive:::archive_formats()))}. 28 | Supported formats differ depending on the libarchive version and build.} 29 | 30 | \item{filter}{\code{character(1)} default: \code{NULL} The archive filter, one of \eval{choices_rd(names(archive:::archive_filters()))}. 31 | Supported filters differ depending on the libarchive version and build.} 32 | 33 | \item{options}{\code{character()} default: \code{character(0)} Options to pass to the filter or format. 34 | The list of available options are documented in 35 | options can have one of the following forms: 36 | \itemize{ 37 | \item \code{option=value} 38 | The option/value pair will be provided to every module. 39 | Modules that do not accept an option with this name will 40 | ignore it. 41 | \item \code{option} 42 | The option will be provided to every module with a value 43 | of "1". 44 | \item \code{!option} 45 | The option will be provided to every module with a NULL 46 | value. 47 | \item \code{module:option=value}, \code{module:option}, \code{module:!option} 48 | As above, but the corresponding option and value will be 49 | provided only to modules whose name matches module. 50 | See \href{https://man.freebsd.org/cgi/man.cgi?query=archive_read_set_options&sektion=3&format=html}{read options} for available read options 51 | See \href{https://man.freebsd.org/cgi/man.cgi?query=archive_write_set_options&sektion=3&format=html}{write options} for available write options 52 | }} 53 | 54 | \item{password}{\code{character(1)} The password to process the archive.} 55 | } 56 | \value{ 57 | An 'archive_write' connection to the file within the archive to be written. 58 | } 59 | \description{ 60 | Create a writable connection to a file in an archive. 61 | } 62 | \details{ 63 | If \code{format} and \code{filter} are \code{NULL}, they will be set automatically based on 64 | the file extension given in \code{file} when writing and automatically detected 65 | using 66 | \href{https://github.com/libarchive/libarchive/wiki/FormatDetection}{Robust automatic format detection} 67 | when reading. 68 | 69 | For traditional zip archives \code{\link[=archive_write]{archive_write()}} creates a connection which 70 | writes the data to the specified file directly. For other archive formats 71 | the file size must be known when the archive is created, so the data is 72 | first written to a scratch file on disk and then added to the archive. This 73 | scratch file is automatically removed when writing is complete. 74 | } 75 | \examples{ 76 | # Archive format and filters can be set automatically from the file extensions. 77 | f1 <- tempfile(fileext = ".tar.gz") 78 | 79 | write.csv(mtcars, archive_write(f1, "mtcars.csv")) 80 | archive(f1) 81 | unlink(f1) 82 | 83 | # They can also be specified explicitly 84 | f2 <- tempfile() 85 | write.csv(mtcars, archive_write(f2, "mtcars.csv", format = "tar", filter = "bzip2")) 86 | archive(f2) 87 | unlink(f2) 88 | 89 | # You can also pass additional options to control things like compression level 90 | f3 <- tempfile(fileext = ".tar.gz") 91 | write.csv(mtcars, archive_write(f3, "mtcars.csv", options = "compression-level=2")) 92 | archive(f3) 93 | unlink(f3) 94 | } 95 | -------------------------------------------------------------------------------- /man/archive_write_files.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/archive_write_dir.R, R/archive_write_files.R 3 | \name{archive_write_dir} 4 | \alias{archive_write_dir} 5 | \alias{archive_write_files} 6 | \title{Add files to a new archive} 7 | \usage{ 8 | archive_write_dir( 9 | archive, 10 | dir, 11 | format = NULL, 12 | filter = NULL, 13 | options = character(), 14 | password = NA_character_, 15 | ..., 16 | recursive = TRUE, 17 | full.names = FALSE 18 | ) 19 | 20 | archive_write_files( 21 | archive, 22 | files, 23 | format = NULL, 24 | filter = NULL, 25 | options = character(), 26 | password = NA_character_ 27 | ) 28 | } 29 | \arguments{ 30 | \item{archive}{\code{character(1)} The archive filename or an \code{archive} object.} 31 | 32 | \item{dir}{\code{character(1)} The directory of files to add.} 33 | 34 | \item{format}{\code{character(1)} default: \code{NULL} The archive format, one of \eval{choices_rd(names(archive:::archive_formats()))}. 35 | Supported formats differ depending on the libarchive version and build.} 36 | 37 | \item{filter}{\code{character(1)} default: \code{NULL} The archive filter, one of \eval{choices_rd(names(archive:::archive_filters()))}. 38 | Supported filters differ depending on the libarchive version and build.} 39 | 40 | \item{options}{\code{character()} default: \code{character(0)} Options to pass to the filter or format. 41 | The list of available options are documented in 42 | options can have one of the following forms: 43 | \itemize{ 44 | \item \code{option=value} 45 | The option/value pair will be provided to every module. 46 | Modules that do not accept an option with this name will 47 | ignore it. 48 | \item \code{option} 49 | The option will be provided to every module with a value 50 | of "1". 51 | \item \code{!option} 52 | The option will be provided to every module with a NULL 53 | value. 54 | \item \code{module:option=value}, \code{module:option}, \code{module:!option} 55 | As above, but the corresponding option and value will be 56 | provided only to modules whose name matches module. 57 | See \href{https://man.freebsd.org/cgi/man.cgi?query=archive_read_set_options&sektion=3&format=html}{read options} for available read options 58 | See \href{https://man.freebsd.org/cgi/man.cgi?query=archive_write_set_options&sektion=3&format=html}{write options} for available write options 59 | }} 60 | 61 | \item{password}{\code{character(1)} The password to process the archive.} 62 | 63 | \item{...}{additional parameters passed to \code{base::dir}.} 64 | 65 | \item{recursive}{logical. Should the listing recurse into directories?} 66 | 67 | \item{full.names}{a logical value. If \code{TRUE}, the directory 68 | path is prepended to the file names to give a relative file path. 69 | If \code{FALSE}, the file names (rather than paths) are returned.} 70 | 71 | \item{files}{\code{character()} One or more files to add to the archive.} 72 | } 73 | \value{ 74 | An 'archive' object representing the new archive (invisibly). 75 | 76 | An 'archive' object representing the new archive (invisibly). 77 | } 78 | \description{ 79 | \code{archive_write_files()} adds one or more files to a new archive. 80 | \code{archive_write_dir()} adds all the file(s) in a directory to a new archive. 81 | } 82 | \examples{ 83 | if (archive:::libarchive_version() > "3.2.0") { 84 | # write some files to a directory 85 | d <- tempfile() 86 | dir.create(d) 87 | old <- setwd(d) 88 | 89 | write.csv(iris, file.path(d, "iris.csv")) 90 | write.csv(mtcars, file.path(d, "mtcars.csv")) 91 | write.csv(airquality, file.path(d, "airquality.csv")) 92 | 93 | # Add some to a new archive 94 | a <- archive_write_files("data.tar.gz", c("iris.csv", "mtcars.csv")) 95 | setwd(old) 96 | a 97 | 98 | # Add all files in a directory 99 | a <- archive_write_dir("data.zip", d) 100 | a 101 | 102 | unlink("data.zip") 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /man/file_connections.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/file_read.R, R/file_write.R 3 | \name{file_read} 4 | \alias{file_read} 5 | \alias{file_connections} 6 | \alias{file_write} 7 | \title{Construct a connections for (possibly compressed) files.} 8 | \usage{ 9 | file_read( 10 | file, 11 | mode = "r", 12 | filter = NULL, 13 | options = character(), 14 | password = NA_character_ 15 | ) 16 | 17 | file_write( 18 | file, 19 | mode = "w", 20 | filter = NULL, 21 | options = character(), 22 | password = NA_character_ 23 | ) 24 | } 25 | \arguments{ 26 | \item{file}{\code{character(1) || integer(1)} The filename within the archive, 27 | specified either by filename or by position.} 28 | 29 | \item{mode}{\code{character(1)} A description of how to open the 30 | connection (if it should be opened initially). See section 31 | ‘Modes’ in \code{\link[base:connections]{base::connections()}} for possible values.} 32 | 33 | \item{filter}{\code{character(1)} default: \code{NULL} The archive filter, one of \eval{choices_rd(names(archive:::archive_filters()))}. 34 | Supported filters differ depending on the libarchive version and build.} 35 | 36 | \item{options}{\code{character()} default: \code{character(0)} Options to pass to the filter or format. 37 | The list of available options are documented in 38 | options can have one of the following forms: 39 | \itemize{ 40 | \item \code{option=value} 41 | The option/value pair will be provided to every module. 42 | Modules that do not accept an option with this name will 43 | ignore it. 44 | \item \code{option} 45 | The option will be provided to every module with a value 46 | of "1". 47 | \item \code{!option} 48 | The option will be provided to every module with a NULL 49 | value. 50 | \item \code{module:option=value}, \code{module:option}, \code{module:!option} 51 | As above, but the corresponding option and value will be 52 | provided only to modules whose name matches module. 53 | See \href{https://man.freebsd.org/cgi/man.cgi?query=archive_read_set_options&sektion=3&format=html}{read options} for available read options 54 | See \href{https://man.freebsd.org/cgi/man.cgi?query=archive_write_set_options&sektion=3&format=html}{write options} for available write options 55 | }} 56 | 57 | \item{password}{\code{character(1)} The password to process the archive.} 58 | } 59 | \value{ 60 | An 'archive_read' connection (for \code{file_read()}) or an 'archive_write' connection (for \code{file_write()}) to the file. 61 | } 62 | \description{ 63 | They are functionally equivalent to calling \link{archive_read} or 64 | \link{archive_write} using \verb{format = "raw", archive = file}. 65 | } 66 | \details{ 67 | \code{file_write()} returns an writable output connection, 68 | \code{file_read()} returns a readable input connection. 69 | } 70 | \examples{ 71 | if (archive:::libarchive_version() > "3.2.0") { 72 | # Write bzip2, base 64 encoded data and use high compression 73 | write.csv(mtcars, 74 | file_write("mtcars.bz2", 75 | filter = c("uuencode", "bzip2"), 76 | options = "compression-level=9" 77 | ) 78 | ) 79 | 80 | # Read it back 81 | read.csv(file_read("mtcars.bz2"), row.names = 1, nrows = 3) 82 | unlink("mtcars.bz2") 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /man/macros/eval.Rd: -------------------------------------------------------------------------------- 1 | \newcommand{\eval}{\code{#1}} 2 | \newcommand{\evalatinstall}{\code{#1}} 3 | \newcommand{\evaltop}{\section{Section}{\code{#1}}} 4 | \newcommand{\evalatinstalltop}{\section{Section}{\code{#1}}} 5 | -------------------------------------------------------------------------------- /src/.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | Language: Cpp 3 | BasedOnStyle: LLVM 4 | Standard: Cpp11 5 | AlignAfterOpenBracket: AlwaysBreak 6 | AllowShortBlocksOnASingleLine: false 7 | BinPackArguments: false 8 | BinPackParameters: false 9 | ConstructorInitializerAllOnOneLineOrOnePerLine: true 10 | PointerAlignment: Left 11 | --- 12 | -------------------------------------------------------------------------------- /src/.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.so 3 | *.dll 4 | lib/ 5 | include/ 6 | bin/ 7 | share/ 8 | libarchive-3.3.1/ 9 | -------------------------------------------------------------------------------- /src/Makevars.in: -------------------------------------------------------------------------------- 1 | PKG_CPPFLAGS = -Icpp11/include 2 | PKG_CXXFLAGS = @PKG_CXXFLAGS@ 3 | PKG_LIBS = @PKG_LIBS@ 4 | 5 | LIB_CON_DIR = ../inst/lib$(R_ARCH) 6 | 7 | LIB_CON = $(LIB_CON_DIR)/libconnection.so 8 | 9 | all: $(SHLIB) mrproper 10 | 11 | $(SHLIB): $(LIB_CON) 12 | 13 | mrproper: $(SHLIB) 14 | @rm -rf ../.deps 15 | 16 | $(LIB_CON): connection/connection.o 17 | @mkdir -p $(LIB_CON_DIR) 18 | @$(SHLIB_LINK) $(SHLIB_LIBADD) $(LIBR) -o $@ $^ 19 | 20 | .PHONY: clean mrproper 21 | 22 | clean: 23 | @rm -f $(OBJECTS) $(SHLIB) $(LIB_CON) connection/connection.o 24 | -------------------------------------------------------------------------------- /src/Makevars.ucrt: -------------------------------------------------------------------------------- 1 | PKG_CPPFLAGS= -I. -Icpp11/include 2 | 3 | ifeq (,$(shell pkg-config --version 2>/dev/null)) 4 | PKG_LIBS = -larchive -lxml2 -lcrypto -lnettle -lregex -lexpat -llzo2 \ 5 | -llzma -llz4 -lbz2 -lz -lzstd -liconv -lws2_32 -lbcrypt 6 | else 7 | PKG_LIBS = $(shell pkg-config --libs libarchive) 8 | endif 9 | 10 | LIB_CON = ../inst/lib$(R_ARCH)/libconnection.dll 11 | 12 | all: $(OBJECTS) $(SHLIB) $(LIB_CON) 13 | 14 | $(LIB_CON): connection/connection.o 15 | mkdir -p $(dir $(LIB_CON)) 16 | $(SHLIB_LINK) $(LIBR) -o $@ $^ 17 | 18 | clean: 19 | rm -f $(OBJECTS) $(SHLIB) $(LIB_CON) connection/connection.o 20 | 21 | .PHONY: all clean 22 | -------------------------------------------------------------------------------- /src/Makevars.win: -------------------------------------------------------------------------------- 1 | VERSION=3.6.1 2 | RWINLIB=../windows/libarchive-$(VERSION) 3 | TARGET = lib$(subst gcc,,$(COMPILED_BY))$(R_ARCH) 4 | PKG_CPPFLAGS= \ 5 | -I$(RWINLIB)/include -I. -Icpp11/include 6 | 7 | PKG_LIBS = \ 8 | -L$(RWINLIB)/$(TARGET) \ 9 | -L$(RWINLIB)/lib$(R_ARCH) \ 10 | -larchive -lcrypto -lnettle -lregex -lexpat -llzo2 -llzma -llz4 -lbz2 -lzstd -lz -liconv -lbcrypt 11 | 12 | 13 | # Originally from https://github.com/cran/curl/blob/3897ba5203dee940e2ce40ac23a0d1106da93df6/src/Makevars.win 14 | 15 | LIB_CON = ../inst/lib$(R_ARCH)/libconnection.dll 16 | 17 | all: clean winlibs $(OBJECTS) $(SHLIB) $(LIB_CON) 18 | 19 | $(LIB_CON): connection/connection.o 20 | mkdir -p $(dir $(LIB_CON)) 21 | $(SHLIB_LINK) $(LIBR) -o $@ $^ 22 | 23 | clean: 24 | rm -f $(OBJECTS) $(SHLIB) $(LIB_CON) connection/connection.o 25 | 26 | winlibs: 27 | "${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe" "../tools/winlibs.R" $(VERSION) 28 | 29 | .PHONY: all winlibs clean 30 | -------------------------------------------------------------------------------- /src/archive.cpp: -------------------------------------------------------------------------------- 1 | #include "r_archive.h" 2 | 3 | #include 4 | 5 | using namespace cpp11::literals; 6 | 7 | [[cpp11::register]] cpp11::sexp 8 | archive_(cpp11::sexp connection, cpp11::strings options, cpp11::strings password) { 9 | 10 | local_utf8_locale ll; 11 | 12 | std::vector paths; 13 | std::vector<__LA_INT64_T> sizes; 14 | std::vector dates; 15 | 16 | struct archive* a; 17 | struct archive_entry* entry; 18 | 19 | a = archive_read_new(); 20 | call(archive_read_support_filter_all, a); 21 | call(archive_read_support_format_all, a); 22 | 23 | if (options.size() > 0) { 24 | call(archive_read_set_options, a, std::string(options[0]).c_str()); 25 | } 26 | if (!cpp11::is_na(password[0])) { 27 | call(archive_read_add_passphrase, a, std::string(password[0]).c_str()); 28 | } 29 | 30 | std::unique_ptr r(new input_data); 31 | r->buf.resize(16384); 32 | r->connection = connection; 33 | 34 | call(archive_read_set_read_callback, a, input_read); 35 | call(archive_read_set_close_callback, a, input_close); 36 | static auto isSeekable = cpp11::package("base")["isSeekable"]; 37 | if (isSeekable(connection)) { 38 | call(archive_read_set_seek_callback, a, input_seek); 39 | } 40 | call(archive_read_set_callback_data, a, r.get()); 41 | call(archive_read_open1, a); 42 | 43 | while (archive_read_next_header(a, &entry) == ARCHIVE_OK) { 44 | paths.push_back(archive_entry_pathname(entry)); 45 | sizes.push_back(archive_entry_size(entry)); 46 | dates.push_back(archive_entry_mtime(entry)); 47 | call(archive_read_data_skip, a); 48 | } 49 | call(archive_read_free, a); 50 | 51 | static auto as_tibble = cpp11::package("tibble")["as_tibble"]; 52 | cpp11::writable::doubles d(dates); 53 | d.attr("class") = {"POSIXct", "POSIXt"}; 54 | 55 | cpp11::writable::list out( 56 | {"path"_nm = paths, "size"_nm = sizes, "date"_nm = d}); 57 | 58 | // out.attr("connection") = static_cast(connection); 59 | 60 | return as_tibble(out); 61 | } 62 | 63 | [[cpp11::register]] cpp11::integers archive_filters() { 64 | cpp11::writable::integers out({ 65 | "none"_nm = ARCHIVE_FILTER_NONE, "gzip"_nm = ARCHIVE_FILTER_GZIP, 66 | "bzip2"_nm = ARCHIVE_FILTER_BZIP2, "compress"_nm = ARCHIVE_FILTER_COMPRESS, 67 | "lzma"_nm = ARCHIVE_FILTER_LZMA, "xz"_nm = ARCHIVE_FILTER_XZ, 68 | "uuencode"_nm = ARCHIVE_FILTER_UU, "lzip"_nm = ARCHIVE_FILTER_LZIP 69 | #if ARCHIVE_VERSION_NUMBER >= 3001000 70 | , 71 | "lrzip"_nm = ARCHIVE_FILTER_LRZIP, "lzop"_nm = ARCHIVE_FILTER_LZOP, 72 | "grzip"_nm = ARCHIVE_FILTER_GRZIP 73 | #endif 74 | 75 | #if ARCHIVE_VERSION_NUMBER >= 3002000 76 | , 77 | "lz4"_nm = ARCHIVE_FILTER_LZ4 78 | #endif 79 | 80 | #if ARCHIVE_VERSION_NUMBER >= 3003003 81 | , 82 | "zstd"_nm = ARCHIVE_FILTER_ZSTD 83 | #endif 84 | }); 85 | return out; 86 | } 87 | 88 | [[cpp11::register]] cpp11::integers archive_formats() { 89 | cpp11::writable::integers out({ 90 | "7zip"_nm = ARCHIVE_FORMAT_7ZIP, "cab"_nm = ARCHIVE_FORMAT_CAB, 91 | "cpio"_nm = ARCHIVE_FORMAT_CPIO, "iso9660"_nm = ARCHIVE_FORMAT_ISO9660, 92 | "lha"_nm = ARCHIVE_FORMAT_LHA, "mtree"_nm = ARCHIVE_FORMAT_MTREE, 93 | "shar"_nm = ARCHIVE_FORMAT_SHAR, "rar"_nm = ARCHIVE_FORMAT_RAR, 94 | "raw"_nm = ARCHIVE_FORMAT_RAW, "tar"_nm = ARCHIVE_FORMAT_TAR, 95 | "xar"_nm = ARCHIVE_FORMAT_XAR, "zip"_nm = ARCHIVE_FORMAT_ZIP 96 | #if ARCHIVE_VERSION_NUMBER >= 3002000 97 | , 98 | "warc"_nm = ARCHIVE_FORMAT_WARC 99 | #endif 100 | }); 101 | return out; 102 | } 103 | 104 | [[cpp11::register]] std::string libarchive_version_() { 105 | std::string str = archive_version_string(); 106 | 107 | // Remove 'libarchive ' 108 | return str.substr(str.find_last_of(' ') + 1); 109 | } 110 | 111 | [[cpp11::register]] const char* libarchive_zlib_version_() { 112 | #if ARCHIVE_VERSION_NUMBER >= 3002000 113 | const char* version = archive_zlib_version(); 114 | return version ? version : ""; 115 | #else 116 | return ""; 117 | #endif 118 | } 119 | 120 | [[cpp11::register]] const char* libarchive_liblzma_version_() { 121 | #if ARCHIVE_VERSION_NUMBER >= 3002000 122 | const char* version = archive_liblzma_version(); 123 | return version ? version : ""; 124 | #else 125 | return ""; 126 | #endif 127 | } 128 | 129 | [[cpp11::register]] const char* libarchive_bzlib_version_() { 130 | #if ARCHIVE_VERSION_NUMBER >= 3002000 131 | const char* version = archive_bzlib_version(); 132 | return version ? version : ""; 133 | #else 134 | return ""; 135 | #endif 136 | } 137 | 138 | [[cpp11::register]] const char* libarchive_liblz4_version_() { 139 | #if ARCHIVE_VERSION_NUMBER >= 3002000 140 | const char* version = archive_liblz4_version(); 141 | return version ? version : ""; 142 | #else 143 | return ""; 144 | #endif 145 | } 146 | 147 | [[cpp11::register]] const char* libarchive_libzstd_version() { 148 | #if ARCHIVE_VERSION_NUMBER >= 3003003 149 | const char* version = archive_libzstd_version(); 150 | return version ? version : ""; 151 | #else 152 | return ""; 153 | #endif 154 | } 155 | -------------------------------------------------------------------------------- /src/archive_extract.cpp: -------------------------------------------------------------------------------- 1 | #include "r_archive.h" 2 | #include 3 | 4 | const char* const pb_format = 5 | "{cli::pb_spin} %zu extracted | {cli::pb_current_bytes} " 6 | "({cli::pb_rate_bytes}) | " 7 | "{cli::pb_elapsed}"; 8 | 9 | static int copy_data( 10 | struct archive* ar, 11 | struct archive* aw, 12 | SEXP progress_bar, 13 | size_t& total_read, 14 | size_t num_extracted) { 15 | int r; 16 | const void* buff; 17 | size_t size; 18 | int64_t offset; 19 | 20 | for (;;) { 21 | r = call(archive_read_data_block, ar, &buff, &size, &offset); 22 | if (r == ARCHIVE_EOF) { 23 | return (ARCHIVE_OK); 24 | } 25 | total_read += size; 26 | 27 | if (CLI_SHOULD_TICK) { 28 | cli_progress_set_format(progress_bar, pb_format, num_extracted); 29 | 30 | cli_progress_set(progress_bar, total_read); 31 | } 32 | 33 | call(archive_write_data_block, aw, buff, size, offset); 34 | } 35 | } 36 | 37 | template std::vector as_file_index(const C& in) { 38 | std::vector out; 39 | out.reserve(in.size()); 40 | for (R_xlen_t value : in) { 41 | out.push_back(value); 42 | } 43 | return out; 44 | } 45 | 46 | template 47 | bool any_matches(const T& needle, const std::vector& haystack) { 48 | for (const C& n : haystack) { 49 | if (n == needle) { 50 | return true; 51 | } 52 | } 53 | return false; 54 | } 55 | 56 | /* From 57 | https://github.com/libarchive/libarchive/blob/0fd2ed25d78e9f4505de5dcb6208c6c0ff8d2edb/tar/util.c#L338-L375 58 | */ 59 | static const char* strip_components(const char* p, int elements) { 60 | /* Skip as many elements as necessary. */ 61 | while (elements > 0) { 62 | switch (*p++) { 63 | case '/': 64 | #if defined(_WIN32) 65 | case '\\': /* Support \ path sep on Windows ONLY. */ 66 | #endif 67 | elements--; 68 | break; 69 | case '\0': 70 | /* Path is too short, skip it. */ 71 | return (NULL); 72 | } 73 | } 74 | 75 | /* Skip any / characters. This handles short paths that have 76 | * additional / termination. This also handles the case where 77 | * the logic above stops in the middle of a duplicate // 78 | * sequence (which would otherwise get converted to an 79 | * absolute path). */ 80 | for (;;) { 81 | switch (*p) { 82 | case '/': 83 | #if defined(_WIN32) 84 | case '\\': /* Support \ path sep on Windows ONLY. */ 85 | #endif 86 | ++p; 87 | break; 88 | case '\0': 89 | return (NULL); 90 | default: 91 | return (p); 92 | } 93 | } 94 | } 95 | 96 | [[cpp11::register]] cpp11::strings archive_extract_( 97 | const cpp11::sexp& connection, 98 | cpp11::sexp file, 99 | int num_strip_components, 100 | cpp11::strings options, 101 | cpp11::strings password, 102 | size_t sz = 16384) { 103 | struct archive* a; 104 | struct archive* ext; 105 | struct archive_entry* entry; 106 | int flags; 107 | int res; 108 | 109 | local_utf8_locale ll; 110 | 111 | std::unique_ptr r(new input_data); 112 | r->buf.resize(16384); 113 | r->connection = connection; 114 | 115 | /* Select which attributes we want to restore. */ 116 | flags = ARCHIVE_EXTRACT_TIME; 117 | flags |= ARCHIVE_EXTRACT_PERM; 118 | flags |= ARCHIVE_EXTRACT_ACL; 119 | flags |= ARCHIVE_EXTRACT_FFLAGS; 120 | 121 | a = archive_read_new(); 122 | call(archive_read_support_format_all, a); 123 | call(archive_read_support_filter_all, a); 124 | 125 | if (options.size() > 0) { 126 | call(archive_read_set_options, a, std::string(options[0]).c_str()); 127 | } 128 | 129 | if (!cpp11::is_na(password[0])) { 130 | call(archive_read_add_passphrase, a, std::string(password[0]).c_str()); 131 | } 132 | 133 | call(archive_read_set_read_callback, a, input_read); 134 | call(archive_read_set_close_callback, a, input_close); 135 | static auto isSeekable = cpp11::package("base")["isSeekable"]; 136 | if (isSeekable(connection)) { 137 | call(archive_read_set_seek_callback, a, input_seek); 138 | } 139 | call(archive_read_set_callback_data, a, r.get()); 140 | call(archive_read_open1, a); 141 | 142 | ext = archive_write_disk_new(); 143 | call(archive_write_disk_set_options, ext, flags); 144 | #ifndef __MINGW32__ 145 | /* set_standard_lookup is not available on windows */ 146 | call(archive_write_disk_set_standard_lookup, ext); 147 | #endif 148 | 149 | std::vector file_indexes; 150 | std::vector file_names; 151 | 152 | if (TYPEOF(file) == INTSXP) { 153 | file_indexes = as_file_index(cpp11::integers(file)); 154 | } else if (TYPEOF(file) == REALSXP) { 155 | file_indexes = as_file_index(cpp11::doubles(file)); 156 | } else if (TYPEOF(file) == STRSXP) { 157 | file_names = cpp11::as_cpp>(file); 158 | } 159 | 160 | using namespace cpp11::literals; 161 | 162 | cpp11::sexp progress_bar(cli_progress_bar(NA_INTEGER, R_NilValue)); 163 | 164 | size_t total_read = 0; 165 | 166 | size_t num_extracted = 0; 167 | 168 | cpp11::writable::strings extracted_files; 169 | 170 | for (R_xlen_t index = 1;; ++index) { 171 | res = call(archive_read_next_header, a, &entry); 172 | if (res == ARCHIVE_EOF) { 173 | break; 174 | } 175 | const char* filename = archive_entry_pathname(entry); 176 | const char* original_filename = filename; 177 | if (num_strip_components > 0) { 178 | filename = strip_components(filename, num_strip_components); 179 | if (filename == nullptr) { 180 | continue; 181 | } 182 | } 183 | 184 | if (file == R_NilValue || 185 | (!file_indexes.empty() && any_matches(index, file_indexes)) || 186 | (!file_names.empty() && any_matches(filename, file_names))) { 187 | extracted_files.push_back(filename); 188 | 189 | if (filename != original_filename) { 190 | archive_entry_copy_pathname(entry, filename); 191 | } 192 | 193 | call(archive_write_header, ext, entry); 194 | copy_data(a, ext, progress_bar, total_read, num_extracted); 195 | call(archive_write_finish_entry, ext); 196 | 197 | num_extracted++; 198 | 199 | if (num_extracted == file_indexes.size() || 200 | num_extracted == file_names.size()) { 201 | break; 202 | } 203 | } 204 | } 205 | 206 | cli_progress_done(progress_bar); 207 | 208 | call(archive_read_close, a); 209 | call(archive_read_free, a); 210 | call(archive_write_close, ext); 211 | call(archive_write_free, ext); 212 | 213 | return extracted_files; 214 | } 215 | -------------------------------------------------------------------------------- /src/archive_read.cpp: -------------------------------------------------------------------------------- 1 | #include "r_archive.h" 2 | 3 | /* Define BSWAP_32 on Big Endian systems */ 4 | #ifdef WORDS_BIGENDIAN 5 | #if (defined(__sun) && defined(__SVR4)) 6 | #include 7 | #elif (defined(__APPLE__) && defined(__ppc__) || defined(__ppc64__)) 8 | #include 9 | #define BSWAP_32 OSSwapInt32 10 | #elif (defined(__OpenBSD__)) 11 | #define BSWAP_32(x) swap32(x) 12 | #elif (defined(__NetBSD__)) 13 | #include 14 | #include 15 | #define BSWAP_32(x) bswap32(x) 16 | #elif (defined(__GLIBC__)) 17 | #include 18 | #define BSWAP_32(x) bswap_32(x) 19 | #elif (defined(_AIX)) 20 | #define BSWAP_32(x) __builtin_bswap32(x) 21 | #endif 22 | #endif 23 | 24 | /* Read archives 25 | * 26 | * The custom R connection code was adapted from curl package by Jeroen Ooms 27 | * Source: 28 | * https://github.com/cran/curl/blob/3897ba5203dee940e2ce40ac23a0d1106da93df6/src/curl.c 29 | * 30 | * The libarchive code from libarchive examples 31 | * Source: https://github.com/libarchive/libarchive/wiki/Examples 32 | */ 33 | 34 | ssize_t input_read(struct archive* a, void* client_data, const void** buff) { 35 | struct input_data* data = static_cast(client_data); 36 | *buff = data->buf.data(); 37 | return read_connection(data->connection, data->buf.data(), data->buf.size()); 38 | } 39 | 40 | int64_t 41 | input_seek(struct archive*, void* client_data, int64_t offset, int whence) { 42 | struct input_data* data = static_cast(client_data); 43 | static auto seek = cpp11::package("base")["seek"]; 44 | 45 | seek( 46 | data->connection, 47 | offset, 48 | whence == SEEK_END ? "end" : whence == SEEK_CUR ? "current" : "start"); 49 | /* need to call seek again to get the current position */ 50 | int64_t value = cpp11::as_cpp(seek(data->connection)); 51 | 52 | return value; 53 | } 54 | 55 | int input_close(struct archive* a, void* client_data) { 56 | struct input_data* data = static_cast(client_data); 57 | static auto close = cpp11::package("base")["close"]; 58 | 59 | close(data->connection); 60 | return (ARCHIVE_OK); 61 | } 62 | 63 | bool entry_matches(const std::string& str, archive_entry* entry) { 64 | if (str.empty()) { 65 | return false; 66 | } 67 | 68 | const char* pathname = archive_entry_pathname(entry); 69 | return str == pathname; 70 | } 71 | 72 | static Rboolean rchive_read_open(Rconnection con) { 73 | rchive* r = (rchive*)con->private_ptr; 74 | 75 | local_utf8_locale ll; 76 | 77 | r->ar = archive_read_new(); 78 | 79 | bool is_raw_format = r->format == ARCHIVE_FORMAT_RAW; 80 | 81 | con->text = strchr(con->mode, 'b') ? FALSE : TRUE; 82 | 83 | /* explicit setting of the format and filters is not available until 84 | * libarchive version 3.1.0 85 | */ 86 | #if ARCHIVE_VERSION_NUMBER >= 3001000 87 | if (r->filters[0] == -1) { 88 | call(archive_read_support_filter_all, con); 89 | } else { 90 | for (int i = 0; i < FILTER_MAX && r->filters[i] != -1; ++i) { 91 | call(archive_read_append_filter, con, r->filters[i]); 92 | } 93 | } 94 | 95 | if (r->format == -1) { 96 | call(archive_read_support_format_all, con); 97 | } else if (is_raw_format) { 98 | call(archive_read_support_format_raw, con); 99 | } else { 100 | call(archive_read_set_format, con, r->format); 101 | } 102 | #else 103 | call(archive_read_support_filter_all, con); 104 | call(archive_read_support_format_all, con); 105 | #endif 106 | 107 | if (!r->options.empty()) { 108 | call(archive_read_set_options, con, r->options.c_str()); 109 | } 110 | 111 | if (!cpp11::is_na(r->password[0])) { 112 | call(archive_read_add_passphrase, con, std::string(r->password[0]).c_str()); 113 | } 114 | 115 | static auto open = cpp11::package("base")["open"]; 116 | static auto isOpen = cpp11::package("base")["isOpen"]; 117 | if (!isOpen(r->input.connection)) { 118 | open(r->input.connection, "rb"); 119 | } 120 | call(archive_read_set_read_callback, r->ar, input_read); 121 | call(archive_read_set_close_callback, r->ar, input_close); 122 | static auto isSeekable = cpp11::package("base")["isSeekable"]; 123 | if (isSeekable(r->input.connection)) { 124 | call(archive_read_set_seek_callback, r->ar, input_seek); 125 | } 126 | call(archive_read_set_callback_data, r->ar, &r->input); 127 | call(archive_read_open1, r->ar); 128 | 129 | /* Find entry to extract */ 130 | int file_offset = -1; 131 | std::string file; 132 | 133 | if (TYPEOF(r->file) == INTSXP || TYPEOF(r->file) == REALSXP) { 134 | file_offset = cpp11::as_cpp(r->file) - 1; 135 | } else { 136 | file = cpp11::as_cpp(r->file); 137 | } 138 | 139 | int itr = 0; 140 | int res; 141 | while ((res = archive_read_next_header(r->ar, &r->entry)) == ARCHIVE_OK) { 142 | if (is_raw_format || entry_matches(file, r->entry) || itr == file_offset) { 143 | r->has_more = 1; 144 | con->isopen = TRUE; 145 | push(r); 146 | return TRUE; 147 | } 148 | call(archive_read_data_skip, con); 149 | ++itr; 150 | } 151 | 152 | con->isopen = FALSE; 153 | const char* msg = archive_error_string(r->ar); 154 | Rf_errorcall(R_NilValue, "%s", msg); 155 | return FALSE; 156 | } 157 | 158 | void rchive_read_close(Rconnection con) { 159 | call(archive_read_close, con); 160 | 161 | con->isopen = FALSE; 162 | con->incomplete = FALSE; 163 | } 164 | 165 | void rchive_read_destroy(Rconnection con) { 166 | rchive* r = (rchive*)con->private_ptr; 167 | 168 | /* free the handle connection */ 169 | call(archive_read_free, con); 170 | 171 | delete r; 172 | } 173 | 174 | /* Support for readBin() */ 175 | static size_t rchive_read(void* target, size_t sz, size_t ni, Rconnection con) { 176 | rchive* r = (rchive*)con->private_ptr; 177 | size_t size = sz * ni; 178 | 179 | /* append data to the target buffer */ 180 | size_t total_size = pop(target, size, r); 181 | while ((size > total_size) && r->has_more) { 182 | push(r); 183 | total_size += pop((char*)target + total_size, (size - total_size), r); 184 | } 185 | con->incomplete = (Rboolean)r->has_more; 186 | return total_size; 187 | } 188 | 189 | /* https://github.com/jeroen/curl/blob/102eb33288c853e0b3d4344fa1725388f606cecc/src/curl.c#L145 190 | */ 191 | /* naive implementation of readLines */ 192 | static int rchive_fgetc(Rconnection con) { 193 | int x = 0; 194 | #ifdef WORDS_BIGENDIAN 195 | return rchive_read(&x, 1, 1, con) ? BSWAP_32(x) : R_EOF; 196 | #else 197 | return rchive_read(&x, 1, 1, con) ? x : R_EOF; 198 | #endif 199 | } 200 | 201 | [[cpp11::register]] SEXP archive_read_( 202 | const cpp11::sexp connection, 203 | const cpp11::sexp file, 204 | const std::string& description, 205 | const std::string& mode, 206 | cpp11::integers format, 207 | cpp11::integers filters, 208 | cpp11::strings options, 209 | cpp11::strings password, 210 | size_t sz = 16384) { 211 | Rconnection con; 212 | 213 | SEXP rc = PROTECT( 214 | new_connection(description.c_str(), mode.c_str(), "archive_read", &con)); 215 | 216 | /* Setup archive */ 217 | rchive* r = new rchive; 218 | r->buf.resize(sz); 219 | r->size = 0; 220 | r->cur = NULL; 221 | 222 | r->input.connection = connection; 223 | r->input.buf.resize(sz); 224 | 225 | if (options.size() > 0) { 226 | r->options = options[0]; 227 | } 228 | 229 | r->format = format.size() == 0 ? -1 : format[0]; 230 | r->password = password; 231 | 232 | /* Initialize filters */ 233 | if (filters.size() > FILTER_MAX) { 234 | cpp11::stop("Cannot use more than %i filters", FILTER_MAX); 235 | } 236 | for (int i = 0; i < FILTER_MAX; ++i) { 237 | r->filters[i] = -1; 238 | } 239 | for (int i = 0; i < filters.size(); ++i) { 240 | r->filters[i] = filters[i]; 241 | } 242 | 243 | r->file = file; 244 | 245 | /* set connection properties */ 246 | con->incomplete = TRUE; 247 | con->private_ptr = r; 248 | con->canseek = FALSE; 249 | con->canwrite = FALSE; 250 | con->isopen = FALSE; 251 | con->blocking = TRUE; 252 | con->UTF8out = FALSE; 253 | con->open = rchive_read_open; 254 | con->close = rchive_read_close; 255 | con->destroy = rchive_read_destroy; 256 | con->read = rchive_read; 257 | con->fgetc = rchive_fgetc; 258 | con->fgetc_internal = rchive_fgetc; 259 | con->text = strchr(con->mode, 'b') ? FALSE : TRUE; 260 | 261 | UNPROTECT(1); 262 | return rc; 263 | } 264 | -------------------------------------------------------------------------------- /src/archive_write.cpp: -------------------------------------------------------------------------------- 1 | #include "r_archive.h" 2 | #include 3 | #include 4 | #include 5 | 6 | std::string my_basename(std::string const& str) { 7 | std::size_t found = str.find_last_of("/\\"); 8 | return str.substr(found + 1); 9 | } 10 | 11 | /* callback function to store received data */ 12 | static size_t 13 | rchive_write_data(const void* contents, size_t sz, size_t n, Rconnection ctx) { 14 | rchive* r = (rchive*)ctx->private_ptr; 15 | 16 | size_t realsize = sz * n; 17 | call(archive_write_data, ctx, contents, realsize); 18 | r->size += realsize; 19 | 20 | return n; 21 | } 22 | 23 | std::string scratch_file(const char* filename) { 24 | static auto tempdir = cpp11::package("base")["tempdir"]; 25 | std::string out = 26 | std::string(CHAR(STRING_ELT(tempdir(), 0))) + '/' + my_basename(filename); 27 | return out; 28 | } 29 | 30 | static Rboolean rchive_write_open(Rconnection con) { 31 | rchive* r = (rchive*)con->private_ptr; 32 | 33 | local_utf8_locale ll; 34 | 35 | r->ar = archive_write_disk_new(); 36 | 37 | r->entry = archive_entry_new(); 38 | 39 | archive_entry_set_pathname( 40 | r->entry, scratch_file(r->filename.c_str()).c_str()); 41 | archive_entry_set_filetype(r->entry, AE_IFREG); 42 | archive_entry_set_perm(r->entry, 0644); 43 | call(archive_write_header, con, r->entry); 44 | 45 | con->isopen = TRUE; 46 | 47 | return TRUE; 48 | } 49 | 50 | /* This function closes the temporary scratch file, then writes the actual 51 | * archive file based on the archive filename given and then unlinks the 52 | * scratch file */ 53 | void rchive_write_close(Rconnection con) { 54 | char buf[8192]; 55 | size_t bytes_read; 56 | rchive* r = (rchive*)con->private_ptr; 57 | 58 | local_utf8_locale ll; 59 | 60 | if (!con->isopen) { 61 | return; 62 | } 63 | /* Close scratch file */ 64 | call(archive_write_finish_entry, con); 65 | call(archive_write_close, con); 66 | call(archive_write_free, con); 67 | archive_entry_free(r->entry); 68 | con->isopen = FALSE; 69 | con->incomplete = FALSE; 70 | 71 | /* Write scratch file to archive */ 72 | struct archive* in; 73 | struct archive* out; 74 | struct archive_entry* entry; 75 | in = archive_read_disk_new(); 76 | #ifndef __MINGW32__ 77 | call(archive_read_disk_set_standard_lookup, in); 78 | #endif 79 | entry = archive_entry_new(); 80 | 81 | std::string scratch = scratch_file(r->filename.c_str()); 82 | int fd = open(scratch.c_str(), O_RDONLY); 83 | if (fd < 0) { 84 | Rf_error("Could not open scratch file"); 85 | } 86 | archive_entry_copy_pathname(entry, r->filename.c_str()); 87 | call( 88 | archive_read_disk_entry_from_file, 89 | in, 90 | entry, 91 | fd, 92 | (const struct stat*)NULL); 93 | 94 | out = archive_write_new(); 95 | 96 | call(archive_write_set_format, out, r->format); 97 | 98 | for (int i = 0; i < FILTER_MAX && r->filters[i] != -1; ++i) { 99 | call(archive_write_add_filter, out, r->filters[i]); 100 | } 101 | 102 | if (!cpp11::is_na(r->password[0])) { 103 | call(archive_write_set_passphrase, out, std::string(r->password[0]).c_str()); 104 | } 105 | 106 | if (!r->options.empty()) { 107 | call(archive_write_set_options, out, r->options.c_str()); 108 | } 109 | 110 | call(archive_write_open_filename, out, r->archive_filename.c_str()); 111 | call(archive_write_header, out, entry); 112 | 113 | while ((bytes_read = read(fd, buf, sizeof(buf))) > 0) { 114 | call(archive_write_data, out, buf, bytes_read); 115 | } 116 | close(fd); 117 | archive_entry_free(entry); 118 | call(archive_write_free, out); 119 | call(archive_read_free, in); 120 | 121 | unlink(scratch.c_str()); 122 | } 123 | 124 | void rchive_write_destroy(Rconnection con) { 125 | rchive* r = (rchive*)con->private_ptr; 126 | 127 | /* free the handle connection */ 128 | delete r; 129 | } 130 | 131 | // This writes a single file to a new connection, it first writes the data 132 | // to a scratch file, then adds it to the archive, because the archive 133 | // headers need to be written before the data is added, and we do not know 134 | // the size of the data until it has been written. 135 | [[cpp11::register]] SEXP archive_write_( 136 | const std::string& archive_filename, 137 | const std::string& filename, 138 | const std::string& mode, 139 | int format, 140 | cpp11::integers filters, 141 | cpp11::strings options, 142 | cpp11::strings password, 143 | size_t sz) { 144 | Rconnection con; 145 | SEXP rc = 146 | PROTECT(new_connection("input", mode.c_str(), "archive_write", &con)); 147 | 148 | /* Setup archive */ 149 | rchive* r = (rchive*)new rchive; 150 | 151 | r->archive_filename = std::move(archive_filename); 152 | 153 | r->format = format; 154 | r->password = password; 155 | 156 | // Initialize filters 157 | if (filters.size() > FILTER_MAX) { 158 | cpp11::stop("Cannot use more than %i filters", FILTER_MAX); 159 | } 160 | for (int i = 0; i < FILTER_MAX; ++i) { 161 | r->filters[i] = -1; 162 | } 163 | for (int i = 0; i < filters.size(); ++i) { 164 | r->filters[i] = filters[i]; 165 | } 166 | 167 | r->filename = std::move(filename); 168 | 169 | if (options.size() > 0) { 170 | r->options = options[0]; 171 | } 172 | 173 | /* set connection properties */ 174 | con->incomplete = TRUE; 175 | con->private_ptr = r; 176 | con->canread = FALSE; 177 | con->canseek = FALSE; 178 | con->canwrite = TRUE; 179 | con->isopen = FALSE; 180 | con->blocking = TRUE; 181 | con->text = FALSE; 182 | con->open = rchive_write_open; 183 | con->close = rchive_write_close; 184 | con->destroy = rchive_write_destroy; 185 | con->write = rchive_write_data; 186 | 187 | UNPROTECT(1); 188 | return rc; 189 | } 190 | -------------------------------------------------------------------------------- /src/archive_write_direct.cpp: -------------------------------------------------------------------------------- 1 | #include "r_archive.h" 2 | #include 3 | #include 4 | #include 5 | 6 | /* callback function to store received data */ 7 | static size_t rchive_write_direct_data( 8 | const void* contents, size_t sz, size_t n, Rconnection con) { 9 | rchive* r = (rchive*)con->private_ptr; 10 | 11 | size_t realsize = sz * n; 12 | call(archive_write_data, con, contents, realsize); 13 | r->size += realsize; 14 | 15 | return n; 16 | } 17 | 18 | static Rboolean rchive_write_direct_open(Rconnection con) { 19 | rchive* r = (rchive*)con->private_ptr; 20 | 21 | local_utf8_locale ll; 22 | 23 | r->ar = archive_write_new(); 24 | 25 | for (int i = 0; i < FILTER_MAX && r->filters[i] != -1; ++i) { 26 | call(archive_write_add_filter, con, r->filters[i]); 27 | } 28 | 29 | call(archive_write_set_format, con, r->format); 30 | 31 | if (!cpp11::is_na(r->password[0])) { 32 | call(archive_write_set_passphrase, con, std::string(r->password[0]).c_str()); 33 | } 34 | 35 | if (!r->options.empty()) { 36 | call(archive_write_set_options, con, r->options.c_str()); 37 | } 38 | 39 | call(archive_write_open_filename, con, r->archive_filename.c_str()); 40 | 41 | r->entry = archive_entry_new(); 42 | 43 | archive_entry_set_pathname(r->entry, r->filename.c_str()); 44 | archive_entry_set_filetype(r->entry, AE_IFREG); 45 | archive_entry_set_perm(r->entry, 0644); 46 | archive_entry_unset_size(r->entry); 47 | call(archive_write_header, con, r->entry); 48 | 49 | archive_entry_free(r->entry); 50 | 51 | con->isopen = TRUE; 52 | 53 | return TRUE; 54 | } 55 | 56 | /* This function closes the temporary scratch file, then writes the actual 57 | * archive file based on the archive filename given and then unlinks the 58 | * scratch file */ 59 | void rchive_write_direct_close(Rconnection con) { 60 | if (!con->isopen) { 61 | return; 62 | } 63 | /* Close scratch file */ 64 | call(archive_write_close, con); 65 | call(archive_write_free, con); 66 | 67 | con->isopen = FALSE; 68 | } 69 | 70 | void rchive_write_direct_destroy(Rconnection con) { 71 | rchive* r = (rchive*)con->private_ptr; 72 | 73 | /* free the handle connection */ 74 | delete r; 75 | } 76 | 77 | // This writes a single (direct) file to a new connection. Unlike other archive 78 | // formats direct does not need to know the size of the file up front. 79 | // This lets us write to it without having to write to a scratch file first. 80 | [[cpp11::register]] SEXP archive_write_direct_( 81 | const std::string& archive_filename, 82 | const std::string& filename, 83 | std::string mode, 84 | int format, 85 | cpp11::integers filters, 86 | cpp11::strings options, 87 | cpp11::strings password, 88 | size_t sz) { 89 | Rconnection con; 90 | SEXP rc = 91 | PROTECT(new_connection("input", mode.c_str(), "archive_write", &con)); 92 | 93 | /* Setup archive */ 94 | rchive* r = (rchive*)new rchive; 95 | 96 | r->archive_filename = std::move(archive_filename); 97 | 98 | // Initialize filters 99 | if (filters.size() > FILTER_MAX) { 100 | cpp11::stop("Cannot use more than %i filters", FILTER_MAX); 101 | } 102 | for (int i = 0; i < FILTER_MAX; ++i) { 103 | r->filters[i] = -1; 104 | } 105 | for (int i = 0; i < filters.size(); ++i) { 106 | r->filters[i] = filters[i]; 107 | } 108 | 109 | r->format = format; 110 | r->password = password; 111 | 112 | r->filename = std::move(filename); 113 | 114 | if (options.size() > 0) { 115 | r->options = options[0]; 116 | } 117 | 118 | /* set connection properties */ 119 | con->incomplete = TRUE; 120 | con->private_ptr = r; 121 | con->canread = FALSE; 122 | con->canseek = FALSE; 123 | con->canwrite = TRUE; 124 | con->isopen = FALSE; 125 | con->blocking = TRUE; 126 | con->text = FALSE; 127 | con->open = rchive_write_direct_open; 128 | con->close = rchive_write_direct_close; 129 | con->destroy = rchive_write_direct_destroy; 130 | con->write = rchive_write_direct_data; 131 | 132 | UNPROTECT(1); 133 | return rc; 134 | } 135 | -------------------------------------------------------------------------------- /src/archive_write_files.cpp: -------------------------------------------------------------------------------- 1 | #include "r_archive.h" 2 | #include 3 | #include 4 | 5 | const char* const pb_format = 6 | "{cli::pb_spin} %zu added | {cli::pb_current_bytes} " 7 | "({cli::pb_rate_bytes}) | " 8 | "{cli::pb_elapsed}"; 9 | 10 | // Write files already on disk to a new archive 11 | [[cpp11::register]] SEXP archive_write_files_( 12 | const std::string& archive_filename, 13 | cpp11::strings files, 14 | int format, 15 | cpp11::integers filters, 16 | cpp11::strings options, 17 | cpp11::strings password, 18 | size_t sz = 16384) { 19 | 20 | struct archive* a; 21 | struct archive_entry* entry; 22 | struct stat st; 23 | std::vector buf; 24 | int len; 25 | int fd; 26 | 27 | buf.resize(sz); 28 | 29 | a = archive_write_new(); 30 | 31 | call(archive_write_set_format, a, format); 32 | 33 | for (int i = 0; i < filters.size(); ++i) { 34 | call(archive_write_add_filter, a, filters[i]); 35 | } 36 | 37 | if (options.size() > 0) { 38 | call(archive_write_set_options, a, std::string(options[0]).c_str()); 39 | } 40 | 41 | if (!cpp11::is_na(password[0])) { 42 | call(archive_write_set_passphrase, a, std::string(password[0]).c_str()); 43 | } 44 | 45 | size_t num_written = 0; 46 | size_t total_written = 0; 47 | 48 | using namespace cpp11::literals; 49 | 50 | cpp11::sexp progress_bar(cli_progress_bar(NA_INTEGER, R_NilValue)); 51 | 52 | call(archive_write_open_filename, a, archive_filename.c_str()); 53 | for (std::string file : files) { 54 | stat(file.c_str(), &st); 55 | entry = archive_entry_new(); 56 | #if defined(_WIN32) || (!defined(__GNUC__) && !defined(__clang__)) 57 | // there are quite many CRT dialects and passing struct stat to 3rdparty library could be unstable. 58 | archive_entry_set_size(entry, st.st_size); 59 | archive_entry_set_mtime(entry, st.st_mtime, 0); 60 | archive_entry_set_ctime(entry, st.st_ctime, 0); 61 | archive_entry_set_atime(entry, st.st_atime, 0); 62 | archive_entry_set_mode(entry, st.st_mode); // seems required as not defaulting to S_IFREG. 63 | #else 64 | archive_entry_copy_stat(entry, &st); 65 | #endif 66 | archive_entry_set_pathname(entry, file.c_str()); 67 | call(archive_write_header, a, entry); 68 | if ((fd = open(file.c_str(), O_RDONLY)) != -1) { 69 | len = read(fd, buf.data(), buf.size()); 70 | while (len > 0) { 71 | call(archive_write_data, a, buf.data(), len); 72 | total_written += len; 73 | if (CLI_SHOULD_TICK) { 74 | cli_progress_set_format(progress_bar, pb_format, num_written); 75 | 76 | cli_progress_set(progress_bar, total_written); 77 | } 78 | len = read(fd, buf.data(), buf.size()); 79 | } 80 | close(fd); 81 | } 82 | archive_entry_free(entry); 83 | ++num_written; 84 | } 85 | call(archive_write_free, a); 86 | 87 | cli_progress_done(progress_bar); 88 | 89 | return R_NilValue; 90 | } 91 | -------------------------------------------------------------------------------- /src/connection/connection.c: -------------------------------------------------------------------------------- 1 | #include "connection.h" 2 | 3 | SEXP new_connection( 4 | const char* description, 5 | const char* mode, 6 | const char* class_name, 7 | Rconnection* ptr) { 8 | return R_new_custom_connection(description, mode, class_name, ptr); 9 | } 10 | 11 | size_t read_connection(SEXP connection, void* buf, size_t n) { 12 | return R_ReadConnection(R_GetConnection(connection), buf, n); 13 | } 14 | -------------------------------------------------------------------------------- /src/connection/connection.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Rinternals.h" 4 | 5 | // clang-format off 6 | #ifdef __clang__ 7 | # pragma clang diagnostic push 8 | # pragma clang diagnostic ignored "-Wkeyword-macro" 9 | #endif 10 | #define class class_name 11 | #define private private_ptr 12 | #include 13 | #undef class 14 | #undef private 15 | #ifdef __clang__ 16 | # pragma clang diagnostic pop 17 | #endif 18 | // clang-format on 19 | 20 | #ifdef __cplusplus 21 | extern "C" { 22 | #endif 23 | 24 | SEXP new_connection( 25 | const char* description, 26 | const char* mode, 27 | const char* class_name, 28 | Rconnection* ptr); 29 | 30 | size_t read_connection(SEXP connection, void* buf, size_t n); 31 | 32 | #ifdef __cplusplus 33 | } 34 | #endif 35 | -------------------------------------------------------------------------------- /src/cpp11/include/cpp11.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "cpp11/R.hpp" 4 | #include "cpp11/altrep.hpp" 5 | #include "cpp11/as.hpp" 6 | #include "cpp11/attribute_proxy.hpp" 7 | #include "cpp11/data_frame.hpp" 8 | #include "cpp11/doubles.hpp" 9 | #include "cpp11/environment.hpp" 10 | #include "cpp11/external_pointer.hpp" 11 | #include "cpp11/function.hpp" 12 | #include "cpp11/integers.hpp" 13 | #include "cpp11/list.hpp" 14 | #include "cpp11/list_of.hpp" 15 | #include "cpp11/logicals.hpp" 16 | #include "cpp11/matrix.hpp" 17 | #include "cpp11/named_arg.hpp" 18 | #include "cpp11/protect.hpp" 19 | #include "cpp11/r_bool.hpp" 20 | #include "cpp11/r_string.hpp" 21 | #include "cpp11/r_vector.hpp" 22 | #include "cpp11/raws.hpp" 23 | #include "cpp11/sexp.hpp" 24 | #include "cpp11/strings.hpp" 25 | -------------------------------------------------------------------------------- /src/cpp11/include/cpp11/R.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #ifdef R_INTERNALS_H_ 4 | #if !(defined(R_NO_REMAP) && defined(STRICT_R_HEADERS)) 5 | #error R headers were included before cpp11 headers \ 6 | and at least one of R_NO_REMAP or STRICT_R_HEADERS \ 7 | was not defined. 8 | #endif 9 | #endif 10 | 11 | #define R_NO_REMAP 12 | #define STRICT_R_HEADERS 13 | #include "R_ext/Boolean.h" 14 | #include "Rinternals.h" 15 | #include "Rversion.h" 16 | 17 | // clang-format off 18 | #ifdef __clang__ 19 | # pragma clang diagnostic push 20 | # pragma clang diagnostic ignored "-Wattributes" 21 | #endif 22 | 23 | #ifdef __GNUC__ 24 | # pragma GCC diagnostic push 25 | # pragma GCC diagnostic ignored "-Wattributes" 26 | #endif 27 | // clang-format on 28 | 29 | #include 30 | #include "cpp11/altrep.hpp" 31 | 32 | #if defined(R_VERSION) && R_VERSION >= R_Version(4, 4, 0) 33 | // Use R's new macro 34 | #define CPP11_PRIdXLEN_T R_PRIdXLEN_T 35 | #else 36 | // Recreate what new R does 37 | #ifdef LONG_VECTOR_SUPPORT 38 | #define CPP11_PRIdXLEN_T "td" 39 | #else 40 | #define CPP11_PRIdXLEN_T "d" 41 | #endif 42 | #endif 43 | 44 | namespace cpp11 { 45 | namespace literals { 46 | 47 | constexpr R_xlen_t operator""_xl(unsigned long long int value) { return value; } 48 | 49 | } // namespace literals 50 | 51 | namespace traits { 52 | template 53 | struct get_underlying_type { 54 | using type = T; 55 | }; 56 | } // namespace traits 57 | 58 | namespace detail { 59 | 60 | // Annoyingly, `TYPEOF()` returns an `int` rather than a `SEXPTYPE`, 61 | // which can throw warnings with `-Wsign-compare` on Windows. 62 | inline SEXPTYPE r_typeof(SEXP x) { return static_cast(TYPEOF(x)); } 63 | 64 | /// Get an object from an environment 65 | /// 66 | /// SAFETY: Keep as a pure C function. Call like an R API function, i.e. wrap in `safe[]` 67 | /// as required. 68 | inline SEXP r_env_get(SEXP env, SEXP sym) { 69 | #if defined(R_VERSION) && R_VERSION >= R_Version(4, 5, 0) 70 | const Rboolean inherits = FALSE; 71 | return R_getVar(sym, env, inherits); 72 | #else 73 | SEXP out = Rf_findVarInFrame3(env, sym, TRUE); 74 | 75 | // Replicate the 3 checks from `R_getVar()` (along with exact error message): 76 | // - Object must be found in the `env` 77 | // - `R_MissingArg` can't leak from an `env` anymore 78 | // - Promises can't leak from an `env` anymore 79 | 80 | if (out == R_MissingArg) { 81 | Rf_errorcall(R_NilValue, "argument \"%s\" is missing, with no default", 82 | CHAR(PRINTNAME(sym))); 83 | } 84 | 85 | if (out == R_UnboundValue) { 86 | Rf_errorcall(R_NilValue, "object '%s' not found", CHAR(PRINTNAME(sym))); 87 | } 88 | 89 | if (r_typeof(out) == PROMSXP) { 90 | PROTECT(out); 91 | out = Rf_eval(out, env); 92 | UNPROTECT(1); 93 | } 94 | 95 | return out; 96 | #endif 97 | } 98 | 99 | /// Check if an object exists in an environment 100 | /// 101 | /// SAFETY: Keep as a pure C function. Call like an R API function, i.e. wrap in `safe[]` 102 | /// as required. 103 | inline bool r_env_has(SEXP env, SEXP sym) { 104 | #if R_VERSION >= R_Version(4, 2, 0) 105 | return R_existsVarInFrame(env, sym); 106 | #else 107 | return Rf_findVarInFrame3(env, sym, FALSE) != R_UnboundValue; 108 | #endif 109 | } 110 | 111 | } // namespace detail 112 | 113 | template 114 | inline T na(); 115 | 116 | template 117 | inline typename std::enable_if::type, double>::value, 118 | bool>::type 119 | is_na(const T& value) { 120 | return value == na(); 121 | } 122 | 123 | template 124 | inline typename std::enable_if::type, double>::value, 125 | bool>::type 126 | is_na(const T& value) { 127 | return ISNA(value); 128 | } 129 | 130 | } // namespace cpp11 131 | -------------------------------------------------------------------------------- /src/cpp11/include/cpp11/altrep.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Rversion.h" 4 | 5 | #if defined(R_VERSION) && R_VERSION >= R_Version(3, 5, 0) 6 | #define HAS_ALTREP 7 | #endif 8 | 9 | #ifndef HAS_ALTREP 10 | 11 | #define ALTREP(x) false 12 | 13 | #define REAL_ELT(x, i) REAL(x)[i] 14 | #define INTEGER_ELT(x, i) INTEGER(x)[i] 15 | #define LOGICAL_ELT(x, i) LOGICAL(x)[i] 16 | #define RAW_ELT(x, i) RAW(x)[i] 17 | 18 | #define SET_REAL_ELT(x, i, val) REAL(x)[i] = val 19 | #define SET_INTEGER_ELT(x, i, val) INTEGER(x)[i] = val 20 | #define SET_LOGICAL_ELT(x, i, val) LOGICAL(x)[i] = val 21 | #define SET_RAW_ELT(x, i, val) RAW(x)[i] = val 22 | 23 | #define REAL_GET_REGION(...) \ 24 | do { \ 25 | } while (false) 26 | 27 | #define INTEGER_GET_REGION(...) \ 28 | do { \ 29 | } while (false) 30 | #endif 31 | 32 | #if !defined HAS_ALTREP || (defined(R_VERSION) && R_VERSION < R_Version(3, 6, 0)) 33 | 34 | #define LOGICAL_GET_REGION(...) \ 35 | do { \ 36 | } while (false) 37 | 38 | #define RAW_GET_REGION(...) \ 39 | do { \ 40 | } while (false) 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /src/cpp11/include/cpp11/attribute_proxy.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include // for initializer_list 4 | #include // for string, basic_string 5 | 6 | #include "cpp11/R.hpp" // for SEXP, SEXPREC, Rf_install, PROTECT, Rf_... 7 | #include "cpp11/as.hpp" // for as_sexp 8 | #include "cpp11/protect.hpp" // for protect, safe, protect::function 9 | 10 | namespace cpp11 { 11 | 12 | class sexp; 13 | 14 | template 15 | class attribute_proxy { 16 | private: 17 | const T& parent_; 18 | SEXP symbol_; 19 | 20 | public: 21 | attribute_proxy(const T& parent, const char* index) 22 | : parent_(parent), symbol_(safe[Rf_install](index)) {} 23 | 24 | attribute_proxy(const T& parent, const std::string& index) 25 | : parent_(parent), symbol_(safe[Rf_install](index.c_str())) {} 26 | 27 | attribute_proxy(const T& parent, SEXP index) : parent_(parent), symbol_(index) {} 28 | 29 | template 30 | attribute_proxy& operator=(C rhs) { 31 | SEXP value = PROTECT(as_sexp(rhs)); 32 | Rf_setAttrib(parent_.data(), symbol_, value); 33 | UNPROTECT(1); 34 | return *this; 35 | } 36 | 37 | template 38 | attribute_proxy& operator=(std::initializer_list rhs) { 39 | SEXP value = PROTECT(as_sexp(rhs)); 40 | Rf_setAttrib(parent_.data(), symbol_, value); 41 | UNPROTECT(1); 42 | return *this; 43 | } 44 | 45 | operator SEXP() const { return safe[Rf_getAttrib](parent_.data(), symbol_); } 46 | }; 47 | 48 | } // namespace cpp11 49 | -------------------------------------------------------------------------------- /src/cpp11/include/cpp11/data_frame.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include // for abs 4 | #include 5 | #include // for initializer_list 6 | #include // for string, basic_string 7 | #include // for move 8 | 9 | #include "R_ext/Arith.h" // for NA_INTEGER 10 | #include "cpp11/R.hpp" // for Rf_xlength, SEXP, SEXPREC, INTEGER 11 | #include "cpp11/attribute_proxy.hpp" // for attribute_proxy 12 | #include "cpp11/list.hpp" // for list, r_vector<>::r_vector, r_v... 13 | #include "cpp11/r_vector.hpp" // for r_vector 14 | 15 | namespace cpp11 { 16 | 17 | class named_arg; 18 | namespace writable { 19 | class data_frame; 20 | } // namespace writable 21 | 22 | class data_frame : public list { 23 | using list::list; 24 | 25 | friend class writable::data_frame; 26 | 27 | /* we cannot use Rf_getAttrib because it has a special case for c(NA, -n) and creates 28 | * the full vector */ 29 | static SEXP get_attrib0(SEXP x, SEXP sym) { 30 | for (SEXP attr = ATTRIB(x); attr != R_NilValue; attr = CDR(attr)) { 31 | if (TAG(attr) == sym) { 32 | return CAR(attr); 33 | } 34 | } 35 | 36 | return R_NilValue; 37 | } 38 | 39 | static R_xlen_t calc_nrow(SEXP x) { 40 | auto nms = get_attrib0(x, R_RowNamesSymbol); 41 | bool has_short_rownames = 42 | (Rf_isInteger(nms) && Rf_xlength(nms) == 2 && INTEGER(nms)[0] == NA_INTEGER); 43 | if (has_short_rownames) { 44 | return static_cast(abs(INTEGER(nms)[1])); 45 | } 46 | 47 | if (!Rf_isNull(nms)) { 48 | return Rf_xlength(nms); 49 | } 50 | 51 | if (Rf_xlength(x) == 0) { 52 | return 0; 53 | } 54 | 55 | return Rf_xlength(VECTOR_ELT(x, 0)); 56 | } 57 | 58 | public: 59 | /* Adapted from 60 | * https://github.com/wch/r-source/blob/f2a0dfab3e26fb42b8b296fcba40cbdbdbec767d/src/main/attrib.c#L198-L207 61 | */ 62 | R_xlen_t nrow() const { return calc_nrow(*this); } 63 | R_xlen_t ncol() const { return size(); } 64 | }; 65 | 66 | namespace writable { 67 | class data_frame : public cpp11::data_frame { 68 | private: 69 | writable::list set_data_frame_attributes(writable::list&& x) { 70 | return set_data_frame_attributes(std::move(x), calc_nrow(x)); 71 | } 72 | 73 | writable::list set_data_frame_attributes(writable::list&& x, R_xlen_t nrow) { 74 | x.attr(R_RowNamesSymbol) = {NA_INTEGER, -static_cast(nrow)}; 75 | x.attr(R_ClassSymbol) = "data.frame"; 76 | return std::move(x); 77 | } 78 | 79 | public: 80 | data_frame(const SEXP data) : cpp11::data_frame(set_data_frame_attributes(data)) {} 81 | data_frame(const SEXP data, bool is_altrep) 82 | : cpp11::data_frame(set_data_frame_attributes(data), is_altrep) {} 83 | data_frame(const SEXP data, bool is_altrep, R_xlen_t nrow) 84 | : cpp11::data_frame(set_data_frame_attributes(data, nrow), is_altrep) {} 85 | data_frame(std::initializer_list il) 86 | : cpp11::data_frame(set_data_frame_attributes(writable::list(il))) {} 87 | data_frame(std::initializer_list il) 88 | : cpp11::data_frame(set_data_frame_attributes(writable::list(il))) {} 89 | 90 | using cpp11::data_frame::ncol; 91 | using cpp11::data_frame::nrow; 92 | 93 | attribute_proxy attr(const char* name) const { return {*this, name}; } 94 | 95 | attribute_proxy attr(const std::string& name) const { 96 | return {*this, name.c_str()}; 97 | } 98 | 99 | attribute_proxy attr(SEXP name) const { return {*this, name}; } 100 | 101 | attribute_proxy names() const { return {*this, R_NamesSymbol}; } 102 | }; 103 | 104 | } // namespace writable 105 | 106 | } // namespace cpp11 107 | -------------------------------------------------------------------------------- /src/cpp11/include/cpp11/declarations.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | // Davis: From what I can tell, you'd only ever define this if you need to include 8 | // `declarations.hpp` manually in a file, i.e. to possibly use `BEGIN_CPP11` with a 9 | // custom `END_CPP11`, as textshaping does do. Otherwise, `declarations.hpp` is included 10 | // in `code.cpp` and should contain all of the cpp11 type definitions that the generated 11 | // function signatures need to link against. 12 | #ifndef CPP11_PARTIAL 13 | #include "cpp11.hpp" 14 | namespace writable = ::cpp11::writable; 15 | using namespace ::cpp11; 16 | #endif 17 | 18 | #include 19 | 20 | namespace cpp11 { 21 | // No longer used, but was previously used in `code.cpp` code generation in cpp11 0.1.0. 22 | // `code.cpp` could be generated with cpp11 0.1.0, but the package could be compiled with 23 | // cpp11 >0.1.0, so `unmove()` must exist in newer cpp11 too. Eventually remove this once 24 | // we decide enough time has gone by since `unmove()` was removed. 25 | // https://github.com/r-lib/cpp11/issues/88 26 | // https://github.com/r-lib/cpp11/pull/75 27 | template 28 | T& unmove(T&& t) { 29 | return t; 30 | } 31 | } // namespace cpp11 32 | 33 | #ifdef HAS_UNWIND_PROTECT 34 | #define CPP11_UNWIND R_ContinueUnwind(err); 35 | #else 36 | #define CPP11_UNWIND \ 37 | do { \ 38 | } while (false); 39 | #endif 40 | 41 | #define CPP11_ERROR_BUFSIZE 8192 42 | 43 | #define BEGIN_CPP11 \ 44 | SEXP err = R_NilValue; \ 45 | char buf[CPP11_ERROR_BUFSIZE] = ""; \ 46 | try { 47 | #define END_CPP11 \ 48 | } \ 49 | catch (cpp11::unwind_exception & e) { \ 50 | err = e.token; \ 51 | } \ 52 | catch (std::exception & e) { \ 53 | strncpy(buf, e.what(), sizeof(buf) - 1); \ 54 | } \ 55 | catch (...) { \ 56 | strncpy(buf, "C++ error (unknown cause)", sizeof(buf) - 1); \ 57 | } \ 58 | if (buf[0] != '\0') { \ 59 | Rf_errorcall(R_NilValue, "%s", buf); \ 60 | } else if (err != R_NilValue) { \ 61 | CPP11_UNWIND \ 62 | } \ 63 | return R_NilValue; 64 | -------------------------------------------------------------------------------- /src/cpp11/include/cpp11/doubles.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include // for min, tranform 4 | #include // for array 5 | #include // for initializer_list 6 | 7 | #include "R_ext/Arith.h" // for ISNA 8 | #include "cpp11/R.hpp" // for SEXP, SEXPREC, Rf_allocVector, REAL 9 | #include "cpp11/as.hpp" // for as_sexp 10 | #include "cpp11/protect.hpp" // for safe 11 | #include "cpp11/r_vector.hpp" // for vector, vector<>::proxy, vector<>::... 12 | #include "cpp11/sexp.hpp" // for sexp 13 | 14 | // Specializations for doubles 15 | 16 | namespace cpp11 { 17 | 18 | template <> 19 | inline SEXPTYPE r_vector::get_sexptype() { 20 | return REALSXP; 21 | } 22 | 23 | template <> 24 | inline typename r_vector::underlying_type r_vector::get_elt(SEXP x, 25 | R_xlen_t i) { 26 | // NOPROTECT: likely too costly to unwind protect every elt 27 | return REAL_ELT(x, i); 28 | } 29 | 30 | template <> 31 | inline typename r_vector::underlying_type* r_vector::get_p(bool is_altrep, 32 | SEXP data) { 33 | if (is_altrep) { 34 | return nullptr; 35 | } else { 36 | return REAL(data); 37 | } 38 | } 39 | 40 | template <> 41 | inline typename r_vector::underlying_type const* r_vector::get_const_p( 42 | bool is_altrep, SEXP data) { 43 | return REAL_OR_NULL(data); 44 | } 45 | 46 | template <> 47 | inline void r_vector::get_region(SEXP x, R_xlen_t i, R_xlen_t n, 48 | typename r_vector::underlying_type* buf) { 49 | // NOPROTECT: likely too costly to unwind protect here 50 | REAL_GET_REGION(x, i, n, buf); 51 | } 52 | 53 | template <> 54 | inline bool r_vector::const_iterator::use_buf(bool is_altrep) { 55 | return is_altrep; 56 | } 57 | 58 | typedef r_vector doubles; 59 | 60 | namespace writable { 61 | 62 | template <> 63 | inline void r_vector::set_elt(SEXP x, R_xlen_t i, 64 | typename r_vector::underlying_type value) { 65 | // NOPROTECT: Likely too costly to unwind protect every set elt 66 | SET_REAL_ELT(x, i, value); 67 | } 68 | 69 | typedef r_vector doubles; 70 | 71 | } // namespace writable 72 | 73 | typedef r_vector integers; 74 | 75 | inline doubles as_doubles(SEXP x) { 76 | if (detail::r_typeof(x) == REALSXP) { 77 | return doubles(x); 78 | } 79 | 80 | else if (detail::r_typeof(x) == INTSXP) { 81 | integers xn(x); 82 | size_t len = xn.size(); 83 | writable::doubles ret(len); 84 | std::transform(xn.begin(), xn.end(), ret.begin(), [](int value) { 85 | return value == NA_INTEGER ? NA_REAL : static_cast(value); 86 | }); 87 | return ret; 88 | } 89 | 90 | throw type_error(REALSXP, detail::r_typeof(x)); 91 | } 92 | 93 | template <> 94 | inline double na() { 95 | return NA_REAL; 96 | } 97 | 98 | } // namespace cpp11 99 | -------------------------------------------------------------------------------- /src/cpp11/include/cpp11/environment.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include // for string, basic_string 4 | 5 | #include "Rversion.h" // for R_VERSION, R_Version 6 | #include "cpp11/R.hpp" // for SEXP, SEXPREC, Rf_install, r_env_get... 7 | #include "cpp11/as.hpp" // for as_sexp 8 | #include "cpp11/protect.hpp" // for protect, protect::function, safe, unwin... 9 | #include "cpp11/sexp.hpp" // for sexp 10 | 11 | #if R_VERSION >= R_Version(4, 0, 0) 12 | #define HAS_REMOVE_VAR_FROM_FRAME 13 | #endif 14 | 15 | #ifndef HAS_REMOVE_VAR_FROM_FRAME 16 | #include "cpp11/function.hpp" 17 | #endif 18 | 19 | namespace cpp11 { 20 | 21 | class environment { 22 | private: 23 | sexp env_; 24 | 25 | class proxy { 26 | SEXP parent_; 27 | SEXP name_; 28 | 29 | public: 30 | proxy(SEXP parent, SEXP name) : parent_(parent), name_(name) {} 31 | 32 | template 33 | proxy& operator=(T value) { 34 | safe[Rf_defineVar](name_, as_sexp(value), parent_); 35 | return *this; 36 | } 37 | operator SEXP() const { return safe[detail::r_env_get](parent_, name_); }; 38 | operator sexp() const { return SEXP(); }; 39 | }; 40 | 41 | public: 42 | environment(SEXP env) : env_(env) {} 43 | environment(sexp env) : env_(env) {} 44 | proxy operator[](const SEXP name) const { return {env_, name}; } 45 | proxy operator[](const char* name) const { return operator[](safe[Rf_install](name)); } 46 | proxy operator[](const std::string& name) const { return operator[](name.c_str()); } 47 | 48 | bool exists(SEXP name) const { return safe[detail::r_env_has](env_, name); } 49 | bool exists(const char* name) const { return exists(safe[Rf_install](name)); } 50 | bool exists(const std::string& name) const { return exists(name.c_str()); } 51 | 52 | void remove(SEXP name) { 53 | PROTECT(name); 54 | #ifdef HAS_REMOVE_VAR_FROM_FRAME 55 | R_removeVarFromFrame(name, env_); 56 | #else 57 | auto remove = package("base")["remove"]; 58 | remove(name, "envir"_nm = env_); 59 | #endif 60 | UNPROTECT(1); 61 | } 62 | 63 | void remove(const char* name) { remove(safe[Rf_install](name)); } 64 | 65 | R_xlen_t size() const { return Rf_xlength(env_); } 66 | 67 | operator SEXP() const { return env_; } 68 | }; 69 | 70 | } // namespace cpp11 71 | -------------------------------------------------------------------------------- /src/cpp11/include/cpp11/external_pointer.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include // for nullptr_t, NULL 4 | #include // for bad_weak_ptr 5 | #include // for add_lvalue_reference 6 | 7 | #include "cpp11/R.hpp" // for SEXP, SEXPREC, R_NilValue 8 | #include "cpp11/protect.hpp" // for protect, safe, protect::function 9 | #include "cpp11/r_bool.hpp" // for r_bool 10 | #include "cpp11/r_vector.hpp" // for type_error 11 | #include "cpp11/sexp.hpp" // for sexp 12 | 13 | namespace cpp11 { 14 | 15 | template 16 | void default_deleter(T* obj) { 17 | delete obj; 18 | } 19 | 20 | template > 21 | class external_pointer { 22 | private: 23 | sexp data_ = R_NilValue; 24 | 25 | static SEXP valid_type(SEXP data) { 26 | if (data == nullptr) { 27 | throw type_error(EXTPTRSXP, NILSXP); 28 | } 29 | if (detail::r_typeof(data) != EXTPTRSXP) { 30 | throw type_error(EXTPTRSXP, detail::r_typeof(data)); 31 | } 32 | 33 | return data; 34 | } 35 | 36 | static void r_deleter(SEXP p) { 37 | if (detail::r_typeof(p) != EXTPTRSXP) return; 38 | 39 | T* ptr = static_cast(R_ExternalPtrAddr(p)); 40 | 41 | if (ptr == NULL) { 42 | return; 43 | } 44 | 45 | R_ClearExternalPtr(p); 46 | 47 | Deleter(ptr); 48 | } 49 | 50 | public: 51 | using pointer = T*; 52 | 53 | external_pointer() noexcept {} 54 | external_pointer(std::nullptr_t) noexcept {} 55 | 56 | external_pointer(SEXP data) : data_(valid_type(data)) {} 57 | 58 | external_pointer(pointer p, bool use_deleter = true, bool finalize_on_exit = true) 59 | : data_(safe[R_MakeExternalPtr]((void*)p, R_NilValue, R_NilValue)) { 60 | if (use_deleter) { 61 | R_RegisterCFinalizerEx(data_, r_deleter, static_cast(finalize_on_exit)); 62 | } 63 | } 64 | 65 | external_pointer(const external_pointer& rhs) { 66 | data_ = safe[Rf_shallow_duplicate](rhs.data_); 67 | } 68 | 69 | external_pointer(external_pointer&& rhs) { reset(rhs.release()); } 70 | 71 | external_pointer& operator=(external_pointer&& rhs) noexcept { reset(rhs.release()); } 72 | 73 | external_pointer& operator=(std::nullptr_t) noexcept { reset(); }; 74 | 75 | operator SEXP() const noexcept { return data_; } 76 | 77 | pointer get() const noexcept { 78 | pointer addr = static_cast(R_ExternalPtrAddr(data_)); 79 | if (addr == nullptr) { 80 | return nullptr; 81 | } 82 | return addr; 83 | } 84 | 85 | typename std::add_lvalue_reference::type operator*() { 86 | pointer addr = get(); 87 | if (addr == nullptr) { 88 | throw std::bad_weak_ptr(); 89 | } 90 | return *get(); 91 | } 92 | 93 | pointer operator->() const { 94 | pointer addr = get(); 95 | if (addr == nullptr) { 96 | throw std::bad_weak_ptr(); 97 | } 98 | return get(); 99 | } 100 | 101 | pointer release() noexcept { 102 | if (get() == nullptr) { 103 | return nullptr; 104 | } 105 | pointer ptr = get(); 106 | R_ClearExternalPtr(data_); 107 | 108 | return ptr; 109 | } 110 | 111 | void reset(pointer ptr = pointer()) { 112 | SEXP old_data = data_; 113 | data_ = safe[R_MakeExternalPtr]((void*)ptr, R_NilValue, R_NilValue); 114 | r_deleter(old_data); 115 | } 116 | 117 | void swap(external_pointer& other) noexcept { 118 | SEXP tmp = other.data_; 119 | other.data_ = data_; 120 | data_ = tmp; 121 | } 122 | 123 | operator bool() noexcept { return data_ != nullptr; } 124 | }; 125 | 126 | template 127 | void swap(external_pointer& lhs, external_pointer& rhs) noexcept { 128 | lhs.swap(rhs); 129 | } 130 | 131 | template 132 | bool operator==(const external_pointer& x, 133 | const external_pointer& y) { 134 | return x.data_ == y.data_; 135 | } 136 | 137 | template 138 | bool operator!=(const external_pointer& x, 139 | const external_pointer& y) { 140 | return x.data_ != y.data_; 141 | } 142 | 143 | template 144 | bool operator<(const external_pointer& x, 145 | const external_pointer& y) { 146 | return x.data_ < y.data_; 147 | } 148 | 149 | template 150 | bool operator<=(const external_pointer& x, 151 | const external_pointer& y) { 152 | return x.data_ <= y.data_; 153 | } 154 | 155 | template 156 | bool operator>(const external_pointer& x, 157 | const external_pointer& y) { 158 | return x.data_ > y.data_; 159 | } 160 | 161 | template 162 | bool operator>=(const external_pointer& x, 163 | const external_pointer& y) { 164 | return x.data_ >= y.data_; 165 | } 166 | 167 | } // namespace cpp11 168 | -------------------------------------------------------------------------------- /src/cpp11/include/cpp11/function.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include // for strcmp 4 | 5 | #include // for snprintf 6 | #include // for string, basic_string 7 | #include // for forward 8 | 9 | #include "cpp11/R.hpp" // for SEXP, SEXPREC, CDR, Rf_install, SETCAR 10 | #include "cpp11/as.hpp" // for as_sexp 11 | #include "cpp11/named_arg.hpp" // for named_arg 12 | #include "cpp11/protect.hpp" // for protect, protect::function, safe 13 | #include "cpp11/sexp.hpp" // for sexp 14 | 15 | namespace cpp11 { 16 | 17 | class function { 18 | public: 19 | function(SEXP data) : data_(data) {} 20 | 21 | template 22 | sexp operator()(Args&&... args) const { 23 | // Size of the arguments plus one for the function name itself 24 | R_xlen_t num_args = sizeof...(args) + 1; 25 | 26 | sexp call(safe[Rf_allocVector](LANGSXP, num_args)); 27 | 28 | construct_call(call, data_, std::forward(args)...); 29 | 30 | return safe[Rf_eval](call, R_GlobalEnv); 31 | } 32 | 33 | private: 34 | sexp data_; 35 | 36 | template 37 | void construct_call(SEXP val, const named_arg& arg, Args&&... args) const { 38 | SETCAR(val, arg.value()); 39 | SET_TAG(val, safe[Rf_install](arg.name())); 40 | val = CDR(val); 41 | construct_call(val, std::forward(args)...); 42 | } 43 | 44 | // Construct the call recursively, each iteration adds an Arg to the pairlist. 45 | template 46 | void construct_call(SEXP val, const T& arg, Args&&... args) const { 47 | SETCAR(val, as_sexp(arg)); 48 | val = CDR(val); 49 | construct_call(val, std::forward(args)...); 50 | } 51 | 52 | // Base case, just return 53 | void construct_call(SEXP val) const {} 54 | }; 55 | 56 | class package { 57 | public: 58 | package(const char* name) : data_(get_namespace(name)) {} 59 | package(const std::string& name) : data_(get_namespace(name.c_str())) {} 60 | function operator[](const char* name) { 61 | return safe[Rf_findFun](safe[Rf_install](name), data_); 62 | } 63 | function operator[](const std::string& name) { return operator[](name.c_str()); } 64 | 65 | private: 66 | static SEXP get_namespace(const char* name) { 67 | if (strcmp(name, "base") == 0) { 68 | return R_BaseEnv; 69 | } 70 | sexp name_sexp = safe[Rf_install](name); 71 | return safe[detail::r_env_get](R_NamespaceRegistry, name_sexp); 72 | } 73 | 74 | // Either base env or in namespace registry, so no protection needed 75 | SEXP data_; 76 | }; 77 | 78 | namespace detail { 79 | 80 | // Special internal way to call `base::message()` 81 | // 82 | // - Pure C, so call with `safe[]` 83 | // - Holds a `static SEXP` for the `base::message` function protected with 84 | // `R_PreserveObject()` 85 | // 86 | // We don't use a `static cpp11::function` because that will infinitely retain a cell in 87 | // our preserve list, which can throw off our counts in the preserve list tests. 88 | inline void r_message(const char* x) { 89 | static SEXP fn = NULL; 90 | 91 | if (fn == NULL) { 92 | fn = Rf_findFun(Rf_install("message"), R_BaseEnv); 93 | R_PreserveObject(fn); 94 | } 95 | 96 | SEXP x_char = PROTECT(Rf_mkCharCE(x, CE_UTF8)); 97 | SEXP x_string = PROTECT(Rf_ScalarString(x_char)); 98 | 99 | SEXP call = PROTECT(Rf_lang2(fn, x_string)); 100 | 101 | Rf_eval(call, R_GlobalEnv); 102 | 103 | UNPROTECT(3); 104 | } 105 | 106 | } // namespace detail 107 | 108 | inline void message(const char* fmt_arg) { 109 | #ifdef CPP11_USE_FMT 110 | std::string msg = fmt::format(fmt_arg); 111 | safe[detail::r_message](msg.c_str()); 112 | #else 113 | char buff[1024]; 114 | int msg; 115 | msg = std::snprintf(buff, 1024, "%s", fmt_arg); 116 | if (msg >= 0 && msg < 1024) { 117 | safe[detail::r_message](buff); 118 | } 119 | #endif 120 | } 121 | 122 | template 123 | void message(const char* fmt_arg, Args... args) { 124 | #ifdef CPP11_USE_FMT 125 | std::string msg = fmt::format(fmt_arg, args...); 126 | safe[detail::r_message](msg.c_str()); 127 | #else 128 | char buff[1024]; 129 | int msg; 130 | msg = std::snprintf(buff, 1024, fmt_arg, args...); 131 | if (msg >= 0 && msg < 1024) { 132 | safe[detail::r_message](buff); 133 | } 134 | #endif 135 | } 136 | 137 | inline void message(const std::string& fmt_arg) { message(fmt_arg.c_str()); } 138 | 139 | template 140 | void message(const std::string& fmt_arg, Args... args) { 141 | message(fmt_arg.c_str(), args...); 142 | } 143 | 144 | } // namespace cpp11 145 | -------------------------------------------------------------------------------- /src/cpp11/include/cpp11/integers.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include // for min 4 | #include // for array 5 | #include // for initializer_list 6 | 7 | #include "R_ext/Arith.h" // for NA_INTEGER 8 | #include "cpp11/R.hpp" // for SEXP, SEXPREC, Rf_allocVector 9 | #include "cpp11/as.hpp" // for as_sexp 10 | #include "cpp11/attribute_proxy.hpp" // for attribute_proxy 11 | #include "cpp11/protect.hpp" // for safe 12 | #include "cpp11/r_vector.hpp" // for r_vector, r_vector<>::proxy 13 | #include "cpp11/sexp.hpp" // for sexp 14 | 15 | // Specializations for integers 16 | 17 | namespace cpp11 { 18 | 19 | template <> 20 | inline SEXPTYPE r_vector::get_sexptype() { 21 | return INTSXP; 22 | } 23 | 24 | template <> 25 | inline typename r_vector::underlying_type r_vector::get_elt(SEXP x, 26 | R_xlen_t i) { 27 | // NOPROTECT: likely too costly to unwind protect every elt 28 | return INTEGER_ELT(x, i); 29 | } 30 | 31 | template <> 32 | inline typename r_vector::underlying_type* r_vector::get_p(bool is_altrep, 33 | SEXP data) { 34 | if (is_altrep) { 35 | return nullptr; 36 | } else { 37 | return INTEGER(data); 38 | } 39 | } 40 | 41 | template <> 42 | inline typename r_vector::underlying_type const* r_vector::get_const_p( 43 | bool is_altrep, SEXP data) { 44 | return INTEGER_OR_NULL(data); 45 | } 46 | 47 | template <> 48 | inline void r_vector::get_region(SEXP x, R_xlen_t i, R_xlen_t n, 49 | typename r_vector::underlying_type* buf) { 50 | // NOPROTECT: likely too costly to unwind protect here 51 | INTEGER_GET_REGION(x, i, n, buf); 52 | } 53 | 54 | template <> 55 | inline bool r_vector::const_iterator::use_buf(bool is_altrep) { 56 | return is_altrep; 57 | } 58 | 59 | typedef r_vector integers; 60 | 61 | namespace writable { 62 | 63 | template <> 64 | inline void r_vector::set_elt(SEXP x, R_xlen_t i, 65 | typename r_vector::underlying_type value) { 66 | // NOPROTECT: Likely too costly to unwind protect every set elt 67 | SET_INTEGER_ELT(x, i, value); 68 | } 69 | 70 | typedef r_vector integers; 71 | 72 | } // namespace writable 73 | 74 | template <> 75 | inline int na() { 76 | return NA_INTEGER; 77 | } 78 | 79 | // forward declaration 80 | 81 | typedef r_vector doubles; 82 | 83 | inline integers as_integers(SEXP x) { 84 | if (detail::r_typeof(x) == INTSXP) { 85 | return integers(x); 86 | } else if (detail::r_typeof(x) == REALSXP) { 87 | doubles xn(x); 88 | writable::integers ret(xn.size()); 89 | std::transform(xn.begin(), xn.end(), ret.begin(), [](double value) { 90 | if (ISNA(value)) { 91 | return NA_INTEGER; 92 | } 93 | if (!is_convertible_without_loss_to_integer(value)) { 94 | throw std::runtime_error("All elements must be integer-like"); 95 | } 96 | return static_cast(value); 97 | }); 98 | return ret; 99 | } 100 | 101 | throw type_error(INTSXP, detail::r_typeof(x)); 102 | } 103 | 104 | } // namespace cpp11 105 | -------------------------------------------------------------------------------- /src/cpp11/include/cpp11/list.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include // for initializer_list 4 | 5 | #include "cpp11/R.hpp" // for SEXP, SEXPREC, SET_VECTOR_ELT 6 | #include "cpp11/attribute_proxy.hpp" // for attribute_proxy 7 | #include "cpp11/protect.hpp" // for safe 8 | #include "cpp11/r_string.hpp" // for r_string 9 | #include "cpp11/r_vector.hpp" // for r_vector, r_vector<>::proxy 10 | #include "cpp11/sexp.hpp" // for sexp 11 | 12 | // Specializations for list 13 | 14 | namespace cpp11 { 15 | 16 | template <> 17 | inline SEXPTYPE r_vector::get_sexptype() { 18 | return VECSXP; 19 | } 20 | 21 | template <> 22 | inline typename r_vector::underlying_type r_vector::get_elt(SEXP x, 23 | R_xlen_t i) { 24 | // NOPROTECT: likely too costly to unwind protect every elt 25 | return VECTOR_ELT(x, i); 26 | } 27 | 28 | template <> 29 | inline typename r_vector::underlying_type* r_vector::get_p(bool, SEXP) { 30 | return nullptr; 31 | } 32 | 33 | template <> 34 | inline typename r_vector::underlying_type const* r_vector::get_const_p( 35 | bool is_altrep, SEXP data) { 36 | // No `VECTOR_PTR_OR_NULL()` 37 | if (is_altrep) { 38 | return nullptr; 39 | } else { 40 | // TODO: Use `VECTOR_PTR_RO()` conditionally once R 4.5.0 is officially released 41 | return static_cast(DATAPTR_RO(data)); 42 | } 43 | } 44 | 45 | /// Specialization for lists, where `x["oob"]` returns `R_NilValue`, like at the R level 46 | template <> 47 | inline SEXP r_vector::get_oob() { 48 | return R_NilValue; 49 | } 50 | 51 | template <> 52 | inline void r_vector::get_region(SEXP x, R_xlen_t i, R_xlen_t n, 53 | typename r_vector::underlying_type* buf) { 54 | cpp11::stop("Unreachable!"); 55 | } 56 | 57 | template <> 58 | inline bool r_vector::const_iterator::use_buf(bool is_altrep) { 59 | return false; 60 | } 61 | 62 | typedef r_vector list; 63 | 64 | namespace writable { 65 | 66 | template <> 67 | inline void r_vector::set_elt(SEXP x, R_xlen_t i, 68 | typename r_vector::underlying_type value) { 69 | // NOPROTECT: Likely too costly to unwind protect every set elt 70 | SET_VECTOR_ELT(x, i, value); 71 | } 72 | 73 | // Requires specialization to handle the fact that, for lists, each element of the 74 | // initializer list is considered the scalar "element", i.e. we don't expect that 75 | // each `named_arg` contains a list of length 1, like we do for the other vector types. 76 | // This means we don't need type checks, length 1 checks, or `get_elt()` for lists. 77 | template <> 78 | inline r_vector::r_vector(std::initializer_list il) 79 | : cpp11::r_vector(safe[Rf_allocVector](VECSXP, il.size())), 80 | capacity_(il.size()) { 81 | unwind_protect([&] { 82 | SEXP names = PROTECT(Rf_allocVector(STRSXP, capacity_)); 83 | Rf_setAttrib(data_, R_NamesSymbol, names); 84 | 85 | auto it = il.begin(); 86 | 87 | for (R_xlen_t i = 0; i < capacity_; ++i, ++it) { 88 | SEXP elt = it->value(); 89 | set_elt(data_, i, elt); 90 | 91 | SEXP name = Rf_mkCharCE(it->name(), CE_UTF8); 92 | SET_STRING_ELT(names, i, name); 93 | } 94 | UNPROTECT(1); 95 | }); 96 | } 97 | 98 | typedef r_vector list; 99 | 100 | } // namespace writable 101 | 102 | } // namespace cpp11 103 | -------------------------------------------------------------------------------- /src/cpp11/include/cpp11/list_of.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include // for string, basic_string 4 | 5 | #include "cpp11/R.hpp" // for R_xlen_t, SEXP, SEXPREC, LONG_VECTOR_SUPPORT 6 | #include "cpp11/list.hpp" // for list 7 | 8 | namespace cpp11 { 9 | 10 | template 11 | class list_of : public list { 12 | public: 13 | list_of(const list& data) : list(data) {} 14 | 15 | #ifdef LONG_VECTOR_SUPPORT 16 | T operator[](const int pos) const { return operator[](static_cast(pos)); } 17 | #endif 18 | 19 | T operator[](const R_xlen_t pos) const { return list::operator[](pos); } 20 | 21 | T operator[](const char* pos) const { return list::operator[](pos); } 22 | 23 | T operator[](const std::string& pos) const { return list::operator[](pos.c_str()); } 24 | }; 25 | 26 | namespace writable { 27 | template 28 | class list_of : public writable::list { 29 | public: 30 | list_of(const list& data) : writable::list(data) {} 31 | list_of(R_xlen_t n) : writable::list(n) {} 32 | 33 | class proxy { 34 | private: 35 | writable::list::proxy data_; 36 | 37 | public: 38 | proxy(const writable::list::proxy& data) : data_(data) {} 39 | 40 | operator T() const { return static_cast(*this); } 41 | operator SEXP() const { return static_cast(data_); } 42 | #ifdef LONG_VECTOR_SUPPORT 43 | typename T::proxy operator[](int pos) { return static_cast(data_)[pos]; } 44 | #endif 45 | typename T::proxy operator[](R_xlen_t pos) { return static_cast(data_)[pos]; } 46 | proxy operator[](const char* pos) { static_cast(data_)[pos]; } 47 | proxy operator[](const std::string& pos) { return static_cast(data_)[pos]; } 48 | proxy& operator=(const T& rhs) { 49 | data_ = rhs; 50 | 51 | return *this; 52 | } 53 | }; 54 | 55 | #ifdef LONG_VECTOR_SUPPORT 56 | proxy operator[](int pos) { 57 | return {writable::list::operator[](static_cast(pos))}; 58 | } 59 | #endif 60 | 61 | proxy operator[](R_xlen_t pos) { return writable::list::operator[](pos); } 62 | 63 | proxy operator[](const char* pos) { return {writable::list::operator[](pos)}; } 64 | 65 | proxy operator[](const std::string& pos) { 66 | return writable::list::operator[](pos.c_str()); 67 | } 68 | }; 69 | } // namespace writable 70 | 71 | } // namespace cpp11 72 | -------------------------------------------------------------------------------- /src/cpp11/include/cpp11/logicals.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include // for min 4 | #include // for array 5 | #include // for initializer_list 6 | 7 | #include "cpp11/R.hpp" // for SEXP, SEXPREC, Rf_all... 8 | #include "cpp11/attribute_proxy.hpp" // for attribute_proxy 9 | #include "cpp11/protect.hpp" // for safe 10 | #include "cpp11/r_bool.hpp" // for r_bool 11 | #include "cpp11/r_vector.hpp" // for r_vector, r_vector<>::proxy 12 | #include "cpp11/sexp.hpp" // for sexp 13 | 14 | // Specializations for logicals 15 | 16 | namespace cpp11 { 17 | 18 | template <> 19 | inline SEXPTYPE r_vector::get_sexptype() { 20 | return LGLSXP; 21 | } 22 | 23 | template <> 24 | inline typename r_vector::underlying_type r_vector::get_elt(SEXP x, 25 | R_xlen_t i) { 26 | // NOPROTECT: likely too costly to unwind protect every elt 27 | return LOGICAL_ELT(x, i); 28 | } 29 | 30 | template <> 31 | inline typename r_vector::underlying_type* r_vector::get_p(bool is_altrep, 32 | SEXP data) { 33 | if (is_altrep) { 34 | return nullptr; 35 | } else { 36 | return LOGICAL(data); 37 | } 38 | } 39 | 40 | template <> 41 | inline typename r_vector::underlying_type const* r_vector::get_const_p( 42 | bool is_altrep, SEXP data) { 43 | return LOGICAL_OR_NULL(data); 44 | } 45 | 46 | template <> 47 | inline void r_vector::get_region(SEXP x, R_xlen_t i, R_xlen_t n, 48 | typename r_vector::underlying_type* buf) { 49 | // NOPROTECT: likely too costly to unwind protect here 50 | LOGICAL_GET_REGION(x, i, n, buf); 51 | } 52 | 53 | template <> 54 | inline bool r_vector::const_iterator::use_buf(bool is_altrep) { 55 | return is_altrep; 56 | } 57 | 58 | typedef r_vector logicals; 59 | 60 | namespace writable { 61 | 62 | template <> 63 | inline void r_vector::set_elt(SEXP x, R_xlen_t i, 64 | typename r_vector::underlying_type value) { 65 | // NOPROTECT: Likely too costly to unwind protect every set elt 66 | SET_LOGICAL_ELT(x, i, value); 67 | } 68 | 69 | inline bool operator==(const r_vector::proxy& lhs, r_bool rhs) { 70 | return static_cast(lhs).operator==(rhs); 71 | } 72 | 73 | typedef r_vector logicals; 74 | 75 | } // namespace writable 76 | 77 | } // namespace cpp11 78 | -------------------------------------------------------------------------------- /src/cpp11/include/cpp11/matrix.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include // for string 5 | 6 | #include "cpp11/R.hpp" // for SEXP, SEXPREC, R_xlen_t, INT... 7 | #include "cpp11/r_bool.hpp" // for r_bool 8 | #include "cpp11/r_string.hpp" // for r_string 9 | #include "cpp11/r_vector.hpp" // for r_vector 10 | #include "cpp11/sexp.hpp" // for sexp 11 | 12 | namespace cpp11 { 13 | 14 | // matrix dimensions 15 | struct matrix_dims { 16 | protected: 17 | const int nrow_; 18 | const int ncol_; 19 | 20 | public: 21 | matrix_dims(SEXP data) : nrow_(Rf_nrows(data)), ncol_(Rf_ncols(data)) {} 22 | matrix_dims(int nrow, int ncol) : nrow_(nrow), ncol_(ncol) {} 23 | 24 | int nrow() const { return nrow_; } 25 | int ncol() const { return ncol_; } 26 | }; 27 | 28 | // base type for dimension-wise matrix access specialization 29 | struct matrix_slice {}; 30 | 31 | struct by_row : public matrix_slice {}; 32 | struct by_column : public matrix_slice {}; 33 | 34 | // basic properties of matrix slices 35 | template 36 | struct matrix_slices : public matrix_dims { 37 | public: 38 | using matrix_dims::matrix_dims; 39 | using matrix_dims::ncol; 40 | using matrix_dims::nrow; 41 | 42 | int nslices() const; 43 | int slice_size() const; 44 | int slice_stride() const; 45 | int slice_offset(int pos) const; 46 | }; 47 | 48 | // basic properties of matrix row slices 49 | template <> 50 | struct matrix_slices : public matrix_dims { 51 | public: 52 | using matrix_dims::matrix_dims; 53 | using matrix_dims::ncol; 54 | using matrix_dims::nrow; 55 | 56 | int nslices() const { return nrow(); } 57 | int slice_size() const { return ncol(); } 58 | int slice_stride() const { return nrow(); } 59 | int slice_offset(int pos) const { return pos; } 60 | }; 61 | 62 | // basic properties of matrix column slices 63 | template <> 64 | struct matrix_slices : public matrix_dims { 65 | public: 66 | using matrix_dims::matrix_dims; 67 | using matrix_dims::ncol; 68 | using matrix_dims::nrow; 69 | 70 | int nslices() const { return ncol(); } 71 | int slice_size() const { return nrow(); } 72 | int slice_stride() const { return 1; } 73 | int slice_offset(int pos) const { return pos * nrow(); } 74 | }; 75 | 76 | template 77 | class matrix : public matrix_slices { 78 | private: 79 | V vector_; 80 | 81 | public: 82 | // matrix slice: row (if S=by_row) or a column (if S=by_column) 83 | class slice { 84 | private: 85 | const matrix& parent_; 86 | int index_; // slice index 87 | int offset_; // index of the first slice element in parent_.vector_ 88 | 89 | public: 90 | slice(const matrix& parent, int index) 91 | : parent_(parent), index_(index), offset_(parent.slice_offset(index)) {} 92 | 93 | R_xlen_t stride() const { return parent_.slice_stride(); } 94 | R_xlen_t size() const { return parent_.slice_size(); } 95 | 96 | bool operator==(const slice& rhs) const { 97 | return (index_ == rhs.index_) && (parent_.data() == rhs.parent_.data()); 98 | } 99 | bool operator!=(const slice& rhs) const { return !operator==(rhs); } 100 | 101 | T operator[](int pos) const { return parent_.vector_[offset_ + stride() * pos]; } 102 | 103 | // iterates elements of a slice 104 | class iterator { 105 | private: 106 | const slice& slice_; 107 | int pos_; 108 | 109 | public: 110 | using difference_type = std::ptrdiff_t; 111 | using value_type = T; 112 | using pointer = T*; 113 | using reference = T&; 114 | using iterator_category = std::forward_iterator_tag; 115 | 116 | iterator(const slice& slice, R_xlen_t pos) : slice_(slice), pos_(pos) {} 117 | 118 | iterator& operator++() { 119 | ++pos_; 120 | return *this; 121 | } 122 | 123 | bool operator==(const iterator& rhs) const { 124 | return (pos_ == rhs.pos_) && (slice_ == rhs.slice_); 125 | } 126 | bool operator!=(const iterator& rhs) const { return !operator==(rhs); } 127 | 128 | T operator*() const { return slice_[pos_]; }; 129 | }; 130 | 131 | iterator begin() const { return {*this, 0}; } 132 | iterator end() const { return {*this, size()}; } 133 | }; 134 | friend slice; 135 | 136 | // iterates slices (rows or columns -- depending on S template param) of a matrix 137 | class slice_iterator { 138 | private: 139 | const matrix& parent_; 140 | int pos_; 141 | 142 | public: 143 | using difference_type = std::ptrdiff_t; 144 | using value_type = slice; 145 | using pointer = slice*; 146 | using reference = slice&; 147 | using iterator_category = std::forward_iterator_tag; 148 | 149 | slice_iterator(const matrix& parent, R_xlen_t pos) : parent_(parent), pos_(pos) {} 150 | 151 | slice_iterator& operator++() { 152 | ++pos_; 153 | return *this; 154 | } 155 | 156 | bool operator==(const slice_iterator& rhs) const { 157 | return (pos_ == rhs.pos_) && (parent_.data() == rhs.parent_.data()); 158 | } 159 | bool operator!=(const slice_iterator& rhs) const { return !operator==(rhs); } 160 | 161 | slice operator*() { return parent_[pos_]; }; 162 | }; 163 | 164 | public: 165 | matrix(SEXP data) : matrix_slices(data), vector_(data) {} 166 | 167 | template 168 | matrix(const cpp11::matrix& rhs) 169 | : matrix_slices(rhs.nrow(), rhs.ncol()), vector_(rhs.vector()) {} 170 | 171 | matrix(int nrow, int ncol) 172 | : matrix_slices(nrow, ncol), vector_(R_xlen_t(nrow * ncol)) { 173 | vector_.attr(R_DimSymbol) = {nrow, ncol}; 174 | } 175 | 176 | using matrix_slices::nrow; 177 | using matrix_slices::ncol; 178 | using matrix_slices::nslices; 179 | using matrix_slices::slice_size; 180 | using matrix_slices::slice_stride; 181 | using matrix_slices::slice_offset; 182 | 183 | V vector() const { return vector_; } 184 | 185 | SEXP data() const { return vector_.data(); } 186 | 187 | R_xlen_t size() const { return vector_.size(); } 188 | 189 | operator SEXP() const { return SEXP(vector_); } 190 | 191 | // operator sexp() { return sexp(vector_); } 192 | 193 | sexp attr(const char* name) const { return SEXP(vector_.attr(name)); } 194 | 195 | sexp attr(const std::string& name) const { return SEXP(vector_.attr(name)); } 196 | 197 | sexp attr(SEXP name) const { return SEXP(vector_.attr(name)); } 198 | 199 | r_vector names() const { return r_vector(vector_.names()); } 200 | 201 | T operator()(int row, int col) const { return vector_[row + (col * nrow())]; } 202 | 203 | slice operator[](int index) const { return {*this, index}; } 204 | 205 | slice_iterator begin() const { return {*this, 0}; } 206 | slice_iterator end() const { return {*this, nslices()}; } 207 | }; 208 | 209 | template 210 | using doubles_matrix = matrix, double, S>; 211 | template 212 | using integers_matrix = matrix, int, S>; 213 | template 214 | using logicals_matrix = matrix, r_bool, S>; 215 | template 216 | using strings_matrix = matrix, r_string, S>; 217 | 218 | namespace writable { 219 | template 220 | using doubles_matrix = matrix, r_vector::proxy, S>; 221 | template 222 | using integers_matrix = matrix, r_vector::proxy, S>; 223 | template 224 | using logicals_matrix = matrix, r_vector::proxy, S>; 225 | template 226 | using strings_matrix = matrix, r_vector::proxy, S>; 227 | } // namespace writable 228 | 229 | // TODO: Add tests for Matrix class 230 | } // namespace cpp11 231 | -------------------------------------------------------------------------------- /src/cpp11/include/cpp11/named_arg.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include // for size_t 4 | 5 | #include // for initializer_list 6 | 7 | #include "cpp11/R.hpp" // for SEXP, SEXPREC, literals 8 | #include "cpp11/as.hpp" // for as_sexp 9 | #include "cpp11/sexp.hpp" // for sexp 10 | 11 | namespace cpp11 { 12 | class named_arg { 13 | public: 14 | explicit named_arg(const char* name) : name_(name), value_(R_NilValue) {} 15 | named_arg& operator=(std::initializer_list il) { 16 | value_ = as_sexp(il); 17 | return *this; 18 | } 19 | 20 | template 21 | named_arg& operator=(T rhs) { 22 | value_ = as_sexp(rhs); 23 | return *this; 24 | } 25 | 26 | template 27 | named_arg& operator=(std::initializer_list rhs) { 28 | value_ = as_sexp(rhs); 29 | return *this; 30 | } 31 | 32 | const char* name() const { return name_; } 33 | SEXP value() const { return value_; } 34 | 35 | private: 36 | const char* name_; 37 | sexp value_; 38 | }; 39 | 40 | namespace literals { 41 | 42 | inline named_arg operator""_nm(const char* name, std::size_t) { return named_arg(name); } 43 | 44 | } // namespace literals 45 | 46 | using namespace literals; 47 | 48 | } // namespace cpp11 49 | -------------------------------------------------------------------------------- /src/cpp11/include/cpp11/r_bool.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include // for numeric_limits 4 | #include 5 | #include // for is_convertible, enable_if 6 | 7 | #include "R_ext/Boolean.h" // for Rboolean 8 | #include "cpp11/R.hpp" // for SEXP, SEXPREC, ... 9 | #include "cpp11/as.hpp" // for as_sexp 10 | #include "cpp11/protect.hpp" // for unwind_protect 11 | #include "cpp11/r_vector.hpp" 12 | #include "cpp11/sexp.hpp" // for sexp 13 | 14 | namespace cpp11 { 15 | 16 | class r_bool { 17 | public: 18 | r_bool() = default; 19 | 20 | r_bool(SEXP data) { 21 | if (Rf_isLogical(data)) { 22 | if (Rf_xlength(data) == 1) { 23 | value_ = static_cast(LOGICAL_ELT(data, 0)); 24 | } 25 | } 26 | throw std::invalid_argument("Invalid r_bool value"); 27 | } 28 | 29 | r_bool(bool value) : value_(value ? TRUE : FALSE) {} 30 | r_bool(Rboolean value) : value_(value) {} 31 | r_bool(int value) : value_(from_int(value)) {} 32 | 33 | operator bool() const { return value_ == TRUE; } 34 | operator int() const { return value_; } 35 | operator Rboolean() const { return value_ ? TRUE : FALSE; } 36 | 37 | bool operator==(r_bool rhs) const { return value_ == rhs.value_; } 38 | bool operator==(bool rhs) const { return operator==(r_bool(rhs)); } 39 | bool operator==(Rboolean rhs) const { return operator==(r_bool(rhs)); } 40 | bool operator==(int rhs) const { return operator==(r_bool(rhs)); } 41 | 42 | private: 43 | static constexpr int na = std::numeric_limits::min(); 44 | 45 | static int from_int(int value) { 46 | if (value == static_cast(FALSE)) return FALSE; 47 | if (value == static_cast(na)) return na; 48 | return TRUE; 49 | } 50 | 51 | int value_ = na; 52 | }; 53 | 54 | inline std::ostream& operator<<(std::ostream& os, r_bool const& value) { 55 | os << ((value == TRUE) ? "TRUE" : "FALSE"); 56 | return os; 57 | } 58 | 59 | template 60 | using enable_if_r_bool = enable_if_t::value, R>; 61 | 62 | template 63 | enable_if_r_bool as_sexp(T from) { 64 | sexp res = Rf_allocVector(LGLSXP, 1); 65 | unwind_protect([&] { SET_LOGICAL_ELT(res.data(), 0, from); }); 66 | return res; 67 | } 68 | 69 | template <> 70 | inline r_bool na() { 71 | return NA_LOGICAL; 72 | } 73 | 74 | namespace traits { 75 | template <> 76 | struct get_underlying_type { 77 | using type = int; 78 | }; 79 | } // namespace traits 80 | 81 | } // namespace cpp11 82 | -------------------------------------------------------------------------------- /src/cpp11/include/cpp11/r_string.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include // for string, basic_string, operator== 4 | #include // for is_convertible, enable_if 5 | 6 | #include "R_ext/Memory.h" // for vmaxget, vmaxset 7 | #include "cpp11/R.hpp" // for SEXP, SEXPREC, Rf_mkCharCE, Rf_translat... 8 | #include "cpp11/as.hpp" // for as_sexp 9 | #include "cpp11/protect.hpp" // for unwind_protect, protect, protect::function 10 | #include "cpp11/sexp.hpp" // for sexp 11 | 12 | namespace cpp11 { 13 | 14 | class r_string { 15 | public: 16 | r_string() = default; 17 | r_string(SEXP data) : data_(data) {} 18 | r_string(const char* data) : data_(safe[Rf_mkCharCE](data, CE_UTF8)) {} 19 | r_string(const std::string& data) 20 | : data_(safe[Rf_mkCharLenCE](data.c_str(), data.size(), CE_UTF8)) {} 21 | 22 | operator SEXP() const { return data_; } 23 | operator sexp() const { return data_; } 24 | operator std::string() const { 25 | std::string res; 26 | res.reserve(size()); 27 | 28 | void* vmax = vmaxget(); 29 | unwind_protect([&] { res.assign(Rf_translateCharUTF8(data_)); }); 30 | vmaxset(vmax); 31 | 32 | return res; 33 | } 34 | 35 | bool operator==(const r_string& rhs) const { return data_.data() == rhs.data_.data(); } 36 | 37 | bool operator==(const SEXP rhs) const { return data_.data() == rhs; } 38 | 39 | bool operator==(const char* rhs) const { 40 | return static_cast(*this) == rhs; 41 | } 42 | 43 | bool operator==(const std::string& rhs) const { 44 | return static_cast(*this) == rhs; 45 | } 46 | 47 | R_xlen_t size() const { return Rf_xlength(data_); } 48 | 49 | private: 50 | sexp data_ = R_NilValue; 51 | }; 52 | 53 | inline SEXP as_sexp(std::initializer_list il) { 54 | R_xlen_t size = il.size(); 55 | 56 | sexp data; 57 | unwind_protect([&] { 58 | data = Rf_allocVector(STRSXP, size); 59 | auto it = il.begin(); 60 | for (R_xlen_t i = 0; i < size; ++i, ++it) { 61 | if (*it == NA_STRING) { 62 | SET_STRING_ELT(data, i, *it); 63 | } else { 64 | SET_STRING_ELT(data, i, Rf_mkCharCE(Rf_translateCharUTF8(*it), CE_UTF8)); 65 | } 66 | } 67 | }); 68 | return data; 69 | } 70 | 71 | template 72 | using enable_if_r_string = enable_if_t::value, R>; 73 | 74 | template 75 | enable_if_r_string as_sexp(T from) { 76 | r_string str(from); 77 | sexp res; 78 | unwind_protect([&] { 79 | res = Rf_allocVector(STRSXP, 1); 80 | 81 | if (str == NA_STRING) { 82 | SET_STRING_ELT(res, 0, str); 83 | } else { 84 | SET_STRING_ELT(res, 0, Rf_mkCharCE(Rf_translateCharUTF8(str), CE_UTF8)); 85 | } 86 | }); 87 | 88 | return res; 89 | } 90 | 91 | template <> 92 | inline r_string na() { 93 | return NA_STRING; 94 | } 95 | 96 | namespace traits { 97 | template <> 98 | struct get_underlying_type { 99 | using type = SEXP; 100 | }; 101 | } // namespace traits 102 | 103 | } // namespace cpp11 104 | -------------------------------------------------------------------------------- /src/cpp11/include/cpp11/raws.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include // for min 4 | #include // for array 5 | #include // for uint8_t 6 | #include // for initializer_list 7 | 8 | #include "Rversion.h" 9 | #include "cpp11/R.hpp" // for RAW, SEXP, SEXPREC, Rf_allocVector 10 | #include "cpp11/attribute_proxy.hpp" // for attribute_proxy 11 | #include "cpp11/protect.hpp" // for safe 12 | #include "cpp11/r_vector.hpp" // for r_vector, r_vector<>::proxy 13 | #include "cpp11/sexp.hpp" // for sexp 14 | 15 | // Specializations for raws 16 | 17 | namespace cpp11 { 18 | 19 | namespace traits { 20 | template <> 21 | struct get_underlying_type { 22 | using type = Rbyte; 23 | }; 24 | } // namespace traits 25 | 26 | template <> 27 | inline SEXPTYPE r_vector::get_sexptype() { 28 | return RAWSXP; 29 | } 30 | 31 | template <> 32 | inline typename r_vector::underlying_type r_vector::get_elt( 33 | SEXP x, R_xlen_t i) { 34 | // NOPROTECT: likely too costly to unwind protect every elt 35 | return RAW_ELT(x, i); 36 | } 37 | 38 | template <> 39 | inline typename r_vector::underlying_type const* r_vector::get_const_p( 40 | bool is_altrep, SEXP data) { 41 | return RAW_OR_NULL(data); 42 | } 43 | 44 | template <> 45 | inline typename r_vector::underlying_type* r_vector::get_p( 46 | bool is_altrep, SEXP data) { 47 | if (is_altrep) { 48 | return nullptr; 49 | } else { 50 | return RAW(data); 51 | } 52 | } 53 | 54 | template <> 55 | inline void r_vector::get_region(SEXP x, R_xlen_t i, R_xlen_t n, 56 | typename r_vector::underlying_type* buf) { 57 | // NOPROTECT: likely too costly to unwind protect here 58 | RAW_GET_REGION(x, i, n, buf); 59 | } 60 | 61 | template <> 62 | inline bool r_vector::const_iterator::use_buf(bool is_altrep) { 63 | return is_altrep; 64 | } 65 | 66 | typedef r_vector raws; 67 | 68 | namespace writable { 69 | 70 | template <> 71 | inline void r_vector::set_elt(SEXP x, R_xlen_t i, 72 | typename r_vector::underlying_type value) { 73 | // NOPROTECT: Likely too costly to unwind protect every set elt 74 | #if R_VERSION >= R_Version(4, 2, 0) 75 | SET_RAW_ELT(x, i, value); 76 | #else 77 | RAW(x)[i] = value; 78 | #endif 79 | } 80 | 81 | typedef r_vector raws; 82 | 83 | } // namespace writable 84 | 85 | } // namespace cpp11 86 | -------------------------------------------------------------------------------- /src/cpp11/include/cpp11/sexp.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include // for size_t 4 | 5 | #include // for string, basic_string 6 | 7 | #include "cpp11/R.hpp" // for SEXP, SEXPREC, REAL_ELT, R_NilV... 8 | #include "cpp11/attribute_proxy.hpp" // for attribute_proxy 9 | #include "cpp11/protect.hpp" // for store 10 | 11 | namespace cpp11 { 12 | 13 | /// Converting to SEXP 14 | class sexp { 15 | private: 16 | SEXP data_ = R_NilValue; 17 | SEXP preserve_token_ = R_NilValue; 18 | 19 | public: 20 | sexp() = default; 21 | 22 | sexp(SEXP data) : data_(data), preserve_token_(detail::store::insert(data_)) {} 23 | 24 | // We maintain our own new `preserve_token_` 25 | sexp(const sexp& rhs) { 26 | data_ = rhs.data_; 27 | preserve_token_ = detail::store::insert(data_); 28 | } 29 | 30 | // We take ownership over the `rhs.preserve_token_`. 31 | // Importantly we clear it in the `rhs` so it can't release the object upon destruction. 32 | sexp(sexp&& rhs) { 33 | data_ = rhs.data_; 34 | preserve_token_ = rhs.preserve_token_; 35 | 36 | rhs.data_ = R_NilValue; 37 | rhs.preserve_token_ = R_NilValue; 38 | } 39 | 40 | sexp& operator=(const sexp& rhs) { 41 | detail::store::release(preserve_token_); 42 | 43 | data_ = rhs.data_; 44 | preserve_token_ = detail::store::insert(data_); 45 | 46 | return *this; 47 | } 48 | 49 | ~sexp() { detail::store::release(preserve_token_); } 50 | 51 | attribute_proxy attr(const char* name) const { 52 | return attribute_proxy(*this, name); 53 | } 54 | 55 | attribute_proxy attr(const std::string& name) const { 56 | return attribute_proxy(*this, name.c_str()); 57 | } 58 | 59 | attribute_proxy attr(SEXP name) const { 60 | return attribute_proxy(*this, name); 61 | } 62 | 63 | attribute_proxy names() const { 64 | return attribute_proxy(*this, R_NamesSymbol); 65 | } 66 | 67 | operator SEXP() const { return data_; } 68 | SEXP data() const { return data_; } 69 | 70 | /// DEPRECATED: Do not use this, it will be removed soon. 71 | operator double() const { return REAL_ELT(data_, 0); } 72 | /// DEPRECATED: Do not use this, it will be removed soon. 73 | operator size_t() const { return REAL_ELT(data_, 0); } 74 | /// DEPRECATED: Do not use this, it will be removed soon. 75 | operator bool() const { return LOGICAL_ELT(data_, 0); } 76 | }; 77 | 78 | } // namespace cpp11 79 | -------------------------------------------------------------------------------- /src/cpp11/include/cpp11/strings.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include // for initializer_list 4 | #include // for string, basic_string 5 | 6 | #include "cpp11/R.hpp" // for SEXP, SEXPREC, SET_STRI... 7 | #include "cpp11/as.hpp" // for as_sexp 8 | #include "cpp11/attribute_proxy.hpp" // for attribute_proxy 9 | #include "cpp11/named_arg.hpp" // for named_arg 10 | #include "cpp11/protect.hpp" // for safe 11 | #include "cpp11/r_string.hpp" // for r_string 12 | #include "cpp11/r_vector.hpp" // for r_vector, r_vector<>::proxy 13 | #include "cpp11/sexp.hpp" // for sexp 14 | 15 | // Specializations for strings 16 | 17 | namespace cpp11 { 18 | 19 | template <> 20 | inline SEXPTYPE r_vector::get_sexptype() { 21 | return STRSXP; 22 | } 23 | 24 | template <> 25 | inline typename r_vector::underlying_type r_vector::get_elt( 26 | SEXP x, R_xlen_t i) { 27 | // NOPROTECT: likely too costly to unwind protect every elt 28 | return STRING_ELT(x, i); 29 | } 30 | 31 | template <> 32 | inline typename r_vector::underlying_type* r_vector::get_p(bool, 33 | SEXP) { 34 | return nullptr; 35 | } 36 | 37 | template <> 38 | inline typename r_vector::underlying_type const* 39 | r_vector::get_const_p(bool is_altrep, SEXP data) { 40 | // No `STRING_PTR_OR_NULL()` 41 | if (is_altrep) { 42 | return nullptr; 43 | } else { 44 | return STRING_PTR_RO(data); 45 | } 46 | } 47 | 48 | template <> 49 | inline void r_vector::get_region(SEXP x, R_xlen_t i, R_xlen_t n, 50 | typename r_vector::underlying_type* buf) { 51 | cpp11::stop("Unreachable!"); 52 | } 53 | 54 | template <> 55 | inline bool r_vector::const_iterator::use_buf(bool is_altrep) { 56 | return false; 57 | } 58 | 59 | typedef r_vector strings; 60 | 61 | namespace writable { 62 | 63 | template <> 64 | inline void r_vector::set_elt(SEXP x, R_xlen_t i, 65 | typename r_vector::underlying_type value) { 66 | // NOPROTECT: Likely too costly to unwind protect every set elt 67 | SET_STRING_ELT(x, i, value); 68 | } 69 | 70 | inline bool operator==(const r_vector::proxy& lhs, r_string rhs) { 71 | return static_cast(lhs).operator==(static_cast(rhs).c_str()); 72 | } 73 | 74 | inline SEXP alloc_or_copy(const SEXP data) { 75 | switch (detail::r_typeof(data)) { 76 | case CHARSXP: 77 | return cpp11::r_vector(safe[Rf_allocVector](STRSXP, 1)); 78 | case STRSXP: 79 | return safe[Rf_shallow_duplicate](data); 80 | default: 81 | throw type_error(STRSXP, detail::r_typeof(data)); 82 | } 83 | } 84 | 85 | inline SEXP alloc_if_charsxp(const SEXP data) { 86 | switch (detail::r_typeof(data)) { 87 | case CHARSXP: 88 | return cpp11::r_vector(safe[Rf_allocVector](STRSXP, 1)); 89 | case STRSXP: 90 | return data; 91 | default: 92 | throw type_error(STRSXP, detail::r_typeof(data)); 93 | } 94 | } 95 | 96 | template <> 97 | inline r_vector::r_vector(const SEXP& data) 98 | : cpp11::r_vector(alloc_or_copy(data)), capacity_(length_) { 99 | if (detail::r_typeof(data) == CHARSXP) { 100 | SET_STRING_ELT(data_, 0, data); 101 | } 102 | } 103 | 104 | template <> 105 | inline r_vector::r_vector(SEXP&& data) 106 | : cpp11::r_vector(alloc_if_charsxp(data)), capacity_(length_) { 107 | if (detail::r_typeof(data) == CHARSXP) { 108 | SET_STRING_ELT(data_, 0, data); 109 | } 110 | } 111 | 112 | // Requires specialization to handle `NA_STRING` and UTF-8 translation 113 | template <> 114 | inline r_vector::r_vector(std::initializer_list il) 115 | : cpp11::r_vector(safe[Rf_allocVector](STRSXP, il.size())), 116 | capacity_(il.size()) { 117 | unwind_protect([&] { 118 | auto it = il.begin(); 119 | 120 | for (R_xlen_t i = 0; i < capacity_; ++i, ++it) { 121 | // i.e. to `SEXP` 122 | underlying_type elt = static_cast(*it); 123 | 124 | if (elt == NA_STRING) { 125 | set_elt(data_, i, elt); 126 | } else { 127 | set_elt(data_, i, Rf_mkCharCE(Rf_translateCharUTF8(elt), CE_UTF8)); 128 | } 129 | } 130 | }); 131 | } 132 | 133 | typedef r_vector strings; 134 | 135 | template 136 | inline void r_vector::push_back(const named_arg& value) { 137 | push_back(value.value()); 138 | if (Rf_xlength(names()) == 0) { 139 | cpp11::writable::strings new_nms(size()); 140 | names() = new_nms; 141 | } 142 | cpp11::writable::strings nms(names()); 143 | nms[size() - 1] = value.name(); 144 | } 145 | 146 | } // namespace writable 147 | 148 | } // namespace cpp11 149 | -------------------------------------------------------------------------------- /src/r_archive.cpp: -------------------------------------------------------------------------------- 1 | #include "r_archive.h" 2 | 3 | SEXP new_connection_xptr; 4 | 5 | SEXP read_connection_xptr; 6 | 7 | void rchive_init(SEXP nc_xptr, SEXP rc_xptr) { 8 | new_connection_xptr = nc_xptr; 9 | R_PreserveObject(nc_xptr); 10 | read_connection_xptr = rc_xptr; 11 | R_PreserveObject(rc_xptr); 12 | } 13 | 14 | SEXP new_connection( 15 | const char* description, 16 | const char* mode, 17 | const char* class_name, 18 | Rconnection* ptr) { 19 | auto new_connection_ptr = reinterpret_cast( 21 | R_ExternalPtrAddr(new_connection_xptr)); 22 | return new_connection_ptr(description, mode, class_name, ptr); 23 | } 24 | 25 | size_t read_connection(SEXP connection, void* buf, size_t n) { 26 | auto read_connection_ptr = reinterpret_cast( 27 | R_ExternalPtrAddr(read_connection_xptr)); 28 | return read_connection_ptr(connection, buf, n); 29 | } 30 | 31 | size_t pop(void* target, size_t max, rchive* r) { 32 | size_t copy_size = r->size < max ? r->size : max; 33 | memcpy(target, r->cur, copy_size); 34 | r->cur += copy_size; 35 | r->size -= copy_size; 36 | 37 | /* clang-format off */ 38 | /* Rprintf("Requested %d bytes, popped %d bytes, new size %d bytes.\n", max, copy_size, r->size); */ 39 | /* clang-format on */ 40 | 41 | return copy_size; 42 | } 43 | 44 | size_t push(rchive* r) { 45 | R_CheckUserInterrupt(); 46 | const void* buf; 47 | size_t size; 48 | __LA_INT64_T offset; 49 | 50 | if (r->last_response == ARCHIVE_EOF) { 51 | return 0; 52 | } 53 | 54 | if (!r->cur) { 55 | r->cur = r->buf.data(); 56 | } 57 | 58 | /* move existing data to front of buffer (if any) */ 59 | memmove(r->buf.data(), r->cur, r->size); 60 | 61 | /* read data from archive */ 62 | r->last_response = archive_read_data_block(r->ar, &buf, &size, &offset); 63 | if (r->last_response == ARCHIVE_EOF) { 64 | r->has_more = 0; 65 | return 0; 66 | } 67 | if (r->last_response != ARCHIVE_OK) { 68 | Rf_error("%s", archive_error_string(r->ar)); 69 | } 70 | 71 | /* allocate more space if required */ 72 | size_t newsize = r->size + size; 73 | while (newsize > r->buf.size()) { 74 | size_t newlimit = 2 * r->buf.size(); 75 | // Rprintf("Resizing buffer to %d.\n", newlimit); 76 | r->buf.resize(newlimit); 77 | } 78 | 79 | /* append new data */ 80 | /* Rprintf("Pushed %d bytes, new size %d bytes.\n", size, newsize); */ 81 | memcpy(r->buf.data() + r->size, buf, size); 82 | r->size = newsize; 83 | r->cur = r->buf.data(); 84 | return size; 85 | } 86 | 87 | #if ARCHIVE_VERSION_NUMBER < 3000004 88 | /* Define archive_write_add_filter for older versions */ 89 | /* This code is pulled directly from 90 | * https://github.com/libarchive/libarchive/blob/06052e47e500ef4c8c937c4c8b987433a647cb4c/libarchive/archive_write_add_filter.c 91 | *- 92 | * Copyright (c) 2012 Ondrej Holy 93 | * All rights reserved. 94 | * 95 | * Redistribution and use in source and binary forms, with or without 96 | * modification, are permitted provided that the following conditions 97 | * are met: 98 | * 1. Redistributions of source code must retain the above copyright 99 | * notice, this list of conditions and the following disclaimer. 100 | * 2. Redistributions in binary form must reproduce the above copyright 101 | * notice, this list of conditions and the following disclaimer in the 102 | * documentation and/or other materials provided with the distribution. 103 | * 104 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 105 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 106 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 107 | * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 108 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 109 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 110 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 111 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 112 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 113 | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 114 | */ 115 | 116 | #include 117 | 118 | /* A table that maps filter codes to functions. */ 119 | static struct { 120 | int code; 121 | int (*setter)(struct archive*); 122 | } codes[] = { 123 | {ARCHIVE_FILTER_NONE, archive_write_add_filter_none}, 124 | {ARCHIVE_FILTER_GZIP, archive_write_add_filter_gzip}, 125 | {ARCHIVE_FILTER_BZIP2, archive_write_add_filter_bzip2}, 126 | {ARCHIVE_FILTER_COMPRESS, archive_write_add_filter_compress}, 127 | {ARCHIVE_FILTER_LZMA, archive_write_add_filter_lzma}, 128 | {ARCHIVE_FILTER_XZ, archive_write_add_filter_xz}, 129 | {ARCHIVE_FILTER_LZIP, archive_write_add_filter_lzip}, 130 | {-1, NULL}}; 131 | 132 | int archive_write_add_filter(struct archive* a, int code) { 133 | int i; 134 | 135 | for (i = 0; codes[i].code != -1; i++) { 136 | if (code == codes[i].code) 137 | return ((codes[i].setter)(a)); 138 | } 139 | 140 | archive_set_error(a, EINVAL, "No such filter"); 141 | return (ARCHIVE_FATAL); 142 | } 143 | #endif 144 | -------------------------------------------------------------------------------- /src/r_archive.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "connection/connection.h" 4 | 5 | #undef Realloc 6 | // Also need to undefine the Free macro 7 | #undef Free 8 | 9 | #include 10 | #include 11 | 12 | #undef TRUE 13 | #undef FALSE 14 | #include 15 | 16 | #include 17 | #include 18 | 19 | #define R_EOF -1 20 | 21 | #define FILTER_MAX 8 22 | 23 | struct input_data { 24 | cpp11::sexp connection; 25 | std::vector buf; 26 | }; 27 | 28 | struct rchive { 29 | std::string archive_filename; 30 | int format; 31 | std::string filename; 32 | cpp11::sexp file; 33 | input_data input; 34 | std::vector buf; 35 | char* cur = nullptr; 36 | archive* ar = nullptr; 37 | archive_entry* entry = nullptr; 38 | ssize_t last_response = 0; 39 | bool has_more = true; 40 | size_t size = 0; 41 | int filters[FILTER_MAX]; 42 | std::string options; 43 | cpp11::strings password; 44 | }; 45 | 46 | size_t pop(void* target, size_t max, rchive* r); 47 | 48 | size_t push(rchive* r); 49 | 50 | ssize_t input_read(struct archive* a, void* client_data, const void** buff); 51 | int64_t 52 | input_seek(struct archive*, void* client_data, int64_t offset, int whence); 53 | int input_close(struct archive* a, void* client_data); 54 | 55 | #if ARCHIVE_VERSION_NUMBER < 3000004 56 | int archive_write_add_filter(struct archive* a, int code); 57 | #endif 58 | 59 | #define call(f, ...) call_(__FILE__, __LINE__, #f, f, __VA_ARGS__) 60 | 61 | template 62 | inline int call_( 63 | const char* file_name, 64 | int line, 65 | const char* function_name, 66 | F f, 67 | Rconnection con, 68 | Args... args) { 69 | rchive* r = (rchive*)con->private_ptr; 70 | if (!r->ar) { 71 | return ARCHIVE_OK; 72 | } 73 | r->last_response = f(r->ar, args...); 74 | if (r->last_response < ARCHIVE_OK) { 75 | con->isopen = FALSE; 76 | const char* msg = archive_error_string(r->ar); 77 | if (msg) { 78 | Rf_errorcall( 79 | R_NilValue, "%s:%i %s(): %s", file_name, line, function_name, msg); 80 | } else { 81 | Rf_errorcall( 82 | R_NilValue, 83 | "%s:%i %s(): unknown libarchive error", 84 | file_name, 85 | line, 86 | function_name); 87 | } 88 | } 89 | return r->last_response; 90 | } 91 | 92 | template 93 | inline int call_( 94 | const char* file_name, 95 | int line, 96 | const char* function_name, 97 | F f, 98 | archive* ar, 99 | Args... args) { 100 | ssize_t response = f(ar, args...); 101 | if (response < ARCHIVE_OK) { 102 | const char* msg = archive_error_string(ar); 103 | if (msg) { 104 | Rf_errorcall( 105 | R_NilValue, "%s:%i %s(): %s", file_name, line, function_name, msg); 106 | } else { 107 | Rf_errorcall( 108 | R_NilValue, 109 | "%s:%i %s(): unknown libarchive error", 110 | file_name, 111 | line, 112 | function_name); 113 | } 114 | } 115 | return response; 116 | } 117 | 118 | class local_utf8_locale { 119 | // In the future once R is using the windows runtime that supports UTF-8 we 120 | // could set the UTF-8 locale here for windows as well with ".UTF-8" 121 | // https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/setlocale-wsetlocale?view=msvc-160#utf-8-support 122 | // But for now just do nothing 123 | #ifdef __MINGW32__ 124 | #else 125 | private: 126 | std::string old_locale_; 127 | 128 | public: 129 | local_utf8_locale() : old_locale_(std::setlocale(LC_CTYPE, NULL)) { 130 | #ifdef __APPLE__ 131 | const char* locale = "UTF-8"; 132 | #else 133 | const char* locale = "C.UTF-8"; 134 | #endif 135 | const char* new_locale = std::setlocale(LC_CTYPE, locale); 136 | if (nullptr == new_locale) { 137 | cpp11::warning("Setting UTF-8 locale failed"); 138 | } 139 | } 140 | ~local_utf8_locale() { std::setlocale(LC_CTYPE, old_locale_.c_str()); } 141 | #endif 142 | }; 143 | 144 | class local_connection { 145 | private: 146 | cpp11::sexp connection_; 147 | std::string mode_; 148 | bool opened_; 149 | 150 | cpp11::function close = cpp11::package("base")["close"]; 151 | 152 | public: 153 | local_connection( 154 | const cpp11::sexp& connection, const std::string& mode = "rb") 155 | : connection_(connection), mode_(mode), opened_(false) { 156 | static auto isOpen = cpp11::package("base")["isOpen"]; 157 | opened_ = !isOpen(connection); 158 | if (opened_) { 159 | static auto open = cpp11::package("base")["open"]; 160 | open(connection_, mode.c_str()); 161 | } 162 | } 163 | ~local_connection() { 164 | if (opened_) { 165 | close(connection_); 166 | } 167 | } 168 | operator SEXP() const { return connection_; } 169 | operator cpp11::sexp() const { return connection_; } 170 | }; 171 | 172 | [[cpp11::register]] void rchive_init(SEXP nc_xptr, SEXP rc_xptr); 173 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(archive) 3 | 4 | test_check("archive") 5 | -------------------------------------------------------------------------------- /tests/testthat/cp866.tar.Z.uu: -------------------------------------------------------------------------------- 1 | begin 644 test_read_format_gtar_cp866.tar.Z 2 | M'YV0CR`A$E1($H"#"!,J7,BPH<.'$"-*1`BC(HP;-6H`L!@#8XR-%3O6^&BQ 3 | MI$4;`&+$J$'CQ@P9*V&0C`'#Q@T9`$#`F,BSI\^?0`'4F4,GC!P0(`",J5,F 4 | MJ,.E39U*G4JUJM6K6'G"DY?N7CEZ6<.*'4NVK-FS:-.J7JG/$RY\Z[D!L.+7HT*=2LER-KWLS9 6 | MY]:N7SN+'DVZM.G3J%.K7LVZM>O7L&/+GDV[MNW;N'/KWLV[M^_?P(,+'TZ\ 7 | MN/'CR),K7\Z\N?/GT*-+GTZ]NO7KV+-KW\Z]N_?OX,.+'T^^O/GSZ-.K7\^^ 8 | 1O?OW\./+GT^_OOW[^//KKPX` 9 | ` 10 | end 11 | -------------------------------------------------------------------------------- /tests/testthat/helper-mock.R: -------------------------------------------------------------------------------- 1 | fake <- local({ 2 | `%||%` <- function(l, r) if (is.null(l)) r else l 3 | 4 | fake_through_tree <- function(tree, what, how) { 5 | for (d in tree) { 6 | for (parent in d) { 7 | parent_env <- parent[["parent_env"]] 8 | func_dict <- parent[["funcs"]] 9 | for (func_name in ls(func_dict, all.names = TRUE)) { 10 | func <- func_dict[[func_name]] 11 | func_env <- new.env(parent = environment(func)) 12 | 13 | what <- override_seperators(what, func_env) 14 | where_name <- override_seperators(func_name, parent_env) 15 | 16 | if (!is.function(how)) { 17 | assign(what, function(...) how, func_env) 18 | } else { 19 | assign(what, how, func_env) 20 | } 21 | 22 | environment(func) <- func_env 23 | locked <- exists(where_name, parent_env, inherits = FALSE) && 24 | bindingIsLocked(where_name, parent_env) 25 | if (locked) { 26 | baseenv()$unlockBinding(where_name, parent_env) 27 | } 28 | assign(where_name, func, parent_env) 29 | if (locked) { 30 | lockBinding(where_name, parent_env) 31 | } 32 | } 33 | } 34 | } 35 | } 36 | 37 | override_seperators <- function(name, env) { 38 | mangled_name <- NULL 39 | for (sep in c("::", "$")) { 40 | if (grepl(sep, name, fixed = TRUE)) { 41 | elements <- strsplit(name, sep, fixed = TRUE) 42 | mangled_name <- paste( 43 | elements[[1L]][1L], 44 | elements[[1L]][2L], 45 | sep = "XXX" 46 | ) 47 | 48 | stub_list <- c(mangled_name) 49 | if ("stub_list" %in% names(attributes(get(sep, env)))) { 50 | stub_list <- c(stub_list, attributes(get(sep, env))[["stub_list"]]) 51 | } 52 | 53 | create_new_name <- create_create_new_name_function( 54 | stub_list, 55 | env, 56 | sep 57 | ) 58 | assign(sep, create_new_name, env) 59 | } 60 | } 61 | mangled_name %||% name 62 | } 63 | 64 | backtick <- function(x) { 65 | encodeString(x, quote = "`", na.encode = FALSE) 66 | } 67 | 68 | create_create_new_name_function <- function(stub_list, env, sep) { 69 | force(stub_list) 70 | force(env) 71 | force(sep) 72 | 73 | create_new_name <- function(pkg, func) { 74 | pkg_name <- deparse(substitute(pkg)) 75 | func_name <- deparse(substitute(func)) 76 | for (stub in stub_list) { 77 | if (paste(pkg_name, func_name, sep = "XXX") == stub) { 78 | return(eval(parse(text = backtick(stub)), env)) 79 | } 80 | } 81 | 82 | # used to avoid recursively calling the replacement function 83 | eval_env <- new.env(parent = parent.frame()) 84 | assign(sep, eval(parse(text = paste0("`", sep, "`"))), eval_env) 85 | 86 | code <- paste(pkg_name, backtick(func_name), sep = sep) 87 | return(eval(parse(text = code), eval_env)) 88 | } 89 | attributes(create_new_name) <- list(stub_list = stub_list) 90 | create_new_name 91 | } 92 | 93 | build_function_tree <- function(test_env, where, where_name) { 94 | func_dict <- new.env() 95 | func_dict[[where_name]] <- where 96 | tree <- list( 97 | list( 98 | list(parent_env = test_env, funcs = func_dict) 99 | ) 100 | ) 101 | 102 | tree 103 | } 104 | 105 | fake <- function(where, what, how) { 106 | where_name <- deparse(substitute(where)) 107 | stopifnot(is.character(what), length(what) == 1) 108 | test_env <- parent.frame() 109 | tree <- build_function_tree(test_env, where, where_name) 110 | fake_through_tree(tree, what, how) 111 | } 112 | }) 113 | -------------------------------------------------------------------------------- /tests/testthat/helpers.R: -------------------------------------------------------------------------------- 1 | 2 | skip_if_no_encoding <- function(enc) { 3 | tryCatch({ 4 | iconv("xxx", from = env, to = env) 5 | TRUE 6 | }, error = function(err) NULL) 7 | testthat::skip(paste("No encoding:", enc)) 8 | } 9 | -------------------------------------------------------------------------------- /tests/testthat/mtcars.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/archive/5e04bfa3a587af62c2e795716eed15d941e3bbdf/tests/testthat/mtcars.tar.gz -------------------------------------------------------------------------------- /tests/testthat/test-archive.R: -------------------------------------------------------------------------------- 1 | context("archive") 2 | 3 | data_file <- system.file(package = "archive", "extdata", "data.zip") 4 | 5 | describe("archive", { 6 | it("reads simple zip files", { 7 | a <- archive(data_file) 8 | expect_equal(NROW(a), 3L) 9 | expect_equal(a[["path"]], c("iris.csv", "mtcars.csv", "airquality.csv")) 10 | expect_equal(a[["size"]], c(192, 274, 142)) 11 | }) 12 | it("takes options", { 13 | skip_on_os("windows") 14 | skip_on_os("solaris") 15 | skip_if_no_encoding("CP866") 16 | skip_if(l10n_info()$`UTF-8` == FALSE) 17 | 18 | a <- archive(test_path("cp866.tar.Z.uu"), "hdrcharset=CP866") 19 | expect_equal(a$path, c("\u041f\u0420\u0418\u0412\u0415\u0422", "\u043f\u0440\u0438\u0432\u0435\u0442")) 20 | }) 21 | it("works with tar.gz files", { 22 | # `file()` will implicitly change to a gzfile connection unless `mode = 23 | # "rb"`, and gzfile cannot seek properly, so we need to ensure we open as 24 | # a normal file. 25 | a <- archive(test_path("mtcars.tar.gz")) 26 | expect_equal(a$path, "mtcars.csv") 27 | }) 28 | }) 29 | 30 | describe("libarchive_zlib_version", { 31 | it("handles simple versions", { 32 | fake(libarchive_zlib_version, "libarchive_zlib_version_", "1.3.1") 33 | expect_equal(libarchive_zlib_version(), package_version("1.3.1")) 34 | }) 35 | it("removes non-numeric suffix", { 36 | fake(libarchive_zlib_version, "libarchive_zlib_version_", "1.3.1.zlib-ng") 37 | expect_equal(libarchive_zlib_version(), package_version("1.3.1")) 38 | }) 39 | it("handles nonsensical versions", { 40 | fake(libarchive_zlib_version, "libarchive_zlib_version_", "1") 41 | expect_equal(libarchive_zlib_version(), package_version("0.0.0")) 42 | fake(libarchive_zlib_version, "libarchive_zlib_version_", "1.foobar") 43 | expect_equal(libarchive_zlib_version(), package_version("0.0.0")) 44 | fake(libarchive_zlib_version, "libarchive_zlib_version_", "not-really-good") 45 | expect_equal(libarchive_zlib_version(), package_version("0.0.0")) 46 | }) 47 | }) 48 | -------------------------------------------------------------------------------- /tests/testthat/test-archive_extract.R: -------------------------------------------------------------------------------- 1 | data_file <- system.file(package = "archive", "extdata", "data.zip") 2 | 3 | describe("archive_extract", { 4 | it("extracts all files in the archive", { 5 | a <- archive(data_file) 6 | d <- tempfile() 7 | on.exit(unlink(d, recursive = TRUE)) 8 | archive_extract(data_file, d) 9 | 10 | f <- list.files(d, full.names = TRUE) 11 | expect_equal(length(f), 3) 12 | expect_equal(file.size(f), a[order(a[["path"]]), ][["size"]]) 13 | }) 14 | it("extracts given files in the archive, indexed by integer", { 15 | d <- tempfile() 16 | on.exit(unlink(d, recursive = TRUE)) 17 | archive_extract(data_file, d, files = c(1L, 3L)) 18 | 19 | f <- list.files(d, full.names = TRUE) 20 | expect_equal(length(f), 2) 21 | 22 | a <- archive(data_file) 23 | expect_equal(sort(file.size(f)), sort(a[a$path %in% a$path[c(1, 3)], ][["size"]])) 24 | }) 25 | it("extracts given files in the archive, indexed by double", { 26 | d <- tempfile() 27 | on.exit(unlink(d, recursive = TRUE)) 28 | archive_extract(data_file, d, files = c(1, 3)) 29 | 30 | f <- list.files(d, full.names = TRUE) 31 | expect_equal(length(f), 2) 32 | 33 | a <- archive(data_file) 34 | expect_equal(sort(file.size(f)), sort(a[a$path %in% a$path[c(1, 3)], ][["size"]])) 35 | }) 36 | it("extracts given files in the archive, indexed by name", { 37 | d <- tempfile() 38 | on.exit(unlink(d, recursive = TRUE)) 39 | archive_extract(data_file, d, files = c("mtcars.csv", "iris.csv")) 40 | 41 | f <- list.files(d, full.names = TRUE) 42 | expect_equal(length(f), 2) 43 | 44 | a <- archive(data_file) 45 | expect_equal(sort(file.size(f)), sort(a[a$path %in% c("mtcars.csv", "iris.csv"), ][["size"]])) 46 | }) 47 | it("can take options", { 48 | skip_on_os("windows") 49 | skip_on_os("solaris") 50 | skip_if_no_encoding("CP866") 51 | skip_if(l10n_info()$`UTF-8` == FALSE) 52 | 53 | out_dir <- tempfile() 54 | dir.create(out_dir) 55 | on.exit(unlink(out_dir, recursive = TRUE)) 56 | 57 | filename <- "\u043f\u0440\u0438\u0432\u0435\u0442" 58 | archive_extract(test_path("cp866.tar.Z.uu"), out_dir, filename, options = "hdrcharset=CP866") 59 | 60 | files <- list.files(out_dir) 61 | Encoding(files) <- "UTF-8" 62 | 63 | expect_equal(files, filename) 64 | }) 65 | 66 | it("can strip components if desired", { 67 | in_dir <- tempfile() 68 | out_dir <- tempfile() 69 | on.exit(unlink(c(in_dir, out_dir), recursive = TRUE)) 70 | 71 | dir.create(file.path(in_dir, "foo/bar"), recursive = TRUE) 72 | 73 | write.csv(iris, file.path(in_dir, "foo", "bar", "iris.csv")) 74 | write.csv(mtcars, file.path(in_dir, "foo", "mtcars.csv")) 75 | 76 | ar <- tempfile(fileext = ".tar") 77 | archive_write_dir(ar, in_dir) 78 | 79 | archive_extract(ar, out_dir, strip_components = 1) 80 | 81 | expect_true(all(c("bar/iris.csv", "mtcars.csv") %in% list.files(out_dir, recursive = TRUE))) 82 | }) 83 | 84 | it("can handle password", { 85 | skip_on_os("windows") # see https://github.com/r-lib/archive/issues/98 86 | in_dir <- tempfile() 87 | out_dir <- tempfile() 88 | on.exit(unlink(c(in_dir, out_dir), recursive = TRUE)) 89 | dir.create(in_dir, recursive = TRUE) 90 | write.csv(iris, file.path(in_dir, "iris.csv")) 91 | 92 | ar <- tempfile(fileext = ".zip") 93 | archive_write_dir(ar, in_dir, options = "encryption=1", password = "foobar") 94 | 95 | expect_error(archive_extract(ar, out_dir), "Passphrase required for this entry") 96 | archive_extract(ar, out_dir, password = "foobar") 97 | expect_true(all(c("iris.csv") %in% list.files(out_dir, recursive = TRUE))) 98 | }) 99 | }) 100 | 101 | -------------------------------------------------------------------------------- /tests/testthat/test-archive_read.R: -------------------------------------------------------------------------------- 1 | data_file <- system.file(package = "archive", "extdata", "data.zip") 2 | 3 | describe("archive_read", { 4 | it("creates a read only connection", { 5 | in_con <- file(data_file) 6 | con <- archive_read(in_con) 7 | on.exit({close(in_con); close(con)}) 8 | expect_is(con, "connection") 9 | expect_is(con, "archive_read") 10 | 11 | s <- summary(con) 12 | 13 | expect_true(startsWith(s[["description"]], "archive_read")) 14 | expect_equal(s[["mode"]], "r") 15 | expect_equal(s[["text"]], "text") 16 | expect_equal(s[["opened"]], "closed") 17 | expect_equal(s[["can read"]], "yes") 18 | expect_equal(s[["can write"]], "no") 19 | }) 20 | it("can be read from with a text connection", { 21 | con <- archive_read(data_file) 22 | 23 | i <- iris 24 | i$Species <- as.character(i$Species) 25 | 26 | expect_equal(read.csv(con, stringsAsFactors = FALSE), head(i)) 27 | }) 28 | 29 | it("can be read from with a binary connection", { 30 | con <- archive_read(data_file, mode = "rb") 31 | 32 | text <- rawToChar(readBin(con, "raw", n = file.info(data_file)$size)) 33 | close(con) 34 | 35 | i <- iris 36 | i$Species <- as.character(i$Species) 37 | 38 | expect_equal(read.csv(text = text, stringsAsFactors = FALSE), head(i)) 39 | }) 40 | 41 | it("works with readRDS", { 42 | on.exit(unlink("archive.tar")) 43 | 44 | w_con <- archive_write(archive = "archive.tar", file = "mtcars") 45 | saveRDS(mtcars, w_con) 46 | close(w_con) 47 | 48 | r_con <- archive_read(archive = "archive.tar", file = "mtcars") 49 | out <- readRDS(r_con) 50 | expect_false(isOpen(r_con)) 51 | close(r_con) 52 | 53 | expect_identical(out, mtcars) 54 | }) 55 | it("takes options", { 56 | skip_on_os("windows") 57 | skip_on_os("solaris") 58 | skip_if_no_encoding("CP866") 59 | skip_if(l10n_info()$`UTF-8` == FALSE) 60 | 61 | con <- archive_read(test_path("cp866.tar.Z.uu"), "\u043f\u0440\u0438\u0432\u0435\u0442", mode = "rb", options = "hdrcharset=CP866") 62 | on.exit(close(con)) 63 | 64 | res <- readBin(con, what = raw(), n = 100) 65 | 66 | res_utf8 <- iconv(list(res), from = "CP866", to = "UTF-8") 67 | 68 | expect_equal(res_utf8, "\u0401\u0404\u0449\u045e\u0445\u0407") 69 | }) 70 | }) 71 | -------------------------------------------------------------------------------- /tests/testthat/test-archive_write.R: -------------------------------------------------------------------------------- 1 | describe("archive_write", { 2 | it("creates a writable connection", { 3 | a <- tempfile(fileext = ".zip") 4 | out_con <- archive_write(a, "mtcars.csv") 5 | on.exit({ 6 | unlink(a) 7 | }) 8 | 9 | expect_is(out_con, "connection") 10 | expect_is(out_con, "archive_write") 11 | 12 | write.csv(file = out_con, mtcars) 13 | 14 | in_con <- unz(a, "mtcars.csv") 15 | data <- read.csv(in_con, row.names = 1) 16 | 17 | expect_equal(data, mtcars) 18 | }) 19 | 20 | it("works with all supported formats", { 21 | extensions <- 22 | c("7z", 23 | "cpio", 24 | "iso", 25 | #"mtree", mtree is a textual description of a file hierarchy, rather than a normal archive format. 26 | "tar", 27 | "tgz", 28 | "taz", 29 | "tar.gz", 30 | "tbz", 31 | "tbz2", 32 | "tz2", 33 | "tar.bz2", 34 | "tlz", 35 | "tar.lzma", 36 | "txz", 37 | "tar.xz", 38 | #"tzo", 39 | "taZ", 40 | "tZ", 41 | #"warc", 42 | "jar", 43 | "zip") 44 | 45 | if (libarchive_version() >= "3.3.3") { 46 | extensions <- c(extensions, "tar.zst") 47 | } 48 | 49 | test_extension <- function(ext) { 50 | filename <- tempfile(fileext = paste0(".", ext)) 51 | on.exit(unlink(filename)) 52 | 53 | expect_error(write.csv(mtcars, archive_write(filename, "mtcars.csv")), NA, info = ext) 54 | expect_equal(read.csv(archive_read(filename, "mtcars.csv"), row.names = 1), mtcars) 55 | } 56 | for (ext in extensions) { 57 | test_extension(ext) 58 | } 59 | }) 60 | 61 | it("can take options", { 62 | f <- tempfile(fileext = ".tar.gz") 63 | f2 <- tempfile(fileext = ".tar.gz") 64 | on.exit(unlink(c(f, f2))) 65 | 66 | write.csv(mtcars, 67 | archive_write(f, "mtcars.csv", options = "compression-level=0") 68 | ) 69 | 70 | write.csv(mtcars, 71 | archive_write(f2, "mtcars.csv", options = "compression-level=9") 72 | ) 73 | 74 | expect_gt(file.size(f), file.size(f2)) 75 | }) 76 | it("zip writer can take options", { 77 | f <- tempfile(fileext = ".zip") 78 | f2 <- tempfile(fileext = ".zip") 79 | on.exit(unlink(c(f, f2))) 80 | 81 | write.csv(mtcars, 82 | archive_write(f, "mtcars.csv", options = "compression-level=1") 83 | ) 84 | 85 | write.csv(mtcars, 86 | archive_write(f2, "mtcars.csv", options = "compression-level=9") 87 | ) 88 | 89 | expect_gt(file.size(f), file.size(f2)) 90 | }) 91 | 92 | it("can translate character sets with a cpio archive", { 93 | skip_on_os("windows") 94 | skip_on_os("solaris") 95 | skip_if_no_encoding("CP866") 96 | skip_on_os("mac") # for some unknown reason this test fails on macOS 97 | 98 | f <- tempfile(fileext = ".cpio") 99 | 100 | filename <- "\u0401\u0404\u0449\u045e\u0445\u0407" 101 | write.csv(mtcars, 102 | archive_write(f, filename, options = "hdrcharset=CP866") 103 | ) 104 | a <- archive(f, options = "hdrcharset=CP866") 105 | 106 | expect_equal(a$path, filename) 107 | }) 108 | 109 | it("can translate character sets with a zip archive", { 110 | skip_on_os("windows") 111 | skip_on_os("solaris") 112 | skip_if_no_encoding("CP866") 113 | skip_on_os("mac") # for some unknown reason this test fails on macOS 114 | 115 | f <- tempfile(fileext = ".zip") 116 | 117 | filename <- "\u0401\u0404\u0449\u045e\u0445\u0407" 118 | write.csv(mtcars, 119 | archive_write(f, filename, options = "hdrcharset=CP866") 120 | ) 121 | a <- archive(f, options = "hdrcharset=CP866") 122 | 123 | expect_equal(a$path, filename) 124 | }) 125 | 126 | it("works with relative paths", { 127 | dir.create(d <- tempfile()) 128 | old <- getwd() 129 | on.exit(setwd(old), add = TRUE) 130 | setwd(d) 131 | 132 | dir.create("files") 133 | write.csv(iris, file.path("files", "iris.csv")) 134 | write.csv(mtcars, file.path("files", "mtcars.csv")) 135 | write.csv(airquality, file.path("files", "airquality.csv")) 136 | 137 | # Add some to a new archive 138 | a <- archive_write_dir("data.tar.gz", "files") 139 | expect_equal( 140 | sort(a$path), 141 | c("airquality.csv", "iris.csv", "mtcars.csv") 142 | ) 143 | }) 144 | 145 | it("works with absolute paths", { 146 | dir.create(d <- tempfile()) 147 | old <- getwd() 148 | on.exit(setwd(old), add = TRUE) 149 | setwd(d) 150 | 151 | dir.create("files") 152 | write.csv(iris, file.path("files", "iris.csv")) 153 | write.csv(mtcars, file.path("files", "mtcars.csv")) 154 | write.csv(airquality, file.path("files", "airquality.csv")) 155 | 156 | # Add some to a new archive 157 | a <- archive_write_dir("data.tar.gz", normalizePath("files")) 158 | expect_equal( 159 | sort(a$path), 160 | c("airquality.csv", "iris.csv", "mtcars.csv") 161 | ) 162 | }) 163 | }) 164 | -------------------------------------------------------------------------------- /tests/testthat/test-archive_write_dir.R: -------------------------------------------------------------------------------- 1 | describe("archive_write_dir", { 2 | it("can write a zip file", { 3 | skip_if(libarchive_zlib_version() == "0.0.0") 4 | dir <- tempfile() 5 | dir.create(dir) 6 | files <- c(mtcars = "mtcars.csv", iris = "iris.csv") 7 | 8 | write.csv(mtcars, file.path(dir, files[["mtcars"]])) 9 | write.csv(iris, file.path(dir, files[["iris"]])) 10 | 11 | archive_write_dir("data.zip", dir) 12 | on.exit(unlink(c(files, "data.zip"))) 13 | 14 | expect_equal( 15 | read.csv(unz("data.zip", files[["mtcars"]]), row.names = 1), 16 | mtcars) 17 | 18 | expect_equal( 19 | read.csv(unz("data.zip", files[["iris"]]), row.names = 1, stringsAsFactors = TRUE), 20 | iris) 21 | }) 22 | }) 23 | -------------------------------------------------------------------------------- /tests/testthat/test-archive_write_files.R: -------------------------------------------------------------------------------- 1 | describe("archive_write_files", { 2 | it("can write a zip file", { 3 | skip_if_not(libarchive_zlib_version() > "0.0.0") 4 | files <- c(mtcars = "mtcars.csv", iris = "iris.csv") 5 | archive <- tempfile(fileext = ".zip") 6 | 7 | on.exit(unlink(c(files, archive))) 8 | 9 | write.csv(mtcars, files[["mtcars"]]) 10 | write.csv(iris, files[["iris"]]) 11 | 12 | archive_write_files(archive, files) 13 | 14 | expect_equal( 15 | read.csv(unz(archive, files[["mtcars"]]), row.names = 1), 16 | mtcars) 17 | 18 | expect_equal( 19 | read.csv(unz(archive, files[["iris"]]), row.names = 1, stringsAsFactors = TRUE), 20 | iris) 21 | }) 22 | 23 | it("takes options", { 24 | skip_if_not(libarchive_zlib_version() > "0.0.0") 25 | files <- c(mtcars = tempfile(fileext = ".csv"), iris = tempfile(fileext = ".csv")) 26 | archive <- tempfile(fileext = ".zip") 27 | archive2 <- tempfile(fileext = ".zip") 28 | on.exit(unlink(c(files, archive))) 29 | 30 | write.csv(mtcars, files[["mtcars"]]) 31 | write.csv(iris, files[["iris"]]) 32 | 33 | archive_write_files(archive, files, options = "compression-level=0") 34 | 35 | archive_write_files(archive2, files, options = "compression-level=9") 36 | 37 | expect_gt(file.size(archive), file.size(archive2)) 38 | }) 39 | }) 40 | -------------------------------------------------------------------------------- /tests/testthat/test-file_read.R: -------------------------------------------------------------------------------- 1 | describe("file_read", { 2 | it("can read a gzip file", { 3 | write.csv(mtcars, 4 | gzfile("test.gz")) 5 | 6 | on.exit(unlink("test.gz")) 7 | 8 | expect_equal( 9 | read.csv(file_read("test.gz"), row.names = 1), 10 | mtcars) 11 | }) 12 | 13 | it("can read a xz file", { 14 | write.csv(mtcars, 15 | xzfile("test.xz")) 16 | 17 | on.exit(unlink("test.xz")) 18 | 19 | expect_equal( 20 | read.csv(file_read("test.xz"), row.names = 1), 21 | mtcars) 22 | }) 23 | }) 24 | -------------------------------------------------------------------------------- /tests/testthat/test-file_write.R: -------------------------------------------------------------------------------- 1 | if (libarchive_version() >= "3.2.0") { 2 | describe("file_write", { 3 | it("can write a gzip file", { 4 | write.csv(mtcars, 5 | file_write("test.gz")) 6 | on.exit(unlink("test.gz")) 7 | 8 | expect_equal( 9 | read.csv(gzfile("test.gz"), row.names = 1), 10 | mtcars) 11 | }) 12 | 13 | it("can take options", { 14 | f <- tempfile(fileext = ".gz") 15 | f2 <- tempfile(fileext = ".gz") 16 | on.exit(unlink(c(f, f2))) 17 | 18 | write.csv(mtcars, 19 | file_write(f, options = "compression-level=0") 20 | ) 21 | 22 | write.csv(mtcars, 23 | file_write(f2, options = "compression-level=9") 24 | ) 25 | 26 | expect_gt(file.size(f), file.size(f2)) 27 | }) 28 | 29 | it("can write a xz file", { 30 | write.csv(mtcars, 31 | file_write("test.xz")) 32 | on.exit(unlink("test.xz")) 33 | 34 | expect_equal( 35 | read.csv(xzfile("test.xz"), row.names = 1), 36 | mtcars) 37 | }) 38 | 39 | it("works with multiple filters", { 40 | filename <- "mtcars.bz2.uu" 41 | write.csv(mtcars, file_write(filename)) 42 | on.exit(unlink(filename)) 43 | 44 | expect_equal(read.csv(file_read(filename), row.names = 1), mtcars) 45 | }) 46 | 47 | it("works with all supported formats", { 48 | 49 | extensions <- c( 50 | "Z", 51 | "bz2", 52 | "gz", 53 | "lz", 54 | "lz4", 55 | "lzo", 56 | "lzma", 57 | "uu", 58 | "xz") 59 | 60 | f <- "mtcars.csv" 61 | test_extension <- function(ext) { 62 | filename <- paste0(f, ".", ext) 63 | on.exit(unlink(filename)) 64 | 65 | expect_error(write.csv(mtcars, file_write(filename)), NA, info = ext) 66 | expect_equal(read.csv(file_read(filename), row.names = 1), mtcars) 67 | } 68 | for (ext in extensions) { 69 | test_extension(ext) 70 | } 71 | 72 | }) 73 | }) 74 | } 75 | -------------------------------------------------------------------------------- /tools/dynamic-help.R: -------------------------------------------------------------------------------- 1 | 2 | cat( 3 | sep = "\n", 4 | paste0("\\renewcommand{\\eval}{\\Sexpr[stage=render,results=rd]{", Sys.getenv("R_PACKAGE_NAME"), ":::#1}}"), 5 | paste0("\\renewcommand{\\evalatinstall}{\\Sexpr[stage=install,results=rd]{", Sys.getenv("R_PACKAGE_NAME"), ":::#1}}"), 6 | # The 'top' variants do not work in R < 4.0.0, because a top-level \Sexpr 7 | # is not allowed, even if it is put in by an Rd macro. Once we do not 8 | # support R 3.6.x, we can use the 'top' variants. 9 | # See also the comments in the include_docs() function. 10 | paste0("\\renewcommand{\\evaltop}{\\Sexpr[stage=render,results=rd]{", Sys.getenv("R_PACKAGE_NAME"), ":::#1}}"), 11 | paste0("\\renewcommand{\\evalatinstalltop}{\\Sexpr[stage=install,results=rd]{", Sys.getenv("R_PACKAGE_NAME"), ":::#1}}"), 12 | file = "man/macros/eval2.Rd" 13 | ) 14 | -------------------------------------------------------------------------------- /tools/winlibs.R: -------------------------------------------------------------------------------- 1 | VERSION <- commandArgs(TRUE) 2 | if(!file.exists(sprintf("../windows/libarchive-%s/include", VERSION))){ 3 | download.file(sprintf("https://github.com/rwinlib/libarchive/archive/v%s.zip", VERSION), "lib.zip", quiet = TRUE) 4 | dir.create("../windows", showWarnings = FALSE) 5 | unzip("lib.zip", exdir = "../windows") 6 | unlink("lib.zip") 7 | } 8 | --------------------------------------------------------------------------------