├── air.toml ├── .github ├── .gitignore └── workflows │ ├── docker-container.yaml │ ├── s390x.yaml │ ├── pkgdown.yaml │ └── test-coverage.yaml ├── vignettes ├── articles │ ├── .gitignore │ ├── results.parquet │ ├── sessioninfo.rds │ └── data-info.parquet ├── 2025-06-03 │ └── .gitignore └── .gitignore ├── cleanup ├── configure.win ├── src ├── thrift │ ├── config.h │ ├── thrift_export.h │ ├── thrift-config.h │ ├── qt │ │ ├── CMakeLists.txt │ │ └── TQIODeviceTransport.h │ ├── protocol │ │ ├── TProtocol.cpp │ │ ├── TProtocolTypes.h │ │ ├── TList.h │ │ ├── TMap.h │ │ ├── TSet.h │ │ ├── TMultiplexedProtocol.cpp │ │ ├── TBase64Utils.h │ │ └── TEnum.h │ ├── concurrency │ │ ├── Thread.cpp │ │ ├── ThreadFactory.cpp │ │ ├── Mutex.cpp │ │ ├── Exception.h │ │ ├── Mutex.h │ │ └── ThreadFactory.h │ ├── TBase.h │ ├── windows │ │ ├── SocketPair.h │ │ ├── Operators.h │ │ ├── WinFcntl.h │ │ ├── GetTimeOfDay.h │ │ ├── WinFcntl.cpp │ │ ├── TWinsockSingleton.cpp │ │ └── TWinsockSingleton.h │ ├── async │ │ ├── TAsyncChannel.cpp │ │ ├── TAsyncBufferProcessor.h │ │ ├── TAsyncProtocolProcessor.cpp │ │ ├── TAsyncProtocolProcessor.h │ │ ├── TEvhttpServer.h │ │ └── TAsyncChannel.h │ ├── TNonCopyable.h │ ├── transport │ │ ├── SocketCommon.h │ │ ├── TSimpleFileTransport.h │ │ ├── TWebSocketServer.cpp │ │ ├── TTransportException.cpp │ │ ├── TSimpleFileTransport.cpp │ │ ├── THttpServer.h │ │ ├── TSSLServerSocket.cpp │ │ ├── TNonblockingSSLServerSocket.cpp │ │ ├── TFDTransport.h │ │ ├── THttpClient.h │ │ └── TSSLServerSocket.h │ ├── server │ │ └── TServer.cpp │ ├── TOutput.h │ ├── TConfiguration.h │ └── numeric_cast.h ├── lib │ ├── nanoparquet.h │ ├── RleBpDecoder.cpp │ ├── endianness.h │ ├── bitpacker.h │ └── decode-utils.h ├── install.libs.R ├── test.cpp ├── RParquetAppender.h ├── flatbuffers │ ├── pch │ │ ├── pch.h │ │ └── flatc_pch.h │ ├── flex_flat_util.h │ ├── file_manager.h │ ├── buffer_ref.h │ ├── struct.h │ ├── Tensor.fbs │ └── string.h ├── miniz │ └── LICENSE ├── zstd │ ├── include │ │ └── zstd │ │ │ ├── compress │ │ │ ├── zstd_fast.h │ │ │ ├── zstd_compress_superblock.h │ │ │ ├── zstd_double_fast.h │ │ │ ├── zstd_compress_literals.h │ │ │ ├── zstd_opt.h │ │ │ └── zstd_compress_sequences.h │ │ │ ├── decompress │ │ │ ├── zstd_ddict.h │ │ │ └── zstd_decompress_block.h │ │ │ └── common │ │ │ └── xxhash_static.h │ └── LICENSE ├── parquet │ └── windows_compatibility.h ├── r-nanoparquet.h ├── RParquetAppender.cpp ├── protect.cpp ├── r-base64.cpp ├── snappy │ └── snappy-stubs-internal.cc ├── base64.h └── Makevars.in ├── .vscode ├── extensions.json └── settings.json ├── inst ├── extdata │ └── userdata1.parquet ├── WORDLIST └── COPYRIGHTS ├── tests ├── testthat │ ├── data │ │ ├── date.parquet │ │ ├── enum.parquet │ │ ├── gzip.parquet │ │ ├── map.parquet │ │ ├── zstd.parquet │ │ ├── binary.parquet │ │ ├── decimal.parquet │ │ ├── enum2.parquet │ │ ├── factor.parquet │ │ ├── float.parquet │ │ ├── float16.parquet │ │ ├── int64.parquet │ │ ├── mixed.parquet │ │ ├── mixed2.parquet │ │ ├── timetz.parquet │ │ ├── zstd-v2.parquet │ │ ├── dbp-int32.parquet │ │ ├── dbp-int64.parquet │ │ ├── decimal2.parquet │ │ ├── decimals.parquet │ │ ├── timestamp.parquet │ │ ├── mixed-miss.parquet │ │ ├── mtcars-arrow.parquet │ │ ├── parquet_go.parquet │ │ ├── timestamp-ms.parquet │ │ ├── uuid-arrow.parquet │ │ ├── v2-missing.parquet │ │ ├── alltypes_plain.parquet │ │ ├── duckdb-bug1589.parquet │ │ ├── int32_decimal.parquet │ │ ├── int64_decimal.parquet │ │ ├── byte_stream_split.parquet │ │ ├── dbp-int32-missing.parquet │ │ ├── delta_byte_array.parquet │ │ ├── nested_lists.snappy.parquet │ │ ├── alltypes_plain.snappy.parquet │ │ ├── float16_zeros_and_nans.parquet │ │ ├── rle_boolean_encoding.parquet │ │ ├── delta_length_byte_array.parquet │ │ ├── float16_nonzeros_and_nans.parquet │ │ ├── issue10279_delta_encoding.parquet │ │ ├── broken │ │ │ ├── polars-no-dict-offset.parquet │ │ │ └── decimal-1.parquet │ │ └── byte_stream_split_extended.gzip.parquet │ ├── _snaps │ │ ├── spelling.md │ │ ├── print.md │ │ ├── write-parquet-2.md │ │ ├── utils.md │ │ ├── deprecated.md │ │ ├── write-parquet-row-groups.md │ │ ├── read-parquet-4.md │ │ └── read-parquet-row-group.md │ ├── test-spelling.R │ ├── test-print.R │ ├── test-deprecated.R │ ├── test-write-large-files.R │ ├── test-infer-parquet-schema.R │ ├── test-read-parquet-connection.R │ ├── test-write-parquet-compression.R │ ├── test-read-parquet-row-group.R │ ├── test-pypolars.R │ ├── test-write-encodings-3.R │ ├── test-rle.R │ └── test-utils.R └── testthat.R ├── .covrignore ├── .gitignore ├── LICENSE ├── .Rbuildignore ├── codecov.yml ├── R ├── print.R ├── nanoparquet-package.R └── infer-parquet-schema.R ├── NAMESPACE ├── tools └── docker │ └── Dockerfile ├── man ├── rle_encode_int.Rd ├── rle_decode_int.Rd ├── infer_parquet_schema.Rd ├── read_parquet_info.Rd ├── read_parquet.Rd ├── read_parquet_schema.Rd ├── parquet_column_types.Rd ├── read_parquet_page.Rd └── read_parquet_pages.Rd ├── configure ├── DESCRIPTION └── _pkgdown.yml /air.toml: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /vignettes/articles/.gitignore: -------------------------------------------------------------------------------- 1 | /.quarto/ 2 | -------------------------------------------------------------------------------- /vignettes/2025-06-03/.gitignore: -------------------------------------------------------------------------------- 1 | /.quarto/ 2 | -------------------------------------------------------------------------------- /vignettes/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.R 3 | 4 | /.quarto/ 5 | -------------------------------------------------------------------------------- /cleanup: -------------------------------------------------------------------------------- 1 | #' !/usr/bin/env sh 2 | 3 | rm -f src/Makevars 4 | -------------------------------------------------------------------------------- /configure.win: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env sh 2 | 3 | sh ./configure 4 | -------------------------------------------------------------------------------- /src/thrift/config.h: -------------------------------------------------------------------------------- 1 | #define SIGNED_RIGHT_SHIFT_IS 1 2 | #define ARITHMETIC_RIGHT_SHIFT 1 3 | -------------------------------------------------------------------------------- /.vscode/extensions.json: -------------------------------------------------------------------------------- 1 | { 2 | "recommendations": [ 3 | "Posit.air-vscode" 4 | ] 5 | } 6 | -------------------------------------------------------------------------------- /inst/extdata/userdata1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/inst/extdata/userdata1.parquet -------------------------------------------------------------------------------- /tests/testthat/data/date.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/tests/testthat/data/date.parquet -------------------------------------------------------------------------------- /tests/testthat/data/enum.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/tests/testthat/data/enum.parquet -------------------------------------------------------------------------------- /tests/testthat/data/gzip.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/tests/testthat/data/gzip.parquet -------------------------------------------------------------------------------- /tests/testthat/data/map.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/tests/testthat/data/map.parquet -------------------------------------------------------------------------------- /tests/testthat/data/zstd.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/tests/testthat/data/zstd.parquet -------------------------------------------------------------------------------- /tests/testthat/data/binary.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/tests/testthat/data/binary.parquet -------------------------------------------------------------------------------- /tests/testthat/data/decimal.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/tests/testthat/data/decimal.parquet -------------------------------------------------------------------------------- /tests/testthat/data/enum2.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/tests/testthat/data/enum2.parquet -------------------------------------------------------------------------------- /tests/testthat/data/factor.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/tests/testthat/data/factor.parquet -------------------------------------------------------------------------------- /tests/testthat/data/float.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/tests/testthat/data/float.parquet -------------------------------------------------------------------------------- /tests/testthat/data/float16.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/tests/testthat/data/float16.parquet -------------------------------------------------------------------------------- /tests/testthat/data/int64.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/tests/testthat/data/int64.parquet -------------------------------------------------------------------------------- /tests/testthat/data/mixed.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/tests/testthat/data/mixed.parquet -------------------------------------------------------------------------------- /tests/testthat/data/mixed2.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/tests/testthat/data/mixed2.parquet -------------------------------------------------------------------------------- /tests/testthat/data/timetz.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/tests/testthat/data/timetz.parquet -------------------------------------------------------------------------------- /tests/testthat/data/zstd-v2.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/tests/testthat/data/zstd-v2.parquet -------------------------------------------------------------------------------- /vignettes/articles/results.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/vignettes/articles/results.parquet -------------------------------------------------------------------------------- /vignettes/articles/sessioninfo.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/vignettes/articles/sessioninfo.rds -------------------------------------------------------------------------------- /tests/testthat/data/dbp-int32.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/tests/testthat/data/dbp-int32.parquet -------------------------------------------------------------------------------- /tests/testthat/data/dbp-int64.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/tests/testthat/data/dbp-int64.parquet -------------------------------------------------------------------------------- /tests/testthat/data/decimal2.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/tests/testthat/data/decimal2.parquet -------------------------------------------------------------------------------- /tests/testthat/data/decimals.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/tests/testthat/data/decimals.parquet -------------------------------------------------------------------------------- /tests/testthat/data/timestamp.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/tests/testthat/data/timestamp.parquet -------------------------------------------------------------------------------- /vignettes/articles/data-info.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/vignettes/articles/data-info.parquet -------------------------------------------------------------------------------- /tests/testthat/data/mixed-miss.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/tests/testthat/data/mixed-miss.parquet -------------------------------------------------------------------------------- /tests/testthat/data/mtcars-arrow.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/tests/testthat/data/mtcars-arrow.parquet -------------------------------------------------------------------------------- /tests/testthat/data/parquet_go.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/tests/testthat/data/parquet_go.parquet -------------------------------------------------------------------------------- /tests/testthat/data/timestamp-ms.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/tests/testthat/data/timestamp-ms.parquet -------------------------------------------------------------------------------- /tests/testthat/data/uuid-arrow.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/tests/testthat/data/uuid-arrow.parquet -------------------------------------------------------------------------------- /tests/testthat/data/v2-missing.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/tests/testthat/data/v2-missing.parquet -------------------------------------------------------------------------------- /tests/testthat/data/alltypes_plain.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/tests/testthat/data/alltypes_plain.parquet -------------------------------------------------------------------------------- /tests/testthat/data/duckdb-bug1589.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/tests/testthat/data/duckdb-bug1589.parquet -------------------------------------------------------------------------------- /tests/testthat/data/int32_decimal.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/tests/testthat/data/int32_decimal.parquet -------------------------------------------------------------------------------- /tests/testthat/data/int64_decimal.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/tests/testthat/data/int64_decimal.parquet -------------------------------------------------------------------------------- /.covrignore: -------------------------------------------------------------------------------- 1 | src/fastpforlib 2 | src/flatbuffers 3 | src/miniz 4 | src/parquet 5 | src/simdutf 6 | src/snappy 7 | src/thrift 8 | src/zstd 9 | -------------------------------------------------------------------------------- /tests/testthat/data/byte_stream_split.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/tests/testthat/data/byte_stream_split.parquet -------------------------------------------------------------------------------- /tests/testthat/data/dbp-int32-missing.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/tests/testthat/data/dbp-int32-missing.parquet -------------------------------------------------------------------------------- /tests/testthat/data/delta_byte_array.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/tests/testthat/data/delta_byte_array.parquet -------------------------------------------------------------------------------- /tests/testthat/data/nested_lists.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/tests/testthat/data/nested_lists.snappy.parquet -------------------------------------------------------------------------------- /tests/testthat/data/alltypes_plain.snappy.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/tests/testthat/data/alltypes_plain.snappy.parquet -------------------------------------------------------------------------------- /tests/testthat/data/float16_zeros_and_nans.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/tests/testthat/data/float16_zeros_and_nans.parquet -------------------------------------------------------------------------------- /tests/testthat/data/rle_boolean_encoding.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/tests/testthat/data/rle_boolean_encoding.parquet -------------------------------------------------------------------------------- /tests/testthat/data/delta_length_byte_array.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/tests/testthat/data/delta_length_byte_array.parquet -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.gcno 2 | *.o 3 | /src/nanoparquet.so 4 | *.gcda 5 | /docs 6 | *.o.bc 7 | /src/broken.dSYM 8 | /src/broken 9 | /src/Makevars 10 | /.dev 11 | -------------------------------------------------------------------------------- /tests/testthat/data/float16_nonzeros_and_nans.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/tests/testthat/data/float16_nonzeros_and_nans.parquet -------------------------------------------------------------------------------- /tests/testthat/data/issue10279_delta_encoding.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/tests/testthat/data/issue10279_delta_encoding.parquet -------------------------------------------------------------------------------- /tests/testthat/_snaps/spelling.md: -------------------------------------------------------------------------------- 1 | # spelling 2 | 3 | Code 4 | spelling::spell_check_package(pkg) 5 | Output 6 | No spelling errors found. 7 | 8 | -------------------------------------------------------------------------------- /tests/testthat/data/broken/polars-no-dict-offset.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/tests/testthat/data/broken/polars-no-dict-offset.parquet -------------------------------------------------------------------------------- /tests/testthat/data/byte_stream_split_extended.gzip.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-lib/nanoparquet/HEAD/tests/testthat/data/byte_stream_split_extended.gzip.parquet -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | if (.Platform$r_arch == "i386" && .Platform$OS.type == "windows") { 2 | message("Skipping tests on Windows i386") 3 | } else { 4 | testthat::test_check("nanoparquet", reporter = "progress") 5 | } 6 | -------------------------------------------------------------------------------- /tests/testthat/data/broken/decimal-1.parquet: -------------------------------------------------------------------------------- 1 | PAR1((, 2 | ,Hschema%col1% 3 |  4 |  5 | &col1 6 | J&J 7 |  *https://github.com/gaborcsardi/nanoparquetuPAR1 -------------------------------------------------------------------------------- /src/lib/nanoparquet.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "endianness.h" 4 | #if IS_BIG_ENDIAN 5 | #error Nanoparquet does not support big-endian platforms: https://github.com/r-lib/nanoparquet/issues/21 6 | #endif 7 | 8 | #include "bytebuffer.h" 9 | #include "ParquetOutFile.h" 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2019-2024 2 | COPYRIGHT HOLDER: Hannes Mühleisen, Posit Software, PBC, 3 | Google Inc., Apache Software Foundation, Game Tools and Valve Software, 4 | Rich Geldreich and Tenacious Software LLC, Facebook, Inc, 5 | Daniel Lemire and others. 6 | See COPYRIGHTS file for details. 7 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^src/.*\.o$ 2 | ^src/.*\.so$ 3 | ^src/.*[.]dSYM$ 4 | ^src/broken$ 5 | ^\.github$ 6 | ^codecov\.yml$ 7 | ^.vscode$ 8 | ^[.]covrignore$ 9 | ^vignettes$ 10 | ^docs$ 11 | ^_pkgdown.yml$ 12 | ^vignettes/articles$ 13 | ^src/Makevars$ 14 | ^[\.]?air\.toml$ 15 | ^\.vscode$ 16 | ^\.dev$ 17 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | 3 | coverage: 4 | status: 5 | project: 6 | default: 7 | target: auto 8 | threshold: 1% 9 | informational: true 10 | patch: 11 | default: 12 | target: auto 13 | threshold: 1% 14 | informational: true 15 | -------------------------------------------------------------------------------- /src/install.libs.R: -------------------------------------------------------------------------------- 1 | files <- Sys.glob(paste0("*", SHLIB_EXT)) 2 | dest <- file.path(R_PACKAGE_DIR, paste0('libs', R_ARCH)) 3 | dir.create(dest, recursive = TRUE, showWarnings = FALSE) 4 | file.copy(files, dest, overwrite = TRUE) 5 | if (file.exists("symbols.rds")) { 6 | file.copy("symbols.rds", dest, overwrite = TRUE) 7 | } 8 | -------------------------------------------------------------------------------- /tests/testthat/test-spelling.R: -------------------------------------------------------------------------------- 1 | test_that("spelling", { 2 | skip_on_cran() 3 | skip_on_covr() 4 | 5 | pkg <- test_path("../../") 6 | if (!file.exists(file.path(pkg, "DESCRIPTION"))) { 7 | pkg <- file.path(pkg, "00_pkg_src", .packageName) 8 | } 9 | 10 | expect_snapshot({ 11 | spelling::spell_check_package(pkg) 12 | }) 13 | }) 14 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "[c]": { 3 | "editor.formatOnSave": true, 4 | "editor.defaultFormatter": "llvm-vs-code-extensions.vscode-clangd" 5 | }, 6 | "clangd.arguments": [ 7 | "-header-insertion=never" 8 | ], 9 | "[r]": { 10 | "editor.formatOnSave": true, 11 | "editor.defaultFormatter": "Posit.air-vscode" 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/print.md: -------------------------------------------------------------------------------- 1 | # obj_sum.nanoparquet_logical_type 2 | 3 | Code 4 | lapply(sch$logical_type, obj_sum.nanoparquet_logical_type) 5 | Output 6 | [[1]] 7 | [1] "INT(8, TRUE)" 8 | 9 | [[2]] 10 | [1] "DECIMAL(1, 3)" 11 | 12 | [[3]] 13 | [1] "TIME" 14 | 15 | [[4]] 16 | [1] "TIMESTAMP" 17 | 18 | 19 | -------------------------------------------------------------------------------- /R/print.R: -------------------------------------------------------------------------------- 1 | #' @exportS3Method pillar::obj_sum 2 | obj_sum.nanoparquet_logical_type <- function(x, ...) { 3 | type <- x$type 4 | paste0( 5 | type, 6 | if (type == "INT") sprintf("(%d, %s)", x$bit_width, x$is_signed), 7 | if (type == "DECIMAL") sprintf("(%d, %d)", x$scale %||% 0, x$precision), 8 | if (type %in% c("TIME", "TIMESTAMP")) { 9 | sprintf("(%s, %s)", x$is_adjusted_to_utc, x$unit) 10 | } 11 | ) 12 | } 13 | -------------------------------------------------------------------------------- /tests/testthat/test-print.R: -------------------------------------------------------------------------------- 1 | test_that("obj_sum.nanoparquet_logical_type", { 2 | sch <- parquet_schema( 3 | "INT_8", 4 | list("DECIMAL", scale = 1, precision = 3, primitive_type = "INT32"), 5 | list("TIME", is_adjusted_utc = TRUE, unit = "MILLIS"), 6 | list("TIMESTAMP", is_adjusted_utc = TRUE, unit = "MICROS") 7 | ) 8 | expect_snapshot({ 9 | lapply(sch$logical_type, obj_sum.nanoparquet_logical_type) 10 | }) 11 | }) 12 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(pillar::obj_sum,nanoparquet_logical_type) 4 | export(append_parquet) 5 | export(infer_parquet_schema) 6 | export(parquet_column_types) 7 | export(parquet_info) 8 | export(parquet_metadata) 9 | export(parquet_options) 10 | export(parquet_schema) 11 | export(read_parquet) 12 | export(read_parquet_info) 13 | export(read_parquet_metadata) 14 | export(read_parquet_schema) 15 | export(write_parquet) 16 | useDynLib(nanoparquet, .registration=TRUE) 17 | -------------------------------------------------------------------------------- /tools/docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ghcr.io/r-hub/r-minimal/r-minimal:release AS build 2 | 3 | COPY . /root/nanoparquet 4 | 5 | RUN mkdir -p /usr/local/lib/R/site-library 6 | 7 | RUN installr -d local::/root/nanoparquet 8 | 9 | FROM ghcr.io/r-hub/r-minimal/r-minimal:release AS final 10 | 11 | COPY --from=build /usr/local/lib/R/site-library /usr/local/lib/R/site-library 12 | 13 | RUN R -q -e 'library(nanoparquet); write_parquet(mtcars, "/tmp/mtcars.parquet"); read_parquet("/tmp/mtcars.parquet")' && \ 14 | rm -rf /tmp/Rtmp* 15 | -------------------------------------------------------------------------------- /inst/WORDLIST: -------------------------------------------------------------------------------- 1 | ALTREP 2 | BROTLI 3 | BSON 4 | CMD 5 | Checksumming 6 | DuckDB 7 | ENUM 8 | GZIP 9 | Gzip 10 | INTSXP 11 | JSON 12 | LGLSXP 13 | LLC 14 | LZ 15 | LZO 16 | MILLIS 17 | ORCID 18 | PBC 19 | POSIXct 20 | Performant 21 | Polars 22 | REALSXP 23 | RLE 24 | STRSXP 25 | UUID 26 | WebAssembly 27 | ZSTD 28 | Zstd 29 | checksums 30 | codec 31 | codecs 32 | difftime 33 | duckdb 34 | duckplyr 35 | fastparquet 36 | hms 37 | millis 38 | multithreaded 39 | nanoparquet's 40 | nanos 41 | nycflights 42 | readr 43 | schemas 44 | subprocess 45 | -------------------------------------------------------------------------------- /src/test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "lib/memstream.h" 3 | 4 | extern "C" { 5 | 6 | SEXP test_memstream() { 7 | MemStream ms(10); 8 | std::ostream &os = ms.stream(); 9 | 10 | os << "This is a test" << "\n"; 11 | os << "This is a test" << "\n"; 12 | os << "This is a test" << "\n"; 13 | os << "This is a test" << "\n"; 14 | os << "This is a test" << "\n"; 15 | 16 | SEXP res = Rf_allocVector(RAWSXP, ms.size()); 17 | ms.copy(RAW(res), ms.size()); 18 | 19 | return res; 20 | } 21 | 22 | } // extern "C" 23 | -------------------------------------------------------------------------------- /src/lib/RleBpDecoder.cpp: -------------------------------------------------------------------------------- 1 | #include "RleBpDecoder.h" 2 | 3 | const uint32_t RleBpDecoder::BITPACK_MASKS[] = { 4 | 0, 1, 3, 7, 15, 31, 63, 5 | 127, 255, 511, 1023, 2047, 4095, 8191, 6 | 16383, 32767, 65535, 131071, 262143, 524287, 1048575, 7 | 2097151, 4194303, 8388607, 16777215, 33554431, 67108863, 134217727, 8 | 268435455, 536870911, 1073741823, 2147483647}; 9 | 10 | const uint8_t RleBpDecoder::BITPACK_DLEN = 8; 11 | -------------------------------------------------------------------------------- /src/thrift/thrift_export.h: -------------------------------------------------------------------------------- 1 | #ifndef THRIFT_EXPORT_H 2 | #define THRIFT_EXPORT_H 3 | 4 | #ifdef THRIFT_STATIC_DEFINE 5 | # define THRIFT_EXPORT 6 | #elif defined(_MSC_VER ) 7 | # ifndef THRIFT_EXPORT 8 | # ifdef thrift_EXPORTS 9 | /* We are building this library */ 10 | # define THRIFT_EXPORT __declspec(dllexport) 11 | # else 12 | /* We are using this library */ 13 | # define THRIFT_EXPORT __declspec(dllimport) 14 | # endif 15 | # endif 16 | #else 17 | # define THRIFT_EXPORT 18 | #endif 19 | 20 | #endif /* THRIFT_EXPORT_H */ 21 | -------------------------------------------------------------------------------- /tests/testthat/test-deprecated.R: -------------------------------------------------------------------------------- 1 | test_that("deprecated warnings", { 2 | pf <- test_path("data/alltypes_plain.parquet") 3 | expect_snapshot( 4 | sch <- parquet_schema(pf) 5 | ) 6 | expect_equal(sch, read_parquet_schema(pf)) 7 | 8 | expect_snapshot( 9 | pct <- parquet_column_types(mtcars) 10 | ) 11 | 12 | expect_snapshot( 13 | mtd <- parquet_metadata(pf) 14 | ) 15 | expect_equal(mtd, read_parquet_metadata(pf)) 16 | 17 | expect_snapshot( 18 | info <- parquet_info(pf) 19 | ) 20 | expect_equal(info, read_parquet_info(pf)) 21 | }) 22 | -------------------------------------------------------------------------------- /tests/testthat/test-write-large-files.R: -------------------------------------------------------------------------------- 1 | test_that("write large file", { 2 | if (Sys.getenv("NANOPARQUET_ALL_TESTS") != "true") { 3 | expect_true(TRUE) 4 | return() 5 | } 6 | mt <- test_df(missing = TRUE, factor = TRUE) 7 | larger <- do.call("rbind", replicate(1000, mt, simplify = FALSE)) 8 | big <- do.call("rbind", replicate(100, larger, simplify = FALSE)) 9 | large <- do.call("rbind", replicate(10, big, simplify = FALSE)) 10 | 11 | tmp <- tempfile(fileext = ".parquet") 12 | system.time(write_parquet(large, tmp)) 13 | system.time(large2 <- read_parquet(tmp)) 14 | expect_equal(large, large2) 15 | }) 16 | -------------------------------------------------------------------------------- /tests/testthat/test-infer-parquet-schema.R: -------------------------------------------------------------------------------- 1 | test_that("infer_parquet_schema", { 2 | expect_snapshot({ 3 | as.data.frame(infer_parquet_schema(test_df(missing = FALSE))) 4 | }) 5 | expect_snapshot({ 6 | as.data.frame(infer_parquet_schema(test_df(missing = TRUE))) 7 | }) 8 | }) 9 | 10 | test_that("logical_to_converted", { 11 | expect_snapshot(error = TRUE, { 12 | logical_to_converted(list("DECIMAL")) 13 | logical_to_converted(list("TIME", TRUE, "SECS")) 14 | logical_to_converted(list("TIMESTAMP", TRUE, "SECS")) 15 | logical_to_converted(list("INT")) 16 | logical_to_converted(list("FOOBAR")) 17 | }) 18 | }) 19 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/write-parquet-2.md: -------------------------------------------------------------------------------- 1 | # REQ PLAIN 2 | 3 | Code 4 | tapply(pgs$num_values, pgs$column + 1, sum) 5 | Output 6 | 1 2 3 4 7 | 10000 10000 10000 10000 8 | 9 | --- 10 | 11 | Code 12 | tapply(pgs$num_values, pgs$column + 1, sum) 13 | Output 14 | 1 2 3 4 15 | 10000 10000 10000 10000 16 | 17 | # OPT PLAIN 18 | 19 | Code 20 | tapply(pgs$num_values, pgs$column + 1, sum) 21 | Output 22 | 1 2 3 4 23 | 10000 10000 10000 10000 24 | 25 | --- 26 | 27 | Code 28 | tapply(pgs$num_values, pgs$column + 1, sum) 29 | Output 30 | 1 2 3 4 31 | 10000 10000 10000 10000 32 | 33 | -------------------------------------------------------------------------------- /man/rle_encode_int.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/porcelain.R 3 | \name{rle_encode_int} 4 | \alias{rle_encode_int} 5 | \title{RLE encode integers} 6 | \usage{ 7 | rle_encode_int(x) 8 | } 9 | \arguments{ 10 | \item{x}{Integer vector.} 11 | } 12 | \value{ 13 | Raw vector, the encoded integers. It has two attributes: 14 | \itemize{ 15 | \item \code{bit_length}: the number of bits needed to encode the input, and 16 | \item \code{length}: length of the original integer input. 17 | } 18 | } 19 | \description{ 20 | RLE encode integers 21 | } 22 | \seealso{ 23 | \code{\link[=rle_decode_int]{rle_decode_int()}} 24 | 25 | Other encodings: 26 | \code{\link{rle_decode_int}()} 27 | } 28 | \concept{encodings} 29 | \keyword{internal} 30 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/utils.md: -------------------------------------------------------------------------------- 1 | # as_count 2 | 3 | Code 4 | as_count(1:2) 5 | Condition 6 | Error in `as_count()`: 7 | ! x must be a count, i.e. an integer scalar 8 | Code 9 | as_count(0) 10 | Condition 11 | Error in `as_count()`: 12 | ! x must be a count, i.e. an integer scalar 13 | Code 14 | as_count(NA_real_) 15 | Condition 16 | Error in `as_count()`: 17 | ! x must be a count, i.e. an integer scalar 18 | Code 19 | as_count(-100) 20 | Condition 21 | Error in `as_count()`: 22 | ! x must be a count, i.e. an integer scalar 23 | Code 24 | as_count(-100L) 25 | Condition 26 | Error in `as_count()`: 27 | ! x must be a count, i.e. an integer scalar 28 | 29 | -------------------------------------------------------------------------------- /man/rle_decode_int.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/porcelain.R 3 | \name{rle_decode_int} 4 | \alias{rle_decode_int} 5 | \title{RLE decode integers} 6 | \usage{ 7 | rle_decode_int( 8 | x, 9 | bit_width = attr(x, "bit_width"), 10 | length = attr(x, "length") \%||\% NA 11 | ) 12 | } 13 | \arguments{ 14 | \item{x}{Raw vector of the encoded integers.} 15 | 16 | \item{bit_width}{Bit width used for the encoding.} 17 | 18 | \item{length}{Length of the output. If \code{NA} then we assume that \code{x} 19 | starts with length of the output, encoded as a 4 byte integer.} 20 | } 21 | \value{ 22 | The decoded integer vector. 23 | } 24 | \description{ 25 | RLE decode integers 26 | } 27 | \seealso{ 28 | \code{\link[=rle_encode_int]{rle_encode_int()}} 29 | 30 | Other encodings: 31 | \code{\link{rle_encode_int}()} 32 | } 33 | \concept{encodings} 34 | \keyword{internal} 35 | -------------------------------------------------------------------------------- /tests/testthat/test-read-parquet-connection.R: -------------------------------------------------------------------------------- 1 | test_that("not open", { 2 | pf <- test_path("data/factor.parquet") 3 | con <- file(pf) 4 | expect_equal( 5 | read_parquet(con), 6 | read_parquet(pf) 7 | ) 8 | # A closed (=invalid in R) connection will error here 9 | expect_error(isOpen(con)) 10 | }) 11 | 12 | test_that("open", { 13 | pf <- test_path("data/factor.parquet") 14 | con <- file(pf, open = "rb") 15 | on.exit(close(con), add = TRUE) 16 | expect_equal( 17 | read_parquet(con), 18 | read_parquet(pf) 19 | ) 20 | expect_true(isOpen(con)) 21 | }) 22 | 23 | test_that("raw, opened", { 24 | pf <- test_path("data/factor.parquet") 25 | bts <- readBin(pf, what = "raw", n = file.size(pf)) 26 | con <- rawConnection(bts, open = "rb") 27 | on.exit(close(con), add = TRUE) 28 | expect_equal( 29 | read_parquet(con), 30 | read_parquet(pf) 31 | ) 32 | expect_true(isOpen(con)) 33 | }) 34 | -------------------------------------------------------------------------------- /tests/testthat/test-write-parquet-compression.R: -------------------------------------------------------------------------------- 1 | test_that("gzip compression levels", { 2 | tmp <- tempfile(fileext = ".parquet") 3 | on.exit(unlink(tmp), add = TRUE) 4 | 5 | df <- test_df() 6 | for (level in c(NA_integer_, 0:11)) { 7 | write_parquet( 8 | df, 9 | tmp, 10 | compression = "gzip", 11 | options = parquet_options(compression_level = level) 12 | ) 13 | expect_equal(as.data.frame(read_parquet(tmp)), as.data.frame(df)) 14 | } 15 | }) 16 | 17 | test_that("zstd compression levels", { 18 | tmp <- tempfile(fileext = ".parquet") 19 | on.exit(unlink(tmp), add = TRUE) 20 | 21 | df <- test_df() 22 | for (level in c(NA_integer_, (-22):(22))) { 23 | write_parquet( 24 | df, 25 | tmp, 26 | compression = "zstd", 27 | options = parquet_options(compression_level = level) 28 | ) 29 | expect_equal(as.data.frame(read_parquet(tmp)), as.data.frame(df)) 30 | } 31 | }) 32 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/deprecated.md: -------------------------------------------------------------------------------- 1 | # deprecated warnings 2 | 3 | Code 4 | sch <- parquet_schema(pf) 5 | Condition 6 | Warning in `parquet_schema()`: 7 | Using `parquet_schema()` to read the schema from a file is deprecated. Use `read_parquet_schema()` instead. 8 | 9 | --- 10 | 11 | Code 12 | pct <- parquet_column_types(mtcars) 13 | Condition 14 | Warning in `parquet_column_types()`: 15 | `parquet_column_types()` is deprecated, please use `read_parquet_schema()` or `parquet_schema()` instead. 16 | 17 | --- 18 | 19 | Code 20 | mtd <- parquet_metadata(pf) 21 | Condition 22 | Warning in `parquet_metadata()`: 23 | `parquet_metadata()` is deprecated. Please use `read_parquet_metadata()` instead. 24 | 25 | --- 26 | 27 | Code 28 | info <- parquet_info(pf) 29 | Condition 30 | Warning in `parquet_info()`: 31 | `parquet_info()` is deprecated, please use `read_parquet_info() instead. 32 | 33 | -------------------------------------------------------------------------------- /src/thrift/thrift-config.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifdef _WIN32 21 | #include 22 | #endif 23 | #include 24 | -------------------------------------------------------------------------------- /R/nanoparquet-package.R: -------------------------------------------------------------------------------- 1 | #' @useDynLib nanoparquet, .registration=TRUE 2 | #' @details 3 | #' ```{r include = FALSE} 4 | #' lines <- readLines("README.md") 5 | #' end <- which(lines == "") 6 | #' lines <- lines[-(1:end)] 7 | #' readme <- tempfile() 8 | #' writeLines(lines, readme) 9 | #' ``` 10 | #' ```{r child = readme} 11 | #' ``` 12 | "_PACKAGE" 13 | 14 | #' @name nanoparquet-types 15 | #' @title nanoparquet's type maps 16 | #' @description 17 | #' How nanoparquet maps R types to Parquet types. 18 | #' 19 | #' @details 20 | #' ```{r, child = "tools/types.Rmd"} 21 | #' ``` 22 | #' @seealso [nanoparquet-package] for options that modify the type 23 | #' mappings. 24 | NULL 25 | 26 | #' @name parquet-encodings 27 | #' @title Parquet encodings 28 | #' @description 29 | #' Various Parquet encodings 30 | #' 31 | #' @details 32 | #' ```{r, child = "tools/encodings.Rmd"} 33 | #' ``` 34 | #' @seealso [write_parquet()] on how to select a non-default encoding when 35 | #' writing Parquet files. 36 | NULL 37 | -------------------------------------------------------------------------------- /configure: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env sh 2 | 3 | if [ -z "$R_HOME" ]; then 4 | echo >&2 R_HOME is not set, are you running R CMD INSTALL? 5 | exit 1 6 | fi 7 | 8 | if [ "$R_OSTYPE" = "unix" ]; then 9 | UNAME=`uname` 10 | else 11 | UNAME=Windows 12 | fi 13 | 14 | if ! echo ${R_LIBRARY_DIR} | grep -q nanoparquet.Rcheck && 15 | ! echo ${R_LIBS} | grep -q nanoparquet.Rcheck && 16 | [ -z "$MAKEFLAGS" ]; then 17 | if [ -n "$NCPU" ]; then 18 | # already set 19 | true 20 | elif [ "$R_OSTYPE" = "windows" ]; then 21 | NCPU=$NUMBER_OF_PROCESSORS 22 | elif [ "$UNAME" = "Linux" ]; then 23 | NCPU=`cat /proc/cpuinfo | grep '^processor\s*:' | wc -l || true` 24 | elif [ "$UNAME" = "Darwin" ]; then 25 | NCPU=`sysctl -n hw.ncpu || true` 26 | else 27 | NCPU=unknown 28 | fi 29 | if expr "$NCPU" : '[1-9][0-9]*$' >/dev/null; then 30 | echo '**'" using $NCPU processors (unless configured differently in Makeconf)." 31 | MAKEFLAGS=-j${NCPU} 32 | fi 33 | fi 34 | 35 | cat src/Makevars.in | sed "s|@MAKEFLAGS@|${MAKEFLAGS}|" > src/Makevars 36 | -------------------------------------------------------------------------------- /tests/testthat/test-read-parquet-row-group.R: -------------------------------------------------------------------------------- 1 | test_that("read_parquet_row_group", { 2 | tmp <- tempfile(fileext = ".parquet") 3 | on.exit(unlink(tmp), add = TRUE) 4 | 5 | un <- function(x) { 6 | x <- as.data.frame(x) 7 | rownames(x) <- NULL 8 | x 9 | } 10 | 11 | do <- function(df, encoding = NULL) { 12 | write_parquet( 13 | df, 14 | tmp, 15 | row_groups = c(1L, 11L, 21L, 31L) 16 | ) 17 | expect_snapshot(as.data.frame(read_parquet_metadata(tmp)$row_groups)[-1]) 18 | expect_equal(un(read_parquet_row_group(tmp, 0L)), un(df[1:10, ])) 19 | expect_equal(un(read_parquet_row_group(tmp, 1L)), un(df[11:20, ])) 20 | expect_equal(un(read_parquet_row_group(tmp, 2L)), un(df[21:30, ])) 21 | expect_equal(un(read_parquet_row_group(tmp, 3L)), un(df[31:32, ])) 22 | } 23 | 24 | do(test_df(), encoding = "PLAIN") 25 | do(test_df(missing = TRUE), encoding = "PLAIN") 26 | do(test_df(), encoding = c(large = "RLE", "RLE_DICTIONARY")) 27 | do(test_df(missing = TRUE), encoding = c(large = "RLE", "RLE_DICTIONARY")) 28 | }) 29 | -------------------------------------------------------------------------------- /src/RParquetAppender.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "lib/ParquetOutFile.h" 3 | #include "RParquetOutFile.h" 4 | #include "RParquetReader.h" 5 | 6 | using namespace nanoparquet; 7 | 8 | class ParquetReaderAppender : public ParquetReader { 9 | public: 10 | ParquetReaderAppender(std::string filename, bool readwrite = false) 11 | : ParquetReader(filename, readwrite) {}; 12 | void alloc_column_chunk(ColumnChunk &cc) {}; 13 | void alloc_dict_page(DictPage &dict) {}; 14 | void alloc_data_page(DataPage &data) {}; 15 | }; 16 | 17 | class RParquetAppender { 18 | public: 19 | RParquetAppender( 20 | std::string filename, 21 | parquet::CompressionCodec::type codec, 22 | int compression_level, 23 | std::vector &row_groups, 24 | int data_page_version, 25 | bool overwrite_last_row_group 26 | ); 27 | void init_metadata( 28 | SEXP dfsxp, 29 | SEXP dim, 30 | SEXP required, 31 | SEXP options, 32 | SEXP schema, 33 | SEXP encoding 34 | ); 35 | void append(); 36 | private: 37 | ParquetReaderAppender reader; 38 | RParquetOutFile outfile; 39 | int data_page_version; 40 | bool overwrite_last_row_group; 41 | }; 42 | -------------------------------------------------------------------------------- /man/infer_parquet_schema.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/infer-parquet-schema.R 3 | \name{infer_parquet_schema} 4 | \alias{infer_parquet_schema} 5 | \title{Infer Parquet schema of a data frame} 6 | \usage{ 7 | infer_parquet_schema(df, options = parquet_options()) 8 | } 9 | \arguments{ 10 | \item{df}{Data frame.} 11 | 12 | \item{options}{Return value of \code{\link[=parquet_options]{parquet_options()}}, may modify the 13 | R to Parquet type mappings.} 14 | } 15 | \value{ 16 | Data frame, the inferred schema. It has the same columns as 17 | the return value of \code{\link[=read_parquet_schema]{read_parquet_schema()}}: 18 | \code{file_name}, \code{name}, \code{r_type}, \code{type}, \code{type_length}, \code{repetition_type}, \code{converted_type}, \code{logical_type}, \code{num_children}, \code{scale}, \code{precision}, \code{field_id}. 19 | } 20 | \description{ 21 | Infer Parquet schema of a data frame 22 | } 23 | \seealso{ 24 | \code{\link[=read_parquet_schema]{read_parquet_schema()}} to read the schema of a Parquet file, 25 | \code{\link[=parquet_schema]{parquet_schema()}} to create a Parquet schema from scratch. 26 | } 27 | -------------------------------------------------------------------------------- /src/flatbuffers/pch/pch.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2017 Google Inc. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef FLATBUFFERS_PCH_H_ 18 | #define FLATBUFFERS_PCH_H_ 19 | 20 | // stl 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | 35 | // flatbuffers 36 | #include "flatbuffers/util.h" 37 | 38 | #endif // FLATBUFFERS_PCH_H_ 39 | -------------------------------------------------------------------------------- /.github/workflows/docker-container.yaml: -------------------------------------------------------------------------------- 1 | name: docker-container.yaml 2 | 3 | on: 4 | workflow_dispatch: 5 | schedule: 6 | - cron: '55 22 6 * 6' 7 | release: 8 | types: [published] 9 | 10 | jobs: 11 | build: 12 | name: build 13 | runs-on: ubuntu-latest 14 | steps: 15 | - name: Checkout 16 | uses: actions/checkout@v4 17 | with: 18 | fetch-depth: 10 19 | 20 | - name: Login to GitHub Container Registry 21 | uses: docker/login-action@v3 22 | with: 23 | registry: ghcr.io 24 | username: ${{ github.repository_owner }} 25 | password: ${{ secrets.GITHUB_TOKEN }} 26 | 27 | - name: Set up QEMU 28 | uses: docker/setup-qemu-action@v3 29 | with: 30 | image: tonistiigi/binfmt:qemu-v8.1.5 31 | 32 | - name: Set up Docker Buildx 33 | uses: docker/setup-buildx-action@v3 34 | 35 | - name: Build 36 | uses: docker/build-push-action@v6 37 | with: 38 | platforms: linux/amd64,linux/arm64 39 | push: true 40 | file: tools/docker/Dockerfile 41 | tags: ghcr.io/r-lib/nanoparquet:${{ github.ref_type != 'tag' && 'devel' || github.ref_name }} 42 | -------------------------------------------------------------------------------- /src/thrift/qt/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | # 19 | 20 | set( thriftcppqt5_SOURCES 21 | TQIODeviceTransport.cpp 22 | TQTcpServer.cpp 23 | ) 24 | set(CMAKE_AUTOMOC ON) 25 | find_package(Qt5 REQUIRED COMPONENTS Core Network) 26 | ADD_LIBRARY_THRIFT(thriftqt5 ${thriftcppqt5_SOURCES}) 27 | target_link_libraries(thriftqt5 PUBLIC thrift) 28 | target_link_libraries(thriftqt5 PUBLIC Qt5::Core Qt5::Network) 29 | -------------------------------------------------------------------------------- /.github/workflows/s390x.yaml: -------------------------------------------------------------------------------- 1 | on: 2 | workflow_dispatch: 3 | 4 | name: s390x.yaml 5 | 6 | jobs: 7 | s390x: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/checkout@v4 11 | - uses: r-hub/actions/ctr-start@main 12 | with: 13 | image: ghcr.io/r-hub/containers/s390x 14 | platform: linux/s390x 15 | ctr-name: s390x 16 | 17 | - name: Test R in container 18 | run: | 19 | getRversion() 20 | R.version[["platform"]] 21 | shell: Rscript {0} 22 | 23 | - uses: r-lib/actions/setup-r-dependencies@v2 24 | with: 25 | pak-version: none 26 | cache-version: s390x-1 27 | extra-packages: any::rcmdcheck 28 | needs: check 29 | 30 | - uses: r-lib/actions/check-r-package@v2 31 | with: 32 | build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' 33 | upload-results: never 34 | upload-snapshots: false 35 | env: 36 | NOT_CRAN: true 37 | 38 | - uses: actions/upload-artifact@v4 39 | if: failure() 40 | with: 41 | name: ${{ format('{0}-{1}-results', runner.os, runner.arch) }} 42 | path: check 43 | -------------------------------------------------------------------------------- /man/read_parquet_info.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parquet-metadata.R 3 | \name{read_parquet_info} 4 | \alias{read_parquet_info} 5 | \alias{parquet_info} 6 | \title{Short summary of a Parquet file} 7 | \usage{ 8 | read_parquet_info(file) 9 | 10 | parquet_info(file) 11 | } 12 | \arguments{ 13 | \item{file}{Path to a Parquet file.} 14 | } 15 | \value{ 16 | Data frame with columns: 17 | \itemize{ 18 | \item \code{file_name}: file name. 19 | \item \code{num_cols}: number of (leaf) columns. 20 | \item \code{num_rows}: number of rows. 21 | \item \code{num_row_groups}: number of row groups. 22 | \item \code{file_size}: file size in bytes. 23 | \item \code{parquet_version}: Parquet version. 24 | \item \code{created_by}: A string scalar, usually the name of the software 25 | that created the file. \code{NA} if not available. 26 | } 27 | } 28 | \description{ 29 | Short summary of a Parquet file 30 | } 31 | \seealso{ 32 | \code{\link[=read_parquet_metadata]{read_parquet_metadata()}} to read more metadata, 33 | \code{\link[=read_parquet_schema]{read_parquet_schema()}} for column information. 34 | \code{\link[=read_parquet]{read_parquet()}}, \code{\link[=write_parquet]{write_parquet()}}, \link{nanoparquet-types}. 35 | } 36 | -------------------------------------------------------------------------------- /src/miniz/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2013-2014 RAD Game Tools and Valve Software 2 | Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC 3 | 4 | All Rights Reserved. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in 14 | all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /src/thrift/protocol/TProtocol.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #include 21 | 22 | namespace apache { 23 | namespace thrift { 24 | namespace protocol { 25 | 26 | TProtocol::~TProtocol() = default; 27 | uint32_t TProtocol::skip_virt(TType type) { 28 | return ::apache::thrift::protocol::skip(*this, type); 29 | } 30 | 31 | TProtocolFactory::~TProtocolFactory() = default; 32 | 33 | }}} // apache::thrift::protocol 34 | -------------------------------------------------------------------------------- /inst/COPYRIGHTS: -------------------------------------------------------------------------------- 1 | # nanoparquet 2 | 3 | (c) 2019-2024 Hannes Mühleisen as miniparquet 4 | (c) 2024 Posit Software, PBC (formerly RStudio) 5 | 6 | # Except for the following parts 7 | 8 | ## Snappy 9 | 10 | In `/src/snappy`. 11 | 12 | (c) Google Inc. 13 | 14 | ## Thrift 15 | 16 | In `src/thrift`. 17 | 18 | (c) Apache Software Foundation 19 | 20 | ## Flatbuffers 21 | 22 | In `src/flatbuffers`. 23 | 24 | (c) Google Inc. 25 | 26 | ## Arrow flatbuffer schemas 27 | 28 | `/src/flatbuffers/*.fbs`. 29 | 30 | (c) Apache Software Foundation 31 | 32 | ## Parquet Thrift schema 33 | 34 | `tools/parquet.thrift`. 35 | 36 | (c) Apache Software Foundation 37 | 38 | ## `src/miniz/miniz_wrapper.hpp` from DuckDB 39 | 40 | (c) Hannes Mühleisen 41 | (c) Mark Mytherin 42 | 43 | ## Miniz 44 | 45 | `/src/miniz`. 46 | 47 | (c) 2013-2014 RAD Game Tools and Valve Software 48 | (c) 2010-2014 Rich Geldreich and Tenacious Software LLC 49 | 50 | ## Zstd 51 | 52 | `src/zstd` 53 | 54 | (c) 2016-present Facebook, Inc. 55 | 56 | ## FastPFor 57 | 58 | `src/fastpforlib` 59 | 60 | Daniel Lemire 61 | Leonid Boytsov 62 | Owen Kaser 63 | Maxime Caron 64 | Louis Dionne 65 | Michel Lemay 66 | Erik Kruus 67 | Andrea Bedini 68 | Matthias Petri 69 | Robson Braga Araujo 70 | Patrick Damme 71 | Xiening Dai 72 | Pavel Pavlov 73 | -------------------------------------------------------------------------------- /src/flatbuffers/pch/flatc_pch.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2017 Google Inc. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef FLATBUFFERS_FLATC_PCH_H_ 18 | #define FLATBUFFERS_FLATC_PCH_H_ 19 | 20 | // stl 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | // flatbuffers 33 | #include "flatbuffers/pch/pch.h" 34 | #include "flatbuffers/code_generators.h" 35 | #include "flatbuffers/flatbuffers.h" 36 | #include "flatbuffers/flexbuffers.h" 37 | #include "flatbuffers/idl.h" 38 | 39 | #endif // FLATBUFFERS_FLATC_PCH_H_ 40 | -------------------------------------------------------------------------------- /src/thrift/protocol/TProtocolTypes.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef THRIFT_PROTOCOL_TPROTOCOLTYPES_H_ 21 | #define THRIFT_PROTOCOL_TPROTOCOLTYPES_H_ 1 22 | 23 | namespace apache { 24 | namespace thrift { 25 | namespace protocol { 26 | 27 | enum PROTOCOL_TYPES { 28 | T_BINARY_PROTOCOL = 0, 29 | T_JSON_PROTOCOL = 1, 30 | T_COMPACT_PROTOCOL = 2, 31 | }; 32 | } 33 | } 34 | } // apache::thrift::protocol 35 | 36 | #endif // #define _THRIFT_PROTOCOL_TPROTOCOLTYPES_H_ 1 37 | -------------------------------------------------------------------------------- /src/zstd/include/zstd/compress/zstd_fast.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under both the BSD-style license (found in the 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | * in the COPYING file in the root directory of this source tree). 8 | * You may select, at your option, one of the above-listed licenses. 9 | */ 10 | 11 | #ifndef ZSTD_FAST_H 12 | #define ZSTD_FAST_H 13 | 14 | #include "zstd/common/mem.h" /* U32 */ 15 | #include "zstd/compress/zstd_compress_internal.h" 16 | 17 | namespace zstd { 18 | 19 | void ZSTD_fillHashTable(ZSTD_matchState_t* ms, 20 | void const* end, ZSTD_dictTableLoadMethod_e dtlm); 21 | size_t ZSTD_compressBlock_fast( 22 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 23 | void const* src, size_t srcSize); 24 | size_t ZSTD_compressBlock_fast_dictMatchState( 25 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 26 | void const* src, size_t srcSize); 27 | size_t ZSTD_compressBlock_fast_extDict( 28 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 29 | void const* src, size_t srcSize); 30 | 31 | } 32 | 33 | #endif /* ZSTD_FAST_H */ 34 | -------------------------------------------------------------------------------- /src/lib/endianness.h: -------------------------------------------------------------------------------- 1 | #ifndef ENDIANNESS_H 2 | #define ENDIANNESS_H 3 | 4 | /** 5 | * We want to check that it is actually a little endian system at 6 | * compile-time. 7 | */ 8 | 9 | #if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) 10 | #define IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) 11 | #elif defined(_WIN32) 12 | #define IS_BIG_ENDIAN 0 13 | #else 14 | #if defined(__APPLE__) || defined(__FreeBSD__) // defined __BYTE_ORDER__ && defined __ORDER_BIG_ENDIAN__ 15 | #include 16 | #elif defined(sun) || defined(__sun) // defined(__APPLE__) || defined(__FreeBSD__) 17 | #include 18 | #else // defined(__APPLE__) || defined(__FreeBSD__) 19 | 20 | #ifdef __has_include 21 | #if __has_include() 22 | #include 23 | #endif //__has_include() 24 | #endif //__has_include 25 | 26 | #endif // defined(__APPLE__) || defined(__FreeBSD__) 27 | 28 | 29 | #ifndef !defined(__BYTE_ORDER__) || !defined(__ORDER_LITTLE_ENDIAN__) 30 | #define IS_BIG_ENDIAN 0 31 | #endif 32 | 33 | #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 34 | #define IS_BIG_ENDIAN 0 35 | #else // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 36 | #define IS_BIG_ENDIAN 1 37 | #endif // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 38 | 39 | #endif // defined __BYTE_ORDER__ && defined __ORDER_BIG_ENDIAN__ 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /src/thrift/concurrency/Thread.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #include 21 | 22 | namespace apache { 23 | namespace thrift { 24 | namespace concurrency { 25 | 26 | void Thread::threadMain(std::shared_ptr thread) { 27 | thread->setState(started); 28 | thread->runnable()->run(); 29 | 30 | if (thread->getState() != stopping && thread->getState() != stopped) { 31 | thread->setState(stopping); 32 | } 33 | } 34 | 35 | } 36 | } 37 | } // apache::thrift::concurrency 38 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/write-parquet-row-groups.md: -------------------------------------------------------------------------------- 1 | # errors 2 | 3 | Code 4 | parquet_options(num_rows_per_row_group = "foobar") 5 | Condition 6 | Error in `as_count()`: 7 | ! num_rows_per_row_group must be a count, i.e. an integer scalar 8 | 9 | --- 10 | 11 | Code 12 | write_parquet(df, tmp, row_groups = "foobar") 13 | Condition 14 | Error in `parse_row_groups()`: 15 | ! Row groups must be specified as a growing positive integer vector, starting with 1. 16 | Code 17 | write_parquet(df, tmp, row_groups = c(100L, 1L)) 18 | Condition 19 | Error in `parse_row_groups()`: 20 | ! Row groups must be specified as a growing positive integer vector, starting with 1. 21 | Code 22 | write_parquet(df, tmp, row_groups = c(1L, 100L)) 23 | Condition 24 | Error in `write_parquet()`: 25 | ! Internal nanoparquet error, row index too large 26 | 27 | # non-factors write local dictionary 28 | 29 | Code 30 | for (do in dict_ofs) { 31 | print(read_parquet_page(tmp, do)[["data"]]) 32 | } 33 | Output 34 | [1] 01 00 00 00 61 35 | [1] 01 00 00 00 61 36 | [1] 01 00 00 00 61 01 00 00 00 62 37 | [1] 01 00 00 00 62 38 | [1] 01 00 00 00 62 39 | [1] 01 00 00 00 63 40 | [1] 01 00 00 00 63 41 | [1] 01 00 00 00 63 42 | 43 | -------------------------------------------------------------------------------- /src/thrift/TBase.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _THRIFT_TBASE_H_ 21 | #define _THRIFT_TBASE_H_ 1 22 | 23 | #include 24 | #include 25 | 26 | namespace apache { 27 | namespace thrift { 28 | 29 | class TBase { 30 | public: 31 | virtual ~TBase() = default; 32 | virtual uint32_t read(protocol::TProtocol* iprot) = 0; 33 | virtual uint32_t write(protocol::TProtocol* oprot) const = 0; 34 | }; 35 | } 36 | } // apache::thrift 37 | 38 | #endif // #ifndef _THRIFT_TBASE_H_ 39 | -------------------------------------------------------------------------------- /src/thrift/windows/SocketPair.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _THRIFT_WINDOWS_SOCKETPAIR_H_ 21 | #define _THRIFT_WINDOWS_SOCKETPAIR_H_ 1 22 | 23 | #if defined(_MSC_VER) && (_MSC_VER > 1200) 24 | #pragma once 25 | #endif // _MSC_VER 26 | 27 | #ifndef _WIN32 28 | #error This is a MSVC header only. 29 | #endif 30 | 31 | // Win32 32 | #include 33 | #include 34 | 35 | int thrift_socketpair(int d, int type, int protocol, THRIFT_SOCKET sv[2]); 36 | 37 | #endif // _THRIFT_WINDOWS_SOCKETPAIR_H_ 38 | -------------------------------------------------------------------------------- /src/zstd/include/zstd/compress/zstd_compress_superblock.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under both the BSD-style license (found in the 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | * in the COPYING file in the root directory of this source tree). 8 | * You may select, at your option, one of the above-listed licenses. 9 | */ 10 | 11 | #ifndef ZSTD_COMPRESS_ADVANCED_H 12 | #define ZSTD_COMPRESS_ADVANCED_H 13 | 14 | /*-************************************* 15 | * Dependencies 16 | ***************************************/ 17 | 18 | #include "zstd.h" /* ZSTD_CCtx */ 19 | 20 | namespace zstd { 21 | /*-************************************* 22 | * Target Compressed Block Size 23 | ***************************************/ 24 | 25 | /* ZSTD_compressSuperBlock() : 26 | * Used to compress a super block when targetCBlockSize is being used. 27 | * The given block will be compressed into multiple sub blocks that are around targetCBlockSize. */ 28 | size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc, 29 | void* dst, size_t dstCapacity, 30 | void const* src, size_t srcSize, 31 | unsigned lastBlock); 32 | } 33 | 34 | 35 | #endif /* ZSTD_COMPRESS_ADVANCED_H */ 36 | -------------------------------------------------------------------------------- /src/flatbuffers/flex_flat_util.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2022 Google Inc. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef FLATBUFFERS_FLEX_FLAT_UTIL_H_ 18 | #define FLATBUFFERS_FLEX_FLAT_UTIL_H_ 19 | 20 | #include "flatbuffers/flatbuffers.h" 21 | #include "flatbuffers/flexbuffers.h" 22 | 23 | namespace flexbuffers { 24 | 25 | // Verifies the `nested` flexbuffer within a flatbuffer vector is valid. 26 | inline bool VerifyNestedFlexBuffer( 27 | const flatbuffers::Vector *const nested, 28 | flatbuffers::Verifier &verifier) { 29 | if (!nested) return true; 30 | return verifier.Check(flexbuffers::VerifyBuffer( 31 | nested->data(), nested->size(), verifier.GetFlexReuseTracker())); 32 | } 33 | 34 | } // namespace flexbuffers 35 | 36 | #endif // FLATBUFFERS_FLEX_FLAT_UTIL_H_ 37 | -------------------------------------------------------------------------------- /src/zstd/include/zstd/compress/zstd_double_fast.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under both the BSD-style license (found in the 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | * in the COPYING file in the root directory of this source tree). 8 | * You may select, at your option, one of the above-listed licenses. 9 | */ 10 | 11 | #ifndef ZSTD_DOUBLE_FAST_H 12 | #define ZSTD_DOUBLE_FAST_H 13 | 14 | #include "zstd/common/mem.h" /* U32 */ 15 | #include "zstd/compress/zstd_compress_internal.h" /* ZSTD_CCtx, size_t */ 16 | 17 | namespace zstd { 18 | 19 | void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, 20 | void const* end, ZSTD_dictTableLoadMethod_e dtlm); 21 | size_t ZSTD_compressBlock_doubleFast( 22 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 23 | void const* src, size_t srcSize); 24 | size_t ZSTD_compressBlock_doubleFast_dictMatchState( 25 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 26 | void const* src, size_t srcSize); 27 | size_t ZSTD_compressBlock_doubleFast_extDict( 28 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 29 | void const* src, size_t srcSize); 30 | 31 | } 32 | 33 | #endif /* ZSTD_DOUBLE_FAST_H */ 34 | -------------------------------------------------------------------------------- /src/zstd/include/zstd/compress/zstd_compress_literals.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under both the BSD-style license (found in the 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | * in the COPYING file in the root directory of this source tree). 8 | * You may select, at your option, one of the above-listed licenses. 9 | */ 10 | 11 | #ifndef ZSTD_COMPRESS_LITERALS_H 12 | #define ZSTD_COMPRESS_LITERALS_H 13 | 14 | #include "zstd/compress/zstd_compress_internal.h" /* ZSTD_hufCTables_t, ZSTD_minGain() */ 15 | 16 | namespace zstd { 17 | 18 | size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize); 19 | 20 | size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize); 21 | 22 | size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, 23 | ZSTD_hufCTables_t* nextHuf, 24 | ZSTD_strategy strategy, int disableLiteralCompression, 25 | void* dst, size_t dstCapacity, 26 | const void* src, size_t srcSize, 27 | void* entropyWorkspace, size_t entropyWorkspaceSize, 28 | const int bmi2); 29 | 30 | } 31 | 32 | #endif /* ZSTD_COMPRESS_LITERALS_H */ 33 | -------------------------------------------------------------------------------- /src/thrift/async/TAsyncChannel.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #include 21 | 22 | namespace apache { 23 | namespace thrift { 24 | namespace async { 25 | 26 | void TAsyncChannel::sendAndRecvMessage(const VoidCallback& cob, 27 | TMemoryBuffer* sendBuf, 28 | TMemoryBuffer* recvBuf) { 29 | std::function send_done 30 | = std::bind(&TAsyncChannel::recvMessage, this, cob, recvBuf); 31 | 32 | sendMessage(send_done, sendBuf); 33 | } 34 | } 35 | } 36 | } // apache::thrift::async 37 | -------------------------------------------------------------------------------- /src/thrift/TNonCopyable.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef TNONCOPYABLE_H 21 | #define TNONCOPYABLE_H 22 | 23 | /** 24 | * @brief A simple non-copyable base class pattern. Derive from TNonCopyable to 25 | * make a class non-copyable and prohibit assignment and copy-construction. 26 | */ 27 | namespace apache { 28 | namespace thrift { 29 | 30 | class TNonCopyable { 31 | protected: 32 | TNonCopyable() = default; 33 | ~TNonCopyable() = default; 34 | 35 | TNonCopyable(const TNonCopyable&) = delete; 36 | TNonCopyable& operator=(const TNonCopyable&) = delete; 37 | }; 38 | 39 | } 40 | } 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /tests/testthat/test-pypolars.R: -------------------------------------------------------------------------------- 1 | test_that("polars can read temporal types", { 2 | skip_without_polars() 3 | 4 | tmp <- tempfile(fileext = ".parquet") 5 | on.exit(unlink(tmp), add = TRUE) 6 | 7 | do <- function(df, path = tmp) { 8 | write_parquet(df, path) 9 | pyscript <- sprintf( 10 | r"[ 11 | import polars as pl 12 | pl.read_parquet("%s") 13 | ]", 14 | normalizePath(path, winslash = "/") 15 | ) 16 | pytmp <- tempfile(fileext = ".py") 17 | on.exit(unlink(pytmp), add = TRUE) 18 | writeLines(pyscript, pytmp) 19 | py <- if (Sys.which("python3") != "") "python3" else "python" 20 | processx::run(py, pytmp, stderr = "2>&1") 21 | } 22 | 23 | # Date 24 | df_date <- data.frame(x = Sys.Date()) 25 | expect_silent(do(df_date)) 26 | 27 | # hms, integer 28 | # it is unclear if this ever comes up in practice 29 | df_hmsi <- data.frame( 30 | x = structure(0L, units = "secs", class = c("hms", "difftime")) 31 | ) 32 | expect_silent(do(df_hmsi)) 33 | 34 | # hms, double 35 | df_hmsd <- data.frame(x = hms::hms(0)) 36 | expect_silent(do(df_hmsd)) 37 | 38 | # difftime 39 | df_difftime <- data.frame(x = as.difftime(1, units = "secs")) 40 | expect_silent(do(df_difftime)) 41 | 42 | # POSIXct 43 | df_posixct <- data.frame(x = Sys.time()) 44 | expect_silent(do(df_posixct)) 45 | 46 | # factor 47 | df_factor <- data.frame(x = as.factor(c("a", "a"))) 48 | expect_silent(do(df_factor)) 49 | }) 50 | -------------------------------------------------------------------------------- /src/parquet/windows_compatibility.h: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | #pragma once 19 | 20 | #ifdef _WIN32 21 | 22 | // Windows defines min and max macros that mess up std::min/max 23 | #ifndef NOMINMAX 24 | #define NOMINMAX 25 | #endif 26 | 27 | #define WIN32_LEAN_AND_MEAN 28 | 29 | // Set Windows 7 as a conservative minimum for Apache Arrow 30 | #if defined(_WIN32_WINNT) && _WIN32_WINNT < 0x601 31 | #undef _WIN32_WINNT 32 | #endif 33 | #ifndef _WIN32_WINNT 34 | #define _WIN32_WINNT 0x601 35 | #endif 36 | 37 | #include 38 | #include 39 | 40 | #undef OPTIONAL // WTF?! 41 | #undef Realloc 42 | 43 | #endif // _WIN32 44 | -------------------------------------------------------------------------------- /src/thrift/windows/Operators.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _THRIFT_WINDOWS_OPERATORS_H_ 21 | #define _THRIFT_WINDOWS_OPERATORS_H_ 22 | 23 | #if defined(_MSC_VER) && (_MSC_VER > 1200) 24 | #pragma once 25 | #endif // _MSC_VER 26 | 27 | namespace apache { 28 | namespace thrift { 29 | 30 | class TEnumIterator; 31 | 32 | inline bool operator==(const TEnumIterator&, const TEnumIterator&) { 33 | // Not entirely sure what the test should be here. It is only to enable 34 | // iterator debugging and is not used in release mode. 35 | return true; 36 | } 37 | } 38 | } // apache::thrift 39 | 40 | #endif // _THRIFT_WINDOWS_OPERATORS_H_ 41 | -------------------------------------------------------------------------------- /src/thrift/concurrency/ThreadFactory.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #include 21 | 22 | #include 23 | #include 24 | 25 | namespace apache { 26 | namespace thrift { 27 | namespace concurrency { 28 | 29 | std::shared_ptr ThreadFactory::newThread(std::shared_ptr runnable) const { 30 | std::shared_ptr result = std::make_shared(isDetached(), runnable); 31 | runnable->thread(result); 32 | return result; 33 | } 34 | 35 | Thread::id_t ThreadFactory::getCurrentThreadId() const { 36 | return std::this_thread::get_id(); 37 | } 38 | } 39 | } 40 | } // apache::thrift::concurrency 41 | -------------------------------------------------------------------------------- /src/thrift/protocol/TList.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _THRIFT_TLIST_H_ 21 | #define _THRIFT_TLIST_H_ 22 | 23 | #include 24 | 25 | namespace apache { 26 | namespace thrift { 27 | namespace protocol { 28 | 29 | /** 30 | * Helper class that encapsulates list metadata. 31 | * 32 | */ 33 | class TList { 34 | public: 35 | TList() : elemType_(T_STOP), 36 | size_(0) { 37 | 38 | } 39 | 40 | TList(TType t = T_STOP, int s = 0) 41 | : elemType_(t), 42 | size_(s) { 43 | 44 | } 45 | 46 | TType elemType_; 47 | int size_; 48 | }; 49 | } 50 | } 51 | } // apache::thrift::protocol 52 | 53 | #endif // #ifndef _THRIFT_TLIST_H_ 54 | -------------------------------------------------------------------------------- /src/thrift/windows/WinFcntl.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _THRIFT_WINDOWS_FCNTL_H_ 21 | #define _THRIFT_WINDOWS_FCNTL_H_ 1 22 | 23 | #if defined(_MSC_VER) && (_MSC_VER > 1200) 24 | #pragma once 25 | #endif // _MSC_VER 26 | 27 | #ifndef _WIN32 28 | #error This is a MSVC header only. 29 | #endif 30 | 31 | #ifdef _WIN32_WCE 32 | #include 33 | #endif 34 | 35 | // Win32 36 | #include 37 | #include 38 | 39 | extern "C" { 40 | int thrift_fcntl(THRIFT_SOCKET fd, int cmd, int flags); 41 | } 42 | 43 | #ifdef _WIN32_WCE 44 | std::string thrift_wstr2str(std::wstring ws); 45 | #endif 46 | 47 | #endif // _THRIFT_WINDOWS_FCNTL_H_ 48 | -------------------------------------------------------------------------------- /src/zstd/include/zstd/decompress/zstd_ddict.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under both the BSD-style license (found in the 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | * in the COPYING file in the root directory of this source tree). 8 | * You may select, at your option, one of the above-listed licenses. 9 | */ 10 | 11 | 12 | #ifndef ZSTD_DDICT_H 13 | #define ZSTD_DDICT_H 14 | 15 | /*-******************************************************* 16 | * Dependencies 17 | *********************************************************/ 18 | #include /* size_t */ 19 | #include "zstd.h" /* ZSTD_DDict, and several public functions */ 20 | 21 | namespace zstd { 22 | /*-******************************************************* 23 | * Interface 24 | *********************************************************/ 25 | 26 | /* note: several prototypes are already published in `zstd.h` : 27 | * ZSTD_createDDict() 28 | * ZSTD_createDDict_byReference() 29 | * ZSTD_createDDict_advanced() 30 | * ZSTD_freeDDict() 31 | * ZSTD_initStaticDDict() 32 | * ZSTD_sizeof_DDict() 33 | * ZSTD_estimateDDictSize() 34 | * ZSTD_getDictID_fromDict() 35 | */ 36 | 37 | const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict); 38 | size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict); 39 | 40 | void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); 41 | 42 | } 43 | 44 | #endif /* ZSTD_DDICT_H */ 45 | -------------------------------------------------------------------------------- /src/r-nanoparquet.h: -------------------------------------------------------------------------------- 1 | #ifndef R_NANOPARQUET_H 2 | #define R_NANOPARQUET_H 3 | 4 | #include 5 | #undef TYPE_BITS 6 | 7 | #include "lib/ParquetOutFile.h" 8 | 9 | void r_to_logical_type(SEXP logical_type, parquet::SchemaElement &sel); 10 | void nanoparquet_map_to_parquet_type( 11 | SEXP x, 12 | SEXP options, 13 | parquet::SchemaElement &sel, 14 | std::string &rtype); 15 | 16 | extern "C" { 17 | 18 | static const char *type_names[] = { 19 | "NULL", 20 | "a symbol", 21 | "a pairlist", 22 | "a closure", 23 | "an environment", 24 | "a promise", 25 | "a language object", 26 | "a special function", 27 | "a builtin function", 28 | "an internal character string", 29 | "a logical vector", 30 | "", 31 | "", 32 | "an integer vector", 33 | "a double vector", 34 | "a complex vector", 35 | "a character vector", 36 | "a dot-dot-dot object", 37 | "an \"any\" object", 38 | "a list", 39 | "an expression", 40 | "a byte code object", 41 | "an external pointer", 42 | "a weak reference", 43 | "a raw vector", 44 | "an S4 object" 45 | }; 46 | 47 | Int96 int32_to_int96(int32_t x) noexcept; 48 | Int96 double_to_int96(double x) noexcept; 49 | uint16_t double_to_float16(double x) noexcept; 50 | 51 | // these use the R API 52 | SEXP rf_get_list_element(SEXP list, const char *str) noexcept; 53 | SEXP rf_nanoparquet_any_na(SEXP x) noexcept; 54 | SEXP rf_nanoparquet_any_null(SEXP x) noexcept; 55 | SEXP rf_convert_logical_type(parquet::LogicalType ltype) noexcept; 56 | 57 | } // extern "C" 58 | 59 | #endif // R_NANOPARQUET_H 60 | -------------------------------------------------------------------------------- /src/thrift/protocol/TMap.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _THRIFT_TMAP_H_ 21 | #define _THRIFT_TMAP_H_ 22 | 23 | #include 24 | 25 | namespace apache { 26 | namespace thrift { 27 | namespace protocol { 28 | 29 | /** 30 | * Helper class that encapsulates map metadata. 31 | * 32 | */ 33 | class TMap { 34 | public: 35 | TMap() 36 | : keyType_(T_STOP), 37 | valueType_(T_STOP), 38 | size_(0) { 39 | 40 | } 41 | 42 | TMap(TType k, TType v, int s) 43 | : keyType_(k), 44 | valueType_(v), 45 | size_(s) { 46 | 47 | } 48 | 49 | TType keyType_; 50 | TType valueType_; 51 | int size_; 52 | }; 53 | } 54 | } 55 | } // apache::thrift::protocol 56 | 57 | #endif // #ifndef _THRIFT_TMAP_H_ 58 | -------------------------------------------------------------------------------- /src/thrift/windows/GetTimeOfDay.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _THRIFT_WINDOWS_GETTIMEOFDAY_H_ 21 | #define _THRIFT_WINDOWS_GETTIMEOFDAY_H_ 22 | 23 | #if defined(_MSC_VER) && (_MSC_VER > 1200) 24 | #pragma once 25 | #endif // _MSC_VER 26 | 27 | #ifndef _WIN32 28 | #error This is a MSVC header only. 29 | #endif 30 | 31 | #include 32 | #include 33 | 34 | struct thrift_timespec { 35 | int64_t tv_sec; 36 | int64_t tv_nsec; 37 | }; 38 | 39 | int thrift_gettimeofday(struct timeval* tv, struct timezone* tz); 40 | int thrift_sleep(unsigned int seconds); 41 | int thrift_usleep(unsigned int micro_seconds); 42 | char* thrift_ctime_r(const time_t* _clock, char* _buf); 43 | 44 | #endif // _THRIFT_WINDOWS_GETTIMEOFDAY_H_ 45 | -------------------------------------------------------------------------------- /.github/workflows/pkgdown.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | release: 9 | types: [published] 10 | workflow_dispatch: 11 | 12 | name: pkgdown 13 | 14 | jobs: 15 | pkgdown: 16 | runs-on: ubuntu-latest 17 | # Only restrict concurrency for non-PR jobs 18 | concurrency: 19 | group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} 20 | env: 21 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 22 | permissions: 23 | contents: write 24 | steps: 25 | - uses: actions/checkout@v4 26 | 27 | - uses: r-lib/actions/setup-pandoc@v2 28 | 29 | - uses: r-lib/actions/setup-r@v2 30 | with: 31 | use-public-rspm: true 32 | 33 | - uses: r-lib/actions/setup-r-dependencies@v2 34 | with: 35 | install-quarto: true 36 | extra-packages: any::pkgdown, local::. 37 | needs: website 38 | 39 | - uses: r-hub/actions/debug-shell@v1 40 | 41 | - name: Build site 42 | run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) 43 | shell: Rscript {0} 44 | 45 | - name: Deploy to GitHub pages 🚀 46 | if: github.event_name != 'pull_request' 47 | uses: JamesIves/github-pages-deploy-action@v4.5.0 48 | with: 49 | clean: false 50 | branch: gh-pages 51 | folder: docs 52 | -------------------------------------------------------------------------------- /src/flatbuffers/file_manager.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2023 Google Inc. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef FLATBUFFERS_FILE_MANAGER_H_ 18 | #define FLATBUFFERS_FILE_MANAGER_H_ 19 | 20 | #include 21 | #include 22 | 23 | #include "flatbuffers/util.h" 24 | 25 | namespace flatbuffers { 26 | 27 | // A File interface to write data to file by default or 28 | // save only file names 29 | class FileManager { 30 | public: 31 | FileManager() = default; 32 | virtual ~FileManager() = default; 33 | 34 | virtual bool SaveFile(const std::string &absolute_file_name, 35 | const std::string &content) = 0; 36 | 37 | virtual bool LoadFile(const std::string &absolute_file_name, 38 | std::string *buf) = 0; 39 | 40 | private: 41 | // Copying is not supported. 42 | FileManager(const FileManager &) = delete; 43 | FileManager &operator=(const FileManager &) = delete; 44 | }; 45 | 46 | } // namespace flatbuffers 47 | 48 | #endif // FLATBUFFERS_FILE_MANAGER_H_ 49 | -------------------------------------------------------------------------------- /src/thrift/transport/SocketCommon.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | * 19 | * @author: David Suárez 20 | */ 21 | 22 | #ifndef THRIFT_SOCKETCOMMON_H 23 | #define THRIFT_SOCKETCOMMON_H 24 | 25 | #include 26 | 27 | #ifdef HAVE_UNISTD_H 28 | #include 29 | #endif 30 | #ifdef HAVE_SYS_UN_H 31 | #include 32 | #endif 33 | #ifdef HAVE_AF_UNIX_H 34 | #include 35 | #endif 36 | #ifdef HAVE_SYS_SOCKET_H 37 | #include 38 | #endif 39 | 40 | #include 41 | 42 | namespace apache { 43 | namespace thrift { 44 | namespace transport { 45 | 46 | socklen_t fillUnixSocketAddr(struct sockaddr_un& address, std::string& path); 47 | 48 | } 49 | } 50 | } // apache::thrift::transport 51 | 52 | #endif //THRIFT_SOCKETCOMMON_H 53 | -------------------------------------------------------------------------------- /src/zstd/LICENSE: -------------------------------------------------------------------------------- 1 | BSD License 2 | 3 | For Zstandard software 4 | 5 | Copyright (c) 2016-present, Facebook, Inc. All rights reserved. 6 | 7 | Redistribution and use in source and binary forms, with or without modification, 8 | are permitted provided that the following conditions are met: 9 | 10 | * Redistributions of source code must retain the above copyright notice, this 11 | list of conditions and the following disclaimer. 12 | 13 | * Redistributions in binary form must reproduce the above copyright notice, 14 | this list of conditions and the following disclaimer in the documentation 15 | and/or other materials provided with the distribution. 16 | 17 | * Neither the name Facebook nor the names of its contributors may be used to 18 | endorse or promote products derived from this software without specific 19 | prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 22 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 23 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 25 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 26 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 28 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 30 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /.github/workflows/test-coverage.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | workflow_dispatch: 9 | 10 | name: test-coverage 11 | 12 | permissions: read-all 13 | 14 | jobs: 15 | test-coverage: 16 | runs-on: ubuntu-latest 17 | env: 18 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 19 | 20 | steps: 21 | - uses: actions/checkout@v4 22 | 23 | - uses: r-lib/actions/setup-r@v2 24 | with: 25 | use-public-rspm: true 26 | Ncpus: 4 27 | 28 | - name: Install Python packages for testing 29 | run: | 30 | dir.create("~/.pip", showWarnings = FALSE, recursive = TRUE) 31 | writeLines( 32 | c("[global]", "break-system-packages = true", "user = true"), 33 | "~/.pip/pip.conf" 34 | ) 35 | if (Sys.which("pip3") != "") { 36 | system("pip3 install pyarrow pandas polars") 37 | } else { 38 | system("pip install pyarrow pandas polars") 39 | } 40 | shell: Rscript {0} 41 | 42 | - uses: r-lib/actions/setup-r-dependencies@v2 43 | with: 44 | extra-packages: gaborcsardi/uncovr 45 | needs: coverage 46 | 47 | - name: Test coverage 48 | run: | 49 | uncovr::test() 50 | uncovr::codecov() 51 | shell: Rscript {0} 52 | env: 53 | CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} 54 | -------------------------------------------------------------------------------- /src/thrift/transport/TSimpleFileTransport.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _THRIFT_TRANSPORT_TSIMPLEFILETRANSPORT_H_ 21 | #define _THRIFT_TRANSPORT_TSIMPLEFILETRANSPORT_H_ 1 22 | 23 | #include 24 | 25 | namespace apache { 26 | namespace thrift { 27 | namespace transport { 28 | 29 | /** 30 | * Dead-simple wrapper around a file. 31 | * 32 | * Writeable files are opened with O_CREAT and O_APPEND 33 | */ 34 | class TSimpleFileTransport : public TFDTransport { 35 | public: 36 | TSimpleFileTransport(const std::string& path, bool read = true, bool write = false, 37 | std::shared_ptr config = nullptr); 38 | }; 39 | } 40 | } 41 | } // apache::thrift::transport 42 | 43 | #endif // _THRIFT_TRANSPORT_TSIMPLEFILETRANSPORT_H_ 44 | -------------------------------------------------------------------------------- /src/thrift/protocol/TSet.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _THRIFT_TSET_H_ 21 | #define _THRIFT_TSET_H_ 22 | 23 | #include 24 | #include 25 | 26 | namespace apache { 27 | namespace thrift { 28 | namespace protocol { 29 | 30 | /** 31 | * Helper class that encapsulates set metadata. 32 | * 33 | */ 34 | class TSet { 35 | public: 36 | TSet() : elemType_(T_STOP), size_(0) { 37 | 38 | } 39 | 40 | TSet(TType t, int s) 41 | : elemType_(t), 42 | size_(s) { 43 | 44 | } 45 | 46 | TSet(TList list) 47 | : elemType_(list.elemType_), 48 | size_(list.size_) { 49 | 50 | } 51 | 52 | TType elemType_; 53 | int size_; 54 | }; 55 | } 56 | } 57 | } // apache::thrift::protocol 58 | 59 | #endif // #ifndef _THRIFT_TSET_H_ 60 | -------------------------------------------------------------------------------- /src/thrift/windows/WinFcntl.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #include 21 | 22 | int thrift_fcntl(THRIFT_SOCKET fd, int cmd, int flags) { 23 | if (cmd != THRIFT_F_GETFL && cmd != THRIFT_F_SETFL) { 24 | return -1; 25 | } 26 | 27 | if (flags != THRIFT_O_NONBLOCK && flags != 0) { 28 | return -1; 29 | } 30 | 31 | if (cmd == THRIFT_F_GETFL) { 32 | return 0; 33 | } 34 | 35 | int res; 36 | if (flags) { 37 | res = ioctlsocket(fd, FIONBIO, reinterpret_cast(&(flags = 1))); 38 | } else { 39 | res = ioctlsocket(fd, FIONBIO, reinterpret_cast(&(flags = 0))); 40 | } 41 | 42 | return res; 43 | } 44 | 45 | #ifdef _WIN32_WCE 46 | std::string thrift_wstr2str(std::wstring ws) { 47 | std::string s(ws.begin(), ws.end()); 48 | return s; 49 | } 50 | #endif 51 | -------------------------------------------------------------------------------- /tests/testthat/test-write-encodings-3.R: -------------------------------------------------------------------------------- 1 | test_that("RLE BP encoder", { 2 | cases <- list( 3 | # bw 1 4 | c(rep(0L, 8), rep(1L, 8)), 5 | c(rep(0L, 9), rep(1L, 9)), 6 | c(rep(0L, 7), rep(1L, 7)), 7 | # bw 2 8 | c(rep(1L, 6), rep(2L, 6)), 9 | c(rep(1L, 7), rep(2L, 7)), 10 | c(rep(1L, 5), rep(2L, 5)), 11 | # bw 3 12 | c(rep(1L, 4), rep(7L, 4)), 13 | c(rep(1L, 5), rep(7L, 5)), 14 | c(rep(1L, 3), rep(7L, 3)), 15 | # bw 4 16 | c(rep(1L, 4), rep(15L, 4)), 17 | c(rep(1L, 5), rep(15L, 5)), 18 | c(rep(1L, 3), rep(15L, 3)), 19 | # bw 5 20 | c(rep(1L, 3), rep(31L, 3)), 21 | c(rep(1L, 4), rep(31L, 4)), 22 | c(rep(1L, 2), rep(31L, 2)), 23 | # bw 6 24 | c(rep(1L, 3), rep(63L, 3)), 25 | c(rep(1L, 4), rep(63L, 4)), 26 | c(rep(1L, 2), rep(63L, 2)), 27 | # bw 7 28 | c(rep(1L, 3), rep(127L, 3)), 29 | c(rep(1L, 4), rep(127L, 4)), 30 | c(rep(1L, 2), rep(127L, 2)), 31 | # bw 8 32 | c(rep(1L, 3), rep(255L, 3)), 33 | c(rep(1L, 4), rep(255L, 4)), 34 | c(rep(1L, 2), rep(255L, 2)), 35 | # bw 9 36 | c(rep(1L, 3), rep(511L, 3)), 37 | c(rep(1L, 4), rep(511L, 4)), 38 | c(rep(1L, 2), rep(511L, 2)), 39 | # bw 10 40 | c(rep(1L, 3), rep(1023L, 3)), 41 | c(rep(1L, 4), rep(1023L, 4)), 42 | c(rep(1L, 2), rep(1023L, 2)), 43 | # bw 11 44 | c(rep(1L, 3), rep(2047L, 3)), 45 | c(rep(1L, 4), rep(2047L, 4)), 46 | c(rep(1L, 2), rep(2047L, 2)) 47 | ) 48 | 49 | for (case in cases) { 50 | expect_equal(rle_decode_int(rle_encode_int(case)), case) 51 | case <- rep(case, 100) 52 | expect_equal(rle_decode_int(rle_encode_int(case)), case) 53 | } 54 | }) 55 | -------------------------------------------------------------------------------- /src/thrift/server/TServer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #include 21 | 22 | #ifdef HAVE_SYS_TIME_H 23 | #include 24 | #endif 25 | #ifdef HAVE_SYS_RESOURCE_H 26 | #include 27 | #endif 28 | 29 | #ifdef HAVE_UNISTD_H 30 | #include 31 | #endif 32 | 33 | namespace apache { 34 | namespace thrift { 35 | namespace server { 36 | 37 | #ifdef HAVE_SYS_RESOURCE_H 38 | int increase_max_fds(int max_fds = (1 << 24)) { 39 | struct rlimit fdmaxrl; 40 | 41 | for (fdmaxrl.rlim_cur = max_fds, fdmaxrl.rlim_max = max_fds; 42 | max_fds && (setrlimit(RLIMIT_NOFILE, &fdmaxrl) < 0); 43 | fdmaxrl.rlim_cur = max_fds, fdmaxrl.rlim_max = max_fds) { 44 | max_fds /= 2; 45 | } 46 | 47 | return static_cast(fdmaxrl.rlim_cur); 48 | } 49 | #endif 50 | } 51 | } 52 | } // apache::thrift::server 53 | -------------------------------------------------------------------------------- /tests/testthat/test-rle.R: -------------------------------------------------------------------------------- 1 | test_that("rle_decode", { 2 | # arrow might be broken on CRAN 3 | skip_on_cran() 4 | skip_without("arrow") 5 | tmp <- tempfile(fileext = ".parquet") 6 | d <- data.frame( 7 | x = as.factor(sample(letters, 100, replace = TRUE)) 8 | ) 9 | arrow::write_parquet(d, tmp) 10 | pages <- read_parquet_pages(tmp) 11 | data_page <- read_parquet_page(tmp, pages$page_header_offset[2]) 12 | data <- data_page$data 13 | def_len <- readBin(data, "int", n = 1) 14 | data <- data[-(1:(def_len + 4L))] 15 | bw <- as.integer(data[1]) 16 | data <- data[-1] 17 | idx <- rle_decode_int(data, bit_width = bw, length = nrow(d)) 18 | # parquet indices are zero based, hence the -1L 19 | expect_equal(idx, as.integer(d$x) - 1L) 20 | }) 21 | 22 | test_that("rle_encode", { 23 | chk <- function(x) { 24 | r <- rle_encode_int(x) 25 | x2 <- rle_decode_int(r, attr(r, "bit_width"), length(x)) 26 | expect_equal(x2, x) 27 | } 28 | chk(c(0:16, rep(1L, 20), 0:16, rep(2L, 20))) 29 | }) 30 | 31 | test_that("edge cases", { 32 | chk <- function(x) { 33 | r <- rle_encode_int(x) 34 | x2 <- rle_decode_int(r, attr(r, "bit_width"), length(x)) 35 | expect_equal(x2, x) 36 | } 37 | chk(integer()) 38 | chk(0L) 39 | chk(1L) 40 | chk(7L) 41 | chk(8L) 42 | chk(100L) 43 | chk(1:2) 44 | chk(1:3) 45 | chk(1:4) 46 | chk(1:5) 47 | chk(1:6) 48 | chk(1:7) 49 | chk(1:8) 50 | chk(1:9) 51 | chk(0:1) 52 | chk(0:2) 53 | chk(0:3) 54 | chk(0:4) 55 | chk(0:5) 56 | chk(0:6) 57 | chk(0:7) 58 | chk(0:8) 59 | chk(0:9) 60 | for (l in c(2, 7, 8, 9, 16, 256, 512, 513, 100000)) { 61 | chk(rep(0L, l)) 62 | chk(rep(1L, l)) 63 | } 64 | }) 65 | -------------------------------------------------------------------------------- /src/flatbuffers/buffer_ref.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2021 Google Inc. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef FLATBUFFERS_BUFFER_REF_H_ 18 | #define FLATBUFFERS_BUFFER_REF_H_ 19 | 20 | #include "flatbuffers/base.h" 21 | #include "flatbuffers/verifier.h" 22 | 23 | namespace flatbuffers { 24 | 25 | // Convenient way to bundle a buffer and its length, to pass it around 26 | // typed by its root. 27 | // A BufferRef does not own its buffer. 28 | struct BufferRefBase {}; // for std::is_base_of 29 | 30 | template struct BufferRef : BufferRefBase { 31 | BufferRef() : buf(nullptr), len(0), must_free(false) {} 32 | BufferRef(uint8_t *_buf, uoffset_t _len) 33 | : buf(_buf), len(_len), must_free(false) {} 34 | 35 | ~BufferRef() { 36 | if (must_free) free(buf); 37 | } 38 | 39 | const T *GetRoot() const { return flatbuffers::GetRoot(buf); } 40 | 41 | bool Verify() { 42 | Verifier verifier(buf, len); 43 | return verifier.VerifyBuffer(nullptr); 44 | } 45 | 46 | uint8_t *buf; 47 | uoffset_t len; 48 | bool must_free; 49 | }; 50 | 51 | } // namespace flatbuffers 52 | 53 | #endif // FLATBUFFERS_BUFFER_REF_H_ 54 | -------------------------------------------------------------------------------- /src/RParquetAppender.cpp: -------------------------------------------------------------------------------- 1 | #include "RParquetAppender.h" 2 | 3 | RParquetAppender::RParquetAppender( 4 | std::string filename, 5 | parquet::CompressionCodec::type codec, 6 | int compression_level, 7 | std::vector &row_groups, 8 | int data_page_version, 9 | bool overwrite_last_row_group) 10 | : reader(filename, true), 11 | outfile(reader.pfile, codec, compression_level, row_groups), 12 | data_page_version(data_page_version), 13 | overwrite_last_row_group(overwrite_last_row_group) { 14 | } 15 | 16 | void RParquetAppender::init_metadata( 17 | SEXP dfsxp, 18 | SEXP dim, 19 | SEXP required, 20 | SEXP options, 21 | SEXP schema, 22 | SEXP encoding 23 | ) { 24 | 25 | std::fstream &pfile = reader.pfile; 26 | 27 | // set file pointer to the place where we need to write 28 | if (overwrite_last_row_group) { 29 | uint32_t nrgs = reader.file_meta_data_.row_groups.size(); 30 | const parquet::RowGroup &last_rg = reader.file_meta_data_.row_groups[nrgs-1]; 31 | int last_rg_size = last_rg.total_byte_size; 32 | // drop last row group from existing metadata, we are overwriting it 33 | reader.file_meta_data_.row_groups.pop_back(); 34 | pfile.seekp(-(reader.footer_len + 8 + last_rg_size), std::ios_base::end); 35 | } else { 36 | pfile.seekp(-(reader.footer_len + 8), std::ios_base::end); 37 | } 38 | 39 | outfile.data_page_version = data_page_version; 40 | 41 | outfile.init_append_metadata( 42 | dfsxp, 43 | dim, 44 | required, 45 | options, 46 | reader.file_meta_data_.schema, 47 | encoding, 48 | reader.file_meta_data_.row_groups, 49 | reader.file_meta_data_.key_value_metadata 50 | ); 51 | } 52 | 53 | void RParquetAppender::append() { 54 | outfile.append(); 55 | } 56 | -------------------------------------------------------------------------------- /R/infer-parquet-schema.R: -------------------------------------------------------------------------------- 1 | #' Infer Parquet schema of a data frame 2 | #' 3 | #' @param df Data frame. 4 | #' @param options Return value of [parquet_options()], may modify the 5 | #' R to Parquet type mappings. 6 | #' @return Data frame, the inferred schema. It has the same columns as 7 | #' the return value of [read_parquet_schema()]: 8 | #' `r paste0("\u0060", names(infer_parquet_schema(mtcars)), "\u0060", collapse = ", ")`. 9 | #' 10 | #' @seealso [read_parquet_schema()] to read the schema of a Parquet file, 11 | #' [parquet_schema()] to create a Parquet schema from scratch. 12 | #' @export 13 | 14 | infer_parquet_schema <- function(df, options = parquet_options()) { 15 | types <- .Call(rf_nanoparquet_map_to_parquet_types, df, options) 16 | lt <- unname(lapply(types, function(x) x[[3]])) 17 | ct <- lapply(lt, function(x) if (!is.null(x)) logical_to_converted(x)) 18 | type_tab <- data.frame( 19 | file_name = rep(NA_character_, length(df)), 20 | name = names(df), 21 | r_type = vapply(types, function(x) x[[2]], ""), 22 | type = vapply(types, function(x) x[[1]], ""), 23 | type_length = rep(NA_integer_, length(df)), 24 | repetition_type = ifelse(vapply(df, anyNA, TRUE), "OPTIONAL", "REQUIRED"), 25 | converted_type = map_chr(ct, function(x) { 26 | x[["converted_type"]] %||% NA_character_ 27 | }), 28 | logical_type = I(lt), 29 | num_children = rep(NA_integer_, length(df)), 30 | scale = map_int(ct, function(x) { 31 | x[["scale"]] %||% NA_integer_ 32 | }), 33 | precision = map_int(ct, function(x) { 34 | x[["precision"]] %||% NA_integer_ 35 | }), 36 | field_id = rep(NA_integer_, length(df)) 37 | ) 38 | 39 | rownames(type_tab) <- NULL 40 | class(type_tab) <- c("tbl", class(type_tab)) 41 | type_tab 42 | } 43 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: nanoparquet 2 | Title: Read and Write 'Parquet' Files 3 | Version: 0.4.3.9000 4 | Authors@R: c( 5 | person("Gábor", "Csárdi", role = c("aut", "cre"), email = "csardi.gabor@gmail.com"), 6 | person("Hannes", "Mühleisen", role = c("aut", "cph"), comment = c(ORCID = "0000-0001-8552-0029")), 7 | person("Google Inc.", role = "cph"), 8 | person("Apache Software Foundation", role = "cph"), 9 | person("Posit Software, PBC", role = "cph"), 10 | person("RAD Game Tools", role = "cph"), 11 | person("Valve Software", role = "cph"), 12 | person("Tenacious Software LLC", role = "cph"), 13 | person("Facebook, Inc.", role = "cph")) 14 | Description: Self-sufficient reader and writer for flat 'Parquet' files. 15 | Can read most 'Parquet' data types. Can write many 'R' data types, 16 | including factors and temporal types. See docs for limitations. 17 | Depends: 18 | R (>= 4.0.0) 19 | License: MIT + file LICENSE 20 | URL: https://github.com/r-lib/nanoparquet, 21 | https://nanoparquet.r-lib.org/ 22 | BugReports: https://github.com/r-lib/nanoparquet/issues 23 | Encoding: UTF-8 24 | Suggests: 25 | arrow, 26 | bit64, 27 | DBI, 28 | duckdb (>= 1.4.0), 29 | hms, 30 | mockery, 31 | pillar, 32 | processx, 33 | rprojroot, 34 | spelling, 35 | testthat, 36 | tzdb, 37 | withr 38 | Roxygen: list(markdown = TRUE) 39 | RoxygenNote: 7.3.2.9000 40 | Config/testthat/edition: 3 41 | Config/testthat/parallel: TRUE 42 | Config/Needs/website: 43 | tidyverse/tidytemplate, 44 | r-lib/pkgdown, 45 | dplyr, 46 | gt, 47 | gtExtras, 48 | knitr, 49 | nycflights13, 50 | prettyunits, 51 | quarto, 52 | rmarkdown, 53 | sessioninfo, 54 | svglite 55 | Language: en-US 56 | Biarch: true 57 | -------------------------------------------------------------------------------- /src/thrift/protocol/TMultiplexedProtocol.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | namespace apache { 25 | namespace thrift { 26 | namespace protocol { 27 | uint32_t TMultiplexedProtocol::writeMessageBegin_virt(const std::string& _name, 28 | const TMessageType _type, 29 | const int32_t _seqid) { 30 | if (_type == T_CALL || _type == T_ONEWAY) { 31 | return TProtocolDecorator::writeMessageBegin_virt(serviceName + separator + _name, 32 | _type, 33 | _seqid); 34 | } else { 35 | return TProtocolDecorator::writeMessageBegin_virt(_name, _type, _seqid); 36 | } 37 | } 38 | } 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/flatbuffers/struct.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2021 Google Inc. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef FLATBUFFERS_STRUCT_H_ 18 | #define FLATBUFFERS_STRUCT_H_ 19 | 20 | #include "flatbuffers/base.h" 21 | 22 | namespace flatbuffers { 23 | 24 | // "structs" are flat structures that do not have an offset table, thus 25 | // always have all members present and do not support forwards/backwards 26 | // compatible extensions. 27 | 28 | class Struct FLATBUFFERS_FINAL_CLASS { 29 | public: 30 | template T GetField(uoffset_t o) const { 31 | return ReadScalar(&data_[o]); 32 | } 33 | 34 | template T GetStruct(uoffset_t o) const { 35 | return reinterpret_cast(&data_[o]); 36 | } 37 | 38 | const uint8_t *GetAddressOf(uoffset_t o) const { return &data_[o]; } 39 | uint8_t *GetAddressOf(uoffset_t o) { return &data_[o]; } 40 | 41 | private: 42 | // private constructor & copy constructor: you obtain instances of this 43 | // class by pointing to existing data only 44 | Struct(); 45 | Struct(const Struct &); 46 | Struct &operator=(const Struct &); 47 | 48 | uint8_t data_[1]; 49 | }; 50 | 51 | } // namespace flatbuffers 52 | 53 | #endif // FLATBUFFERS_STRUCT_H_ 54 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | url: https://nanoparquet.r-lib.org/ 2 | template: 3 | package: tidytemplate 4 | bootstrap: 5 5 | includes: 6 | in_header: | 7 | 8 | 9 | 10 | destination: docs 11 | 12 | development: 13 | mode: auto 14 | 15 | reference: 16 | - title: Parquet schemas and type mappings 17 | desc: | 18 | How nanoparquet maps R data types to Parquet data types 19 | and vice versa. 20 | contents: 21 | - infer_parquet_schema 22 | - nanoparquet-types 23 | - parquet-encodings 24 | - parquet_schema 25 | 26 | - title: Read Parquet files 27 | contents: 28 | - read_parquet 29 | 30 | - title: Write Parquet files 31 | contents: 32 | - append_parquet 33 | - write_parquet 34 | 35 | - title: Extract Parquet metadata 36 | contents: 37 | - read_parquet_info 38 | - read_parquet_metadata 39 | - read_parquet_schema 40 | 41 | - title: Nanoparquet options 42 | contents: 43 | - parquet_options 44 | 45 | - title: Debugging Parquet files 46 | desc: | 47 | These functions are useful for debugging possibly broken 48 | Parquet files and for nanoparquet developers. 49 | contents: 50 | - read_parquet_pages 51 | - read_parquet_page 52 | 53 | - title: internal 54 | contents: 55 | - nanoparquet-package 56 | - nanoparquet 57 | - parquet_column_types 58 | 59 | news: 60 | releases: 61 | - text: "Version 0.4.0" 62 | href: https://www.tidyverse.org/blog/2025/01/nanoparquet-0-4-0/ 63 | - text: "Version 0.3.0" 64 | href: https://www.tidyverse.org/blog/2024/06/nanoparquet-0-3-0/ 65 | -------------------------------------------------------------------------------- /src/thrift/protocol/TBase64Utils.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _THRIFT_PROTOCOL_TBASE64UTILS_H_ 21 | #define _THRIFT_PROTOCOL_TBASE64UTILS_H_ 22 | 23 | #include 24 | #include 25 | 26 | namespace apache { 27 | namespace thrift { 28 | namespace protocol { 29 | 30 | // in must be at least len bytes 31 | // len must be 1, 2, or 3 32 | // buf must be a buffer of at least 4 bytes and may not overlap in 33 | // the data is not padded with '='; the caller can do this if desired 34 | void base64_encode(const uint8_t* in, uint32_t len, uint8_t* buf); 35 | 36 | // buf must be a buffer of at least 4 bytes and contain base64 encoded values 37 | // buf will be changed to contain output bytes 38 | // len is number of bytes to consume from input (must be 2, 3, or 4) 39 | // no '=' padding should be included in the input 40 | void base64_decode(uint8_t* buf, uint32_t len); 41 | } 42 | } 43 | } // apache::thrift::protocol 44 | 45 | #endif // #define _THRIFT_PROTOCOL_TBASE64UTILS_H_ 46 | -------------------------------------------------------------------------------- /src/thrift/concurrency/Mutex.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #include 21 | 22 | #include 23 | #include 24 | 25 | namespace apache { 26 | namespace thrift { 27 | namespace concurrency { 28 | 29 | /** 30 | * Implementation of Mutex class using C++11 std::timed_mutex 31 | * 32 | * Methods throw std::system_error on error. 33 | * 34 | * @version $Id:$ 35 | */ 36 | class Mutex::impl : public std::timed_mutex {}; 37 | 38 | Mutex::Mutex() : impl_(new Mutex::impl()) { 39 | } 40 | 41 | void* Mutex::getUnderlyingImpl() const { 42 | return impl_.get(); 43 | } 44 | 45 | void Mutex::lock() const { 46 | impl_->lock(); 47 | } 48 | 49 | bool Mutex::trylock() const { 50 | return impl_->try_lock(); 51 | } 52 | 53 | bool Mutex::timedlock(int64_t ms) const { 54 | return impl_->try_lock_for(std::chrono::milliseconds(ms)); 55 | } 56 | 57 | void Mutex::unlock() const { 58 | impl_->unlock(); 59 | } 60 | 61 | } 62 | } 63 | } // apache::thrift::concurrency 64 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/read-parquet-4.md: -------------------------------------------------------------------------------- 1 | # DECIMAL converted type 2 | 3 | Code 4 | as.data.frame(read_parquet(tmp)) 5 | Output 6 | d 7 | 1 100 8 | Code 9 | as.data.frame(read_parquet_schema(tmp))[, -1] 10 | Output 11 | name r_type type type_length repetition_type converted_type logical_type 12 | 1 schema NA 13 | 2 d integer INT32 NA REQUIRED DECIMAL 14 | num_children scale precision field_id 15 | 1 1 NA NA NA 16 | 2 NA 2 5 NA 17 | Code 18 | as.data.frame(read_parquet_schema(tmp))[["logical_type"]] 19 | Output 20 | [[1]] 21 | NULL 22 | 23 | [[2]] 24 | NULL 25 | 26 | 27 | --- 28 | 29 | Code 30 | as.data.frame(read_parquet(tmp)) 31 | Output 32 | d 33 | 1 100 34 | Code 35 | as.data.frame(read_parquet_schema(tmp))[, -1] 36 | Output 37 | name r_type type type_length repetition_type converted_type logical_type 38 | 1 schema NA 39 | 2 d integer INT32 NA REQUIRED DECIMAL 40 | num_children scale precision field_id 41 | 1 1 NA NA NA 42 | 2 NA 2 5 NA 43 | Code 44 | as.data.frame(read_parquet_schema(tmp))[["logical_type"]] 45 | Output 46 | [[1]] 47 | NULL 48 | 49 | [[2]] 50 | NULL 51 | 52 | 53 | # DECIMAL in BA dict 54 | 55 | Code 56 | as.data.frame(read_parquet(pf)) 57 | Output 58 | l1 l2 l3 l4 59 | 1 0.1 0.1 0.1 0.1 60 | 2 -0.1 -0.1 -0.1 -0.1 61 | 62 | -------------------------------------------------------------------------------- /src/protect.cpp: -------------------------------------------------------------------------------- 1 | #include "protect.h" 2 | 3 | void throw_error(void *err, Rboolean jump) { 4 | if (jump) { 5 | struct np_error *rerr = (struct np_error*) err; 6 | throw *rerr; 7 | } 8 | } 9 | 10 | SEXP wrapped_rawsxp(void *len) { 11 | R_xlen_t *xlen = (R_xlen_t*) len; 12 | return Rf_allocVector(RAWSXP, *xlen); 13 | } 14 | 15 | SEXP wrapped_intsxp(void *len) { 16 | R_xlen_t *xlen = (R_xlen_t*) len; 17 | return Rf_allocVector(INTSXP, *xlen); 18 | } 19 | 20 | SEXP wrapped_lglsxp(void *len) { 21 | R_xlen_t *xlen = (R_xlen_t*) len; 22 | return Rf_allocVector(LGLSXP, *xlen); 23 | } 24 | 25 | SEXP wrapped_realsxp(void *len) { 26 | R_xlen_t *xlen = (R_xlen_t*) len; 27 | return Rf_allocVector(REALSXP, *xlen); 28 | } 29 | 30 | SEXP wrapped_strsxp(void *len) { 31 | R_xlen_t *xlen = (R_xlen_t*) len; 32 | return Rf_allocVector(STRSXP, *xlen); 33 | } 34 | 35 | SEXP wrapped_vecsxp(void *len) { 36 | R_xlen_t *xlen = (R_xlen_t*) len; 37 | return Rf_allocVector(VECSXP, *xlen); 38 | } 39 | 40 | SEXP wrapped_mkchar(void *data) { 41 | const char **c = (const char **) data; 42 | return Rf_mkChar(*c); 43 | } 44 | 45 | SEXP wrapped_mkstring(void *data) { 46 | const char **c = (const char **) data; 47 | return Rf_mkString(*c); 48 | } 49 | 50 | SEXP wrapped_scalarinteger(void *data) { 51 | int *n = (int*) data; 52 | return Rf_ScalarInteger(*n); 53 | } 54 | 55 | SEXP wrapped_scalarreal(void *data) { 56 | double *n = (double*) data; 57 | return Rf_ScalarReal(*n); 58 | } 59 | 60 | SEXP wrapped_scalarlogical(void *data) { 61 | int *n = (int*) data; 62 | return Rf_ScalarLogical(*n); 63 | } 64 | 65 | SEXP wrapped_scalarstring(void *data) { 66 | SEXP *x = (SEXP*) data; 67 | return Rf_ScalarString(*x); 68 | } 69 | 70 | SEXP wrapped_mknamed_vec(void *data) { 71 | const char ***rdata = (const char ***) data; 72 | return Rf_mkNamed(VECSXP, *rdata); 73 | } 74 | -------------------------------------------------------------------------------- /man/read_parquet.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/read-parquet.R 3 | \name{read_parquet} 4 | \alias{read_parquet} 5 | \title{Read a Parquet file into a data frame} 6 | \usage{ 7 | read_parquet(file, col_select = NULL, options = parquet_options()) 8 | } 9 | \arguments{ 10 | \item{file}{Path to a Parquet file. It may also be an R connection, 11 | in which case it first reads all data from the connection, writes 12 | it into a temporary file, then reads the temporary file, and 13 | deletes it. The connection might be open, it which case it must be 14 | a binary connection. If it is not open, then \code{read_parquet()} will 15 | open it and also close it in the end.} 16 | 17 | \item{col_select}{Columns to read. It can be a numeric vector of column 18 | indices, or a character vector of column names. It is an error to 19 | select the same column multiple times. The order of the columns in 20 | the result is the same as the order in \code{col_select}.} 21 | 22 | \item{options}{Nanoparquet options, see \code{\link[=parquet_options]{parquet_options()}}.} 23 | } 24 | \value{ 25 | A \code{data.frame} with the file's contents. 26 | } 27 | \description{ 28 | Converts the contents of the named Parquet file to a R data frame. 29 | } 30 | \examples{ 31 | file_name <- system.file("extdata/userdata1.parquet", package = "nanoparquet") 32 | parquet_df <- nanoparquet::read_parquet(file_name) 33 | print(str(parquet_df)) 34 | } 35 | \seealso{ 36 | See \code{\link[=write_parquet]{write_parquet()}} to write Parquet files, 37 | \link{nanoparquet-types} for the R <-> Parquet type mapping. 38 | See \code{\link[=read_parquet_info]{read_parquet_info()}}, for general information, 39 | \code{\link[=read_parquet_schema]{read_parquet_schema()}} for information about the 40 | columns, and \code{\link[=read_parquet_metadata]{read_parquet_metadata()}} for the complete metadata. 41 | } 42 | -------------------------------------------------------------------------------- /src/thrift/async/TAsyncBufferProcessor.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _THRIFT_TASYNC_BUFFER_PROCESSOR_H_ 21 | #define _THRIFT_TASYNC_BUFFER_PROCESSOR_H_ 1 22 | 23 | #include 24 | #include 25 | #include 26 | 27 | namespace apache { 28 | namespace thrift { 29 | namespace async { 30 | 31 | class TAsyncBufferProcessor { 32 | public: 33 | // Process data in "in", putting the result in "out". 34 | // Call _return(true) when done, or _return(false) to 35 | // forcefully close the connection (if applicable). 36 | // "in" and "out" should be TMemoryBuffer or similar, 37 | // not a wrapper around a socket. 38 | virtual void process(std::function _return, 39 | std::shared_ptr ibuf, 40 | std::shared_ptr obuf) = 0; 41 | virtual ~TAsyncBufferProcessor() = default; 42 | }; 43 | } 44 | } 45 | } // apache::thrift::async 46 | 47 | #endif // #ifndef _THRIFT_TASYNC_BUFFER_PROCESSOR_H_ 48 | -------------------------------------------------------------------------------- /src/thrift/transport/TWebSocketServer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | #include 25 | #include 26 | 27 | #include 28 | 29 | using std::string; 30 | 31 | namespace apache { 32 | namespace thrift { 33 | namespace transport { 34 | 35 | std::string base64Encode(unsigned char* data, int length) { 36 | std::unique_ptr> base64(BIO_new(BIO_f_base64()), 37 | [](BIO* b) { BIO_free_all(b); }); 38 | BIO_set_flags(base64.get(), BIO_FLAGS_BASE64_NO_NL); 39 | 40 | BIO* dest = BIO_new(BIO_s_mem()); 41 | BIO_push(base64.get(), dest); 42 | BIO_write(base64.get(), data, length); 43 | int ret = BIO_flush(base64.get()); 44 | THRIFT_UNUSED_VARIABLE(ret); 45 | 46 | char* encoded; 47 | length = BIO_get_mem_data(dest, &encoded); 48 | return std::string(encoded, length); 49 | } 50 | } // namespace transport 51 | } // namespace thrift 52 | } // namespace apache 53 | -------------------------------------------------------------------------------- /src/r-base64.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #include "base64.h" 6 | 7 | extern "C" { 8 | 9 | // TODO: exceptions? 10 | 11 | SEXP nanoparquet_base64_decode(SEXP x) { 12 | const char *input; 13 | size_t len; 14 | if (TYPEOF(x) == STRSXP) { 15 | input = (const char*) CHAR(STRING_ELT(x, 0)); 16 | len = strlen(input); 17 | } else if (TYPEOF(x) == RAWSXP) { 18 | input = (const char*) RAW(x); 19 | len = XLENGTH(x); 20 | } else { 21 | Rf_error("Invalid input in base64 decoder"); 22 | } 23 | 24 | size_t olen = base64::maximal_binary_length_from_base64( 25 | input, 26 | len 27 | ); 28 | SEXP rres = PROTECT(Rf_allocVector(RAWSXP, olen)); 29 | 30 | base64::result res = base64::base64_to_binary( 31 | input, 32 | len, 33 | (char*) RAW(rres) 34 | ); 35 | if(res.error != base64::error_code::SUCCESS) { 36 | Rf_error("Base64 decoding error at position %zu", res.count); 37 | } 38 | 39 | if (res.count < olen) { 40 | rres = Rf_xlengthgets(rres, res.count); 41 | } 42 | 43 | UNPROTECT(1); 44 | return rres; 45 | } 46 | 47 | // TODO: exceptions? 48 | 49 | SEXP nanoparquet_base64_encode(SEXP x) { 50 | const char *input; 51 | size_t len; 52 | if (TYPEOF(x) == STRSXP) { 53 | input = (const char*) CHAR(STRING_ELT(x, 0)); 54 | len = strlen(input); 55 | } else if (TYPEOF(x) == RAWSXP) { 56 | input = (const char*) RAW(x); 57 | len = XLENGTH(x); 58 | } else { 59 | Rf_error("Invalid input in base64 encoder"); 60 | } 61 | 62 | size_t olen = base64::base64_length_from_binary(len); 63 | SEXP rtmp = PROTECT(Rf_allocVector(RAWSXP, olen)); 64 | size_t truelen = base64::binary_to_base64( 65 | input, 66 | len, 67 | (char*) RAW(rtmp) 68 | ); 69 | SEXP rres = PROTECT(Rf_allocVector(STRSXP, 1)); 70 | SET_STRING_ELT(rres, 0, Rf_mkCharLen((const char*) RAW(rtmp), truelen)); 71 | 72 | UNPROTECT(2); 73 | return rres; 74 | } 75 | 76 | } 77 | -------------------------------------------------------------------------------- /src/zstd/include/zstd/common/xxhash_static.h: -------------------------------------------------------------------------------- 1 | 2 | /* ================================================================================================ 3 | This section contains definitions which are not guaranteed to remain stable. 4 | They may change in future versions, becoming incompatible with a different version of the library. 5 | They shall only be used with static linking. 6 | Never use these definitions in association with dynamic linking ! 7 | =================================================================================================== */ 8 | #ifndef XXH_STATIC_H_3543687687345 9 | #define XXH_STATIC_H_3543687687345 10 | 11 | namespace zstd { 12 | 13 | /* These definitions are only meant to allow allocation of XXH state 14 | statically, on stack, or in a struct for example. 15 | Do not use members directly. */ 16 | 17 | struct XXH32_state_s { 18 | unsigned total_len_32; 19 | unsigned large_len; 20 | unsigned v1; 21 | unsigned v2; 22 | unsigned v3; 23 | unsigned v4; 24 | unsigned mem32[4]; /* buffer defined as U32 for alignment */ 25 | unsigned memsize; 26 | unsigned reserved; /* never read nor write, will be removed in a future version */ 27 | }; /* typedef'd to XXH32_state_t */ 28 | 29 | struct XXH64_state_s { 30 | unsigned long long total_len; 31 | unsigned long long v1; 32 | unsigned long long v2; 33 | unsigned long long v3; 34 | unsigned long long v4; 35 | unsigned long long mem64[4]; /* buffer defined as U64 for alignment */ 36 | unsigned memsize; 37 | unsigned reserved[2]; /* never read nor write, will be removed in a future version */ 38 | }; /* typedef'd to XXH64_state_t */ 39 | 40 | } 41 | // # ifdef XXH_PRIVATE_API 42 | // # include "xxhash.cpp" /* include xxhash functions as `static`, for inlining */ 43 | // # endif 44 | 45 | #endif /* XXH_STATIC_LINKING_ONLY && XXH_STATIC_H_3543687687345 */ 46 | -------------------------------------------------------------------------------- /src/snappy/snappy-stubs-internal.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2011 Google Inc. All Rights Reserved. 2 | // 3 | // Redistribution and use in source and binary forms, with or without 4 | // modification, are permitted provided that the following conditions are 5 | // met: 6 | // 7 | // * Redistributions of source code must retain the above copyright 8 | // notice, this list of conditions and the following disclaimer. 9 | // * Redistributions in binary form must reproduce the above 10 | // copyright notice, this list of conditions and the following disclaimer 11 | // in the documentation and/or other materials provided with the 12 | // distribution. 13 | // * Neither the name of Google Inc. nor the names of its 14 | // contributors may be used to endorse or promote products derived from 15 | // this software without specific prior written permission. 16 | // 17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | #include 30 | #include 31 | 32 | #include "snappy-stubs-internal.h" 33 | 34 | namespace snappy { 35 | 36 | void Varint::Append32(string* s, uint32 value) { 37 | char buf[Varint::kMax32]; 38 | const char* p = Varint::Encode32(buf, value); 39 | s->append(buf, p - buf); 40 | } 41 | 42 | } // namespace snappy 43 | -------------------------------------------------------------------------------- /src/base64.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace base64 { 6 | 7 | enum error_code { 8 | SUCCESS = 0, 9 | HEADER_BITS, // Any byte must have fewer than 5 header bits. 10 | TOO_SHORT, // The leading byte must be followed by N-1 continuation bytes, where N is the UTF-8 character length 11 | // This is also the error when the input is truncated. 12 | TOO_LONG, // We either have too many consecutive continuation bytes or the string starts with a continuation byte. 13 | OVERLONG, // The decoded character must be above U+7F for two-byte characters, U+7FF for three-byte characters, 14 | // and U+FFFF for four-byte characters. 15 | TOO_LARGE, // The decoded character must be less than or equal to U+10FFFF,less than or equal than U+7F for ASCII OR less than equal than U+FF for Latin1 16 | SURROGATE, // The decoded character must be not be in U+D800...DFFF (UTF-8 or UTF-32) OR 17 | // a high surrogate must be followed by a low surrogate and a low surrogate must be preceded by a high surrogate (UTF-16) OR 18 | // there must be no surrogate at all (Latin1) 19 | INVALID_BASE64_CHARACTER, // Found a character that cannot be part of a valid base64 string. 20 | BASE64_INPUT_REMAINDER, // The base64 input terminates with a single character, excluding padding (=). 21 | OUTPUT_BUFFER_TOO_SMALL, // The provided buffer is too small. 22 | OTHER // Not related to validation/transcoding. 23 | }; 24 | 25 | struct result { 26 | error_code error; 27 | // In case of error, indicates the position of the error. 28 | // In case of success, indicates the number of code units 29 | // validated/written. 30 | size_t count; 31 | }; 32 | 33 | size_t base64_length_from_binary(size_t length); 34 | size_t maximal_binary_length_from_base64(const char * input, size_t length); 35 | 36 | result base64_to_binary(const char * input, size_t length, char* output); 37 | size_t binary_to_base64(const char * input, size_t length, char* output); 38 | 39 | } -------------------------------------------------------------------------------- /src/thrift/protocol/TEnum.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _THRIFT_ENUM_H_ 21 | #define _THRIFT_ENUM_H_ 22 | 23 | namespace apache { 24 | namespace thrift { 25 | namespace protocol { 26 | 27 | /** 28 | * Enumerated definition of the types that the Thrift protocol supports. 29 | * Take special note of the T_END type which is used specifically to mark 30 | * the end of a sequence of fields. 31 | */ 32 | enum TType { 33 | T_STOP = 0, 34 | T_VOID = 1, 35 | T_BOOL = 2, 36 | T_BYTE = 3, 37 | T_I08 = 3, 38 | T_I16 = 6, 39 | T_I32 = 8, 40 | T_U64 = 9, 41 | T_I64 = 10, 42 | T_DOUBLE = 4, 43 | T_STRING = 11, 44 | T_UTF7 = 11, 45 | T_STRUCT = 12, 46 | T_MAP = 13, 47 | T_SET = 14, 48 | T_LIST = 15, 49 | T_UTF8 = 16, 50 | T_UTF16 = 17 51 | }; 52 | 53 | /** 54 | * Enumerated definition of the message types that the Thrift protocol 55 | * supports. 56 | */ 57 | enum TMessageType { 58 | T_CALL = 1, 59 | T_REPLY = 2, 60 | T_EXCEPTION = 3, 61 | T_ONEWAY = 4 62 | }; 63 | 64 | }}} // apache::thrift::protocol 65 | 66 | #endif // #define _THRIFT_ENUM_H_ 67 | -------------------------------------------------------------------------------- /src/flatbuffers/Tensor.fbs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | /// EXPERIMENTAL: Metadata for n-dimensional arrays, aka "tensors" or 19 | /// "ndarrays". Arrow implementations in general are not required to implement 20 | /// this type 21 | 22 | include "Schema.fbs"; 23 | 24 | namespace org.apache.arrow.flatbuf; 25 | 26 | /// ---------------------------------------------------------------------- 27 | /// Data structures for dense tensors 28 | 29 | /// Shape data for a single axis in a tensor 30 | table TensorDim { 31 | /// Length of dimension 32 | size: long; 33 | 34 | /// Name of the dimension, optional 35 | name: string; 36 | } 37 | 38 | table Tensor { 39 | /// The type of data contained in a value cell. Currently only fixed-width 40 | /// value types are supported, no strings or nested types 41 | type: Type (required); 42 | 43 | /// The dimensions of the tensor, optionally named 44 | shape: [TensorDim] (required); 45 | 46 | /// Non-negative byte offsets to advance one value cell along each dimension 47 | /// If omitted, default to row-major order (C-like). 48 | strides: [long]; 49 | 50 | /// The location and size of the tensor's data 51 | data: Buffer (required); 52 | } 53 | 54 | root_type Tensor; 55 | -------------------------------------------------------------------------------- /src/thrift/TOutput.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _THRIFT_OUTPUT_H_ 21 | #define _THRIFT_OUTPUT_H_ 1 22 | 23 | #include 24 | 25 | namespace apache { 26 | namespace thrift { 27 | 28 | class TOutput { 29 | public: 30 | TOutput(); 31 | 32 | inline void setOutputFunction(void (*function)(const char*)) { f_ = function; } 33 | 34 | inline void operator()(const char* message) { f_(message); } 35 | 36 | // It is important to have a const char* overload here instead of 37 | // just the string version, otherwise errno could be corrupted 38 | // if there is some problem allocating memory when constructing 39 | // the string. 40 | void perror(const char* message, int errno_copy); 41 | inline void perror(const std::string& message, int errno_copy) { 42 | perror(message.c_str(), errno_copy); 43 | } 44 | 45 | void printf(const char* message, ...); 46 | 47 | static void errorTimeWrapper(const char* msg); 48 | 49 | /** Just like strerror_r but returns a C++ string object. */ 50 | static std::string strerror_s(int errno_copy); 51 | 52 | private: 53 | void (*f_)(const char*); 54 | }; 55 | 56 | THRIFT_EXPORT extern TOutput GlobalOutput; 57 | } 58 | } // namespace apache::thrift 59 | 60 | #endif //_THRIFT_OUTPUT_H_ 61 | -------------------------------------------------------------------------------- /src/thrift/transport/TTransportException.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #include 21 | #include 22 | 23 | #include 24 | 25 | using std::string; 26 | 27 | namespace apache { 28 | namespace thrift { 29 | namespace transport { 30 | 31 | const char* TTransportException::what() const noexcept { 32 | if (message_.empty()) { 33 | switch (type_) { 34 | case UNKNOWN: 35 | return "TTransportException: Unknown transport exception"; 36 | case NOT_OPEN: 37 | return "TTransportException: Transport not open"; 38 | case TIMED_OUT: 39 | return "TTransportException: Timed out"; 40 | case END_OF_FILE: 41 | return "TTransportException: End of file"; 42 | case INTERRUPTED: 43 | return "TTransportException: Interrupted"; 44 | case BAD_ARGS: 45 | return "TTransportException: Invalid arguments"; 46 | case CORRUPTED_DATA: 47 | return "TTransportException: Corrupted Data"; 48 | case INTERNAL_ERROR: 49 | return "TTransportException: Internal error"; 50 | default: 51 | return "TTransportException: (Invalid exception type)"; 52 | } 53 | } else { 54 | return message_.c_str(); 55 | } 56 | } 57 | } 58 | } 59 | } // apache::thrift::transport 60 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/read-parquet-row-group.md: -------------------------------------------------------------------------------- 1 | # read_parquet_row_group 2 | 3 | Code 4 | as.data.frame(read_parquet_metadata(tmp)$row_groups)[-1] 5 | Output 6 | id total_byte_size num_rows file_offset total_compressed_size ordinal 7 | 1 0 937 10 NA NA NA 8 | 2 1 947 10 NA NA NA 9 | 3 2 1011 10 NA NA NA 10 | 4 3 528 2 NA NA NA 11 | 12 | --- 13 | 14 | Code 15 | as.data.frame(read_parquet_metadata(tmp)$row_groups)[-1] 16 | Output 17 | id total_byte_size num_rows file_offset total_compressed_size ordinal 18 | 1 0 992 10 NA NA NA 19 | 2 1 1029 10 NA NA NA 20 | 3 2 1091 10 NA NA NA 21 | 4 3 606 2 NA NA NA 22 | 23 | --- 24 | 25 | Code 26 | as.data.frame(read_parquet_metadata(tmp)$row_groups)[-1] 27 | Output 28 | id total_byte_size num_rows file_offset total_compressed_size ordinal 29 | 1 0 937 10 NA NA NA 30 | 2 1 947 10 NA NA NA 31 | 3 2 1011 10 NA NA NA 32 | 4 3 528 2 NA NA NA 33 | 34 | --- 35 | 36 | Code 37 | as.data.frame(read_parquet_metadata(tmp)$row_groups)[-1] 38 | Output 39 | id total_byte_size num_rows file_offset total_compressed_size ordinal 40 | 1 0 992 10 NA NA NA 41 | 2 1 1029 10 NA NA NA 42 | 3 2 1091 10 NA NA NA 43 | 4 3 606 2 NA NA NA 44 | 45 | -------------------------------------------------------------------------------- /src/thrift/windows/TWinsockSingleton.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #include 21 | 22 | // boost 23 | #include 24 | 25 | namespace apache { 26 | namespace thrift { 27 | namespace transport { 28 | 29 | TWinsockSingleton::instance_ptr TWinsockSingleton::instance_ptr_(nullptr); 30 | std::once_flag TWinsockSingleton::flags_; 31 | 32 | //------------------------------------------------------------------------------ 33 | TWinsockSingleton::TWinsockSingleton(void) { 34 | WORD version(MAKEWORD(2, 2)); 35 | WSAData data = {0}; 36 | 37 | int error(WSAStartup(version, &data)); 38 | if (error != 0) { 39 | throw std::runtime_error("Failed to initialise Winsock."); 40 | } 41 | } 42 | 43 | //------------------------------------------------------------------------------ 44 | TWinsockSingleton::~TWinsockSingleton(void) { 45 | WSACleanup(); 46 | } 47 | 48 | //------------------------------------------------------------------------------ 49 | void TWinsockSingleton::create(void) { 50 | std::call_once(flags_, init); 51 | } 52 | 53 | //------------------------------------------------------------------------------ 54 | void TWinsockSingleton::init(void) { 55 | instance_ptr_.reset(new TWinsockSingleton); 56 | } 57 | } 58 | } 59 | } // apache::thrift::transport 60 | -------------------------------------------------------------------------------- /src/thrift/windows/TWinsockSingleton.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _THRIFT_TRANSPORT_WINDOWS_TWINSOCKSINGLETON_H_ 21 | #define _THRIFT_TRANSPORT_WINDOWS_TWINSOCKSINGLETON_H_ 1 22 | 23 | #if defined(_MSC_VER) && (_MSC_VER > 1200) 24 | #pragma once 25 | #endif // _MSC_VER 26 | 27 | #ifndef _WIN32 28 | #error This is a MSVC header only. 29 | #endif 30 | 31 | #include 32 | 33 | // boost 34 | #include 35 | 36 | #include 37 | #include 38 | 39 | 40 | namespace apache { 41 | namespace thrift { 42 | namespace transport { 43 | 44 | /** 45 | * Winsock2 must be intialised once only in order to create sockets. This class 46 | * performs a one time initialisation when create is called. 47 | */ 48 | class TWinsockSingleton : private apache::thrift::TNonCopyable { 49 | 50 | public: 51 | typedef std::shared_ptr instance_ptr; 52 | 53 | private: 54 | TWinsockSingleton(void); 55 | 56 | public: 57 | ~TWinsockSingleton(void); 58 | 59 | public: 60 | static void create(void); 61 | 62 | private: 63 | static void init(void); 64 | 65 | private: 66 | static instance_ptr instance_ptr_; 67 | static std::once_flag flags_; 68 | }; 69 | } 70 | } 71 | } // apache::thrift::transport 72 | 73 | #endif // _THRIFT_TRANSPORT_WINDOWS_TWINSOCKSINGLETON_H_ 74 | -------------------------------------------------------------------------------- /src/thrift/async/TAsyncProtocolProcessor.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #include 21 | 22 | using apache::thrift::transport::TBufferBase; 23 | using apache::thrift::protocol::TProtocol; 24 | 25 | namespace apache { 26 | namespace thrift { 27 | namespace async { 28 | 29 | void TAsyncProtocolProcessor::process(std::function _return, 30 | std::shared_ptr ibuf, 31 | std::shared_ptr obuf) { 32 | std::shared_ptr iprot(pfact_->getProtocol(ibuf)); 33 | std::shared_ptr oprot(pfact_->getProtocol(obuf)); 34 | return underlying_ 35 | ->process(std::bind(&TAsyncProtocolProcessor::finish, 36 | _return, 37 | oprot, 38 | std::placeholders::_1), 39 | iprot, 40 | oprot); 41 | } 42 | 43 | /* static */ void TAsyncProtocolProcessor::finish( 44 | std::function _return, 45 | std::shared_ptr oprot, 46 | bool healthy) { 47 | (void)oprot; 48 | // This is a stub function to hold a reference to oprot. 49 | return _return(healthy); 50 | } 51 | } 52 | } 53 | } // apache::thrift::async 54 | -------------------------------------------------------------------------------- /src/thrift/transport/TSimpleFileTransport.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #include 21 | 22 | #include 23 | 24 | #include 25 | #ifdef HAVE_SYS_STAT_H 26 | #include 27 | #endif 28 | #include 29 | 30 | #ifdef _WIN32 31 | #include 32 | #endif 33 | 34 | namespace apache { 35 | namespace thrift { 36 | namespace transport { 37 | 38 | TSimpleFileTransport::TSimpleFileTransport(const std::string& path, bool read, bool write, std::shared_ptr config) 39 | : TFDTransport(-1, TFDTransport::CLOSE_ON_DESTROY, config) { 40 | int flags = 0; 41 | if (read && write) { 42 | flags = O_RDWR; 43 | } else if (read) { 44 | flags = O_RDONLY; 45 | } else if (write) { 46 | flags = O_WRONLY; 47 | } else { 48 | throw TTransportException("Neither READ nor WRITE specified"); 49 | } 50 | if (write) { 51 | flags |= O_CREAT | O_APPEND; 52 | } 53 | #ifndef _WIN32 54 | mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH; 55 | #else 56 | int mode = _S_IREAD | _S_IWRITE; 57 | #endif 58 | int fd = ::THRIFT_OPEN(path.c_str(), flags, mode); 59 | if (fd < 0) { 60 | throw TTransportException("failed to open file for writing: " + path); 61 | } 62 | setFD(fd); 63 | open(); 64 | } 65 | } 66 | } 67 | } // apache::thrift::transport 68 | -------------------------------------------------------------------------------- /src/zstd/include/zstd/compress/zstd_opt.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under both the BSD-style license (found in the 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | * in the COPYING file in the root directory of this source tree). 8 | * You may select, at your option, one of the above-listed licenses. 9 | */ 10 | 11 | #ifndef ZSTD_OPT_H 12 | #define ZSTD_OPT_H 13 | 14 | #include "zstd/compress/zstd_compress_internal.h" 15 | 16 | namespace zstd { 17 | 18 | /* used in ZSTD_loadDictionaryContent() */ 19 | void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend); 20 | 21 | size_t ZSTD_compressBlock_btopt( 22 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 23 | void const* src, size_t srcSize); 24 | size_t ZSTD_compressBlock_btultra( 25 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 26 | void const* src, size_t srcSize); 27 | size_t ZSTD_compressBlock_btultra2( 28 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 29 | void const* src, size_t srcSize); 30 | 31 | 32 | size_t ZSTD_compressBlock_btopt_dictMatchState( 33 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 34 | void const* src, size_t srcSize); 35 | size_t ZSTD_compressBlock_btultra_dictMatchState( 36 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 37 | void const* src, size_t srcSize); 38 | 39 | size_t ZSTD_compressBlock_btopt_extDict( 40 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 41 | void const* src, size_t srcSize); 42 | size_t ZSTD_compressBlock_btultra_extDict( 43 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 44 | void const* src, size_t srcSize); 45 | 46 | /* note : no btultra2 variant for extDict nor dictMatchState, 47 | * because btultra2 is not meant to work with dictionaries 48 | * and is only specific for the first block (no prefix) */ 49 | 50 | } 51 | 52 | #endif /* ZSTD_OPT_H */ 53 | -------------------------------------------------------------------------------- /src/thrift/transport/THttpServer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _THRIFT_TRANSPORT_THTTPSERVER_H_ 21 | #define _THRIFT_TRANSPORT_THTTPSERVER_H_ 1 22 | 23 | #include 24 | 25 | namespace apache { 26 | namespace thrift { 27 | namespace transport { 28 | 29 | class THttpServer : public THttpTransport { 30 | public: 31 | THttpServer(std::shared_ptr transport, std::shared_ptr config = nullptr); 32 | 33 | ~THttpServer() override; 34 | 35 | void flush() override; 36 | 37 | protected: 38 | virtual std::string getHeader(uint32_t len); 39 | void readHeaders(); 40 | void parseHeader(char* header) override; 41 | bool parseStatusLine(char* status) override; 42 | std::string getTimeRFC1123(); 43 | }; 44 | 45 | /** 46 | * Wraps a transport into HTTP protocol 47 | */ 48 | class THttpServerTransportFactory : public TTransportFactory { 49 | public: 50 | THttpServerTransportFactory() = default; 51 | 52 | ~THttpServerTransportFactory() override = default; 53 | 54 | /** 55 | * Wraps the transport into a buffered one. 56 | */ 57 | std::shared_ptr getTransport(std::shared_ptr trans) override { 58 | return std::shared_ptr(new THttpServer(trans)); 59 | } 60 | }; 61 | } 62 | } 63 | } // apache::thrift::transport 64 | 65 | #endif // #ifndef _THRIFT_TRANSPORT_THTTPSERVER_H_ 66 | -------------------------------------------------------------------------------- /src/Makevars.in: -------------------------------------------------------------------------------- 1 | OBJECTS= lib/ParquetReader.o RParquetReader.o RParquetAppender.o \ 2 | RParquetOutFile.o r-utils.o rwrapper.o protect.o read2.o write2.o \ 3 | cpp-utils.o read-metadata.o read-pages.o \ 4 | arrow-schema.o base64.o r-base64.o snappy.o encodings.o \ 5 | dictionary-encoding.o test.o \ 6 | lib/ParquetOutFile.o lib/RleBpDecoder.o \ 7 | parquet/parquet_types.o \ 8 | thrift/protocol/TProtocol.o thrift/transport/TTransportException.o \ 9 | thrift/transport/TBufferTransports.o \ 10 | fastpforlib/bitpacking.o \ 11 | snappy/snappy.o snappy/snappy-sinksource.o \ 12 | miniz/miniz.o \ 13 | zstd/common/entropy_common.o zstd/common/error_private.o \ 14 | zstd/common/fse_decompress.o zstd/common/xxhash.o \ 15 | zstd/common/zstd_common.o zstd/decompress/huf_decompress.o \ 16 | zstd/decompress/zstd_ddict.o zstd/decompress/zstd_decompress.o \ 17 | zstd/decompress/zstd_decompress_block.o \ 18 | zstd/compress/fse_compress.o zstd/compress/hist.o \ 19 | zstd/compress/huf_compress.o zstd/compress/zstd_compress.o \ 20 | zstd/compress/zstd_compress_literals.o \ 21 | zstd/compress/zstd_compress_sequences.o \ 22 | zstd/compress/zstd_compress_superblock.o \ 23 | zstd/compress/zstd_double_fast.o zstd/compress/zstd_fast.o \ 24 | zstd/compress/zstd_lazy.o zstd/compress/zstd_ldm.o \ 25 | zstd/compress/zstd_opt.o 26 | 27 | PKG_CPPFLAGS = -Ithrift -I. -Izstd/include 28 | # PKG_CFLAGS = -DR_NO_REMAP 29 | 30 | PKG_CXXFLAGS = -DR_NO_REMAP 31 | PKG_CXX11FLAGS = -DR_NO_REMAP 32 | PKG_CXX14FLAGS = -DR_NO_REMAP 33 | PKG_CXX17FLAGS = -DR_NO_REMAP 34 | PKG_CXX20FLAGS = -DR_NO_REMAP 35 | PKG_CXX23FLAGS = -DR_NO_REMAP 36 | PKG_CXX24FLAGS = -DR_NO_REMAP 37 | PKG_CXX25FLAGS = -DR_NO_REMAP 38 | PKG_CXX26FLAGS = -DR_NO_REMAP 39 | PKG_CXX27FLAGS = -DR_NO_REMAP 40 | PKG_CXX28FLAGS = -DR_NO_REMAP 41 | PKG_CXX29FLAGS = -DR_NO_REMAP 42 | PKG_CXX30FLAGS = -DR_NO_REMAP 43 | 44 | # PKG_LIBS = -lws2_32 45 | 46 | MAKEFLAGS=@MAKEFLAGS@ 47 | 48 | all: $(SHLIB) 49 | 50 | broken: broken.cpp parquet/parquet_types.cpp \ 51 | thrift/protocol/TProtocol.cpp\ 52 | thrift/transport/TTransportException.cpp \ 53 | thrift/transport/TBufferTransports.cpp 54 | clang++ -std=gnu++11 -Ithrift -I. -Izstd/include $^ -g -O0 -o broken 55 | -------------------------------------------------------------------------------- /src/thrift/async/TAsyncProtocolProcessor.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _THRIFT_TNAME_ME_H_ 21 | #define _THRIFT_TNAME_ME_H_ 1 22 | 23 | #include 24 | #include 25 | #include 26 | 27 | namespace apache { 28 | namespace thrift { 29 | namespace async { 30 | 31 | class TAsyncProtocolProcessor : public TAsyncBufferProcessor { 32 | public: 33 | TAsyncProtocolProcessor(std::shared_ptr underlying, 34 | std::shared_ptr pfact) 35 | : underlying_(underlying), pfact_(pfact) {} 36 | 37 | void process(std::function _return, 38 | std::shared_ptr ibuf, 39 | std::shared_ptr obuf) override; 40 | 41 | ~TAsyncProtocolProcessor() override = default; 42 | 43 | private: 44 | static void finish(std::function _return, 45 | std::shared_ptr oprot, 46 | bool healthy); 47 | 48 | std::shared_ptr underlying_; 49 | std::shared_ptr pfact_; 50 | }; 51 | } 52 | } 53 | } // apache::thrift::async 54 | 55 | #endif // #ifndef _THRIFT_TNAME_ME_H_ 56 | -------------------------------------------------------------------------------- /man/read_parquet_schema.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parquet-metadata.R 3 | \name{read_parquet_schema} 4 | \alias{read_parquet_schema} 5 | \title{Read the schema of a Parquet file} 6 | \usage{ 7 | read_parquet_schema(file, options = parquet_options()) 8 | } 9 | \arguments{ 10 | \item{file}{Path to a Parquet file.} 11 | 12 | \item{options}{Return value of \code{\link[=parquet_options]{parquet_options()}}, options that 13 | potentially modify the Parquet to R type mappings.} 14 | } 15 | \value{ 16 | \if{html}{\out{
}}\preformatted{Data frame, the schema of the file. It has one row for 17 | each node (inner node or leaf node). For flat files this means one 18 | root node (inner node), always the first one, and then one row for 19 | each "real" column. For nested schemas, the rows are in depth-first 20 | search order. Most important columns are: 21 | - `file_name`: file name. 22 | - `name`: column name. 23 | - `r_type`: the R type that corresponds to the Parquet type. 24 | Might be `NA` if [read_parquet()] cannot read this column. See 25 | [nanoparquet-types] for the type mapping rules. 26 | - `type`: data type. One of the low level data types. 27 | - `type_length`: length for fixed length byte arrays. 28 | - `repettion_type`: character, one of `REQUIRED`, `OPTIONAL` or 29 | `REPEATED`. 30 | - `logical_type`: a list column, the logical types of the columns. 31 | An element has at least an entry called `type`, and potentially 32 | additional entries, e.g. `bit_width`, `is_signed`, etc. 33 | - `num_children`: number of child nodes. Should be a non-negative 34 | integer for the root node, and `NA` for a leaf node. 35 | }\if{html}{\out{
}} 36 | } 37 | \description{ 38 | This function should work on all files, even if \code{\link[=read_parquet]{read_parquet()}} is 39 | unable to read them, because of an unsupported schema, encoding, 40 | compression or other reason. 41 | } 42 | \seealso{ 43 | \code{\link[=read_parquet_metadata]{read_parquet_metadata()}} to read more metadata, 44 | \code{\link[=read_parquet_info]{read_parquet_info()}} to show only basic information. 45 | \code{\link[=read_parquet]{read_parquet()}}, \code{\link[=write_parquet]{write_parquet()}}, \link{nanoparquet-types}. 46 | } 47 | -------------------------------------------------------------------------------- /src/thrift/qt/TQIODeviceTransport.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _THRIFT_ASYNC_TQIODEVICE_TRANSPORT_H_ 21 | #define _THRIFT_ASYNC_TQIODEVICE_TRANSPORT_H_ 1 22 | 23 | #include 24 | 25 | #include 26 | 27 | class QIODevice; 28 | 29 | namespace apache { 30 | namespace thrift { 31 | namespace transport { 32 | 33 | /** 34 | * Transport that operates on a QIODevice (socket, file, etc). 35 | */ 36 | class TQIODeviceTransport 37 | : public apache::thrift::transport::TVirtualTransport { 38 | public: 39 | explicit TQIODeviceTransport(std::shared_ptr dev); 40 | ~TQIODeviceTransport() override; 41 | 42 | void open() override; 43 | bool isOpen() const override; 44 | bool peek() override; 45 | void close() override; 46 | 47 | uint32_t readAll(uint8_t* buf, uint32_t len); 48 | uint32_t read(uint8_t* buf, uint32_t len); 49 | 50 | void write(const uint8_t* buf, uint32_t len); 51 | uint32_t write_partial(const uint8_t* buf, uint32_t len); 52 | 53 | void flush() override; 54 | 55 | uint8_t* borrow(uint8_t* buf, uint32_t* len); 56 | void consume(uint32_t len); 57 | 58 | private: 59 | TQIODeviceTransport(const TQIODeviceTransport&); 60 | TQIODeviceTransport& operator=(const TQIODeviceTransport&); 61 | 62 | std::shared_ptr dev_; 63 | }; 64 | } 65 | } 66 | } // apache::thrift::transport 67 | 68 | #endif // #ifndef _THRIFT_ASYNC_TQIODEVICE_TRANSPORT_H_ 69 | -------------------------------------------------------------------------------- /src/thrift/transport/TSSLServerSocket.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | namespace apache { 25 | namespace thrift { 26 | namespace transport { 27 | 28 | /** 29 | * SSL server socket implementation. 30 | */ 31 | TSSLServerSocket::TSSLServerSocket(int port, std::shared_ptr factory) 32 | : TServerSocket(port), factory_(factory) { 33 | factory_->server(true); 34 | } 35 | 36 | TSSLServerSocket::TSSLServerSocket(const std::string& address, 37 | int port, 38 | std::shared_ptr factory) 39 | : TServerSocket(address, port), factory_(factory) { 40 | factory_->server(true); 41 | } 42 | 43 | TSSLServerSocket::TSSLServerSocket(int port, 44 | int sendTimeout, 45 | int recvTimeout, 46 | std::shared_ptr factory) 47 | : TServerSocket(port, sendTimeout, recvTimeout), factory_(factory) { 48 | factory_->server(true); 49 | } 50 | 51 | std::shared_ptr TSSLServerSocket::createSocket(THRIFT_SOCKET client) { 52 | if (interruptableChildren_) { 53 | return factory_->createSocket(client, pChildInterruptSockReader_); 54 | 55 | } else { 56 | return factory_->createSocket(client); 57 | } 58 | } 59 | } 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /man/parquet_column_types.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parquet-column-types.R 3 | \name{parquet_column_types} 4 | \alias{parquet_column_types} 5 | \title{Map between R and Parquet data types} 6 | \usage{ 7 | parquet_column_types(x, options = parquet_options()) 8 | } 9 | \arguments{ 10 | \item{x}{Path to a Parquet file, or a data frame.} 11 | 12 | \item{options}{Nanoparquet options, see \code{\link[=parquet_options]{parquet_options()}}.} 13 | } 14 | \value{ 15 | Data frame with columns: 16 | \itemize{ 17 | \item \code{file_name}: file name. 18 | \item \code{name}: column name. 19 | \item \code{type}: (low level) Parquet data type. 20 | \item \code{r_type}: the R type that corresponds to the Parquet type. 21 | Might be \code{NA} if \code{\link[=read_parquet]{read_parquet()}} cannot read this column. See 22 | \link{nanoparquet-types} for the type mapping rules. 23 | \item \code{repetition_type}: whether the column in \code{REQUIRED} (cannot be 24 | \code{NA}) or \code{OPTIONAL} (may be \code{NA}). \code{REPEATED} columns are not 25 | currently supported by nanoparquet. 26 | \item \code{logical_type}: Parquet logical type in a list column. 27 | An element has at least an entry called \code{type}, and potentially 28 | additional entries, e.g. \code{bit_width}, \code{is_signed}, etc. 29 | } 30 | } 31 | \description{ 32 | Note that this function is now deprecated. Please use 33 | \code{\link[=read_parquet_schema]{read_parquet_schema()}} for files, and \code{\link[=infer_parquet_schema]{infer_parquet_schema()}} for 34 | data frames. 35 | } 36 | \details{ 37 | This function works two ways. It can map the R types of a data frame to 38 | Parquet types, to see how \code{\link[=write_parquet]{write_parquet()}} would write out the data 39 | frame. It can also map the types of a Parquet file to R types, to see 40 | how \code{\link[=read_parquet]{read_parquet()}} would read the file into R. 41 | } 42 | \seealso{ 43 | \code{\link[=read_parquet_metadata]{read_parquet_metadata()}} to read more metadata, 44 | \code{\link[=read_parquet_info]{read_parquet_info()}} for a very short summary. 45 | \code{\link[=read_parquet_schema]{read_parquet_schema()}} for the complete Parquet schema. 46 | \code{\link[=read_parquet]{read_parquet()}}, \code{\link[=write_parquet]{write_parquet()}}, \link{nanoparquet-types}. 47 | } 48 | -------------------------------------------------------------------------------- /src/zstd/include/zstd/decompress/zstd_decompress_block.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under both the BSD-style license (found in the 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | * in the COPYING file in the root directory of this source tree). 8 | * You may select, at your option, one of the above-listed licenses. 9 | */ 10 | 11 | 12 | #ifndef ZSTD_DEC_BLOCK_H 13 | #define ZSTD_DEC_BLOCK_H 14 | 15 | /*-******************************************************* 16 | * Dependencies 17 | *********************************************************/ 18 | #include /* size_t */ 19 | #include "zstd.h" /* DCtx, and some public functions */ 20 | #include "zstd/common/zstd_internal.h" /* blockProperties_t, and some public functions */ 21 | #include "zstd/decompress/zstd_decompress_internal.h" /* ZSTD_seqSymbol */ 22 | 23 | namespace zstd { 24 | 25 | /* === Prototypes === */ 26 | 27 | /* note: prototypes already published within `zstd.h` : 28 | * ZSTD_decompressBlock() 29 | */ 30 | 31 | /* note: prototypes already published within `zstd_internal.h` : 32 | * ZSTD_getcBlockSize() 33 | * ZSTD_decodeSeqHeaders() 34 | */ 35 | 36 | 37 | /* ZSTD_decompressBlock_internal() : 38 | * decompress block, starting at `src`, 39 | * into destination buffer `dst`. 40 | * @return : decompressed block size, 41 | * or an error code (which can be tested using ZSTD_isError()) 42 | */ 43 | size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, 44 | void* dst, size_t dstCapacity, 45 | const void* src, size_t srcSize, const int frame); 46 | 47 | /* ZSTD_buildFSETable() : 48 | * generate FSE decoding table for one symbol (ll, ml or off) 49 | * this function must be called with valid parameters only 50 | * (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.) 51 | * in which case it cannot fail. 52 | * Internal use only. 53 | */ 54 | void ZSTD_buildFSETable(ZSTD_seqSymbol* dt, 55 | const short* normalizedCounter, unsigned maxSymbolValue, 56 | const U32* baseValue, const U32* nbAdditionalBits, 57 | unsigned tableLog); 58 | 59 | } 60 | 61 | #endif /* ZSTD_DEC_BLOCK_H */ 62 | -------------------------------------------------------------------------------- /src/thrift/transport/TNonblockingSSLServerSocket.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #include 21 | #include 22 | 23 | namespace apache { 24 | namespace thrift { 25 | namespace transport { 26 | 27 | /** 28 | * Nonblocking SSL server socket implementation. 29 | */ 30 | TNonblockingSSLServerSocket::TNonblockingSSLServerSocket(int port, std::shared_ptr factory) 31 | : TNonblockingServerSocket(port), factory_(factory) { 32 | factory_->server(true); 33 | } 34 | 35 | TNonblockingSSLServerSocket::TNonblockingSSLServerSocket(const std::string& address, 36 | int port, 37 | std::shared_ptr factory) 38 | : TNonblockingServerSocket(address, port), factory_(factory) { 39 | factory_->server(true); 40 | } 41 | 42 | TNonblockingSSLServerSocket::TNonblockingSSLServerSocket(int port, 43 | int sendTimeout, 44 | int recvTimeout, 45 | std::shared_ptr factory) 46 | : TNonblockingServerSocket(port, sendTimeout, recvTimeout), factory_(factory) { 47 | factory_->server(true); 48 | } 49 | 50 | std::shared_ptr TNonblockingSSLServerSocket::createSocket(THRIFT_SOCKET client) { 51 | std::shared_ptr tSSLSocket; 52 | tSSLSocket = factory_->createSocket(client); 53 | tSSLSocket->setLibeventSafe(); 54 | return tSSLSocket; 55 | } 56 | } 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /src/thrift/TConfiguration.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef THRIFT_TCONFIGURATION_H 21 | #define THRIFT_TCONFIGURATION_H 22 | 23 | namespace apache { 24 | namespace thrift { 25 | 26 | class TConfiguration 27 | { 28 | public: 29 | TConfiguration(int maxMessageSize = DEFAULT_MAX_MESSAGE_SIZE, 30 | int maxFrameSize = DEFAULT_MAX_FRAME_SIZE, int recursionLimit = DEFAULT_RECURSION_DEPTH) 31 | : maxMessageSize_(maxMessageSize), maxFrameSize_(maxFrameSize), recursionLimit_(recursionLimit) {} 32 | 33 | const static int DEFAULT_MAX_MESSAGE_SIZE = 100 * 1024 * 1024; 34 | const static int DEFAULT_MAX_FRAME_SIZE = 16384000; // this value is used consistently across all Thrift libraries 35 | const static int DEFAULT_RECURSION_DEPTH = 64; 36 | 37 | inline int getMaxMessageSize() { return maxMessageSize_; } 38 | inline void setMaxMessageSize(int maxMessageSize) { maxMessageSize_ = maxMessageSize; } 39 | inline int getMaxFrameSize() { return maxFrameSize_; } 40 | inline void setMaxFrameSize(int maxFrameSize) { maxFrameSize_ = maxFrameSize; } 41 | inline int getRecursionLimit() { return recursionLimit_; } 42 | inline void setRecursionLimit(int recursionLimit) { recursionLimit_ = recursionLimit; } 43 | 44 | private: 45 | int maxMessageSize_ = DEFAULT_MAX_MESSAGE_SIZE; 46 | int maxFrameSize_ = DEFAULT_MAX_FRAME_SIZE; 47 | int recursionLimit_ = DEFAULT_RECURSION_DEPTH; 48 | 49 | // TODO(someone_smart): add connection and i/o timeouts 50 | }; 51 | } 52 | } // apache::thrift 53 | 54 | #endif /* THRIFT_TCONFIGURATION_H */ 55 | 56 | -------------------------------------------------------------------------------- /src/thrift/numeric_cast.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef THRIFT_NUMERIC_CAST_H 21 | #define THRIFT_NUMERIC_CAST_H 22 | 23 | #include 24 | #include 25 | 26 | #if defined(_MSC_VER) 27 | // avoid compiler warnings and errors in MSVC if max is defined as a macro 28 | #undef max 29 | #endif 30 | 31 | namespace apache { 32 | namespace thrift { 33 | 34 | /** 35 | * @brief Perform a safe numeric cast 36 | * 37 | * Previously this was provided by `boost::numeric_cast`. This 38 | * implementation reduces the dependency on `boost`. 39 | * 40 | * @tparam Dst The destination type 41 | * @tparam Src The source type 42 | * @param value The value to be converted 43 | * @return Dst The converted value 44 | * 45 | * @see SA49658182 46 | */ 47 | template 48 | inline Dst numeric_cast(Src value) { 49 | typedef std::numeric_limits DstLim; 50 | typedef std::numeric_limits SrcLim; 51 | 52 | const bool positive_overflow_possible = DstLim::max() < SrcLim::max(); 53 | const bool negative_overflow_possible = DstLim::lowest() > SrcLim::lowest(); 54 | 55 | if (positive_overflow_possible && value > DstLim::max()) { 56 | throw std::bad_cast(); 57 | } 58 | 59 | if (negative_overflow_possible && (value < DstLim::lowest())) { 60 | throw std::bad_cast(); 61 | } 62 | 63 | // limits have been checked, therefore safe to cast 64 | return static_cast(value); 65 | } 66 | 67 | } // namespace thrift 68 | } // namespace apache 69 | 70 | #endif 71 | -------------------------------------------------------------------------------- /src/lib/bitpacker.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | class BitPacker { 6 | public: 7 | BitPacker(uint8_t *buffer, uint8_t bit_width) 8 | : start_(buffer), buffer_(buffer), bit_width_(bit_width), tmp(0), 9 | bit_offset(0) { 10 | value_bytes = ceil(bit_width / 8.0); 11 | } 12 | 13 | inline void pack_varint(uint64_t v) { 14 | check_zero_offset(); 15 | while (v >= 128) { 16 | uint8_t c = v | 0x80; 17 | *buffer_++ = c; 18 | // std::cerr << "pack varint +1" << std::endl; 19 | v >>= 7; 20 | } 21 | *buffer_++ = v; 22 | // std::cerr << "pack varint +1" << std::endl; 23 | } 24 | 25 | // value for repetition, using bit_width bits rounded up to bytes 26 | inline void pack_value(uint64_t v) { 27 | check_zero_offset(); 28 | for (auto i = 0; i < value_bytes; i++) { 29 | *buffer_++ = v & 0xff; 30 | // std::cerr << "pack value +1" << std::endl; 31 | v >>= 8; 32 | } 33 | } 34 | 35 | inline void pack(uint64_t v) { 36 | tmp |= v << bit_offset; 37 | bit_offset += bit_width_; 38 | if (bit_offset >= 64) { 39 | std::memcpy(buffer_, &tmp, 8); 40 | // std::cerr << "pack: " << (void*) buffer_ << " + " << 8 << std::endl; 41 | buffer_ += 8; 42 | bit_offset -= 64; 43 | tmp = bit_offset == 0 ? 0 : v >> (bit_width_ - bit_offset); 44 | } 45 | } 46 | 47 | inline void flush() { 48 | if (bit_offset % 8 != 0) { 49 | throw std::runtime_error( // # nocov 50 | "Internal bit packer error, flushing partial bytes" 51 | ); 52 | } 53 | // std::cerr << "flush: " << (void*) buffer_ << " + " << bit_offset / 8 << std::endl; 54 | std::memcpy(buffer_, &tmp, bit_offset / 8); 55 | buffer_ += bit_offset / 8; 56 | bit_offset = 0; 57 | tmp = 0; 58 | } 59 | 60 | inline uint32_t size() const { 61 | // std::cerr << "bitbuffer size: " << buffer_ - start_ << std::endl; 62 | return buffer_ - start_; 63 | } 64 | 65 | inline void check_zero_offset() { 66 | if (bit_offset != 0) { 67 | throw std::runtime_error( // # nocov 68 | "Internal bit packer error, raw value with packed data" 69 | ); 70 | } 71 | } 72 | 73 | uint8_t *start_; 74 | uint8_t *buffer_; 75 | uint8_t bit_width_; 76 | uint8_t value_bytes; 77 | uint64_t tmp; 78 | int bit_offset; 79 | }; 80 | -------------------------------------------------------------------------------- /src/lib/decode-utils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include "fastpforlib/bitpackinghelpers.h" 7 | 8 | struct buffer { 9 | uint8_t *start; 10 | uint32_t len; 11 | }; 12 | 13 | template 14 | T uleb_decode(buffer *buf) { 15 | T result = 0; 16 | uint8_t shift = 0; 17 | while (true) { 18 | if (buf->len <= 0) { 19 | throw runtime_error("Buffer ended while varint decoding"); 20 | } 21 | auto byte = *buf->start++; buf->len--; 22 | result |= (byte & 127) << shift; 23 | if ((byte & 128) == 0) break; 24 | shift += 7; 25 | if (shift > sizeof(T) * 8) { 26 | throw runtime_error("Varint decoding found too large number"); 27 | } 28 | } 29 | 30 | return result; 31 | } 32 | 33 | template 34 | T zigzag_decode(Tunsigned val) { 35 | // return val & 1 ? T(-(val >> 1)) : T(val >> 1); 36 | return T(val >> 1) ^ -T(val & 1); 37 | } 38 | 39 | template 40 | void unpack_bits(uint8_t *buf, uint64_t len, uint8_t bw, T *values, 41 | uint64_t num_values) { 42 | if (len < bw * num_values / 8 + ((bw * num_values) % 8 > 0)) { 43 | throw runtime_error( 44 | "Buffer too short for unpacking specified number of values" 45 | ); 46 | } 47 | 48 | if (bw == 0) { 49 | memset(values, 0, num_values * sizeof(T)); 50 | return; 51 | } 52 | 53 | // we unpack output_group_size _values_ with one call, from 54 | // input_group_size _bytes_ 55 | int output_group_size = sizeof(T) * 8; 56 | int input_group_size = output_group_size * bw / 8; 57 | uint32_t bw2 = bw; 58 | while (num_values > output_group_size) { 59 | fastpforlib::fastunpack((uint32_t*) buf, values, bw2); 60 | num_values -= output_group_size; 61 | buf += input_group_size; 62 | values += output_group_size; 63 | } 64 | 65 | // the leftover bytes must be unpacked from a dummy buffer, into a 66 | // dummy buffer, because out input and/or output buffer is not long 67 | // enough 68 | if (num_values > 0) { 69 | unique_ptr ib(new uint8_t[input_group_size]); 70 | unique_ptr ob(new T[output_group_size]); 71 | int left_bytes = num_values * bw / 8 + ((bw * num_values) % 8 > 0); 72 | memcpy(ib.get(), buf, left_bytes); 73 | fastpforlib::fastunpack((uint32_t*) ib.get(), ob.get(), bw2); 74 | memcpy(values, ob.get(), num_values * sizeof(T)); 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /src/thrift/concurrency/Exception.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _THRIFT_CONCURRENCY_EXCEPTION_H_ 21 | #define _THRIFT_CONCURRENCY_EXCEPTION_H_ 1 22 | 23 | #include 24 | #include 25 | 26 | namespace apache { 27 | namespace thrift { 28 | namespace concurrency { 29 | 30 | class NoSuchTaskException : public apache::thrift::TException {}; 31 | 32 | class UncancellableTaskException : public apache::thrift::TException {}; 33 | 34 | class InvalidArgumentException : public apache::thrift::TException {}; 35 | 36 | class IllegalStateException : public apache::thrift::TException { 37 | public: 38 | IllegalStateException() = default; 39 | IllegalStateException(const std::string& message) : TException(message) {} 40 | }; 41 | 42 | class TimedOutException : public apache::thrift::TException { 43 | public: 44 | TimedOutException() : TException("TimedOutException"){}; 45 | TimedOutException(const std::string& message) : TException(message) {} 46 | }; 47 | 48 | class TooManyPendingTasksException : public apache::thrift::TException { 49 | public: 50 | TooManyPendingTasksException() : TException("TooManyPendingTasksException"){}; 51 | TooManyPendingTasksException(const std::string& message) : TException(message) {} 52 | }; 53 | 54 | class SystemResourceException : public apache::thrift::TException { 55 | public: 56 | SystemResourceException() = default; 57 | 58 | SystemResourceException(const std::string& message) : TException(message) {} 59 | }; 60 | } 61 | } 62 | } // apache::thrift::concurrency 63 | 64 | #endif // #ifndef _THRIFT_CONCURRENCY_EXCEPTION_H_ 65 | -------------------------------------------------------------------------------- /src/thrift/async/TEvhttpServer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _THRIFT_TEVHTTP_SERVER_H_ 21 | #define _THRIFT_TEVHTTP_SERVER_H_ 1 22 | 23 | #include 24 | 25 | struct event_base; 26 | struct evhttp; 27 | struct evhttp_request; 28 | 29 | namespace apache { 30 | namespace thrift { 31 | namespace async { 32 | 33 | class TAsyncBufferProcessor; 34 | 35 | class TEvhttpServer { 36 | public: 37 | /** 38 | * Create a TEvhttpServer for use with an external evhttp instance. 39 | * Must be manually installed with evhttp_set_cb, using 40 | * TEvhttpServer::request as the callback and the 41 | * address of the server as the extra arg. 42 | * Do not call "serve" on this server. 43 | */ 44 | TEvhttpServer(std::shared_ptr processor); 45 | 46 | /** 47 | * Create a TEvhttpServer with an embedded event_base and evhttp, 48 | * listening on port and responding on the endpoint "/". 49 | * Call "serve" on this server to serve forever. 50 | */ 51 | TEvhttpServer(std::shared_ptr processor, int port); 52 | 53 | ~TEvhttpServer(); 54 | 55 | static void request(struct evhttp_request* req, void* self); 56 | int serve(); 57 | 58 | struct event_base* getEventBase(); 59 | 60 | private: 61 | struct RequestContext; 62 | 63 | void process(struct evhttp_request* req); 64 | void complete(RequestContext* ctx, bool success); 65 | 66 | std::shared_ptr processor_; 67 | struct event_base* eb_; 68 | struct evhttp* eh_; 69 | }; 70 | } 71 | } 72 | } // apache::thrift::async 73 | 74 | #endif // #ifndef _THRIFT_TEVHTTP_SERVER_H_ 75 | -------------------------------------------------------------------------------- /src/zstd/include/zstd/compress/zstd_compress_sequences.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. 3 | * All rights reserved. 4 | * 5 | * This source code is licensed under both the BSD-style license (found in the 6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 | * in the COPYING file in the root directory of this source tree). 8 | * You may select, at your option, one of the above-listed licenses. 9 | */ 10 | 11 | #ifndef ZSTD_COMPRESS_SEQUENCES_H 12 | #define ZSTD_COMPRESS_SEQUENCES_H 13 | 14 | #include "zstd/common/fse.h" /* FSE_repeat, FSE_CTable */ 15 | #include "zstd/common/zstd_internal.h" /* symbolEncodingType_e, ZSTD_strategy */ 16 | 17 | namespace zstd { 18 | 19 | typedef enum { 20 | ZSTD_defaultDisallowed = 0, 21 | ZSTD_defaultAllowed = 1 22 | } ZSTD_defaultPolicy_e; 23 | 24 | symbolEncodingType_e 25 | ZSTD_selectEncodingType( 26 | FSE_repeat* repeatMode, unsigned const* count, unsigned const max, 27 | size_t const mostFrequent, size_t nbSeq, unsigned const FSELog, 28 | FSE_CTable const* prevCTable, 29 | short const* defaultNorm, U32 defaultNormLog, 30 | ZSTD_defaultPolicy_e const isDefaultAllowed, 31 | ZSTD_strategy const strategy); 32 | 33 | size_t 34 | ZSTD_buildCTable(void* dst, size_t dstCapacity, 35 | FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, 36 | unsigned* count, U32 max, 37 | const BYTE* codeTable, size_t nbSeq, 38 | const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, 39 | const FSE_CTable* prevCTable, size_t prevCTableSize, 40 | void* entropyWorkspace, size_t entropyWorkspaceSize); 41 | 42 | size_t ZSTD_encodeSequences( 43 | void* dst, size_t dstCapacity, 44 | FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, 45 | FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, 46 | FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, 47 | seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2); 48 | 49 | size_t ZSTD_fseBitCost( 50 | FSE_CTable const* ctable, 51 | unsigned const* count, 52 | unsigned const max); 53 | 54 | size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog, 55 | unsigned const* count, unsigned const max); 56 | 57 | } 58 | 59 | #endif /* ZSTD_COMPRESS_SEQUENCES_H */ 60 | -------------------------------------------------------------------------------- /tests/testthat/test-utils.R: -------------------------------------------------------------------------------- 1 | test_that("is_rcmd_check", { 2 | withr::local_envvar(NOT_CRAN = "true") 3 | expect_false(is_rcmd_check()) 4 | 5 | withr::local_envvar( 6 | NOT_CRAN = NA_character_, 7 | "_R_CHECK_PACKAGE_NAME_" = NA_character_ 8 | ) 9 | expect_false(is_rcmd_check()) 10 | 11 | withr::local_envvar( 12 | NOT_CRAN = NA_character_, 13 | "_R_CHECK_PACKAGE_NAME_" = "foo" 14 | ) 15 | expect_true(is_rcmd_check()) 16 | }) 17 | 18 | test_that("is_flag", { 19 | expect_true(is_flag(TRUE)) 20 | expect_true(is_flag(FALSE)) 21 | 22 | expect_false(is_flag(c(TRUE, TRUE))) 23 | expect_false(is_flag(1)) 24 | expect_false(is_flag(NA)) 25 | }) 26 | 27 | test_that("is_string", { 28 | expect_true(is_string("a")) 29 | 30 | expect_false(is_string(c("a", "b"))) 31 | expect_false(is_string(NA_character_)) 32 | expect_false(is_string(1)) 33 | expect_false(is_string(NULL)) 34 | }) 35 | 36 | test_that("is_uint32", { 37 | expect_true(is_uint32(0)) 38 | expect_true(is_uint32(0L)) 39 | expect_true(is_uint32(100)) 40 | expect_true(is_uint32(100L)) 41 | expect_true(is_uint32(4294967295)) 42 | expect_true(is_uint32(4000000000)) 43 | expect_true(is_uint32(2147483647)) 44 | expect_true(is_uint32(2147483647L)) 45 | 46 | expect_false(is_uint32(1 / 2)) 47 | expect_false(is_uint32(-1)) 48 | expect_false(is_uint32(4294967296)) 49 | expect_false(is_uint32("a")) 50 | expect_false(is_uint32(NA_integer_)) 51 | expect_false(is_uint32(NA_real_)) 52 | expect_false(is_uint32("foo")) 53 | }) 54 | 55 | test_that("is_icount", { 56 | expect_true(is_icount(1L)) 57 | expect_true(is_icount(100L)) 58 | expect_true(is_icount(2147483647L)) 59 | 60 | expect_false(is_icount(NA_integer_)) 61 | expect_false(is_icount(1:2)) 62 | expect_false(is_icount(1)) 63 | expect_false(is_icount(0L)) 64 | expect_false(is_icount(-100L)) 65 | }) 66 | 67 | test_that("is_dcount", { 68 | expect_true(is_dcount(1)) 69 | expect_true(is_dcount(100)) 70 | expect_true(is_dcount(2147483647)) 71 | 72 | expect_false(is_dcount(NA_real_)) 73 | expect_false(is_dcount(1:2)) 74 | expect_false(is_dcount(1L)) 75 | expect_false(is_dcount(0)) 76 | expect_false(is_dcount(-100)) 77 | }) 78 | 79 | test_that("as_count", { 80 | expect_equal(as_count(1), 1L) 81 | expect_equal(as_count(100), 100L) 82 | expect_snapshot(error = TRUE, { 83 | as_count(1:2) 84 | as_count(0) 85 | as_count(NA_real_) 86 | as_count(-100) 87 | as_count(-100L) 88 | }) 89 | }) 90 | -------------------------------------------------------------------------------- /src/flatbuffers/string.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2021 Google Inc. All rights reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | #ifndef FLATBUFFERS_STRING_H_ 18 | #define FLATBUFFERS_STRING_H_ 19 | 20 | #include "flatbuffers/base.h" 21 | #include "flatbuffers/vector.h" 22 | 23 | namespace flatbuffers { 24 | 25 | struct String : public Vector { 26 | const char *c_str() const { return reinterpret_cast(Data()); } 27 | std::string str() const { return std::string(c_str(), size()); } 28 | 29 | // clang-format off 30 | #ifdef FLATBUFFERS_HAS_STRING_VIEW 31 | flatbuffers::string_view string_view() const { 32 | return flatbuffers::string_view(c_str(), size()); 33 | } 34 | 35 | /* implicit */ 36 | operator flatbuffers::string_view() const { 37 | return flatbuffers::string_view(c_str(), size()); 38 | } 39 | #endif // FLATBUFFERS_HAS_STRING_VIEW 40 | // clang-format on 41 | 42 | bool operator<(const String &o) const { 43 | return StringLessThan(this->data(), this->size(), o.data(), o.size()); 44 | } 45 | }; 46 | 47 | // Convenience function to get std::string from a String returning an empty 48 | // string on null pointer. 49 | static inline std::string GetString(const String *str) { 50 | return str ? str->str() : ""; 51 | } 52 | 53 | // Convenience function to get char* from a String returning an empty string on 54 | // null pointer. 55 | static inline const char *GetCstring(const String *str) { 56 | return str ? str->c_str() : ""; 57 | } 58 | 59 | #ifdef FLATBUFFERS_HAS_STRING_VIEW 60 | // Convenience function to get string_view from a String returning an empty 61 | // string_view on null pointer. 62 | static inline flatbuffers::string_view GetStringView(const String *str) { 63 | return str ? str->string_view() : flatbuffers::string_view(); 64 | } 65 | #endif // FLATBUFFERS_HAS_STRING_VIEW 66 | 67 | } // namespace flatbuffers 68 | 69 | #endif // FLATBUFFERS_STRING_H_ 70 | -------------------------------------------------------------------------------- /src/thrift/async/TAsyncChannel.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _THRIFT_ASYNC_TASYNCCHANNEL_H_ 21 | #define _THRIFT_ASYNC_TASYNCCHANNEL_H_ 1 22 | 23 | #include 24 | #include 25 | #include 26 | 27 | namespace apache { 28 | namespace thrift { 29 | namespace transport { 30 | class TMemoryBuffer; 31 | } 32 | } 33 | } 34 | 35 | namespace apache { 36 | namespace thrift { 37 | namespace async { 38 | using apache::thrift::transport::TMemoryBuffer; 39 | 40 | class TAsyncChannel { 41 | public: 42 | typedef std::function VoidCallback; 43 | 44 | virtual ~TAsyncChannel() = default; 45 | 46 | // is the channel in a good state? 47 | virtual bool good() const = 0; 48 | virtual bool error() const = 0; 49 | virtual bool timedOut() const = 0; 50 | 51 | /** 52 | * Send a message over the channel. 53 | */ 54 | virtual void sendMessage(const VoidCallback& cob, 55 | apache::thrift::transport::TMemoryBuffer* message) = 0; 56 | 57 | /** 58 | * Receive a message from the channel. 59 | */ 60 | virtual void recvMessage(const VoidCallback& cob, 61 | apache::thrift::transport::TMemoryBuffer* message) = 0; 62 | 63 | /** 64 | * Send a message over the channel and receive a response. 65 | */ 66 | virtual void sendAndRecvMessage(const VoidCallback& cob, 67 | apache::thrift::transport::TMemoryBuffer* sendBuf, 68 | apache::thrift::transport::TMemoryBuffer* recvBuf); 69 | }; 70 | } 71 | } 72 | } // apache::thrift::async 73 | 74 | #endif // #ifndef _THRIFT_ASYNC_TASYNCCHANNEL_H_ 75 | -------------------------------------------------------------------------------- /src/thrift/concurrency/Mutex.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _THRIFT_CONCURRENCY_MUTEX_H_ 21 | #define _THRIFT_CONCURRENCY_MUTEX_H_ 1 22 | 23 | #include 24 | #include 25 | 26 | namespace apache { 27 | namespace thrift { 28 | namespace concurrency { 29 | 30 | /** 31 | * NOTE: All mutex implementations throw an exception on failure. See each 32 | * specific implementation to understand the exception type(s) used. 33 | */ 34 | 35 | /** 36 | * A simple mutex class 37 | * 38 | * @version $Id:$ 39 | */ 40 | class Mutex { 41 | public: 42 | Mutex(); 43 | virtual ~Mutex() = default; 44 | 45 | virtual void lock() const; 46 | virtual bool trylock() const; 47 | virtual bool timedlock(int64_t milliseconds) const; 48 | virtual void unlock() const; 49 | 50 | void* getUnderlyingImpl() const; 51 | 52 | private: 53 | class impl; 54 | std::shared_ptr impl_; 55 | }; 56 | 57 | 58 | class Guard : apache::thrift::TNonCopyable { 59 | public: 60 | Guard(const Mutex& value, int64_t timeout = 0) : mutex_(&value) { 61 | if (timeout == 0) { 62 | value.lock(); 63 | } else if (timeout < 0) { 64 | if (!value.trylock()) { 65 | mutex_ = nullptr; 66 | } 67 | } else { 68 | if (!value.timedlock(timeout)) { 69 | mutex_ = nullptr; 70 | } 71 | } 72 | } 73 | ~Guard() { 74 | if (mutex_) { 75 | mutex_->unlock(); 76 | } 77 | } 78 | 79 | operator bool() const { return (mutex_ != nullptr); } 80 | 81 | private: 82 | const Mutex* mutex_; 83 | }; 84 | 85 | } 86 | } 87 | } // apache::thrift::concurrency 88 | 89 | #endif // #ifndef _THRIFT_CONCURRENCY_MUTEX_H_ 90 | -------------------------------------------------------------------------------- /src/thrift/concurrency/ThreadFactory.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _THRIFT_CONCURRENCY_THREADFACTORY_H_ 21 | #define _THRIFT_CONCURRENCY_THREADFACTORY_H_ 1 22 | 23 | #include 24 | 25 | #include 26 | namespace apache { 27 | namespace thrift { 28 | namespace concurrency { 29 | 30 | /** 31 | * Factory to create thread object and bind them to Runnable 32 | * object for execution 33 | */ 34 | class ThreadFactory { 35 | public: 36 | /** 37 | * All threads created by a factory are reference-counted 38 | * via std::shared_ptr. The factory guarantees that threads and the Runnable tasks 39 | * they host will be properly cleaned up once the last strong reference 40 | * to both is given up. 41 | * 42 | * By default threads are not joinable. 43 | */ 44 | ThreadFactory(bool detached = true) : detached_(detached) { } 45 | 46 | virtual ~ThreadFactory() = default; 47 | 48 | /** 49 | * Gets current detached mode 50 | */ 51 | bool isDetached() const { return detached_; } 52 | 53 | /** 54 | * Sets the detached disposition of newly created threads. 55 | */ 56 | void setDetached(bool detached) { detached_ = detached; } 57 | 58 | /** 59 | * Create a new thread. 60 | */ 61 | virtual std::shared_ptr newThread(std::shared_ptr runnable) const; 62 | 63 | /** 64 | * Gets the current thread id or unknown_thread_id if the current thread is not a thrift thread 65 | */ 66 | Thread::id_t getCurrentThreadId() const; 67 | 68 | private: 69 | bool detached_; 70 | }; 71 | 72 | } 73 | } 74 | } // apache::thrift::concurrency 75 | 76 | #endif // #ifndef _THRIFT_CONCURRENCY_THREADFACTORY_H_ 77 | -------------------------------------------------------------------------------- /man/read_parquet_page.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/porcelain.R 3 | \name{read_parquet_page} 4 | \alias{read_parquet_page} 5 | \title{Read a page from a Parquet file} 6 | \usage{ 7 | read_parquet_page(file, offset) 8 | } 9 | \arguments{ 10 | \item{file}{Path to a Parquet file.} 11 | 12 | \item{offset}{Integer offset of the start of the page in the file. 13 | See \code{\link[=read_parquet_pages]{read_parquet_pages()}} for a list of all pages and their offsets.} 14 | } 15 | \value{ 16 | Named list. Many entries correspond to the columns of 17 | the result of \code{\link[=read_parquet_pages]{read_parquet_pages()}}. Additional entries are: 18 | \itemize{ 19 | \item \code{codec}: compression codec. Possible values: 20 | \item \code{has_repetition_levels}: whether the page has repetition levels. 21 | \item \code{has_definition_levels}: whether the page has definition levels. 22 | \item \code{schema_column}: which schema column the page corresponds to. Note 23 | that only leaf columns have pages. 24 | \item \code{data_type}: low level Parquet data type. Possible values: 25 | \item \code{repetition_type}: whether the column the page belongs to is 26 | \code{REQUIRED}, \code{OPTIONAL} or \code{REPEATED}. 27 | \item \code{page_header}: the bytes of the page header in a raw vector. 28 | \item \code{num_null}: number of missing (\code{NA}) values. Only set in V2 data 29 | pages. 30 | \item \code{num_rows}: this is the same as \code{num_values} for flat tables, i.e. 31 | files without repetition levels. 32 | \item \code{compressed_data}: the data of the page in a raw vector. It includes 33 | repetition and definition levels, if any. 34 | \item \code{data}: the uncompressed data, if nanoparquet supports the 35 | compression codec of the file (GZIP and SNAPPY at the time of 36 | writing), or if the file is not compressed. In the latter case it 37 | is the same as \code{compressed_data}. 38 | } 39 | } 40 | \description{ 41 | Read a page from a Parquet file 42 | } 43 | \examples{ 44 | \dontshow{if (Sys.getenv("IN_PKGDOWN") == "true") (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} 45 | file_name <- system.file("extdata/userdata1.parquet", package = "nanoparquet") 46 | nanoparquet:::read_parquet_pages(file_name) 47 | options(max.print = 100) # otherwise long raw vector 48 | nanoparquet:::read_parquet_page(file_name, 4L) 49 | \dontshow{\}) # examplesIf} 50 | } 51 | \seealso{ 52 | \code{\link[=read_parquet_pages]{read_parquet_pages()}} for a summary of all pages. 53 | } 54 | \keyword{internal} 55 | -------------------------------------------------------------------------------- /src/thrift/transport/TFDTransport.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _THRIFT_TRANSPORT_TFDTRANSPORT_H_ 21 | #define _THRIFT_TRANSPORT_TFDTRANSPORT_H_ 1 22 | 23 | #include 24 | #ifdef HAVE_SYS_TIME_H 25 | #include 26 | #endif 27 | 28 | #include 29 | #include 30 | 31 | namespace apache { 32 | namespace thrift { 33 | namespace transport { 34 | 35 | /** 36 | * Dead-simple wrapper around a file descriptor. 37 | * 38 | */ 39 | class TFDTransport : public TVirtualTransport { 40 | public: 41 | enum ClosePolicy { NO_CLOSE_ON_DESTROY = 0, CLOSE_ON_DESTROY = 1 }; 42 | 43 | TFDTransport(int fd, ClosePolicy close_policy = NO_CLOSE_ON_DESTROY, 44 | std::shared_ptr config = nullptr) 45 | : TVirtualTransport(config), fd_(fd), close_policy_(close_policy) { 46 | } 47 | 48 | ~TFDTransport() override { 49 | if (close_policy_ == CLOSE_ON_DESTROY) { 50 | try { 51 | close(); 52 | } catch (TTransportException& ex) { 53 | GlobalOutput.printf("~TFDTransport TTransportException: '%s'", ex.what()); 54 | } 55 | } 56 | } 57 | 58 | bool isOpen() const override { return fd_ >= 0; } 59 | 60 | void open() override {} 61 | 62 | void close() override; 63 | 64 | uint32_t read(uint8_t* buf, uint32_t len); 65 | 66 | void write(const uint8_t* buf, uint32_t len); 67 | 68 | void setFD(int fd) { fd_ = fd; } 69 | int getFD() { return fd_; } 70 | 71 | protected: 72 | int fd_; 73 | ClosePolicy close_policy_; 74 | }; 75 | } 76 | } 77 | } // apache::thrift::transport 78 | 79 | #endif // #ifndef _THRIFT_TRANSPORT_TFDTRANSPORT_H_ 80 | -------------------------------------------------------------------------------- /src/thrift/transport/THttpClient.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _THRIFT_TRANSPORT_THTTPCLIENT_H_ 21 | #define _THRIFT_TRANSPORT_THTTPCLIENT_H_ 1 22 | 23 | #include 24 | 25 | namespace apache { 26 | namespace thrift { 27 | namespace transport { 28 | 29 | /** 30 | * @brief Client transport using HTTP. The path is an optional field that is 31 | * not required by Thrift HTTP server or client. It can be used i.e. with HTTP 32 | * redirection, load balancing or forwarding on the server. 33 | */ 34 | class THttpClient : public THttpTransport { 35 | public: 36 | /** 37 | * @brief Constructor that wraps an existing transport, but also sets the 38 | * host and path. The host and path are not used for the connection but are 39 | * set in the HTTP header of the transport. 40 | */ 41 | THttpClient(std::shared_ptr transport, 42 | std::string host = "localhost", 43 | std::string path = "/service", 44 | std::shared_ptr config = nullptr); 45 | 46 | /** 47 | * @brief Constructor that will create a new socket transport using the host 48 | * and port. 49 | */ 50 | THttpClient(std::string host, int port, 51 | std::string path = "", 52 | std::shared_ptr config = nullptr); 53 | 54 | ~THttpClient() override; 55 | 56 | void flush() override; 57 | 58 | void setPath(std::string path); 59 | 60 | protected: 61 | std::string host_; 62 | std::string path_; 63 | 64 | void parseHeader(char* header) override; 65 | bool parseStatusLine(char* status) override; 66 | }; 67 | } 68 | } 69 | } // apache::thrift::transport 70 | 71 | #endif // #ifndef _THRIFT_TRANSPORT_THTTPCLIENT_H_ 72 | -------------------------------------------------------------------------------- /man/read_parquet_pages.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/porcelain.R 3 | \name{read_parquet_pages} 4 | \alias{read_parquet_pages} 5 | \title{Metadata of all pages of a Parquet file} 6 | \usage{ 7 | read_parquet_pages(file) 8 | } 9 | \arguments{ 10 | \item{file}{Path to a Parquet file.} 11 | } 12 | \value{ 13 | Data frame with columns: 14 | \itemize{ 15 | \item \code{file_name}: file name. 16 | \item \code{row_group}: id of the row group the page belongs to, 17 | an integer between 0 and the number of row groups 18 | minus one. 19 | \item \code{column}: id of the column. An integer between the 20 | number of leaf columns minus one. Note that only leaf 21 | columns are considered, as non-leaf columns do not 22 | have any pages. 23 | \item \code{page_type}: \code{DATA_PAGE}, \code{INDEX_PAGE}, \code{DICTIONARY_PAGE} or 24 | \code{DATA_PAGE_V2}. 25 | \item \code{page_header_offset}: offset of the data page (its header) in the 26 | file. 27 | \item \code{uncompressed_page_size}: does not include the page header, as per 28 | Parquet spec. 29 | \item \code{compressed_page_size}: without the page header. 30 | \item \code{crc}: integer, checksum, if present in the file, can be \code{NA}. 31 | \item \code{num_values}: number of data values in this page, include 32 | \code{NULL} (\code{NA} in R) values. 33 | \item \code{encoding}: encoding of the page, current possible encodings: 34 | "PLAIN", "GROUP_VAR_INT", "PLAIN_DICTIONARY", "RLE", "BIT_PACKED", "DELTA_BINARY_PACKED", "DELTA_LENGTH_BYTE_ARRAY", "DELTA_BYTE_ARRAY", "RLE_DICTIONARY", "BYTE_STREAM_SPLIT". 35 | \item \code{definition_level_encoding}: encoding of the definition levels, 36 | see \code{encoding} for possible values. This can be missing in V2 data 37 | pages, where they are always RLE encoded. 38 | \item \code{repetition_level_encoding}: encoding of the repetition levels, 39 | see \code{encoding} for possible values. This can be missing in V2 data 40 | pages, where they are always RLE encoded. 41 | \item \code{data_offset}: offset of the actual data in the file. 42 | \item \code{page_header_length}: size of the page header, in bytes. 43 | } 44 | } 45 | \description{ 46 | Metadata of all pages of a Parquet file 47 | } 48 | \details{ 49 | Reading all the page headers might be slow for large files, especially 50 | if the file has many small pages. 51 | } 52 | \examples{ 53 | file_name <- system.file("extdata/userdata1.parquet", package = "nanoparquet") 54 | nanoparquet:::read_parquet_pages(file_name) 55 | } 56 | \seealso{ 57 | \code{\link[=read_parquet_page]{read_parquet_page()}} to read a page. 58 | } 59 | \keyword{internal} 60 | -------------------------------------------------------------------------------- /src/thrift/transport/TSSLServerSocket.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | #ifndef _THRIFT_TRANSPORT_TSSLSERVERSOCKET_H_ 21 | #define _THRIFT_TRANSPORT_TSSLSERVERSOCKET_H_ 1 22 | 23 | #include 24 | 25 | namespace apache { 26 | namespace thrift { 27 | namespace transport { 28 | 29 | class TSSLSocketFactory; 30 | 31 | /** 32 | * Server socket that accepts SSL connections. 33 | */ 34 | class TSSLServerSocket : public TServerSocket { 35 | public: 36 | /** 37 | * Constructor. Binds to all interfaces. 38 | * 39 | * @param port Listening port 40 | * @param factory SSL socket factory implementation 41 | */ 42 | TSSLServerSocket(int port, std::shared_ptr factory); 43 | 44 | /** 45 | * Constructor. Binds to the specified address. 46 | * 47 | * @param address Address to bind to 48 | * @param port Listening port 49 | * @param factory SSL socket factory implementation 50 | */ 51 | TSSLServerSocket(const std::string& address, 52 | int port, 53 | std::shared_ptr factory); 54 | 55 | /** 56 | * Constructor. Binds to all interfaces. 57 | * 58 | * @param port Listening port 59 | * @param sendTimeout Socket send timeout 60 | * @param recvTimeout Socket receive timeout 61 | * @param factory SSL socket factory implementation 62 | */ 63 | TSSLServerSocket(int port, 64 | int sendTimeout, 65 | int recvTimeout, 66 | std::shared_ptr factory); 67 | 68 | protected: 69 | std::shared_ptr createSocket(THRIFT_SOCKET socket) override; 70 | std::shared_ptr factory_; 71 | }; 72 | } 73 | } 74 | } 75 | 76 | #endif 77 | --------------------------------------------------------------------------------