├── .github ├── .gitignore └── workflows │ ├── test-coverage.yaml │ ├── pkgdown.yaml │ ├── pr-commands.yaml │ └── R-CMD-check.yaml ├── tests ├── testthat │ ├── teardown.R │ ├── setup.R │ ├── test-bm-table-to-df.R │ ├── test-bm-array-to-vector.R │ ├── test-bm-array-altrep-materialization.R │ ├── test-bm-write-csv.R │ ├── test-bm-write-file.R │ ├── test-bm-row-group-size.R │ ├── test-bm-read-csv.R │ ├── test-bm-read-json.R │ ├── test-custom-duckdb.R │ ├── test-ensure-lib.R │ ├── test-bm-dataset-taxi-2013.R │ ├── test-ensure-source.R │ ├── test-measure.R │ ├── test-external-dependencies.R │ ├── test-bm-read-file.R │ ├── test-ensure-tpch-source.R │ ├── test-params.R │ ├── test-benchmark-dataframe.R │ ├── helper.R │ ├── test-util.R │ ├── test-result.R │ ├── test-ensure-format.R │ └── test-publish.R └── testthat.R ├── LICENSE ├── inst ├── test_data │ ├── chi_traffic_sample.parquet │ └── datasets │ │ └── taxi_2013 │ │ ├── taxi_2013_1.csv.gz │ │ ├── taxi_2013_2.csv.gz │ │ ├── taxi_2013_3.csv.gz │ │ ├── taxi_2013_4.csv.gz │ │ ├── taxi_2013_5.csv.gz │ │ ├── taxi_2013_6.csv.gz │ │ ├── taxi_2013_7.csv.gz │ │ ├── taxi_2013_8.csv.gz │ │ ├── taxi_2013_9.csv.gz │ │ ├── taxi_2013_10.csv.gz │ │ ├── taxi_2013_11.csv.gz │ │ └── taxi_2013_12.csv.gz ├── tpch │ ├── answers │ │ ├── scale-factor-1 │ │ │ ├── tpch-q01-sf1.parquet │ │ │ ├── tpch-q02-sf1.parquet │ │ │ ├── tpch-q03-sf1.parquet │ │ │ ├── tpch-q04-sf1.parquet │ │ │ ├── tpch-q05-sf1.parquet │ │ │ ├── tpch-q06-sf1.parquet │ │ │ ├── tpch-q07-sf1.parquet │ │ │ ├── tpch-q08-sf1.parquet │ │ │ ├── tpch-q09-sf1.parquet │ │ │ ├── tpch-q10-sf1.parquet │ │ │ ├── tpch-q11-sf1.parquet │ │ │ ├── tpch-q12-sf1.parquet │ │ │ ├── tpch-q13-sf1.parquet │ │ │ ├── tpch-q14-sf1.parquet │ │ │ ├── tpch-q15-sf1.parquet │ │ │ ├── tpch-q16-sf1.parquet │ │ │ ├── tpch-q17-sf1.parquet │ │ │ ├── tpch-q18-sf1.parquet │ │ │ ├── tpch-q19-sf1.parquet │ │ │ ├── tpch-q20-sf1.parquet │ │ │ ├── tpch-q21-sf1.parquet │ │ │ └── tpch-q22-sf1.parquet │ │ ├── scale-factor-10 │ │ │ ├── tpch-q01-sf10.parquet │ │ │ ├── tpch-q02-sf10.parquet │ │ │ ├── tpch-q03-sf10.parquet │ │ │ ├── tpch-q04-sf10.parquet │ │ │ ├── tpch-q05-sf10.parquet │ │ │ ├── tpch-q06-sf10.parquet │ │ │ ├── tpch-q07-sf10.parquet │ │ │ ├── tpch-q08-sf10.parquet │ │ │ ├── tpch-q09-sf10.parquet │ │ │ ├── tpch-q10-sf10.parquet │ │ │ ├── tpch-q11-sf10.parquet │ │ │ ├── tpch-q12-sf10.parquet │ │ │ ├── tpch-q13-sf10.parquet │ │ │ ├── tpch-q14-sf10.parquet │ │ │ ├── tpch-q15-sf10.parquet │ │ │ ├── tpch-q16-sf10.parquet │ │ │ ├── tpch-q17-sf10.parquet │ │ │ ├── tpch-q18-sf10.parquet │ │ │ ├── tpch-q19-sf10.parquet │ │ │ ├── tpch-q20-sf10.parquet │ │ │ ├── tpch-q21-sf10.parquet │ │ │ └── tpch-q22-sf10.parquet │ │ ├── scale-factor-0.1 │ │ │ ├── tpch-q01-sf0.1.parquet │ │ │ ├── tpch-q02-sf0.1.parquet │ │ │ ├── tpch-q03-sf0.1.parquet │ │ │ ├── tpch-q04-sf0.1.parquet │ │ │ ├── tpch-q05-sf0.1.parquet │ │ │ ├── tpch-q06-sf0.1.parquet │ │ │ ├── tpch-q07-sf0.1.parquet │ │ │ ├── tpch-q08-sf0.1.parquet │ │ │ ├── tpch-q09-sf0.1.parquet │ │ │ ├── tpch-q10-sf0.1.parquet │ │ │ ├── tpch-q11-sf0.1.parquet │ │ │ ├── tpch-q12-sf0.1.parquet │ │ │ ├── tpch-q13-sf0.1.parquet │ │ │ ├── tpch-q14-sf0.1.parquet │ │ │ ├── tpch-q15-sf0.1.parquet │ │ │ ├── tpch-q16-sf0.1.parquet │ │ │ ├── tpch-q17-sf0.1.parquet │ │ │ ├── tpch-q18-sf0.1.parquet │ │ │ ├── tpch-q19-sf0.1.parquet │ │ │ ├── tpch-q20-sf0.1.parquet │ │ │ ├── tpch-q21-sf0.1.parquet │ │ │ └── tpch-q22-sf0.1.parquet │ │ └── scale-factor-0.01 │ │ │ ├── tpch-q01-sf0.01.parquet │ │ │ ├── tpch-q02-sf0.01.parquet │ │ │ ├── tpch-q03-sf0.01.parquet │ │ │ ├── tpch-q04-sf0.01.parquet │ │ │ ├── tpch-q05-sf0.01.parquet │ │ │ ├── tpch-q06-sf0.01.parquet │ │ │ ├── tpch-q07-sf0.01.parquet │ │ │ ├── tpch-q08-sf0.01.parquet │ │ │ ├── tpch-q09-sf0.01.parquet │ │ │ ├── tpch-q10-sf0.01.parquet │ │ │ ├── tpch-q11-sf0.01.parquet │ │ │ ├── tpch-q12-sf0.01.parquet │ │ │ ├── tpch-q13-sf0.01.parquet │ │ │ ├── tpch-q14-sf0.01.parquet │ │ │ ├── tpch-q15-sf0.01.parquet │ │ │ ├── tpch-q16-sf0.01.parquet │ │ │ ├── tpch-q17-sf0.01.parquet │ │ │ ├── tpch-q18-sf0.01.parquet │ │ │ ├── tpch-q19-sf0.01.parquet │ │ │ ├── tpch-q20-sf0.01.parquet │ │ │ ├── tpch-q21-sf0.01.parquet │ │ │ └── tpch-q22-sf0.01.parquet │ ├── answers_duckdb_data │ │ ├── scale-factor-1 │ │ │ ├── tpch-q01-sf1.parquet │ │ │ ├── tpch-q02-sf1.parquet │ │ │ ├── tpch-q03-sf1.parquet │ │ │ ├── tpch-q04-sf1.parquet │ │ │ ├── tpch-q05-sf1.parquet │ │ │ ├── tpch-q06-sf1.parquet │ │ │ ├── tpch-q07-sf1.parquet │ │ │ ├── tpch-q08-sf1.parquet │ │ │ ├── tpch-q09-sf1.parquet │ │ │ ├── tpch-q10-sf1.parquet │ │ │ ├── tpch-q11-sf1.parquet │ │ │ ├── tpch-q12-sf1.parquet │ │ │ ├── tpch-q13-sf1.parquet │ │ │ ├── tpch-q14-sf1.parquet │ │ │ ├── tpch-q15-sf1.parquet │ │ │ ├── tpch-q16-sf1.parquet │ │ │ ├── tpch-q17-sf1.parquet │ │ │ ├── tpch-q18-sf1.parquet │ │ │ ├── tpch-q19-sf1.parquet │ │ │ ├── tpch-q20-sf1.parquet │ │ │ ├── tpch-q21-sf1.parquet │ │ │ └── tpch-q22-sf1.parquet │ │ ├── scale-factor-10 │ │ │ ├── tpch-q01-sf10.parquet │ │ │ ├── tpch-q02-sf10.parquet │ │ │ ├── tpch-q03-sf10.parquet │ │ │ ├── tpch-q04-sf10.parquet │ │ │ ├── tpch-q05-sf10.parquet │ │ │ ├── tpch-q06-sf10.parquet │ │ │ ├── tpch-q07-sf10.parquet │ │ │ ├── tpch-q08-sf10.parquet │ │ │ ├── tpch-q09-sf10.parquet │ │ │ ├── tpch-q10-sf10.parquet │ │ │ ├── tpch-q11-sf10.parquet │ │ │ ├── tpch-q12-sf10.parquet │ │ │ ├── tpch-q13-sf10.parquet │ │ │ ├── tpch-q14-sf10.parquet │ │ │ ├── tpch-q15-sf10.parquet │ │ │ ├── tpch-q16-sf10.parquet │ │ │ ├── tpch-q17-sf10.parquet │ │ │ ├── tpch-q18-sf10.parquet │ │ │ ├── tpch-q19-sf10.parquet │ │ │ ├── tpch-q20-sf10.parquet │ │ │ ├── tpch-q21-sf10.parquet │ │ │ └── tpch-q22-sf10.parquet │ │ ├── scale-factor-0.1 │ │ │ ├── tpch-q01-sf0.1.parquet │ │ │ ├── tpch-q02-sf0.1.parquet │ │ │ ├── tpch-q03-sf0.1.parquet │ │ │ ├── tpch-q04-sf0.1.parquet │ │ │ ├── tpch-q05-sf0.1.parquet │ │ │ ├── tpch-q06-sf0.1.parquet │ │ │ ├── tpch-q07-sf0.1.parquet │ │ │ ├── tpch-q08-sf0.1.parquet │ │ │ ├── tpch-q09-sf0.1.parquet │ │ │ ├── tpch-q10-sf0.1.parquet │ │ │ ├── tpch-q11-sf0.1.parquet │ │ │ ├── tpch-q12-sf0.1.parquet │ │ │ ├── tpch-q13-sf0.1.parquet │ │ │ ├── tpch-q14-sf0.1.parquet │ │ │ ├── tpch-q15-sf0.1.parquet │ │ │ ├── tpch-q16-sf0.1.parquet │ │ │ ├── tpch-q17-sf0.1.parquet │ │ │ ├── tpch-q18-sf0.1.parquet │ │ │ ├── tpch-q19-sf0.1.parquet │ │ │ ├── tpch-q20-sf0.1.parquet │ │ │ ├── tpch-q21-sf0.1.parquet │ │ │ └── tpch-q22-sf0.1.parquet │ │ └── scale-factor-0.01 │ │ │ ├── tpch-q01-sf0.01.parquet │ │ │ ├── tpch-q02-sf0.01.parquet │ │ │ ├── tpch-q03-sf0.01.parquet │ │ │ ├── tpch-q04-sf0.01.parquet │ │ │ ├── tpch-q05-sf0.01.parquet │ │ │ ├── tpch-q06-sf0.01.parquet │ │ │ ├── tpch-q07-sf0.01.parquet │ │ │ ├── tpch-q08-sf0.01.parquet │ │ │ ├── tpch-q09-sf0.01.parquet │ │ │ ├── tpch-q10-sf0.01.parquet │ │ │ ├── tpch-q11-sf0.01.parquet │ │ │ ├── tpch-q12-sf0.01.parquet │ │ │ ├── tpch-q13-sf0.01.parquet │ │ │ ├── tpch-q14-sf0.01.parquet │ │ │ ├── tpch-q15-sf0.01.parquet │ │ │ ├── tpch-q16-sf0.01.parquet │ │ │ ├── tpch-q17-sf0.01.parquet │ │ │ ├── tpch-q18-sf0.01.parquet │ │ │ ├── tpch-q19-sf0.01.parquet │ │ │ ├── tpch-q20-sf0.01.parquet │ │ │ ├── tpch-q21-sf0.01.parquet │ │ │ └── tpch-q22-sf0.01.parquet │ └── README.md ├── regenerate-benchmarks-json.R ├── benchmarks.json └── tpch-answer-gen.R ├── .gitignore ├── .Rbuildignore ├── codecov.yml ├── man ├── all_sources.Rd ├── get_csv_writer.Rd ├── known_sources.Rd ├── get_read_function.Rd ├── get_json_reader.Rd ├── read_source.Rd ├── tpch_tables.Rd ├── remote_dataset.Rd ├── get_csv_reader.Rd ├── get_source_attr.Rd ├── get_dataset_attr.Rd ├── install_benchconnect.Rd ├── BenchEnvironment.Rd ├── file_with_ext.Rd ├── null-default.Rd ├── install_datalogistik.Rd ├── sync_and_drop_caches.Rd ├── confirm_mem_alloc.Rd ├── install_pipx.Rd ├── tables_refed.Rd ├── tpc_h_queries.Rd ├── get_package_benchmarks.Rd ├── get_write_function.Rd ├── ensure_source.Rd ├── get_query_func.Rd ├── get_params_summary.Rd ├── table_to_df.Rd ├── dataset_taxi_parquet.Rd ├── df_to_table.Rd ├── write_csv.Rd ├── knowns.Rd ├── data_file.Rd ├── placebo.Rd ├── dataset_taxi_2013.Rd ├── read_csv.Rd ├── ensure_format.Rd ├── ensure_dataset.Rd ├── read_file.Rd ├── row_group_size.Rd ├── write_file.Rd ├── read_json.Rd ├── validate_format.Rd ├── BenchmarkDataFrame.Rd ├── assemble_metadata.Rd ├── tpch_answer.Rd ├── array_altrep_materialization.Rd ├── array_to_vector.Rd ├── get_sql_query_func.Rd ├── generate_tpch.Rd ├── measure.Rd ├── get_input_func.Rd ├── as.data.frame.BenchmarkResults.Rd ├── tpc_h.Rd ├── get_default_parameters.Rd ├── run_bm.Rd ├── run_one.Rd ├── run_benchmark.Rd ├── R6Point1Class.Rd └── run.Rd ├── arrowbench.Rproj ├── R ├── setup.R ├── bm-remote-dataset.R ├── bm-table-to-df.R ├── publish.R ├── bm-placebo.R ├── bm-df-to-table.R ├── benchmark-dataframe.R ├── ensure-tpch-source.R ├── custom-duckdb.R ├── measure.R ├── bm-row-group-size.R ├── params.R ├── bm-write-csv.R ├── bm-array-to-vector.R ├── bm-write-file.R ├── bm-read-file.R ├── bm-dataset-taxi-parquet.R ├── external-dependencies.R ├── bm-dataset-taxi-2013.R └── bm-read-csv.R ├── LICENSE.md ├── DESCRIPTION └── NAMESPACE /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /tests/testthat/teardown.R: -------------------------------------------------------------------------------- 1 | wipe_results() 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2021 2 | COPYRIGHT HOLDER: 2021 Ursa Computing 3 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(arrowbench) 3 | 4 | test_check("arrowbench") 5 | -------------------------------------------------------------------------------- /inst/test_data/chi_traffic_sample.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/test_data/chi_traffic_sample.parquet -------------------------------------------------------------------------------- /inst/test_data/datasets/taxi_2013/taxi_2013_1.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/test_data/datasets/taxi_2013/taxi_2013_1.csv.gz -------------------------------------------------------------------------------- /inst/test_data/datasets/taxi_2013/taxi_2013_2.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/test_data/datasets/taxi_2013/taxi_2013_2.csv.gz -------------------------------------------------------------------------------- /inst/test_data/datasets/taxi_2013/taxi_2013_3.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/test_data/datasets/taxi_2013/taxi_2013_3.csv.gz -------------------------------------------------------------------------------- /inst/test_data/datasets/taxi_2013/taxi_2013_4.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/test_data/datasets/taxi_2013/taxi_2013_4.csv.gz -------------------------------------------------------------------------------- /inst/test_data/datasets/taxi_2013/taxi_2013_5.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/test_data/datasets/taxi_2013/taxi_2013_5.csv.gz -------------------------------------------------------------------------------- /inst/test_data/datasets/taxi_2013/taxi_2013_6.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/test_data/datasets/taxi_2013/taxi_2013_6.csv.gz -------------------------------------------------------------------------------- /inst/test_data/datasets/taxi_2013/taxi_2013_7.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/test_data/datasets/taxi_2013/taxi_2013_7.csv.gz -------------------------------------------------------------------------------- /inst/test_data/datasets/taxi_2013/taxi_2013_8.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/test_data/datasets/taxi_2013/taxi_2013_8.csv.gz -------------------------------------------------------------------------------- /inst/test_data/datasets/taxi_2013/taxi_2013_9.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/test_data/datasets/taxi_2013/taxi_2013_9.csv.gz -------------------------------------------------------------------------------- /inst/test_data/datasets/taxi_2013/taxi_2013_10.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/test_data/datasets/taxi_2013/taxi_2013_10.csv.gz -------------------------------------------------------------------------------- /inst/test_data/datasets/taxi_2013/taxi_2013_11.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/test_data/datasets/taxi_2013/taxi_2013_11.csv.gz -------------------------------------------------------------------------------- /inst/test_data/datasets/taxi_2013/taxi_2013_12.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/test_data/datasets/taxi_2013/taxi_2013_12.csv.gz -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-1/tpch-q01-sf1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-1/tpch-q01-sf1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-1/tpch-q02-sf1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-1/tpch-q02-sf1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-1/tpch-q03-sf1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-1/tpch-q03-sf1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-1/tpch-q04-sf1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-1/tpch-q04-sf1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-1/tpch-q05-sf1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-1/tpch-q05-sf1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-1/tpch-q06-sf1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-1/tpch-q06-sf1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-1/tpch-q07-sf1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-1/tpch-q07-sf1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-1/tpch-q08-sf1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-1/tpch-q08-sf1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-1/tpch-q09-sf1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-1/tpch-q09-sf1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-1/tpch-q10-sf1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-1/tpch-q10-sf1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-1/tpch-q11-sf1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-1/tpch-q11-sf1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-1/tpch-q12-sf1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-1/tpch-q12-sf1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-1/tpch-q13-sf1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-1/tpch-q13-sf1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-1/tpch-q14-sf1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-1/tpch-q14-sf1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-1/tpch-q15-sf1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-1/tpch-q15-sf1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-1/tpch-q16-sf1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-1/tpch-q16-sf1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-1/tpch-q17-sf1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-1/tpch-q17-sf1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-1/tpch-q18-sf1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-1/tpch-q18-sf1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-1/tpch-q19-sf1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-1/tpch-q19-sf1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-1/tpch-q20-sf1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-1/tpch-q20-sf1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-1/tpch-q21-sf1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-1/tpch-q21-sf1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-1/tpch-q22-sf1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-1/tpch-q22-sf1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-10/tpch-q01-sf10.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-10/tpch-q01-sf10.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-10/tpch-q02-sf10.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-10/tpch-q02-sf10.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-10/tpch-q03-sf10.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-10/tpch-q03-sf10.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-10/tpch-q04-sf10.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-10/tpch-q04-sf10.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-10/tpch-q05-sf10.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-10/tpch-q05-sf10.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-10/tpch-q06-sf10.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-10/tpch-q06-sf10.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-10/tpch-q07-sf10.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-10/tpch-q07-sf10.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-10/tpch-q08-sf10.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-10/tpch-q08-sf10.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-10/tpch-q09-sf10.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-10/tpch-q09-sf10.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-10/tpch-q10-sf10.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-10/tpch-q10-sf10.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-10/tpch-q11-sf10.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-10/tpch-q11-sf10.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-10/tpch-q12-sf10.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-10/tpch-q12-sf10.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-10/tpch-q13-sf10.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-10/tpch-q13-sf10.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-10/tpch-q14-sf10.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-10/tpch-q14-sf10.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-10/tpch-q15-sf10.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-10/tpch-q15-sf10.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-10/tpch-q16-sf10.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-10/tpch-q16-sf10.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-10/tpch-q17-sf10.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-10/tpch-q17-sf10.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-10/tpch-q18-sf10.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-10/tpch-q18-sf10.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-10/tpch-q19-sf10.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-10/tpch-q19-sf10.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-10/tpch-q20-sf10.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-10/tpch-q20-sf10.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-10/tpch-q21-sf10.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-10/tpch-q21-sf10.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-10/tpch-q22-sf10.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-10/tpch-q22-sf10.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-0.1/tpch-q01-sf0.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.1/tpch-q01-sf0.1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-0.1/tpch-q02-sf0.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.1/tpch-q02-sf0.1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-0.1/tpch-q03-sf0.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.1/tpch-q03-sf0.1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-0.1/tpch-q04-sf0.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.1/tpch-q04-sf0.1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-0.1/tpch-q05-sf0.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.1/tpch-q05-sf0.1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-0.1/tpch-q06-sf0.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.1/tpch-q06-sf0.1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-0.1/tpch-q07-sf0.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.1/tpch-q07-sf0.1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-0.1/tpch-q08-sf0.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.1/tpch-q08-sf0.1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-0.1/tpch-q09-sf0.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.1/tpch-q09-sf0.1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-0.1/tpch-q10-sf0.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.1/tpch-q10-sf0.1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-0.1/tpch-q11-sf0.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.1/tpch-q11-sf0.1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-0.1/tpch-q12-sf0.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.1/tpch-q12-sf0.1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-0.1/tpch-q13-sf0.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.1/tpch-q13-sf0.1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-0.1/tpch-q14-sf0.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.1/tpch-q14-sf0.1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-0.1/tpch-q15-sf0.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.1/tpch-q15-sf0.1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-0.1/tpch-q16-sf0.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.1/tpch-q16-sf0.1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-0.1/tpch-q17-sf0.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.1/tpch-q17-sf0.1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-0.1/tpch-q18-sf0.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.1/tpch-q18-sf0.1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-0.1/tpch-q19-sf0.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.1/tpch-q19-sf0.1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-0.1/tpch-q20-sf0.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.1/tpch-q20-sf0.1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-0.1/tpch-q21-sf0.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.1/tpch-q21-sf0.1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-0.1/tpch-q22-sf0.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.1/tpch-q22-sf0.1.parquet -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | tests/testthat/results 6 | .DS_Store 7 | results/ 8 | data/ 9 | tests/testthat/data/ 10 | .vscode/ 11 | -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-0.01/tpch-q01-sf0.01.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.01/tpch-q01-sf0.01.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-0.01/tpch-q02-sf0.01.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.01/tpch-q02-sf0.01.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-0.01/tpch-q03-sf0.01.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.01/tpch-q03-sf0.01.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-0.01/tpch-q04-sf0.01.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.01/tpch-q04-sf0.01.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-0.01/tpch-q05-sf0.01.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.01/tpch-q05-sf0.01.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-0.01/tpch-q06-sf0.01.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.01/tpch-q06-sf0.01.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-0.01/tpch-q07-sf0.01.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.01/tpch-q07-sf0.01.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-0.01/tpch-q08-sf0.01.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.01/tpch-q08-sf0.01.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-0.01/tpch-q09-sf0.01.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.01/tpch-q09-sf0.01.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-0.01/tpch-q10-sf0.01.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.01/tpch-q10-sf0.01.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-0.01/tpch-q11-sf0.01.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.01/tpch-q11-sf0.01.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-0.01/tpch-q12-sf0.01.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.01/tpch-q12-sf0.01.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-0.01/tpch-q13-sf0.01.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.01/tpch-q13-sf0.01.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-0.01/tpch-q14-sf0.01.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.01/tpch-q14-sf0.01.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-0.01/tpch-q15-sf0.01.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.01/tpch-q15-sf0.01.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-0.01/tpch-q16-sf0.01.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.01/tpch-q16-sf0.01.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-0.01/tpch-q17-sf0.01.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.01/tpch-q17-sf0.01.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-0.01/tpch-q18-sf0.01.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.01/tpch-q18-sf0.01.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-0.01/tpch-q19-sf0.01.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.01/tpch-q19-sf0.01.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-0.01/tpch-q20-sf0.01.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.01/tpch-q20-sf0.01.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-0.01/tpch-q21-sf0.01.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.01/tpch-q21-sf0.01.parquet -------------------------------------------------------------------------------- /inst/tpch/answers/scale-factor-0.01/tpch-q22-sf0.01.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.01/tpch-q22-sf0.01.parquet -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^r_libs 4 | ^data 5 | ^source_data 6 | ^results 7 | ^plots 8 | ^.*\.prof$ 9 | ^codecov\.yml$ 10 | ^\.github$ 11 | ^LICENSE\.md$ 12 | -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q01-sf1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q01-sf1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q02-sf1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q02-sf1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q03-sf1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q03-sf1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q04-sf1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q04-sf1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q05-sf1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q05-sf1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q06-sf1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q06-sf1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q07-sf1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q07-sf1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q08-sf1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q08-sf1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q09-sf1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q09-sf1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q10-sf1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q10-sf1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q11-sf1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q11-sf1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q12-sf1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q12-sf1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q13-sf1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q13-sf1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q14-sf1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q14-sf1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q15-sf1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q15-sf1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q16-sf1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q16-sf1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q17-sf1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q17-sf1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q18-sf1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q18-sf1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q19-sf1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q19-sf1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q20-sf1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q20-sf1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q21-sf1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q21-sf1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q22-sf1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q22-sf1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q01-sf10.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q01-sf10.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q02-sf10.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q02-sf10.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q03-sf10.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q03-sf10.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q04-sf10.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q04-sf10.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q05-sf10.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q05-sf10.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q06-sf10.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q06-sf10.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q07-sf10.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q07-sf10.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q08-sf10.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q08-sf10.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q09-sf10.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q09-sf10.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q10-sf10.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q10-sf10.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q11-sf10.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q11-sf10.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q12-sf10.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q12-sf10.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q13-sf10.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q13-sf10.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q14-sf10.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q14-sf10.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q15-sf10.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q15-sf10.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q16-sf10.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q16-sf10.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q17-sf10.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q17-sf10.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q18-sf10.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q18-sf10.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q19-sf10.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q19-sf10.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q20-sf10.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q20-sf10.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q21-sf10.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q21-sf10.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q22-sf10.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q22-sf10.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q01-sf0.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q01-sf0.1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q02-sf0.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q02-sf0.1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q03-sf0.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q03-sf0.1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q04-sf0.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q04-sf0.1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q05-sf0.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q05-sf0.1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q06-sf0.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q06-sf0.1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q07-sf0.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q07-sf0.1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q08-sf0.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q08-sf0.1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q09-sf0.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q09-sf0.1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q10-sf0.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q10-sf0.1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q11-sf0.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q11-sf0.1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q12-sf0.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q12-sf0.1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q13-sf0.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q13-sf0.1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q14-sf0.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q14-sf0.1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q15-sf0.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q15-sf0.1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q16-sf0.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q16-sf0.1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q17-sf0.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q17-sf0.1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q18-sf0.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q18-sf0.1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q19-sf0.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q19-sf0.1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q20-sf0.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q20-sf0.1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q21-sf0.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q21-sf0.1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q22-sf0.1.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q22-sf0.1.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q01-sf0.01.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q01-sf0.01.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q02-sf0.01.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q02-sf0.01.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q03-sf0.01.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q03-sf0.01.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q04-sf0.01.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q04-sf0.01.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q05-sf0.01.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q05-sf0.01.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q06-sf0.01.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q06-sf0.01.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q07-sf0.01.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q07-sf0.01.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q08-sf0.01.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q08-sf0.01.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q09-sf0.01.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q09-sf0.01.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q10-sf0.01.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q10-sf0.01.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q11-sf0.01.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q11-sf0.01.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q12-sf0.01.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q12-sf0.01.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q13-sf0.01.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q13-sf0.01.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q14-sf0.01.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q14-sf0.01.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q15-sf0.01.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q15-sf0.01.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q16-sf0.01.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q16-sf0.01.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q17-sf0.01.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q17-sf0.01.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q18-sf0.01.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q18-sf0.01.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q19-sf0.01.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q19-sf0.01.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q20-sf0.01.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q20-sf0.01.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q21-sf0.01.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q21-sf0.01.parquet -------------------------------------------------------------------------------- /inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q22-sf0.01.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q22-sf0.01.parquet -------------------------------------------------------------------------------- /tests/testthat/setup.R: -------------------------------------------------------------------------------- 1 | if (!pipx_available()) { 2 | install_pipx() 3 | } 4 | 5 | if (!benchconnect_available()) { 6 | install_benchconnect() 7 | } 8 | 9 | if (!datalogistik_available()) { 10 | install_datalogistik() 11 | } 12 | -------------------------------------------------------------------------------- /tests/testthat/test-bm-table-to-df.R: -------------------------------------------------------------------------------- 1 | test_that("table_to_df benchmark works", { 2 | expect_benchmark_run( 3 | run_benchmark( 4 | table_to_df, 5 | source = "nyctaxi_sample", 6 | cpu_count = arrow::cpu_count() 7 | ) 8 | ) 9 | }) 10 | 11 | wipe_results() -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | 3 | coverage: 4 | status: 5 | project: 6 | default: 7 | target: auto 8 | threshold: 1% 9 | informational: true 10 | patch: 11 | default: 12 | target: auto 13 | threshold: 1% 14 | informational: true 15 | -------------------------------------------------------------------------------- /tests/testthat/test-bm-array-to-vector.R: -------------------------------------------------------------------------------- 1 | test_that("array_to_vector benchmark runs", { 2 | 3 | expect_benchmark_run( 4 | run_benchmark( 5 | array_to_vector, 6 | source = "nyctaxi_sample", 7 | cpu_count = arrow::cpu_count(), 8 | alt_rep = FALSE 9 | ) 10 | ) 11 | }) 12 | -------------------------------------------------------------------------------- /tests/testthat/test-bm-array-altrep-materialization.R: -------------------------------------------------------------------------------- 1 | test_that("array_altrep_materialization benchmark runs", { 2 | 3 | expect_benchmark_run( 4 | run_benchmark( 5 | array_altrep_materialization, 6 | source = "fanniemae_sample", 7 | altrep = TRUE, 8 | cpu_count = arrow::cpu_count() 9 | ) 10 | ) 11 | }) 12 | -------------------------------------------------------------------------------- /tests/testthat/test-bm-write-csv.R: -------------------------------------------------------------------------------- 1 | test_that("write_csv benchmark works", { 2 | expect_benchmark_run( 3 | run_benchmark( 4 | write_csv, 5 | source = "nyctaxi_sample", 6 | writer = c("arrow", "data.table", "vroom", "readr", "base"), 7 | cpu_count = arrow::cpu_count() 8 | ) 9 | ) 10 | }) 11 | 12 | wipe_results() 13 | -------------------------------------------------------------------------------- /man/all_sources.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/known-sources.R 3 | \docType{data} 4 | \name{all_sources} 5 | \alias{all_sources} 6 | \title{Known data files} 7 | \format{ 8 | An object of class \code{list} of length 13. 9 | } 10 | \usage{ 11 | all_sources 12 | } 13 | \description{ 14 | Known data files 15 | } 16 | \keyword{datasets} 17 | -------------------------------------------------------------------------------- /man/get_csv_writer.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bm-write-csv.R 3 | \name{get_csv_writer} 4 | \alias{get_csv_writer} 5 | \title{Get a CSV writer} 6 | \usage{ 7 | get_csv_writer(writer) 8 | } 9 | \arguments{ 10 | \item{writer}{the writer to use} 11 | } 12 | \value{ 13 | the csv writer 14 | } 15 | \description{ 16 | Get a CSV writer 17 | } 18 | -------------------------------------------------------------------------------- /man/known_sources.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/known-sources.R 3 | \docType{data} 4 | \name{known_sources} 5 | \alias{known_sources} 6 | \title{Known data files} 7 | \format{ 8 | An object of class \code{list} of length 10. 9 | } 10 | \usage{ 11 | known_sources 12 | } 13 | \description{ 14 | Known data files 15 | } 16 | \keyword{datasets} 17 | -------------------------------------------------------------------------------- /man/get_read_function.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bm-read-file.R 3 | \name{get_read_function} 4 | \alias{get_read_function} 5 | \title{Get a reader} 6 | \usage{ 7 | get_read_function(file_type) 8 | } 9 | \arguments{ 10 | \item{file_type}{what file_type to read} 11 | } 12 | \value{ 13 | the read function to use 14 | } 15 | \description{ 16 | Get a reader 17 | } 18 | -------------------------------------------------------------------------------- /man/get_json_reader.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bm-read-json.R 3 | \name{get_json_reader} 4 | \alias{get_json_reader} 5 | \title{Get a JSON reader} 6 | \usage{ 7 | get_json_reader(reader) 8 | } 9 | \arguments{ 10 | \item{reader}{string of the reader package to use} 11 | } 12 | \value{ 13 | the JSON function 14 | } 15 | \description{ 16 | Get a JSON reader 17 | } 18 | -------------------------------------------------------------------------------- /man/read_source.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ensure-source.R 3 | \name{read_source} 4 | \alias{read_source} 5 | \title{Read a known source} 6 | \usage{ 7 | read_source(file, ...) 8 | } 9 | \arguments{ 10 | \item{file}{file to read} 11 | 12 | \item{...}{extra arguments to pass} 13 | } 14 | \value{ 15 | the source 16 | } 17 | \description{ 18 | Read a known source 19 | } 20 | -------------------------------------------------------------------------------- /man/tpch_tables.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ensure-tpch-source.R 3 | \docType{data} 4 | \name{tpch_tables} 5 | \alias{tpch_tables} 6 | \title{Table names for TPC-H benchmarks} 7 | \format{ 8 | An object of class \code{character} of length 8. 9 | } 10 | \usage{ 11 | tpch_tables 12 | } 13 | \description{ 14 | Table names for TPC-H benchmarks 15 | } 16 | \keyword{internal} 17 | -------------------------------------------------------------------------------- /man/remote_dataset.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bm-remote-dataset.R 3 | \docType{data} 4 | \name{remote_dataset} 5 | \alias{remote_dataset} 6 | \title{Remote (S3) dataset reading} 7 | \format{ 8 | An object of class \code{Benchmark} of length 11. 9 | } 10 | \usage{ 11 | remote_dataset 12 | } 13 | \description{ 14 | Remote (S3) dataset reading 15 | } 16 | \keyword{datasets} 17 | -------------------------------------------------------------------------------- /man/get_csv_reader.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bm-read-csv.R 3 | \name{get_csv_reader} 4 | \alias{get_csv_reader} 5 | \title{Get a CSV reader} 6 | \usage{ 7 | get_csv_reader(reader, delim) 8 | } 9 | \arguments{ 10 | \item{reader}{the reader to use} 11 | 12 | \item{delim}{the delimiter to use} 13 | } 14 | \value{ 15 | the csv reader 16 | } 17 | \description{ 18 | Get a CSV reader 19 | } 20 | -------------------------------------------------------------------------------- /man/get_source_attr.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ensure-source.R 3 | \name{get_source_attr} 4 | \alias{get_source_attr} 5 | \title{Get source attributes} 6 | \usage{ 7 | get_source_attr(file, attr) 8 | } 9 | \arguments{ 10 | \item{file}{the file to get attributes for} 11 | 12 | \item{attr}{the attribute to get} 13 | } 14 | \description{ 15 | Get source attributes 16 | } 17 | \keyword{internal} 18 | -------------------------------------------------------------------------------- /tests/testthat/test-bm-write-file.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | test_that("write_file benchmark works", { 4 | expect_benchmark_run( 5 | run_benchmark( 6 | write_file, 7 | source = "nyctaxi_sample", 8 | file_type = c("parquet", "feather"), 9 | compression = c("uncompressed", "snappy", "lz4"), 10 | input_type = c("arrow_table", "data_frame"), 11 | cpu_count = arrow::cpu_count() 12 | ) 13 | ) 14 | }) 15 | 16 | wipe_results() 17 | -------------------------------------------------------------------------------- /man/get_dataset_attr.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ensure-source.R 3 | \name{get_dataset_attr} 4 | \alias{get_dataset_attr} 5 | \title{Get dataset attributes} 6 | \usage{ 7 | get_dataset_attr(name, attr) 8 | } 9 | \arguments{ 10 | \item{attr}{the attribute to get} 11 | 12 | \item{dataset}{the file to get attributes for} 13 | } 14 | \description{ 15 | Get dataset attributes 16 | } 17 | \keyword{internal} 18 | -------------------------------------------------------------------------------- /man/install_benchconnect.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/external-dependencies.R 3 | \name{install_benchconnect} 4 | \alias{install_benchconnect} 5 | \title{Install benchconnect} 6 | \usage{ 7 | install_benchconnect() 8 | } 9 | \description{ 10 | Install \href{https://github.com/conbench/conbench/tree/main/benchconnect}{benchconnect}, 11 | a utility for sending benchmark results to a Conbench server 12 | } 13 | -------------------------------------------------------------------------------- /arrowbench.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | StripTrailingWhitespace: Yes 16 | 17 | BuildType: Package 18 | PackageUseDevtools: Yes 19 | PackageInstallArgs: --no-multiarch --with-keep.source 20 | PackageRoxygenize: rd,collate,namespace 21 | -------------------------------------------------------------------------------- /tests/testthat/test-bm-row-group-size.R: -------------------------------------------------------------------------------- 1 | test_that("row_group_size benchmark runs", { 2 | 3 | params <- get_default_parameters(row_group_size, chunk_size = list(NULL, 10000L, 100000L, 1000000L)) 4 | 5 | expect_benchmark_run( 6 | run_benchmark( 7 | row_group_size, 8 | source = "fanniemae_sample", 9 | queries = "everything", 10 | chunk_size = list(1000L), 11 | cpu_count = arrow::cpu_count() 12 | ) 13 | ) 14 | }) 15 | 16 | wipe_results() 17 | -------------------------------------------------------------------------------- /man/BenchEnvironment.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/benchmark.R 3 | \name{BenchEnvironment} 4 | \alias{BenchEnvironment} 5 | \title{Create a test environment to run benchmarks in} 6 | \usage{ 7 | BenchEnvironment(...) 8 | } 9 | \arguments{ 10 | \item{...}{named list of parameters to set in the environment} 11 | } 12 | \value{ 13 | An environment 14 | } 15 | \description{ 16 | Create a test environment to run benchmarks in 17 | } 18 | -------------------------------------------------------------------------------- /man/file_with_ext.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/util.R 3 | \name{file_with_ext} 4 | \alias{file_with_ext} 5 | \title{Get a file with an extension} 6 | \usage{ 7 | file_with_ext(file, new_ext) 8 | } 9 | \arguments{ 10 | \item{file}{the file} 11 | 12 | \item{new_ext}{the new extension} 13 | } 14 | \value{ 15 | the file with the new extension 16 | } 17 | \description{ 18 | Get a file with an extension 19 | } 20 | \keyword{internal} 21 | -------------------------------------------------------------------------------- /man/null-default.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/util.R 3 | \name{null-default} 4 | \alias{null-default} 5 | \alias{\%||\%} 6 | \title{Default value for NULL} 7 | \usage{ 8 | a \%||\% b 9 | } 10 | \arguments{ 11 | \item{a}{Thing to test for \code{NULL}-ness} 12 | 13 | \item{b}{Thing to use if \code{a} is \code{NULL}} 14 | } 15 | \value{ 16 | \code{a} unless it's \code{NULL}, then \code{b} 17 | } 18 | \description{ 19 | Default value for NULL 20 | } 21 | -------------------------------------------------------------------------------- /man/install_datalogistik.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/external-dependencies.R 3 | \name{install_datalogistik} 4 | \alias{install_datalogistik} 5 | \title{Install datalogistik} 6 | \usage{ 7 | install_datalogistik() 8 | } 9 | \description{ 10 | Install \href{https://github.com/conbench/datalogistik}{datalogistik}, a utility 11 | for generating, downloading, and converting datasets for benchmarking. 12 | } 13 | \details{ 14 | Only for interactive use. 15 | } 16 | -------------------------------------------------------------------------------- /man/sync_and_drop_caches.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/util.R 3 | \name{sync_and_drop_caches} 4 | \alias{sync_and_drop_caches} 5 | \title{Attempt to drop disk caches} 6 | \usage{ 7 | sync_and_drop_caches() 8 | } 9 | \value{ 10 | Logical; were caches cleared? 11 | } 12 | \description{ 13 | Attempts to drop disk caches. Currently only works on Linux. If clearing 14 | fails, will set an option so it will not reattempt on future calls. 15 | } 16 | \keyword{internal} 17 | -------------------------------------------------------------------------------- /man/confirm_mem_alloc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/util.R 3 | \name{confirm_mem_alloc} 4 | \alias{confirm_mem_alloc} 5 | \title{Confirm that the memory allocator enabled} 6 | \usage{ 7 | confirm_mem_alloc(mem_alloc) 8 | } 9 | \arguments{ 10 | \item{mem_alloc}{the memory allocator to be tested (one of: "jemalloc", "mimalloc", "system")} 11 | } 12 | \value{ 13 | nothing 14 | } 15 | \description{ 16 | Confirm that the memory allocator enabled 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /tests/testthat/test-bm-read-csv.R: -------------------------------------------------------------------------------- 1 | test_that("read_csv setup", { 2 | defaults <- get_default_args(read_csv$setup) 3 | expect_named(defaults, c("source", "reader", "compression", "output_format"), ignore.order = TRUE) 4 | }) 5 | 6 | 7 | test_that("read_csv benchmark works", { 8 | expect_benchmark_run( 9 | run_benchmark( 10 | read_csv, 11 | source = c("nyctaxi_sample", "fanniemae_sample"), 12 | compression = "uncompressed", 13 | cpu_count = arrow::cpu_count() 14 | ) 15 | ) 16 | }) 17 | 18 | wipe_results() 19 | -------------------------------------------------------------------------------- /tests/testthat/test-bm-read-json.R: -------------------------------------------------------------------------------- 1 | test_that("read_json setup", { 2 | defaults <- get_default_args(read_json$setup) 3 | expect_named(defaults, c("source", "reader", "compression", "output_format", "rbinder"), ignore.order = TRUE) 4 | }) 5 | 6 | test_that("read_json benchmark works", { 7 | expect_benchmark_run( 8 | run_benchmark( 9 | read_json, 10 | reader = "arrow", 11 | source = "fanniemae_sample", 12 | compression = "uncompressed", 13 | cpu_count = arrow::cpu_count() 14 | ) 15 | ) 16 | }) 17 | 18 | wipe_results() -------------------------------------------------------------------------------- /man/install_pipx.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/external-dependencies.R 3 | \name{install_pipx} 4 | \alias{install_pipx} 5 | \title{Install pipx} 6 | \usage{ 7 | install_pipx() 8 | } 9 | \description{ 10 | Install \href{https://pypa.github.io/pipx/}{pipx}, a version of pip that installs 11 | Python packages in isolated environments where they will always be available 12 | regardless of which version of Python is presently on \verb{$PATH}. Especially 13 | useful for installing packages designed to be used via CLIs. 14 | } 15 | -------------------------------------------------------------------------------- /man/tables_refed.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bm-tpc-h.R 3 | \name{tables_refed} 4 | \alias{tables_refed} 5 | \title{For extracting table names from TPC-H queries} 6 | \usage{ 7 | tables_refed(query_func) 8 | } 9 | \arguments{ 10 | \item{query_func}{a function containing a dplyr pipeline} 11 | } 12 | \value{ 13 | all references inside of \code{input_func(...)}, collapsed 14 | } 15 | \description{ 16 | This searches a function for all references of \code{input_func(...)} and returns 17 | the contents of \code{...} 18 | } 19 | -------------------------------------------------------------------------------- /man/tpc_h_queries.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tpch-queries.R 3 | \docType{data} 4 | \name{tpc_h_queries} 5 | \alias{tpc_h_queries} 6 | \title{all queries take an input_func which is a function that will return a dplyr tbl 7 | referencing the table needed.} 8 | \format{ 9 | An object of class \code{list} of length 22. 10 | } 11 | \usage{ 12 | tpc_h_queries 13 | } 14 | \description{ 15 | all queries take an input_func which is a function that will return a dplyr tbl 16 | referencing the table needed. 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /man/get_package_benchmarks.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/benchmark-dataframe.R 3 | \name{get_package_benchmarks} 4 | \alias{get_package_benchmarks} 5 | \title{Get a list of benchmarks in a package} 6 | \usage{ 7 | get_package_benchmarks(package = "arrowbench") 8 | } 9 | \arguments{ 10 | \item{package}{String of package name in which to find benchmarks} 11 | } 12 | \value{ 13 | An instance of \link{BenchmarkDataFrame} with all the benchmarks contained 14 | by a package 15 | } 16 | \description{ 17 | Get a list of benchmarks in a package 18 | } 19 | -------------------------------------------------------------------------------- /man/get_write_function.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ensure-format.R 3 | \name{get_write_function} 4 | \alias{get_write_function} 5 | \title{Get a writer} 6 | \usage{ 7 | get_write_function(format, compression, chunk_size = NULL) 8 | } 9 | \arguments{ 10 | \item{format}{format to write} 11 | 12 | \item{compression}{compression to use} 13 | 14 | \item{chunk_size}{the size of chunks to write (default: NULL, the default for 15 | the format)} 16 | } 17 | \value{ 18 | the write function to use 19 | } 20 | \description{ 21 | Get a writer 22 | } 23 | -------------------------------------------------------------------------------- /man/ensure_source.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ensure-source.R 3 | \name{ensure_source} 4 | \alias{ensure_source} 5 | \title{Make sure a data file exists} 6 | \usage{ 7 | ensure_source(name, ...) 8 | } 9 | \arguments{ 10 | \item{name}{A known-source id, a file path, or a URL} 11 | 12 | \item{...}{arguments to pass on to a custom locator} 13 | } 14 | \value{ 15 | A valid path to a source file. If a known source but not present, 16 | it will be downloaded and possibly decompressed. 17 | } 18 | \description{ 19 | Make sure a data file exists 20 | } 21 | -------------------------------------------------------------------------------- /man/get_query_func.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bm-tpc-h.R 3 | \name{get_query_func} 4 | \alias{get_query_func} 5 | \title{Get a query function that will run a specific TPC-H query} 6 | \usage{ 7 | get_query_func(query_id, engine = NULL) 8 | } 9 | \arguments{ 10 | \item{query_id}{which query to get?} 11 | 12 | \item{engine}{which engine to use (all options return a dplyr-based query, 13 | with the except of \code{"duckdb_sql"} which returns a SQL-based query)} 14 | } 15 | \description{ 16 | Get a query function that will run a specific TPC-H query 17 | } 18 | -------------------------------------------------------------------------------- /man/get_params_summary.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/benchmark.R 3 | \name{get_params_summary} 4 | \alias{get_params_summary} 5 | \title{Extract the parameter summary as a data.frame} 6 | \usage{ 7 | get_params_summary(run) 8 | } 9 | \arguments{ 10 | \item{run}{An instance of \code{BenchmarkResults} as returned by \code{run_benchmark} 11 | or \code{BenchmarkResult} as returned by \code{run_one} and \code{run_bm}} 12 | } 13 | \value{ 14 | a tibble 15 | } 16 | \description{ 17 | Extract a data.frame that provides the parameters used in a run and the 18 | error status 19 | } 20 | -------------------------------------------------------------------------------- /man/table_to_df.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bm-table-to-df.R 3 | \docType{data} 4 | \name{table_to_df} 5 | \alias{table_to_df} 6 | \title{Benchmark for reading an Arrow table to a data.frame} 7 | \format{ 8 | An object of class \code{Benchmark} of length 11. 9 | } 10 | \usage{ 11 | table_to_df 12 | } 13 | \description{ 14 | This flexes conversion to R data structures from Arrow data structures. 15 | } 16 | \section{Parameters}{ 17 | 18 | \itemize{ 19 | \item \code{source} A known-file id to use (it will be read in to a data.frame first) 20 | } 21 | } 22 | 23 | \keyword{datasets} 24 | -------------------------------------------------------------------------------- /tests/testthat/test-custom-duckdb.R: -------------------------------------------------------------------------------- 1 | test_that("custom DuckDB can be installed to and used from a custom lib", { 2 | # ...and can execute SQL 3 | expect_equal( 4 | query_custom_duckdb("SELECT 'thing' as col_name"), 5 | data.frame(col_name = 'thing') 6 | ) 7 | 8 | # ...and write parquet files 9 | temp_parquet <- tempfile() 10 | expect_identical( 11 | export_custom_duckdb("SELECT 'thing' as col_name", temp_parquet), 12 | temp_parquet 13 | ) 14 | 15 | expect_equal( 16 | as.data.frame(arrow::read_parquet(temp_parquet)), 17 | data.frame(col_name = 'thing', stringsAsFactors = FALSE) 18 | ) 19 | }) 20 | -------------------------------------------------------------------------------- /man/dataset_taxi_parquet.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bm-dataset-taxi-parquet.R 3 | \docType{data} 4 | \name{dataset_taxi_parquet} 5 | \alias{dataset_taxi_parquet} 6 | \title{Benchmark Taxi dataset (Parquet) reading} 7 | \format{ 8 | An object of class \code{Benchmark} of length 12. 9 | } 10 | \usage{ 11 | dataset_taxi_parquet 12 | } 13 | \description{ 14 | Benchmark Taxi dataset (Parquet) reading 15 | } 16 | \section{Parameters}{ 17 | 18 | \itemize{ 19 | \item \code{query} Name of a known query to run; see \code{dataset_taxi_parquet$cases} 20 | } 21 | } 22 | 23 | \keyword{datasets} 24 | -------------------------------------------------------------------------------- /man/df_to_table.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bm-df-to-table.R 3 | \docType{data} 4 | \name{df_to_table} 5 | \alias{df_to_table} 6 | \title{Benchmark for reading a data.frame into an Arrow table} 7 | \format{ 8 | An object of class \code{Benchmark} of length 11. 9 | } 10 | \usage{ 11 | df_to_table 12 | } 13 | \description{ 14 | This flexes that conversion from R data structures to Arrow data structures. 15 | } 16 | \section{Parameters}{ 17 | 18 | \itemize{ 19 | \item \code{source} A known-file id to use (it will be read in to a data.frame first) 20 | } 21 | } 22 | 23 | \keyword{datasets} 24 | -------------------------------------------------------------------------------- /man/write_csv.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bm-write-csv.R 3 | \docType{data} 4 | \name{write_csv} 5 | \alias{write_csv} 6 | \title{Benchmark CSV writing} 7 | \format{ 8 | An object of class \code{Benchmark} of length 11. 9 | } 10 | \usage{ 11 | write_csv 12 | } 13 | \description{ 14 | Benchmark CSV writing 15 | } 16 | \section{Parameters}{ 17 | 18 | \itemize{ 19 | \item \code{source} A CSV file path to write to 20 | \item \code{writer} One of \code{c("arrow", "data.table", "vroom", "readr",)} 21 | \item \code{input} One of \code{c("arrow_table", "data_frame")} 22 | } 23 | } 24 | 25 | \keyword{datasets} 26 | -------------------------------------------------------------------------------- /tests/testthat/test-ensure-lib.R: -------------------------------------------------------------------------------- 1 | test_that("lib_dir()", { 2 | expect_identical( 3 | lib_dir("foo"), 4 | file.path(getwd(), "r_libs", paste0("R-", paste0(c(getRversion()$major, getRversion()$minor), collapse = ".")), "foo") 5 | ) 6 | 7 | expect_identical( 8 | lib_dir("remote-user/arrow@branch/with/slashes"), 9 | file.path(getwd(), "r_libs", paste0("R-", paste0(c(getRversion()$major, getRversion()$minor), collapse = ".")), "remote-user_arrow@branch_with_slashes") 10 | ) 11 | }) 12 | 13 | test_that("identify_repo_ref()", { 14 | expect_identical( 15 | identify_repo_ref("remote-name/repo@ref"), 16 | list(repo = "name/repo", ref = "ref") 17 | ) 18 | }) -------------------------------------------------------------------------------- /man/knowns.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ensure-format.R 3 | \docType{data} 4 | \name{knowns} 5 | \alias{knowns} 6 | \alias{known_compressions} 7 | \alias{known_formats} 8 | \title{Known formats and compressions} 9 | \format{ 10 | An object of class \code{character} of length 8. 11 | 12 | An object of class \code{character} of length 5. 13 | } 14 | \usage{ 15 | known_compressions 16 | 17 | known_formats 18 | } 19 | \description{ 20 | These formats and compression algorithms are known to {arrowbench}. Not all of 21 | them will work with all formats (in fact, parquet is the only one that 22 | supports all of them). 23 | } 24 | \keyword{datasets} 25 | -------------------------------------------------------------------------------- /man/data_file.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ensure-source.R 3 | \name{data_file} 4 | \alias{data_file} 5 | \title{Find a data file} 6 | \usage{ 7 | data_file(...) 8 | } 9 | \arguments{ 10 | \item{...}{file path to look for} 11 | } 12 | \value{ 13 | path to the file (or NULL if the file doesn't exist) 14 | } 15 | \description{ 16 | This looks in the locations in the following order and returns the first 17 | path that exists: 18 | } 19 | \details{ 20 | \itemize{ 21 | \item source dir ("data") 22 | \item as well as the temp directory ("data/temp") 23 | } 24 | 25 | If there is not a file present in either of those, it returns NULL 26 | } 27 | \keyword{internal} 28 | -------------------------------------------------------------------------------- /man/placebo.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bm-placebo.R 3 | \docType{data} 4 | \name{placebo} 5 | \alias{placebo} 6 | \title{Placebo benchmark for testing} 7 | \format{ 8 | An object of class \code{Benchmark} of length 11. 9 | } 10 | \usage{ 11 | placebo 12 | } 13 | \description{ 14 | Placebo benchmark for testing 15 | } 16 | \section{Parameters}{ 17 | 18 | \itemize{ 19 | \item \code{duration} the duration for the benchmark to take 20 | \item \code{error_type} \code{NULL} to cause no error, \code{"rlang::abort"} to use rlang's 21 | \code{abort} and any other string (including \code{"base"}) will use base's \code{stop} 22 | } 23 | } 24 | 25 | \keyword{internal} 26 | -------------------------------------------------------------------------------- /tests/testthat/test-bm-dataset-taxi-2013.R: -------------------------------------------------------------------------------- 1 | test_that("dataset_taxi_2013 exists", { 2 | defaults <- get_default_args(dataset_taxi_2013$setup) 3 | 4 | expect_named(defaults, c("dataset", "query")) 5 | expect_equal( 6 | defaults$query, 7 | c("basic", "payment_type_crd", "small_no_files", "dims") 8 | ) 9 | }) 10 | 11 | test_that("dataset_taxi_2013 runs on sample data", { 12 | expect_benchmark_run( 13 | res <- run_benchmark(dataset_taxi_2013, dataset = "taxi_2013_sample", cpu_count = arrow::cpu_count()) 14 | ) 15 | 16 | lapply(res$optional_benchmark_info$results, function(result) { 17 | expect_s3_class(result, "BenchmarkResult") 18 | expect_gte(result$result$real, 0) 19 | }) 20 | }) 21 | -------------------------------------------------------------------------------- /.github/workflows/test-coverage.yaml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | - main 5 | - master 6 | pull_request: 7 | branches: 8 | - main 9 | - master 10 | 11 | name: test-coverage 12 | 13 | jobs: 14 | test-coverage: 15 | runs-on: ubuntu-latest 16 | env: 17 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 18 | steps: 19 | - uses: actions/checkout@v3 20 | 21 | - uses: r-lib/actions/setup-r@v2 22 | with: 23 | use-public-rspm: true 24 | 25 | - uses: r-lib/actions/setup-r-dependencies@v2 26 | with: 27 | extra-packages: covr 28 | 29 | - name: Test coverage 30 | run: covr::codecov() 31 | shell: Rscript {0} 32 | -------------------------------------------------------------------------------- /man/dataset_taxi_2013.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bm-dataset-taxi-2013.R 3 | \docType{data} 4 | \name{dataset_taxi_2013} 5 | \alias{dataset_taxi_2013} 6 | \title{Benchmark Taxi 2013 dataset reading} 7 | \format{ 8 | An object of class \code{Benchmark} of length 12. 9 | } 10 | \usage{ 11 | dataset_taxi_2013 12 | } 13 | \description{ 14 | Benchmark Taxi 2013 dataset reading 15 | } 16 | \section{Parameters}{ 17 | 18 | \itemize{ 19 | \item \code{dataset} Name of dataset to use, either \code{taxi_2013} or \code{taxi_2013_sample} (for testing) 20 | \item \code{query} Name of a known query to run; see \code{dataset_taxi_2013$cases} 21 | } 22 | } 23 | 24 | \keyword{datasets} 25 | -------------------------------------------------------------------------------- /man/read_csv.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bm-read-csv.R 3 | \docType{data} 4 | \name{read_csv} 5 | \alias{read_csv} 6 | \title{Benchmark CSV reading} 7 | \format{ 8 | An object of class \code{Benchmark} of length 11. 9 | } 10 | \usage{ 11 | read_csv 12 | } 13 | \description{ 14 | Benchmark CSV reading 15 | } 16 | \section{Parameters}{ 17 | 18 | \itemize{ 19 | \item \code{source} A CSV file path to read in 20 | \item \code{reader} One of \code{c("arrow", "data.table", "vroom", "readr")} 21 | \item \code{compression} One of \code{c("uncompressed", "gzip")} 22 | \item \code{output_format} One of \code{c("arrow_table", "data_frame")} 23 | } 24 | } 25 | 26 | \keyword{datasets} 27 | -------------------------------------------------------------------------------- /inst/regenerate-benchmarks-json.R: -------------------------------------------------------------------------------- 1 | " 2 | This script regenerates inst/benchmarks.json with all current benchmarks. That 3 | file is used by arrow-benchmarks-ci here: 4 | https://github.com/voltrondata-labs/arrow-benchmarks-ci/blob/main/buildkite/benchmark/run.py 5 | to keep track of benchmarks available in a repository. 6 | " 7 | 8 | arrowbench::get_package_benchmarks()$name |> 9 | lapply(function(name) { 10 | list( 11 | command = name, 12 | name = paste0("arrowbench/", name), 13 | runner = "arrowbench", 14 | flags = list(language = "R") 15 | ) 16 | }) |> 17 | unname() |> 18 | jsonlite::write_json( 19 | path = "inst/benchmarks.json", 20 | pretty = TRUE, 21 | auto_unbox = TRUE 22 | ) 23 | -------------------------------------------------------------------------------- /man/ensure_format.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ensure-format.R 3 | \name{ensure_format} 4 | \alias{ensure_format} 5 | \title{Ensure that a source has a specific format} 6 | \usage{ 7 | ensure_format( 8 | name, 9 | format = known_formats, 10 | compression = known_compressions, 11 | chunk_size = NULL 12 | ) 13 | } 14 | \arguments{ 15 | \item{name}{name of the known source} 16 | 17 | \item{format}{format to be ensured} 18 | 19 | \item{compression}{compression to be ensured} 20 | 21 | \item{chunk_size}{the number of rows to write in each chunk} 22 | } 23 | \value{ 24 | the file that was ensured to exist 25 | } 26 | \description{ 27 | Ensure that a source has a specific format 28 | } 29 | -------------------------------------------------------------------------------- /man/ensure_dataset.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ensure-source.R 3 | \name{ensure_dataset} 4 | \alias{ensure_dataset} 5 | \title{Make sure a multi-file dataset exists} 6 | \usage{ 7 | ensure_dataset(name, download = TRUE) 8 | } 9 | \arguments{ 10 | \item{name}{A known-dataset id. See \code{known_datasets}.} 11 | 12 | \item{download}{logical: should the dataset be synced to the local disk 13 | or queried from its remote URL. Default is \code{TRUE}; files are cached 14 | and not downloaded if they're already found locally.} 15 | } 16 | \value{ 17 | An \code{arrow::Dataset}, validated to have the correct number of rows 18 | } 19 | \description{ 20 | Make sure a multi-file dataset exists 21 | } 22 | -------------------------------------------------------------------------------- /R/setup.R: -------------------------------------------------------------------------------- 1 | # The ensure_* functions will make sure everything is downloaded lazily, 2 | # but you can run this to eagerly set up everything up front 3 | 4 | setup_all <- function() { 5 | setup_sources() 6 | setup_datasets() 7 | setup_packages() 8 | } 9 | 10 | setup_sources <- function() { 11 | for (x in names(known_sources)) { 12 | message("Downloading source ", x) 13 | ensure_source(x) 14 | } 15 | } 16 | 17 | setup_datasets <- function() { 18 | for (x in names(known_datasets)) { 19 | message("Downloading dataset ", x) 20 | ensure_dataset(x) 21 | } 22 | } 23 | 24 | setup_packages <- function() { 25 | for (x in names(arrow_version_to_date)) { 26 | message("Installing libs for ", x) 27 | ensure_lib(x) 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /man/read_file.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bm-read-file.R 3 | \docType{data} 4 | \name{read_file} 5 | \alias{read_file} 6 | \title{Benchmark file reading} 7 | \format{ 8 | An object of class \code{Benchmark} of length 11. 9 | } 10 | \usage{ 11 | read_file 12 | } 13 | \description{ 14 | Benchmark file reading 15 | } 16 | \section{Parameters}{ 17 | 18 | \itemize{ 19 | \item \code{source} A known-file id, or a CSV(?) file path to read in 20 | \item \code{file_type} One of \code{c("parquet", "feather", "fst")} 21 | \item \code{compression} One of the values: uncompressed, snappy, zstd, gzip, lz4, brotli, lzo, bz2 22 | \item \code{output_type} One of \code{c("arrow_table", "data_frame")} 23 | } 24 | } 25 | 26 | \keyword{datasets} 27 | -------------------------------------------------------------------------------- /man/row_group_size.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bm-row-group-size.R 3 | \docType{data} 4 | \name{row_group_size} 5 | \alias{row_group_size} 6 | \title{Benchmark effect of parquet row group size} 7 | \format{ 8 | An object of class \code{Benchmark} of length 11. 9 | } 10 | \usage{ 11 | row_group_size 12 | } 13 | \description{ 14 | Benchmark effect of parquet row group size 15 | } 16 | \section{Parameters}{ 17 | 18 | \itemize{ 19 | \item \code{source} A known-file id, or a file path to read in 20 | \item \code{queries} What queries to run 21 | \item \code{chunk_size} Number of rows to write in each row group. Suggested sizes: 22 | \code{chunk_size = list(NULL, 10000L, 100000L, 1000000L)} 23 | } 24 | } 25 | 26 | \keyword{datasets} 27 | -------------------------------------------------------------------------------- /man/write_file.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bm-write-file.R 3 | \docType{data} 4 | \name{write_file} 5 | \alias{write_file} 6 | \title{Benchmark file writing} 7 | \format{ 8 | An object of class \code{Benchmark} of length 11. 9 | } 10 | \usage{ 11 | write_file 12 | } 13 | \description{ 14 | Benchmark file writing 15 | } 16 | \section{Parameters}{ 17 | 18 | \itemize{ 19 | \item \code{source} A known-file id, or a CSV(?) file path to read in 20 | \item \code{file_type} One of \code{c("parquet", "feather", "fst")} 21 | \item \code{compression} One of the values: uncompressed, snappy, zstd, gzip, lz4, brotli, lzo, bz2 22 | \item \code{input_type} One of \code{c("arrow_table", "data_frame")} 23 | } 24 | } 25 | 26 | \keyword{datasets} 27 | -------------------------------------------------------------------------------- /tests/testthat/test-ensure-source.R: -------------------------------------------------------------------------------- 1 | test_that("get_source_attr()", { 2 | # can get known_source attrs 3 | expect_identical(get_source_attr("fanniemae_2016Q4", "dim"), c(22180168L, 31L)) 4 | 5 | # and can get test_source attrs 6 | expect_identical(get_source_attr("nyctaxi_sample", "dim"), c(998L, 18L)) 7 | }) 8 | 9 | test_that("get_dataset_attr()", { 10 | # can get known_source attrs 11 | expect_identical(get_dataset_attr("taxi_parquet", "dim"), c(1547741381L, 20L)) 12 | }) 13 | 14 | test_that("ensure_source error handling", { 15 | expect_error( 16 | ensure_source("not_a_source"), 17 | "not_a_source is not a known source" 18 | ) 19 | }) 20 | 21 | test_that("source_filename()", { 22 | expect_identical( 23 | source_filename("fanniemae_2016Q4"), 24 | "fanniemae_2016Q4.csv.gz" 25 | ) 26 | }) 27 | -------------------------------------------------------------------------------- /tests/testthat/test-measure.R: -------------------------------------------------------------------------------- 1 | test_that("with_gc_info + errors", { 2 | # this tests with_gc_info + errors behavior, but we can't test it quite 3 | # directly because of how testthat alters how errors work. 4 | 5 | suppress_deparse_warning( 6 | capture.output( 7 | base_error <- run_one(placebo, error_type = "base"), type = "message" 8 | ) 9 | ) 10 | expect_false(is.null(base_error$error)) 11 | expect_match(base_error$error$error, "Error.*something went wrong \\(but I knew that\\)") 12 | 13 | suppress_deparse_warning( 14 | capture.output( 15 | rlang_error <- run_one(placebo, error_type = "rlang::abort"), type = "message" 16 | ) 17 | ) 18 | expect_false(is.null(base_error$error)) 19 | expect_match(rlang_error$error$error, "Error.*something went wrong \\(but I knew that\\)") 20 | }) 21 | -------------------------------------------------------------------------------- /R/bm-remote-dataset.R: -------------------------------------------------------------------------------- 1 | #' Remote (S3) dataset reading 2 | #' 3 | #' @export 4 | remote_dataset <- Benchmark("remote_dataset", 5 | setup = function(source = c("taxi_file_list_parquet", "taxi_file_list_feather")) { 6 | library("dplyr") 7 | dataset <- ensure_dataset(source, download = FALSE) 8 | result_dim <- get_dataset_attr(source, "dim") 9 | 10 | BenchEnvironment( 11 | dataset = dataset, 12 | expected_dim = result_dim 13 | ) 14 | }, 15 | before_each = { 16 | options("arrow.use_async" = TRUE) 17 | result <- NULL 18 | }, 19 | run = { 20 | result <- collect(dataset) 21 | }, 22 | after_each = { 23 | stopifnot( 24 | "The dimensions do not match" = all.equal(dim(result), expected_dim) 25 | ) 26 | }, 27 | packages_used = function(params) { 28 | c("arrow") 29 | } 30 | ) 31 | -------------------------------------------------------------------------------- /man/read_json.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bm-read-json.R 3 | \docType{data} 4 | \name{read_json} 5 | \alias{read_json} 6 | \title{Benchmark JSON reading} 7 | \format{ 8 | An object of class \code{Benchmark} of length 11. 9 | } 10 | \usage{ 11 | read_json 12 | } 13 | \description{ 14 | Benchmark JSON reading 15 | } 16 | \section{Parameters}{ 17 | 18 | \itemize{ 19 | \item \code{source} A JSON file path to read in 20 | \item \code{reader} One of \code{c("arrow", "jsonlite", "ndjson", "RcppSimdJson")} 21 | \item \code{compression} One of \code{c("uncompressed", "gzip")} 22 | \item \code{output_format} One of \code{c("arrow_table", "data_frame")} 23 | \item \code{rbinder} Method for simplifying to dataframe. Not relevant for {arrow} and {ndjson}. 24 | } 25 | } 26 | 27 | \keyword{datasets} 28 | -------------------------------------------------------------------------------- /man/validate_format.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ensure-format.R 3 | \name{validate_format} 4 | \alias{validate_format} 5 | \alias{stop_if_not_valid_format} 6 | \title{Validate format and compression combinations} 7 | \usage{ 8 | validate_format(format, compression) 9 | 10 | stop_if_not_valid_format(format, compression) 11 | } 12 | \arguments{ 13 | \item{format}{the format of the file} 14 | 15 | \item{compression}{the compression codec} 16 | } 17 | \value{ 18 | \code{TRUE} invisibly 19 | } 20 | \description{ 21 | For a given format + compression, determine if the combination is valid. 22 | \code{validate_format()} returns a vector of \code{TRUE}/\code{FALSE} if the formats are 23 | valid. \code{stop_if_not_valid_format()} will stop if any of the format + compressions 24 | are not valid. 25 | } 26 | \keyword{internal} 27 | -------------------------------------------------------------------------------- /tests/testthat/test-external-dependencies.R: -------------------------------------------------------------------------------- 1 | test_that("external_cli_available() works", { 2 | fake_uninstalled_cli <- basename(tempfile()) 3 | expect_warning( 4 | expect_false( 5 | external_cli_available(fake_uninstalled_cli) 6 | ), 7 | regexp = paste(fake_uninstalled_cli, "not installed or on $PATH"), 8 | fixed = TRUE 9 | ) 10 | 11 | expect_true(external_cli_available("which")) 12 | }) 13 | 14 | test_that("pipx_available() works", { 15 | expect_equal(pipx_available(), processx::run("which", "pipx")$status == 0L) 16 | }) 17 | 18 | test_that("benchconnect_available() works", { 19 | expect_equal(benchconnect_available(), processx::run("which", "benchconnect")$status == 0L) 20 | }) 21 | 22 | test_that("datalogistik_available() works", { 23 | expect_equal(datalogistik_available(), processx::run("which", "datalogistik")$status == 0L) 24 | }) 25 | -------------------------------------------------------------------------------- /man/BenchmarkDataFrame.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/benchmark-dataframe.R 3 | \name{BenchmarkDataFrame} 4 | \alias{BenchmarkDataFrame} 5 | \title{A classed dataframe of benchmarks for running} 6 | \usage{ 7 | BenchmarkDataFrame(benchmarks, parameters) 8 | } 9 | \arguments{ 10 | \item{benchmarks}{A list with elements of class \code{Benchmark}} 11 | 12 | \item{parameters}{Optional. A list of dataframes of parameter combinations to 13 | run as generated by \code{\link[=get_default_parameters]{get_default_parameters()}}. If null, defaults will be generated 14 | when \code{\link[=run]{run()}} is called.} 15 | } 16 | \value{ 17 | A classed dataframe with \code{name} (benchmark attribute, not object name), 18 | \code{benchmark}, and \code{params} columns 19 | } 20 | \description{ 21 | A classed dataframe of benchmarks for running 22 | } 23 | -------------------------------------------------------------------------------- /man/assemble_metadata.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/run.R 3 | \name{assemble_metadata} 4 | \alias{assemble_metadata} 5 | \title{Assemble metadata for a benchmark run} 6 | \usage{ 7 | assemble_metadata(name, params, cpu_count, drop_caches, n_iter) 8 | } 9 | \arguments{ 10 | \item{name}{Benchmark name, i.e. \code{bm$name}} 11 | 12 | \item{params}{Named list of parameters for the individual run, i.e. the case} 13 | 14 | \item{cpu_count}{Number of CPUs allocated} 15 | 16 | \item{drop_caches}{Attempt to drop the disk cache before each case or iteration. 17 | Currently only works on linux. Permissible values are \code{"case"}, \code{"iteration"}, 18 | and \code{NULL}. Defaults to \code{NULL}, i.e. don't drop caches.} 19 | 20 | \item{n_iter}{Number of iterations} 21 | } 22 | \description{ 23 | Assemble metadata for a benchmark run 24 | } 25 | \keyword{internal} 26 | -------------------------------------------------------------------------------- /man/tpch_answer.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bm-tpc-h.R 3 | \name{tpch_answer} 4 | \alias{tpch_answer} 5 | \title{Get a TPC-H answer} 6 | \usage{ 7 | tpch_answer( 8 | scale_factor, 9 | query_id, 10 | source = c("arrowbench", "duckdb"), 11 | data_source = c("duckdb", "dbgen") 12 | ) 13 | } 14 | \arguments{ 15 | \item{scale_factor}{scale factor (possible values: \code{c(0.01, 0.1, 1, 10)})} 16 | 17 | \item{query_id}{Id of the query (possible values: 1-22)} 18 | 19 | \item{source}{source of the answer (default: "arrowbench"), "duckdb" can 20 | return answers for scale_factor 1.} 21 | 22 | \item{data_source}{which source of data should we construct ansers for? "duckdb" 23 | (the default) has a slightly different set of data in the *_address columns 24 | compared to "dbgen"} 25 | } 26 | \value{ 27 | the answer, as a data.frame 28 | } 29 | \description{ 30 | Get a TPC-H answer 31 | } 32 | -------------------------------------------------------------------------------- /inst/tpch/README.md: -------------------------------------------------------------------------------- 1 | # Why is there a separate duckdb data directory here? 2 | 3 | The DuckDB data generator actually produces data that is _slightly_ out of spec. Specifically, the `_address` columns generate slightly different data. Generally speaking, this isn't a big deal since the queries don't use pattern matches on those columns, but they do show up in some of the answers. 4 | 5 | But it's plain to see if you look at the official answers and the duckdb ones for a query that includes a `_address` column: 6 | 7 | https://github.com/databricks/tpch-dbgen/blob/6985da461c641fd0d255b214f2d693f1bf08bc33/answers/q2.out 8 | https://github.com/duckdb/duckdb/blob/c0a4ab96c626426961c207f49c19aa81448e91da/extension/tpch/dbgen/answers/sf1/q02.csv 9 | 10 | Additionally, DuckDB >= 0.8 has also changed slightly some of the `s_comment` columns. This doesn't impact queries themselves, but [the answers changed](https://github.com/duckdb/duckdb/pull/6535). Note the answer differences before PR #136. -------------------------------------------------------------------------------- /man/array_altrep_materialization.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bm-array-altrep-materialization.R 3 | \docType{data} 4 | \name{array_altrep_materialization} 5 | \alias{array_altrep_materialization} 6 | \title{Benchmark for materializing an altrep Arrow array} 7 | \format{ 8 | An object of class \code{Benchmark} of length 11. 9 | } 10 | \usage{ 11 | array_altrep_materialization 12 | } 13 | \description{ 14 | This flexes a lower level conversion to R data structures from Arrow data structures. 15 | } 16 | \section{Parameters}{ 17 | 18 | \itemize{ 19 | \item \code{source} A known-file id to use (it will be read in to a data.frame first) 20 | \item \code{exclude_nulls} Logical. Remove any columns with any \code{NULL}s or \code{NA}s in them? 21 | \item \code{altrep} Logical. Use altrep storage for vectors? 22 | \item \code{subset_indices} Length-one list of vector to use to subset rows of source. 23 | } 24 | } 25 | 26 | \keyword{datasets} 27 | -------------------------------------------------------------------------------- /man/array_to_vector.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bm-array-to-vector.R 3 | \docType{data} 4 | \name{array_to_vector} 5 | \alias{array_to_vector} 6 | \title{Benchmark for reading an Arrow array to a vector} 7 | \format{ 8 | An object of class \code{Benchmark} of length 11. 9 | } 10 | \usage{ 11 | array_to_vector 12 | } 13 | \description{ 14 | This flexes a lower level conversion to R data structures from Arrow data structures. 15 | } 16 | \section{Parameters}{ 17 | 18 | \itemize{ 19 | \item \code{source} A known-file id to use (it will be read in to a data.frame first) 20 | \item \code{chunked_arrays} logical, should the arrays converted be \code{ChunkedArrays} or \code{Arrays}? 21 | \item \code{exclude_nulls} logical, should any columns with any \code{NULL}s or \code{NA}s in them be removed? 22 | \item \code{alt_rep} logical, should the altrep option be set? (\code{TRUE} to enable it, \code{FALSE} to disable) 23 | } 24 | } 25 | 26 | \keyword{datasets} 27 | -------------------------------------------------------------------------------- /man/get_sql_query_func.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bm-tpc-h.R 3 | \name{get_sql_query_func} 4 | \alias{get_sql_query_func} 5 | \title{Get a SQL query} 6 | \usage{ 7 | get_sql_query_func(query_num) 8 | } 9 | \arguments{ 10 | \item{query_num}{the query number to fetch the result for} 11 | } 12 | \value{ 13 | a function that accepts an argument \code{con} which will run 14 | \code{DBI::dbGetQuery()} against. 15 | } 16 | \description{ 17 | Produces a function that can be queried against any DBI backend (e.g. DuckDB) 18 | } 19 | \details{ 20 | The function that is returned takes the following arguments. The first two are 21 | suppleid to match the signature of those in tpc_h_queries 22 | \itemize{ 23 | \item \code{input_func} set to default \code{NULL}, will have no effect if supplied 24 | \item \code{collect_func} set to default \code{NULL}, will have no effect if supplied 25 | \item \code{con} a (DBI) connection to query against 26 | } 27 | } 28 | \keyword{internal} 29 | -------------------------------------------------------------------------------- /man/generate_tpch.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ensure-tpch-source.R 3 | \name{generate_tpch} 4 | \alias{generate_tpch} 5 | \title{Generate tpch data} 6 | \usage{ 7 | generate_tpch(scale_factor = 1) 8 | } 9 | \arguments{ 10 | \item{scale_factor}{a relative measure of the size of data in gigabytes.} 11 | } 12 | \description{ 13 | Generate tpch data at a given scale factor. By default, 14 | data is output relative to the current working directory. However, 15 | you can set the environment variable \code{ARROWBENCH_DATA_DIR} to 16 | point to another directory. Setting this environment variable has 17 | the advantage of being a central location for general usage. Running 18 | this function will install a custom version of duckdb in an \code{r_libs} 19 | directory, relative to the directory specified by the environment 20 | variable \code{ARROWBENCH_LOCAL_DIR}. When running this function for the first time you will 21 | see significant output from that installation process. This is normal. 22 | } 23 | -------------------------------------------------------------------------------- /man/measure.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/measure.R 3 | \name{measure} 4 | \alias{measure} 5 | \title{Measure times and memory usage} 6 | \usage{ 7 | measure(..., profiling = FALSE, drop_caches = NULL) 8 | } 9 | \arguments{ 10 | \item{...}{An expression to} 11 | 12 | \item{profiling}{Logical: collect prof info? If \code{TRUE}, the result data will 13 | contain a \code{prof_file} field, which you can read in with 14 | \code{profvis::profvis(prof_input = file)}. Default is \code{FALSE}} 15 | 16 | \item{drop_caches}{Attempt to drop the disk cache before each case or iteration. 17 | Currently only works on linux. Permissible values are \code{"case"}, \code{"iteration"}, 18 | and \code{NULL}. Defaults to \code{NULL}, i.e. don't drop caches. As \code{measure()} is run 19 | once per iteration, here \code{"iteration"} results in dropping caches once and 20 | \code{NULL} and \code{"case"} result in no cache dropping.} 21 | } 22 | \value{ 23 | A tibble of timings and memory usage 24 | } 25 | \description{ 26 | Measure times and memory usage 27 | } 28 | -------------------------------------------------------------------------------- /man/get_input_func.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bm-tpc-h.R 3 | \name{get_input_func} 4 | \alias{get_input_func} 5 | \title{Get an input function for a table} 6 | \usage{ 7 | get_input_func( 8 | engine, 9 | scale_factor, 10 | query_id, 11 | format, 12 | compression = "uncompressed", 13 | con = NULL, 14 | memory_map = FALSE, 15 | chunk_size = NULL 16 | ) 17 | } 18 | \arguments{ 19 | \item{engine}{which engine to use} 20 | 21 | \item{scale_factor}{what scale factor to reference} 22 | 23 | \item{query_id}{which query is being used} 24 | 25 | \item{format}{which format} 26 | 27 | \item{compression}{which compression to use (default: "uncompressed")} 28 | 29 | \item{con}{a connection} 30 | 31 | \item{memory_map}{should the file be memory mapped? (only relevant for the "native" format with Arrow)} 32 | 33 | \item{chunk_size}{what chunk_size should be used with the source files? (default: NULL, the default for the file format)} 34 | } 35 | \description{ 36 | This returns a function which will return a table reference with the specified 37 | parameters 38 | } 39 | -------------------------------------------------------------------------------- /R/bm-table-to-df.R: -------------------------------------------------------------------------------- 1 | #' Benchmark for reading an Arrow table to a data.frame 2 | #' 3 | #' This flexes conversion to R data structures from Arrow data structures. 4 | #' 5 | #' @section Parameters: 6 | #' * `source` A known-file id to use (it will be read in to a data.frame first) 7 | #' 8 | #' @export 9 | table_to_df <- Benchmark("table_to_df", 10 | setup = function(source = names(known_sources)) { 11 | source <- ensure_source(source) 12 | result_dim <- get_source_attr(source, "dim") 13 | table <- read_source(source, as_data_frame = FALSE) 14 | 15 | transfer_func <- function(table) as.data.frame(table) 16 | 17 | BenchEnvironment( 18 | transfer_func = transfer_func, 19 | result_dim = result_dim, 20 | table = table 21 | ) 22 | }, 23 | before_each = { 24 | result <- NULL 25 | }, 26 | run = { 27 | result <- transfer_func(table) 28 | }, 29 | after_each = { 30 | stopifnot("The dimensions do not match" = all.equal(dim(result), result_dim)) 31 | result <- NULL 32 | }, 33 | valid_params = function(params) params, 34 | packages_used = function(params) "arrow" 35 | ) 36 | 37 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2021 Ursa Computing 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /man/as.data.frame.BenchmarkResults.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/result.R 3 | \name{as.data.frame.BenchmarkResults} 4 | \alias{as.data.frame.BenchmarkResults} 5 | \alias{as.data.frame.BenchmarkResult} 6 | \title{Convert benchmark result object to a tidy data frame} 7 | \usage{ 8 | \method{as.data.frame}{BenchmarkResults}(x, row.names = NULL, optional = FALSE, ...) 9 | 10 | \method{as.data.frame}{BenchmarkResult}(x, row.names = NULL, optional = FALSE, packages = "arrow", ...) 11 | } 12 | \arguments{ 13 | \item{x}{a benchmark result object or list of them as returned by \code{\link[=run_one]{run_one()}} or \code{\link[=run_benchmark]{run_benchmark()}}} 14 | 15 | \item{row.names}{for generic consistency} 16 | 17 | \item{optional}{for generic consistency} 18 | 19 | \item{...}{additional arguments passed on to methods for individual results. 20 | \code{packages} is the only currently supported argument.} 21 | 22 | \item{packages}{Packages for which to extract versions} 23 | } 24 | \value{ 25 | A data.frame suitable for analysis in R 26 | } 27 | \description{ 28 | Convert benchmark result object to a tidy data frame 29 | } 30 | -------------------------------------------------------------------------------- /man/tpc_h.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bm-tpc-h.R 3 | \docType{data} 4 | \name{tpc_h} 5 | \alias{tpc_h} 6 | \title{Benchmark TPC-H queries} 7 | \format{ 8 | An object of class \code{Benchmark} of length 11. 9 | } 10 | \usage{ 11 | tpc_h 12 | } 13 | \description{ 14 | Benchmark TPC-H queries 15 | } 16 | \section{Parameters}{ 17 | 18 | \itemize{ 19 | \item \code{engine} One of \code{c("arrow", "duckdb", "dplyr")} 20 | \item \code{query_id} integer, 1-22 21 | \item \code{format} One of \code{c("parquet", "feather", "native")} 22 | \item \code{scale_factor} Scale factor to use for data generation (e.g. 0.1, 1, 10, 100) 23 | \item \code{memory_map} Should memory mapping be used when reading a file in? (only 24 | applicable to arrow, native. \code{FALSE} will result in the file being explicitly 25 | read into memory before the benchmark) 26 | \item \code{output} the format of the output (either \code{"data_frame"} (default) or \code{"arrow_table"}) 27 | \item \code{chunk_size} a size of row groups to aim for in parquet or feather files (default: 28 | NULL is the default for \code{arrow:write_parquet()} or \code{arrow::write_feather()}) 29 | } 30 | } 31 | 32 | \keyword{datasets} 33 | -------------------------------------------------------------------------------- /tests/testthat/test-bm-read-file.R: -------------------------------------------------------------------------------- 1 | test_that("read_file validation", { 2 | # read_file has a few combinations in its default arguments that aren't valid 3 | read_file_no_validate <- read_file 4 | read_file_no_validate$valid_params <- NULL 5 | 6 | params_no_validate <- get_default_parameters(read_file_no_validate) 7 | 8 | params <- get_default_parameters(read_file) 9 | 10 | expect_lt(nrow(params), nrow(params_no_validate)) 11 | 12 | # specifically feather+snappy is not a possibility 13 | expect_identical( 14 | nrow(params[params$file_type == "feather" & params$compression == "snappy", ]), 15 | 0L 16 | ) 17 | }) 18 | 19 | for (file_type in c("parquet", "feather")) { 20 | if (file_type == "parquet") { 21 | compression <- c("uncompressed", "snappy", "lz4") 22 | } else { 23 | compression <- "uncompressed" 24 | } 25 | 26 | test_that(paste0("read_file benchmark works for ", file_type), { 27 | expect_benchmark_run( 28 | run_benchmark( 29 | read_file, 30 | source = "nyctaxi_sample", 31 | file_type = file_type, 32 | compression = compression, 33 | output_type = c("arrow_table", "data_frame"), 34 | cpu_count = arrow::cpu_count() 35 | ) 36 | ) 37 | }) 38 | } 39 | 40 | 41 | wipe_results() 42 | -------------------------------------------------------------------------------- /R/publish.R: -------------------------------------------------------------------------------- 1 | # Call benchconnect 2 | # 3 | # @param args A character vector of arguments to pass to the benchconnect binary 4 | # 5 | # @returns A string of stdout returned by the call 6 | call_benchconnect <- function(args) { 7 | stopifnot(benchconnect_available()) 8 | res <- processx::run(command = "benchconnect", args = args, echo_cmd = TRUE, echo = TRUE) 9 | message(res$stderr) 10 | res$stdout 11 | } 12 | 13 | 14 | augment_run <- function(run) { 15 | stdout <- call_benchconnect(c("augment", "run", "--json", run$json)) 16 | BenchmarkRun$from_json(stdout) 17 | } 18 | 19 | augment_result <- function(result) { 20 | stdout <- call_benchconnect(c("augment", "result", "--json", result$json)) 21 | BenchmarkResult$from_json(stdout) 22 | } 23 | 24 | 25 | start_run <- function(run) { 26 | call_benchconnect(c("start", "run", "--json", run$json)) 27 | } 28 | 29 | submit_result <- function(result) { 30 | call_benchconnect(c("submit", "result", "--json", result$json)) 31 | } 32 | 33 | finish_run <- function(run) { 34 | # Ed note: `run` is not used right now, but there are some things we can pass 35 | # here in the future, so I put it here for parallelism for now. Since it is 36 | # not evaluated, it doesn't need to be specified for now. 37 | call_benchconnect(c("finish", "run", "--json", "{}")) 38 | } 39 | -------------------------------------------------------------------------------- /R/bm-placebo.R: -------------------------------------------------------------------------------- 1 | #' Placebo benchmark for testing 2 | #' 3 | #' @section Parameters: 4 | #' * `duration` the duration for the benchmark to take 5 | #' * `error_type` `NULL` to cause no error, `"rlang::abort"` to use rlang's 6 | #' `abort` and any other string (including `"base"`) will use base's `stop` 7 | #' 8 | #' @keywords internal 9 | placebo <- Benchmark("placebo", 10 | setup = function(duration = 0.01, error_type = NULL, output_type = NULL, grid = TRUE) { 11 | BenchEnvironment(placebo_func = function() { 12 | if (!is.null(output_type)) { 13 | msg <- "here's some output" 14 | if (output_type == "message") { 15 | message("A message: ", msg) 16 | } else if (output_type == "warning") { 17 | warning("A warning:", msg) 18 | } else if (output_type == "cat") { 19 | cat("A cat:", msg) 20 | } 21 | } 22 | 23 | if (!is.null(error_type)) { 24 | msg <- "something went wrong (but I knew that)" 25 | if (error_type == "rlang::abort") { 26 | rlang::abort(msg) 27 | } 28 | stop(msg) 29 | } 30 | Sys.sleep(duration) 31 | }) 32 | }, 33 | before_each = TRUE, 34 | run = { 35 | placebo_func() 36 | }, 37 | after_each = TRUE, 38 | valid_params = function(params) { 39 | params 40 | }, 41 | packages_used = function(params) { 42 | "base" 43 | } 44 | ) 45 | -------------------------------------------------------------------------------- /man/get_default_parameters.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/params.R 3 | \name{get_default_parameters} 4 | \alias{get_default_parameters} 5 | \alias{get_default_parameters.default} 6 | \alias{get_default_parameters.Benchmark} 7 | \alias{get_default_parameters.BenchmarkDataFrame} 8 | \title{Generate a dataframe of default parameters for a benchmark} 9 | \usage{ 10 | get_default_parameters(x, ...) 11 | 12 | \method{get_default_parameters}{default}(x, ...) 13 | 14 | \method{get_default_parameters}{Benchmark}(x, ...) 15 | 16 | \method{get_default_parameters}{BenchmarkDataFrame}(x, ...) 17 | } 18 | \arguments{ 19 | \item{x}{An object for which to generate parameters} 20 | 21 | \item{...}{Named arguments corresponding to the parameters of \code{bm}'s \code{setup} 22 | function. May also contain global params \code{cpu_count}, \code{lib_path}, \code{mem_alloc}, 23 | and \code{drop_caches}. See the "Parameterizing benchmarks" section of \code{\link[=Benchmark]{Benchmark()}} 24 | for more details.} 25 | } 26 | \value{ 27 | For \code{get_default_parameters.Benchmark}, a dataframe of parameter combinations 28 | to try with a column for each parameter and a row for each combination. 29 | } 30 | \description{ 31 | Generates a dataframe of parameter combinations for a benchmark to try based 32 | on the parameter defaults of its \code{setup} function and supplied parameters. 33 | } 34 | -------------------------------------------------------------------------------- /tests/testthat/test-ensure-tpch-source.R: -------------------------------------------------------------------------------- 1 | # This test (might) include installing a custom version of DuckDB that has the 2 | # tpc-h extension built. This doesn't work well when coverage is running, so 3 | # skip these tests when generating coverage. 4 | skip_on_covr() 5 | skip_if(Sys.getenv("ARROWBENCH_TEST_CUSTOM_DUCKDB", "") == "") 6 | 7 | temp_dir <- tempfile() 8 | dir.create(temp_dir) 9 | 10 | expected_filenames <- as.list(set_names( 11 | file.path(temp_dir, paste0(tpch_tables, "_0.0001.parquet")), 12 | nm = tpch_tables 13 | )) 14 | 15 | withr::with_envvar( 16 | list(ARROWBENCH_DATA_DIR = temp_dir), 17 | { 18 | 19 | test_that("can generate a small dataset", { 20 | tpch_files <- ensure_tpch(0.0001) 21 | expect_identical( 22 | tpch_files, 23 | expected_filenames 24 | ) 25 | }) 26 | 27 | test_that("can read that same small dataset if it is in the data folder already", { 28 | mockery::stub(ensure_tpch, 'generate_tpch', function(scale_factor) stop("this should not be called")) 29 | tpch_files <- ensure_tpch(0.0001) 30 | expect_identical( 31 | tpch_files, 32 | expected_filenames 33 | ) 34 | }) 35 | 36 | test_that("and ensure gets the same thing", { 37 | tpch_files <- ensure_source("tpch", scale_factor = 0.0001) 38 | expect_identical( 39 | tpch_files, 40 | expected_filenames 41 | ) 42 | }) 43 | } 44 | ) 45 | -------------------------------------------------------------------------------- /R/bm-df-to-table.R: -------------------------------------------------------------------------------- 1 | #' Benchmark for reading a data.frame into an Arrow table 2 | #' 3 | #' This flexes that conversion from R data structures to Arrow data structures. 4 | #' 5 | #' @section Parameters: 6 | #' * `source` A known-file id to use (it will be read in to a data.frame first) 7 | #' 8 | #' @export 9 | df_to_table <- Benchmark("dataframe-to-table", 10 | setup = function( 11 | source = c( 12 | "chi_traffic_2020_Q1", 13 | "type_strings", 14 | "type_dict", 15 | "type_integers", 16 | "type_floats", 17 | "type_nested" 18 | ) 19 | ) { 20 | source <- ensure_source(source) 21 | result_dim <- get_source_attr(source, "dim") 22 | # Make sure that we're not (accidentally) creating altrep vectors which will 23 | # make the benchmark measure both arrow->R and then also R->arrow when we 24 | # really want to just measure R->arrow. 25 | df <- read_source(source, as_data_frame = TRUE) 26 | 27 | transfer_func <- function(df) arrow::Table$create(df) 28 | 29 | BenchEnvironment( 30 | transfer_func = transfer_func, 31 | result_dim = result_dim, 32 | df = df 33 | ) 34 | }, 35 | before_each = { 36 | result <- NULL 37 | }, 38 | run = { 39 | result <- transfer_func(df) 40 | }, 41 | after_each = { 42 | stopifnot("The dimensions do not match" = all.equal(dim(result), result_dim)) 43 | result <- NULL 44 | }, 45 | valid_params = function(params) params, 46 | packages_used = function(params) "arrow" 47 | ) 48 | 49 | -------------------------------------------------------------------------------- /man/run_bm.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/run.R 3 | \name{run_bm} 4 | \alias{run_bm} 5 | \title{Execute a benchmark run} 6 | \usage{ 7 | run_bm( 8 | bm, 9 | ..., 10 | n_iter = 1, 11 | batch_id = NULL, 12 | profiling = FALSE, 13 | global_params = list(), 14 | run_id = NULL, 15 | run_name = NULL, 16 | run_reason = NULL 17 | ) 18 | } 19 | \arguments{ 20 | \item{bm}{\code{\link[=Benchmark]{Benchmark()}} object} 21 | 22 | \item{...}{parameters passed to \code{bm$setup()} or global parameters; see the 23 | "Parameterizing benchmarks" section of \code{\link[=Benchmark]{Benchmark()}}} 24 | 25 | \item{n_iter}{Integer number of iterations to replicate each benchmark} 26 | 27 | \item{batch_id}{a length 1 character vector to identify the batch} 28 | 29 | \item{profiling}{Logical: collect prof info? If \code{TRUE}, the result data will 30 | contain a \code{prof_file} field, which you can read in with 31 | \code{profvis::profvis(prof_input = file)}. Default is \code{FALSE}} 32 | 33 | \item{global_params}{the global parameters that have been set} 34 | 35 | \item{run_id}{Unique ID for the run} 36 | 37 | \item{run_name}{Name for the run} 38 | 39 | \item{run_reason}{Low-cardinality reason for the run, e.g. "commit" or "test"} 40 | } 41 | \description{ 42 | This is the function that gets called in the script that \code{\link[=run_one]{run_one()}} prepares. 43 | You may call this function interactively, but you won't get the isolation 44 | in a fresh R process that \code{run_one()} provides. 45 | } 46 | -------------------------------------------------------------------------------- /.github/workflows/pkgdown.yaml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | - main 5 | 6 | name: pkgdown 7 | 8 | jobs: 9 | pkgdown: 10 | runs-on: macOS-latest 11 | env: 12 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 13 | steps: 14 | - uses: actions/checkout@v3 15 | 16 | - uses: r-lib/actions/setup-r@v2 17 | 18 | - uses: r-lib/actions/setup-pandoc@v2 19 | 20 | - name: Query dependencies 21 | run: | 22 | install.packages('remotes') 23 | saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) 24 | writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") 25 | shell: Rscript {0} 26 | 27 | - name: Cache R packages 28 | uses: actions/cache@v2 29 | with: 30 | path: ${{ env.R_LIBS_USER }} 31 | key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} 32 | restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- 33 | 34 | - name: Install dependencies 35 | run: | 36 | remotes::install_deps(dependencies = TRUE) 37 | install.packages("pkgdown", type = "binary") 38 | shell: Rscript {0} 39 | 40 | - name: Install package 41 | run: R CMD INSTALL . 42 | 43 | - name: Deploy package 44 | run: | 45 | git config --local user.email "actions@github.com" 46 | git config --local user.name "GitHub Actions" 47 | Rscript -e 'pkgdown::deploy_to_branch(new_process = FALSE)' 48 | -------------------------------------------------------------------------------- /tests/testthat/test-params.R: -------------------------------------------------------------------------------- 1 | test_that("get_default_parameters.BenchmarkDataFrame() can fill in params col", { 2 | bm_list <- list(placebo, placebo) 3 | bm_df <- BenchmarkDataFrame(bm_list) 4 | assert_benchmark_dataframe(bm_df, bm_list) 5 | 6 | bm_df_augmented <- get_default_parameters(bm_df) 7 | assert_benchmark_dataframe(bm_df_augmented, bm_list, lapply(bm_list, get_default_parameters)) 8 | lapply(bm_df_augmented$parameters, function(param_df) { 9 | expect_s3_class(param_df, "data.frame") 10 | expect_equal(param_df, get_default_parameters(placebo)) 11 | expect_gt(nrow(param_df), 0L) 12 | }) 13 | 14 | # handle keyword args 15 | bm_df_augmented <- get_default_parameters(bm_df, duration = 1) 16 | assert_benchmark_dataframe(bm_df_augmented, bm_list, lapply(bm_list, get_default_parameters, duration = 1)) 17 | lapply(bm_df_augmented$parameters, function(param_df) { 18 | expect_s3_class(param_df, "data.frame") 19 | expect_equal(param_df, get_default_parameters(placebo, duration = 1)) 20 | expect_gt(nrow(param_df), 0L) 21 | }) 22 | 23 | # handle partially-specified param lists 24 | bm_df <- BenchmarkDataFrame(bm_list, parameters = list(get_default_parameters(placebo, duration = 1), NULL)) 25 | bm_df_augmented <- get_default_parameters(bm_df, duration = 1) 26 | assert_benchmark_dataframe(bm_df_augmented, bm_list, lapply(bm_list, get_default_parameters, duration = 1)) 27 | lapply(bm_df_augmented$parameters, function(param_df) { 28 | expect_s3_class(param_df, "data.frame") 29 | expect_equal(param_df, get_default_parameters(placebo, duration = 1)) 30 | expect_gt(nrow(param_df), 0L) 31 | }) 32 | }) 33 | -------------------------------------------------------------------------------- /tests/testthat/test-benchmark-dataframe.R: -------------------------------------------------------------------------------- 1 | test_that("BenchmarkDataFrame can be instantiated", { 2 | for (bm_list in list( 3 | list(placebo), 4 | list(placebo, placebo), 5 | list(a = placebo, b = placebo) 6 | )) { 7 | bm_df <- BenchmarkDataFrame(benchmarks = bm_list) 8 | assert_benchmark_dataframe(bm_df, benchmarks = bm_list) 9 | } 10 | 11 | bm_list <- list(placebo, placebo) 12 | param_list <- list(get_default_parameters(placebo), NULL) 13 | bm_df <- BenchmarkDataFrame(benchmarks = bm_list, parameters = param_list) 14 | assert_benchmark_dataframe(bm_df, benchmarks = bm_list, parameters = param_list) 15 | 16 | expect_error( 17 | BenchmarkDataFrame(1), 18 | "All elements of `benchmarks` are not of class `Benchmark`!" 19 | ) 20 | }) 21 | 22 | 23 | test_that("format.BenchmarkDataFrame() works", { 24 | bm_df <- BenchmarkDataFrame(benchmarks = list(placebo)) 25 | expect_output(print(bm_df), "# ") 26 | }) 27 | 28 | 29 | # A vector of benchmark attribute names run on `ursa-i9-9960x` 30 | URSA_I9_9960X_R_BENCHMARK_NAMES <- c( 31 | "dataframe-to-table", # `df_to_table` 32 | "file-read", 33 | "file-write", 34 | "partitioned-dataset-filter", # `dataset_taxi_parquet` 35 | "wide-dataframe", # not actually an R benchmark 36 | "tpch" # `tpc_h` 37 | ) 38 | 39 | test_that("`get_package_benchmarks()` works", { 40 | bm_df <- get_package_benchmarks() 41 | assert_benchmark_dataframe(bm_df = bm_df, benchmarks = bm_df$benchmark) 42 | expect_gt(nrow(bm_df), 0L) 43 | # currently `any()` because `wide-dataframe` is actually a Python benchmark, 44 | # but is still listed in arrow-benchmarks-ci in R. If removed, change to `all()`. 45 | expect_true(any(URSA_I9_9960X_R_BENCHMARK_NAMES %in% bm_df$name)) 46 | }) 47 | -------------------------------------------------------------------------------- /R/benchmark-dataframe.R: -------------------------------------------------------------------------------- 1 | #' A classed dataframe of benchmarks for running 2 | #' 3 | #' @param benchmarks A list with elements of class `Benchmark` 4 | #' @param parameters Optional. A list of dataframes of parameter combinations to 5 | #' run as generated by [get_default_parameters()]. If null, defaults will be generated 6 | #' when [run()] is called. 7 | #' 8 | #' @return A classed dataframe with `name` (benchmark attribute, not object name), 9 | #' `benchmark`, and `params` columns 10 | #' 11 | #' @export 12 | BenchmarkDataFrame <- function(benchmarks, parameters) { 13 | lapply(benchmarks, function(bm) stopifnot( 14 | "All elements of `benchmarks` are not of class `Benchmark`!" = inherits(bm, "Benchmark") 15 | )) 16 | 17 | bm_names <- vapply(benchmarks, function(bm) bm$name, character(1)) 18 | if (missing(parameters)) { 19 | parameters <- rep(list(NULL), length = length(benchmarks)) 20 | } 21 | 22 | structure( 23 | tibble::tibble( 24 | name = bm_names, 25 | benchmark = benchmarks, 26 | parameters = parameters 27 | ), 28 | class = c("BenchmarkDataFrame", "tbl_df", "tbl", "data.frame") 29 | ) 30 | } 31 | 32 | 33 | #' @export 34 | format.BenchmarkDataFrame <- function(x, ...) { 35 | c("# ", NextMethod()) 36 | } 37 | 38 | 39 | #' Get a list of benchmarks in a package 40 | #' 41 | #' @param package String of package name in which to find benchmarks 42 | #' 43 | #' @return An instance of [BenchmarkDataFrame] with all the benchmarks contained 44 | #' by a package 45 | #' 46 | #' @export 47 | get_package_benchmarks <- function(package = "arrowbench") { 48 | nms <- getNamespaceExports(package) 49 | objs <- mget(nms, envir = getNamespace(package)) 50 | bms <- Filter(function(x) inherits(x, "Benchmark"), objs) 51 | BenchmarkDataFrame(benchmarks = bms) 52 | } 53 | -------------------------------------------------------------------------------- /man/run_one.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/run.R 3 | \name{run_one} 4 | \alias{run_one} 5 | \title{Run a Benchmark with a single set of parameters} 6 | \usage{ 7 | run_one( 8 | bm, 9 | ..., 10 | n_iter = 1, 11 | batch_id = NULL, 12 | dry_run = FALSE, 13 | profiling = FALSE, 14 | progress_bar = NULL, 15 | read_only = FALSE, 16 | run_id = NULL, 17 | run_name = NULL, 18 | run_reason = NULL, 19 | test_packages = NULL 20 | ) 21 | } 22 | \arguments{ 23 | \item{bm}{\code{\link[=Benchmark]{Benchmark()}} object} 24 | 25 | \item{...}{parameters passed to \code{bm$setup()} or global parameters; see the 26 | "Parameterizing benchmarks" section of \code{\link[=Benchmark]{Benchmark()}}} 27 | 28 | \item{n_iter}{Integer number of iterations to replicate each benchmark} 29 | 30 | \item{batch_id}{a length 1 character vector to identify the batch} 31 | 32 | \item{dry_run}{logical: just return the R source code that would be run in 33 | a subprocess? Default is \code{FALSE}, meaning that the benchmarks will be run.} 34 | 35 | \item{profiling}{Logical: collect prof info? If \code{TRUE}, the result data will 36 | contain a \code{prof_file} field, which you can read in with 37 | \code{profvis::profvis(prof_input = file)}. Default is \code{FALSE}} 38 | 39 | \item{progress_bar}{a \code{progress} object to update progress to (default \code{NULL})} 40 | 41 | \item{read_only}{this will only attempt to read benchmark files and will not 42 | run any that it cannot find.} 43 | 44 | \item{run_id}{Unique ID for the run} 45 | 46 | \item{run_name}{Name for the run} 47 | 48 | \item{run_reason}{Low-cardinality reason for the run, e.g. "commit" or "test"} 49 | 50 | \item{test_packages}{a character vector of packages that the benchmarks test (default \code{NULL})} 51 | } 52 | \value{ 53 | An instance of \code{BenchmarkResult}: an R6 object containing either 54 | "stats" or "error". 55 | } 56 | \description{ 57 | Run a Benchmark with a single set of parameters 58 | } 59 | -------------------------------------------------------------------------------- /.github/workflows/pr-commands.yaml: -------------------------------------------------------------------------------- 1 | on: 2 | issue_comment: 3 | types: [created] 4 | name: Commands 5 | jobs: 6 | document: 7 | if: startsWith(github.event.comment.body, '/document') 8 | name: document 9 | runs-on: macOS-latest 10 | env: 11 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 12 | steps: 13 | - uses: actions/checkout@v3 14 | - uses: r-lib/actions/pr-fetch@v2 15 | with: 16 | repo-token: ${{ secrets.GITHUB_TOKEN }} 17 | - uses: r-lib/actions/setup-r@v2 18 | - name: Install dependencies 19 | run: Rscript -e 'install.packages(c("remotes", "roxygen2"))' -e 'remotes::install_deps(dependencies = TRUE)' 20 | - name: Document 21 | run: Rscript -e 'roxygen2::roxygenise()' 22 | - name: commit 23 | run: | 24 | git config --local user.email "actions@github.com" 25 | git config --local user.name "GitHub Actions" 26 | git add man/\* NAMESPACE 27 | git commit -m 'Document' 28 | - uses: r-lib/actions/pr-push@v2 29 | with: 30 | repo-token: ${{ secrets.GITHUB_TOKEN }} 31 | style: 32 | if: startsWith(github.event.comment.body, '/style') 33 | name: style 34 | runs-on: macOS-latest 35 | env: 36 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 37 | steps: 38 | - uses: actions/checkout@v3 39 | - uses: r-lib/actions/pr-fetch@v2 40 | with: 41 | repo-token: ${{ secrets.GITHUB_TOKEN }} 42 | - uses: r-lib/actions/setup-r@v2 43 | - name: Install dependencies 44 | run: Rscript -e 'install.packages("styler")' 45 | - name: Style 46 | run: Rscript -e 'styler::style_pkg()' 47 | - name: commit 48 | run: | 49 | git config --local user.email "actions@github.com" 50 | git config --local user.name "GitHub Actions" 51 | git add \*.R 52 | git commit -m 'Style' 53 | - uses: r-lib/actions/pr-push@v2 54 | with: 55 | repo-token: ${{ secrets.GITHUB_TOKEN }} 56 | -------------------------------------------------------------------------------- /tests/testthat/helper.R: -------------------------------------------------------------------------------- 1 | wipe_results <- function() unlink(test_path("results/"), recursive = TRUE) 2 | 3 | expect_benchmark_run <- function(..., success = TRUE) { 4 | suppress_deparse_warning( 5 | # Capture the messages 6 | output <- capture.output( 7 | # Expect some console output 8 | expect_output( 9 | result <- eval(...) 10 | ), 11 | type = "message" 12 | ) 13 | ) 14 | 15 | expect_s3_class(result, "BenchmarkResults") 16 | 17 | # If we require success, then we should confirm that the `error` attribute of 18 | # each result is empty 19 | if (success) { 20 | # the calling handler, etc is all so that we can send _one_ instance of the 21 | # message output and not a bunch 22 | messaged <- FALSE 23 | withCallingHandlers( 24 | for (res in result$results) { 25 | expect_null(res$error) 26 | }, 27 | error = function(e) { 28 | if (!messaged) { 29 | message(paste0(output, collapse = "\n")) 30 | messaged <<- TRUE 31 | } 32 | e 33 | } 34 | ) 35 | } 36 | 37 | } 38 | 39 | suppress_deparse_warning <- function(...) { 40 | # surpress the deparse may be incomplete warnings which are a side-effect of 41 | # loadall + testing 42 | withCallingHandlers( 43 | ..., 44 | warning = function(w) { 45 | if (startsWith(conditionMessage(w), "deparse may be incomplete")) 46 | invokeRestart("muffleWarning") 47 | }) 48 | } 49 | 50 | 51 | assert_benchmark_dataframe <- function(bm_df, benchmarks, parameters) { 52 | if (missing(parameters)) { 53 | parameters <- rep(list(NULL), length(benchmarks)) 54 | } 55 | 56 | expect_s3_class(bm_df, c("BenchmarkDataFrame", "tbl", "tbl_df", "data.frame")) 57 | expect_true(all(c("name", "benchmark", "parameters") %in% names(bm_df))) 58 | expect_equal(nrow(bm_df), length(benchmarks)) 59 | expect_equal(bm_df$name, vapply(benchmarks, function(x) x$name, character(1))) 60 | expect_equal(bm_df$benchmark, benchmarks) 61 | expect_equal(bm_df$parameters, parameters) 62 | } 63 | 64 | -------------------------------------------------------------------------------- /man/run_benchmark.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/run.R 3 | \name{run_benchmark} 4 | \alias{run_benchmark} 5 | \title{Run a Benchmark across a range of parameters} 6 | \usage{ 7 | run_benchmark( 8 | bm, 9 | ..., 10 | params = get_default_parameters(bm, ...), 11 | n_iter = 1, 12 | dry_run = FALSE, 13 | profiling = FALSE, 14 | read_only = FALSE, 15 | run_id = NULL, 16 | run_name = NULL, 17 | run_reason = NULL 18 | ) 19 | } 20 | \arguments{ 21 | \item{bm}{\code{\link[=Benchmark]{Benchmark()}} object} 22 | 23 | \item{...}{Optional benchmark parameters to run across} 24 | 25 | \item{params}{\code{data.frame} of parameter combinations. By default, this will 26 | be constructed from the expansion of the \code{...} arguments, the declared 27 | parameter options in \code{bm$setup}, and any restrictions potentially defined in 28 | \code{bm$valid_params()}.} 29 | 30 | \item{n_iter}{Integer number of iterations to replicate each benchmark. If 31 | \code{n_iter} is also supplied in \code{params}, that takes precedence.} 32 | 33 | \item{dry_run}{logical: just return the R source code that would be run in 34 | a subprocess? Default is \code{FALSE}, meaning that the benchmarks will be run.} 35 | 36 | \item{profiling}{Logical: collect prof info? If \code{TRUE}, the result data will 37 | contain a \code{prof_file} field, which you can read in with 38 | \code{profvis::profvis(prof_input = file)}. Default is \code{FALSE}} 39 | 40 | \item{read_only}{this will only attempt to read benchmark files and will not 41 | run any that it cannot find.} 42 | 43 | \item{run_id}{Unique ID for the run} 44 | 45 | \item{run_name}{Name for the run. If not specified, will use \verb{\{run_reason\}: \{commit hash\}}} 46 | 47 | \item{run_reason}{Low-cardinality reason for the run, e.g. "commit" or "test"} 48 | } 49 | \value{ 50 | A \code{BenchmarkResults} object, containing \code{results} attribute of a list 51 | of length \code{nrow(params)}, each of those a \code{BenchmarkResult} object. 52 | For a simpler view of results, call \code{as.data.frame()} on it. 53 | } 54 | \description{ 55 | Run a Benchmark across a range of parameters 56 | } 57 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: arrowbench 2 | Type: Package 3 | Title: Tools for Continuous and Interactive Benchmarking 4 | Version: 0.2.0 5 | Authors@R: c( 6 | person("Neal", "Richardson", email = "neal.p.richardson@gmail.com", role = "aut"), 7 | person("Jonathan", "Keane", email = "jkeane@gmail.com", role = c("aut", "cre")), 8 | person("Edward", "Visel", email = "edward.visel@gmail.com", role = "aut", comment = c(ORCID = "0000-0002-2811-6254")) 9 | ) 10 | Description: Tools for defining benchmarks, running them across a 11 | range of parameters, and reporting their results in a standardized form. 12 | License: MIT + file LICENSE 13 | Encoding: UTF-8 14 | Depends: R (>= 3.5.0) 15 | Imports: 16 | arrow, 17 | bench, 18 | dplyr, 19 | duckdb (>= 0.8.0), 20 | distro, 21 | glue, 22 | jsonlite, 23 | processx, 24 | progress, 25 | purrr, 26 | R6, 27 | remotes, 28 | rlang, 29 | R.utils, 30 | sessioninfo, 31 | tibble, 32 | utils, 33 | uuid, 34 | waldo, 35 | withr 36 | Suggests: 37 | testthat (>= 3.0.0), 38 | archive, 39 | data.table, 40 | DBI, 41 | dbplyr, 42 | fst, 43 | jsonify, 44 | lubridate, 45 | mockery, 46 | ndjson, 47 | RcppSimdJson, 48 | readr, 49 | vroom 50 | RoxygenNote: 7.2.3 51 | Roxygen: list(markdown = TRUE, load = "source") 52 | Collate: 53 | 'benchmark-dataframe.R' 54 | 'benchmark.R' 55 | 'bm-array-altrep-materialization.R' 56 | 'bm-array-to-vector.R' 57 | 'bm-dataset-taxi-2013.R' 58 | 'bm-dataset-taxi-parquet.R' 59 | 'bm-df-to-table.R' 60 | 'bm-placebo.R' 61 | 'bm-read-csv.R' 62 | 'bm-read-file.R' 63 | 'bm-read-json.R' 64 | 'bm-remote-dataset.R' 65 | 'bm-row-group-size.R' 66 | 'bm-table-to-df.R' 67 | 'bm-tpc-h.R' 68 | 'bm-write-csv.R' 69 | 'bm-write-file.R' 70 | 'custom-duckdb.R' 71 | 'ensure-format.R' 72 | 'ensure-lib.R' 73 | 'known-sources.R' 74 | 'ensure-source.R' 75 | 'ensure-tpch-source.R' 76 | 'external-dependencies.R' 77 | 'measure.R' 78 | 'params.R' 79 | 'publish.R' 80 | 'util.R' 81 | 'result.R' 82 | 'run.R' 83 | 'setup.R' 84 | 'tpch-queries.R' 85 | Config/testthat/edition: 3 86 | -------------------------------------------------------------------------------- /R/ensure-tpch-source.R: -------------------------------------------------------------------------------- 1 | #' Table names for TPC-H benchmarks 2 | #' 3 | #' @keywords internal 4 | #' @export 5 | tpch_tables <- c("customer", "lineitem", "nation", "orders", "part", "partsupp", "region", "supplier") 6 | 7 | #' Generate tpch data 8 | #' 9 | #' Generate tpch data at a given scale factor. By default, 10 | #' data is output relative to the current working directory. However, 11 | #' you can set the environment variable `ARROWBENCH_DATA_DIR` to 12 | #' point to another directory. Setting this environment variable has 13 | #' the advantage of being a central location for general usage. Running 14 | #' this function will install a custom version of duckdb in an `r_libs` 15 | #' directory, relative to the directory specified by the environment 16 | #' variable `ARROWBENCH_LOCAL_DIR`. When running this function for the first time you will 17 | #' see significant output from that installation process. This is normal. 18 | #' 19 | #' @param scale_factor a relative measure of the size of data in gigabytes. 20 | #' 21 | #' @export 22 | generate_tpch <- function(scale_factor = 1) { 23 | duckdb_file <- tempfile() 24 | on.exit(unlink(duckdb_file, recursive = TRUE)) 25 | 26 | # generate the tables 27 | query_custom_duckdb( 28 | paste0("CALL dbgen(sf=", scale_factor, ");"), 29 | dbdir = duckdb_file 30 | ) 31 | 32 | # write each table to paruqet 33 | out <- lapply(tpch_tables, function(name) { 34 | filename <- source_data_file(paste0(name, "_", format(scale_factor, scientific = FALSE), ".parquet")) 35 | query <- paste0("SELECT * FROM ", name, ";") 36 | export_custom_duckdb(query, filename, dbdir = duckdb_file) 37 | 38 | filename 39 | }) 40 | 41 | set_names(out, tpch_tables) 42 | } 43 | 44 | #' @importFrom rlang set_names 45 | ensure_tpch <- function(scale_factor = 1) { 46 | ensure_source_dirs_exist() 47 | 48 | filenames <- paste0(paste(tpch_tables, format(scale_factor, scientific = FALSE), sep="_"), ".parquet") 49 | 50 | # Check for places this file might already be and return those. 51 | cached_files <- map(filenames, data_file) 52 | if (all(!map_lgl(cached_files, is.null))) { 53 | # if the file is in our temp storage or source storage, go for it there. 54 | return(set_names(cached_files, nm = tpch_tables)) 55 | } 56 | 57 | # generate it 58 | generate_tpch(scale_factor) 59 | } -------------------------------------------------------------------------------- /R/custom-duckdb.R: -------------------------------------------------------------------------------- 1 | ensure_custom_duckdb <- function() { 2 | result <- tryCatch({ 3 | con <- DBI::dbConnect(duckdb::duckdb()) 4 | on.exit(DBI::dbDisconnect(con, shutdown = TRUE)) 5 | DBI::dbExecute(con, "LOAD tpch;") 6 | DBI::dbGetQuery(con, "select scale_factor, query_nr from tpch_answers() LIMIT 1;") 7 | }, 8 | error = function(e) { 9 | error_is_from_us <- grepl( 10 | paste0(c( 11 | "(name tpch_answers is not on the catalog)", 12 | "(name tpch_answers does not exist)", 13 | "(tpch.duckdb_extension\" not found)" 14 | ), 15 | collapse = "|" 16 | ), 17 | conditionMessage(e) 18 | ) 19 | 20 | if (error_is_from_us) { 21 | NULL 22 | } else { 23 | rlang::abort( 24 | "An unexpected error occured whilst querying TPC-H enabled duckdb", 25 | parent = e 26 | ) 27 | } 28 | } 29 | ) 30 | 31 | # Check that the result has a query in it 32 | if (identical(result$query_nr, 1L)) { 33 | return(invisible(NULL)) 34 | } 35 | 36 | 37 | install_duckdb_tpch() 38 | result <- try( 39 | ensure_custom_duckdb(), 40 | silent = FALSE 41 | ) 42 | 43 | if (!inherits(result, "try-error")) { 44 | return(invisible(NULL)) 45 | } 46 | 47 | stop("Could not load the DuckDB TPC-H extension.") 48 | } 49 | 50 | query_custom_duckdb <- function(sql, dbdir = ":memory:") { 51 | ensure_custom_duckdb() 52 | 53 | con <- DBI::dbConnect(duckdb::duckdb(dbdir = dbdir)) 54 | on.exit(DBI::dbDisconnect(con, shutdown = TRUE)) 55 | DBI::dbExecute(con, "LOAD tpch;") 56 | DBI::dbGetQuery(con, sql) 57 | } 58 | 59 | export_custom_duckdb <- function(sql, sink, dbdir = ":memory:") { 60 | ensure_custom_duckdb() 61 | 62 | con <- DBI::dbConnect(duckdb::duckdb(dbdir = dbdir)) 63 | on.exit(DBI::dbDisconnect(con, shutdown = TRUE)) 64 | DBI::dbExecute(con, "LOAD tpch;") 65 | res <- DBI::dbSendQuery(con, sql, arrow = TRUE) 66 | 67 | # this could be streamed in the future when the parquet writer 68 | # in R supports streaming 69 | reader <- duckdb::duckdb_fetch_record_batch(res) 70 | table <- reader$read_table() 71 | arrow::write_parquet(table, sink) 72 | 73 | sink 74 | } 75 | 76 | install_duckdb_tpch <- function() { 77 | con <- DBI::dbConnect(duckdb::duckdb()) 78 | on.exit(DBI::dbDisconnect(con)) 79 | DBI::dbExecute(con, "INSTALL tpch; LOAD tpch;") 80 | } 81 | -------------------------------------------------------------------------------- /R/measure.R: -------------------------------------------------------------------------------- 1 | #' Measure times and memory usage 2 | #' 3 | #' @param ... An expression to 4 | #' @param drop_caches Attempt to drop the disk cache before each case or iteration. 5 | #' Currently only works on linux. Permissible values are `"case"`, `"iteration"`, 6 | #' and `NULL`. Defaults to `NULL`, i.e. don't drop caches. As `measure()` is run 7 | #' once per iteration, here `"iteration"` results in dropping caches once and 8 | #' `NULL` and `"case"` result in no cache dropping. 9 | #' @inheritParams run_benchmark 10 | #' 11 | #' @return A tibble of timings and memory usage 12 | #' @export 13 | measure <- function(..., profiling = FALSE, drop_caches = NULL) { 14 | start_mem <- bench::bench_process_memory() 15 | if (!is.null(drop_caches) && drop_caches == "iteration") { 16 | sync_and_drop_caches() 17 | } 18 | gc_info <- with_gc_info({ 19 | prof_file <- with_profiling(profiling, { 20 | timings <- bench::bench_time(eval.parent(...)) 21 | }) 22 | }) 23 | end_mem <- bench::bench_process_memory() 24 | 25 | timings <- as.data.frame(as.list(timings)) 26 | 27 | timings$start_mem_bytes <- as.numeric(start_mem["current"]) 28 | timings$end_mem_bytes <- as.numeric(end_mem["current"]) 29 | timings$max_mem_bytes <- as.numeric(end_mem["max"]) 30 | timings$prof_file <- prof_file 31 | 32 | cbind(timings, gc_info) 33 | } 34 | 35 | with_profiling <- function(profiling_on, expr) { 36 | if (profiling_on) { 37 | prof_file <- basename(tempfile(fileext = ".prof")) 38 | utils::Rprof(filename = prof_file, memory.profiling = TRUE, gc.profiling = TRUE, line.profiling = TRUE) 39 | on.exit(utils::Rprof(NULL)) 40 | } else { 41 | prof_file <- NULL 42 | } 43 | eval.parent(expr) 44 | prof_file 45 | } 46 | 47 | with_gc_info <- function(expr) { 48 | force(expr) 49 | with_gcinfo <- "bench" %:::% "with_gcinfo" 50 | gc_output <- with_gcinfo(eval.parent(expr)) 51 | # This will swallow errors, so check for error output and re-raise 52 | if (length(gc_output) > 0 && any(startsWith(gc_output, "Error")) ) { 53 | stop(paste(gc_output, collapse = "\n"), call. = FALSE) 54 | } 55 | parse_gc <- "bench" %:::% "parse_gc" 56 | gc <- parse_gc(gc_output) 57 | names(gc) <- paste0("gc_", names(gc)) 58 | if (nrow(gc) == 0) { 59 | # Means there was no garbage collection, so let's fill this in with 0s 60 | gc[1, ] <- list(0L, 0L, 0L) 61 | } 62 | # Cat out any messages so that we don't swallow them. 63 | # TODO: filter out what has been parsed? 64 | cat(gc_output) 65 | gc 66 | } 67 | 68 | # work around checks looking for`:::` 69 | `%:::%` = function(pkg, fun) get(fun, envir = asNamespace(pkg), inherits = FALSE) 70 | -------------------------------------------------------------------------------- /tests/testthat/test-util.R: -------------------------------------------------------------------------------- 1 | test_that("cache key", { 2 | expect_identical(bm_run_cache_key("foo", alpha = "one", beta = 2), "foo/one-2") 3 | expect_identical(bm_run_cache_key("foo", beta = 2, alpha = "one"), "foo/one-2") 4 | }) 5 | 6 | test_that("find_r()", { 7 | out <- system(paste(find_r(), "--no-save -s 2>&1"), intern = TRUE, input = "print('output')\n") 8 | expect_match(out, "output") 9 | 10 | # when system fails, there's also a warning 11 | expect_warning(error_out <- system(paste(find_r(), "--no-save -s 2>&1"), intern = TRUE, input = "stop('this is an error')\n")) 12 | expect_match(error_out[[1]], "this is an error") 13 | }) 14 | 15 | 16 | test_that("get_default_args", { 17 | func <- function( 18 | one = 1, 19 | a_few = c(1, 2, 3), 20 | null = NULL, 21 | # we need to use something in the package here for environment scoping + 22 | # testthat reasons 23 | a_vector = known_sources, 24 | none 25 | ) NULL 26 | 27 | expect_identical( 28 | get_default_args(func), 29 | list(one = 1, a_few = c(1, 2, 3), a_vector = known_sources) 30 | ) 31 | }) 32 | 33 | 34 | test_that("sync_and_drop_caches() works", { 35 | # @param ... named values where names are values for `args` and values are 36 | # whether to fail 37 | make_mock_run_function <- function(...) { 38 | dots <- list(...) 39 | function(command, args, error_on_status) { 40 | list(status = as.integer(dots[[args]])) 41 | } 42 | } 43 | 44 | cases = suppressWarnings(purrr::cross(list( 45 | "sync; echo 3 | sudo tee /proc/sys/vm/drop_caches" = c(TRUE, FALSE), 46 | "sync; sudo purge" = c(TRUE, FALSE) 47 | ))) 48 | 49 | for (case in cases) { 50 | options( 51 | "arrowbench.drop_caches_failed" = NULL, 52 | "arrowbench.purge_failed" = NULL 53 | ) 54 | 55 | mockery::stub( 56 | where = sync_and_drop_caches, 57 | what = "processx::run", 58 | how = do.call(make_mock_run_function, case) 59 | ) 60 | 61 | expect_identical(sync_and_drop_caches(), any(!unlist(case))) 62 | 63 | if (case[["sync; echo 3 | sudo tee /proc/sys/vm/drop_caches"]]) { 64 | expect_true(getOption("arrowbench.drop_caches_failed")) 65 | if (case[["sync; sudo purge"]]) { 66 | expect_true(getOption("arrowbench.purge_failed")) 67 | } else { 68 | expect_null(getOption("arrowbench.purge_failed")) 69 | } 70 | } else { 71 | expect_null(getOption("arrowbench.drop_caches_failed")) 72 | expect_null(getOption("arrowbench.purge_failed")) 73 | } 74 | } 75 | 76 | options( 77 | "arrowbench.drop_caches_failed" = NULL, 78 | "arrowbench.purge_failed" = NULL 79 | ) 80 | }) -------------------------------------------------------------------------------- /man/R6Point1Class.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/result.R 3 | \name{R6Point1Class} 4 | \alias{R6Point1Class} 5 | \title{Version of R6 with heritable static/class methods and attributes} 6 | \usage{ 7 | R6Point1Class(..., static = NULL) 8 | } 9 | \arguments{ 10 | \item{...}{Passed through to \link[R6:R6Class]{R6::R6Class}} 11 | 12 | \item{static}{A named list of static/class functions/values to turn into 13 | methods/attributes. Note there is currently no differentiation between static 14 | and class methods at the moment; static methods are simply class methods that 15 | do not access \code{self}, though it will exist in their evaluation environment. 16 | This arrangement can be changed in the future if reason exists.} 17 | } 18 | \description{ 19 | Elements in \code{static} can be called without instantiation, e.g. \code{Class$method()}. 20 | Functions are evaluated in the environment of \code{Class}, so you can refer to \code{self} 21 | (which is the class—not the instance—here) to create class methods. 22 | } 23 | \section{Why this exists}{ 24 | 25 | 26 | Sometimes we want static/class methods/attributes that can be accessed from 27 | the class (e.g. \code{MyR6Class$my_static_method()}) instead of an instance of 28 | that class (e.g. \code{MyR6Class$new(...)$my_normal_method()}). As individual 29 | classes are environments, these can be added after the fact like so: 30 | 31 | \if{html}{\out{
}}\preformatted{MyR6Class <- R6Class(...) 32 | MyR6Class$my_static_method <- function(x) ... 33 | }\if{html}{\out{
}} 34 | 35 | But the problem with the above is it's not heritable; if you make a class that 36 | inherits from \code{MyR6Class}, it will not have \verb{$my_static_method()} unless you 37 | manually re-add it. 38 | 39 | This class structure abstracts the pattern, so when you create a new class, it 40 | checks if the parent contains anything in \code{private$static}, and copies over any 41 | methods/attributes there, less any overwritten in the new class. 42 | } 43 | 44 | \section{How static/class methods/attributes may be useful}{ 45 | 46 | 47 | There are lots of reasons you may want static/class methods/attributes, but 48 | the immediate use-case here is to create alternate methods for instantiating 49 | a class besides \verb{$new()}/\verb{$initialize()}. For instance, if a class can be 50 | represented as JSON, it's quite helpful to have a \verb{$from_json()} method that 51 | can recreate an instance from a JSON blob. 52 | 53 | You could have a separate special reader function that returns an instance, 54 | but especially as classes multiply this solution becomes difficult to 55 | maintain. 56 | } 57 | 58 | \keyword{internal} 59 | -------------------------------------------------------------------------------- /inst/benchmarks.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "command": "dataset_taxi_2013", 4 | "name": "arrowbench/dataset_taxi_2013", 5 | "runner": "arrowbench", 6 | "flags": { 7 | "language": "R" 8 | } 9 | }, 10 | { 11 | "command": "row_group_size", 12 | "name": "arrowbench/row_group_size", 13 | "runner": "arrowbench", 14 | "flags": { 15 | "language": "R" 16 | } 17 | }, 18 | { 19 | "command": "write_csv", 20 | "name": "arrowbench/write_csv", 21 | "runner": "arrowbench", 22 | "flags": { 23 | "language": "R" 24 | } 25 | }, 26 | { 27 | "command": "read_csv", 28 | "name": "arrowbench/read_csv", 29 | "runner": "arrowbench", 30 | "flags": { 31 | "language": "R" 32 | } 33 | }, 34 | { 35 | "command": "read_json", 36 | "name": "arrowbench/read_json", 37 | "runner": "arrowbench", 38 | "flags": { 39 | "language": "R" 40 | } 41 | }, 42 | { 43 | "command": "remote_dataset", 44 | "name": "arrowbench/remote_dataset", 45 | "runner": "arrowbench", 46 | "flags": { 47 | "language": "R" 48 | } 49 | }, 50 | { 51 | "command": "file-write", 52 | "name": "arrowbench/file-write", 53 | "runner": "arrowbench", 54 | "flags": { 55 | "language": "R" 56 | } 57 | }, 58 | { 59 | "command": "dataframe-to-table", 60 | "name": "arrowbench/dataframe-to-table", 61 | "runner": "arrowbench", 62 | "flags": { 63 | "language": "R" 64 | } 65 | }, 66 | { 67 | "command": "table_to_df", 68 | "name": "arrowbench/table_to_df", 69 | "runner": "arrowbench", 70 | "flags": { 71 | "language": "R" 72 | } 73 | }, 74 | { 75 | "command": "array_to_vector", 76 | "name": "arrowbench/array_to_vector", 77 | "runner": "arrowbench", 78 | "flags": { 79 | "language": "R" 80 | } 81 | }, 82 | { 83 | "command": "partitioned-dataset-filter", 84 | "name": "arrowbench/partitioned-dataset-filter", 85 | "runner": "arrowbench", 86 | "flags": { 87 | "language": "R" 88 | } 89 | }, 90 | { 91 | "command": "file-read", 92 | "name": "arrowbench/file-read", 93 | "runner": "arrowbench", 94 | "flags": { 95 | "language": "R" 96 | } 97 | }, 98 | { 99 | "command": "tpch", 100 | "name": "arrowbench/tpch", 101 | "runner": "arrowbench", 102 | "flags": { 103 | "language": "R" 104 | } 105 | }, 106 | { 107 | "command": "array_altrep_materialization", 108 | "name": "arrowbench/array_altrep_materialization", 109 | "runner": "arrowbench", 110 | "flags": { 111 | "language": "R" 112 | } 113 | } 114 | ] 115 | -------------------------------------------------------------------------------- /man/run.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/run.R 3 | \name{run} 4 | \alias{run} 5 | \alias{run.BenchmarkDataFrame} 6 | \title{Run an object} 7 | \usage{ 8 | run(x, ...) 9 | 10 | \method{run}{BenchmarkDataFrame}(x, ..., publish = FALSE, run_id = NULL, run_name = NULL, run_reason = NULL) 11 | } 12 | \arguments{ 13 | \item{x}{An S3 classed object to run} 14 | 15 | \item{...}{Additional arguments passed through to methods. For 16 | \code{run.BenchmarkDataFrame}, passed through to \code{\link[=get_default_parameters]{get_default_parameters()}} (when 17 | parameters are not specified) and \code{\link[=run_benchmark]{run_benchmark()}}.} 18 | 19 | \item{publish}{Flag for whether to publish results to a Conbench server. See 20 | "Environment Variables" section for how to specify server details. Requires 21 | the benchconnect CLI is installed; see \code{\link[=install_benchconnect]{install_benchconnect()}}.} 22 | 23 | \item{run_id}{Unique ID for the run. If not specified, will be generated.} 24 | 25 | \item{run_name}{Name for the run. If not specified, will use \verb{\{run_reason\}: \{commit hash\}}} 26 | 27 | \item{run_reason}{Required. Low-cardinality reason for the run, e.g. "commit" or "test"} 28 | } 29 | \value{ 30 | A modified object containing run results. For \code{run.BenchmarkDataFrame}, 31 | a \code{results} list column is appended. 32 | } 33 | \description{ 34 | Run an object 35 | } 36 | \section{Environment Variables}{ 37 | 38 | \itemize{ 39 | \item \code{CONBENCH_URL}: Required. The URL of the Conbench server with no trailing 40 | slash. For arrow, should be \verb{https://conbench.ursa.dev}. 41 | \item \code{CONBENCH_EMAIL}: The email to use for Conbench login. Only required if the 42 | server is private. 43 | \item \code{CONBENCH_PASSWORD}: The password to use for Conbench login. Only required 44 | if the server is private. 45 | \item \code{CONBENCH_PROJECT_REPOSITORY}: The repository name (in the format 46 | \code{org/repo}) or the URL (in the format \verb{https://github.com/org/repo}). 47 | Defaults to \code{"https://github.com/apache/arrow"} if unset. 48 | \item \code{CONBENCH_PROJECT_PR_NUMBER}: Recommended. The number of the GitHub pull 49 | request that is running this benchmark, or \code{NULL} if it's a run on the 50 | default branch 51 | \item \code{CONBENCH_PROJECT_COMMIT}: The 40-character commit SHA of the repo being 52 | benchmarked. If missing, will attempt to obtain it from 53 | \code{arrow::arrow_info()$build_info$git_id}, though this may not be populated 54 | depending on how Arrow was built. 55 | \item \code{CONBENCH_MACHINE_INFO_NAME}: Will override detected machine host name sent 56 | in \code{machine_info.name} when posting runs and results. Needed for cases where 57 | the actual host name can vary, like CI and cloud runners. 58 | } 59 | } 60 | 61 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(as.character,Serializable) 4 | S3method(as.data.frame,BenchmarkResult) 5 | S3method(as.data.frame,BenchmarkResults) 6 | S3method(as.list,Serializable) 7 | S3method(format,BenchmarkDataFrame) 8 | S3method(get_default_parameters,Benchmark) 9 | S3method(get_default_parameters,BenchmarkDataFrame) 10 | S3method(get_default_parameters,default) 11 | S3method(run,BenchmarkDataFrame) 12 | S3method(run,default) 13 | export("%||%") 14 | export(BenchEnvironment) 15 | export(Benchmark) 16 | export(BenchmarkDataFrame) 17 | export(all_sources) 18 | export(array_altrep_materialization) 19 | export(array_to_vector) 20 | export(confirm_mem_alloc) 21 | export(dataset_taxi_2013) 22 | export(dataset_taxi_parquet) 23 | export(df_to_table) 24 | export(ensure_dataset) 25 | export(ensure_format) 26 | export(ensure_source) 27 | export(file_with_ext) 28 | export(generate_tpch) 29 | export(get_csv_reader) 30 | export(get_csv_writer) 31 | export(get_dataset_attr) 32 | export(get_default_parameters) 33 | export(get_input_func) 34 | export(get_json_reader) 35 | export(get_package_benchmarks) 36 | export(get_params_summary) 37 | export(get_query_func) 38 | export(get_read_function) 39 | export(get_source_attr) 40 | export(get_sql_query_func) 41 | export(get_write_function) 42 | export(install_benchconnect) 43 | export(install_datalogistik) 44 | export(install_pipx) 45 | export(known_compressions) 46 | export(known_formats) 47 | export(known_sources) 48 | export(measure) 49 | export(read_csv) 50 | export(read_file) 51 | export(read_json) 52 | export(read_source) 53 | export(remote_dataset) 54 | export(row_group_size) 55 | export(run) 56 | export(run_benchmark) 57 | export(run_bm) 58 | export(run_one) 59 | export(sync_and_drop_caches) 60 | export(table_to_df) 61 | export(tables_refed) 62 | export(tpc_h) 63 | export(tpc_h_queries) 64 | export(tpch_answer) 65 | export(tpch_tables) 66 | export(write_csv) 67 | export(write_file) 68 | importFrom(R.utils,gunzip) 69 | importFrom(R.utils,gzip) 70 | importFrom(distro,distro) 71 | importFrom(glue,glue) 72 | importFrom(jsonlite,fromJSON) 73 | importFrom(jsonlite,toJSON) 74 | importFrom(progress,progress_bar) 75 | importFrom(purrr,flatten) 76 | importFrom(purrr,map) 77 | importFrom(purrr,map_int) 78 | importFrom(purrr,map_lgl) 79 | importFrom(purrr,pmap) 80 | importFrom(purrr,transpose) 81 | importFrom(remotes,install_github) 82 | importFrom(rlang,is_missing) 83 | importFrom(rlang,set_names) 84 | importFrom(sessioninfo,package_info) 85 | importFrom(stats,setNames) 86 | importFrom(utils,head) 87 | importFrom(utils,install.packages) 88 | importFrom(utils,installed.packages) 89 | importFrom(utils,modifyList) 90 | importFrom(utils,packageDescription) 91 | importFrom(utils,tail) 92 | importFrom(utils,write.csv) 93 | importFrom(waldo,compare) 94 | importFrom(withr,with_envvar) 95 | importFrom(withr,with_makevars) 96 | importFrom(withr,with_options) 97 | -------------------------------------------------------------------------------- /R/bm-row-group-size.R: -------------------------------------------------------------------------------- 1 | #' Benchmark effect of parquet row group size 2 | #' 3 | #' @section Parameters: 4 | #' * `source` A known-file id, or a file path to read in 5 | #' * `queries` What queries to run 6 | #' * `chunk_size` Number of rows to write in each row group. Suggested sizes: 7 | #' `chunk_size = list(NULL, 10000L, 100000L, 1000000L)` 8 | #' 9 | #' @export 10 | row_group_size <- Benchmark( 11 | "row_group_size", 12 | setup = function(source = c("fanniemae_2016Q4", "fanniemae_sample"), # TODO implement more sources 13 | queries = c("filters", "everything"), 14 | chunk_size = NULL) { 15 | # ensure that we have the right kind of file available 16 | input_file <- ensure_format( 17 | name = source, format = "parquet", compression = "snappy", chunk_size = chunk_size 18 | ) 19 | 20 | library("dplyr", warn.conflicts = FALSE) 21 | 22 | # put the necessary variables into a BenchmarkEnvironment to be used when the 23 | # benchmark is running. 24 | BenchEnvironment(source = source, input_file = input_file, queries = queries) 25 | }, 26 | 27 | # delete the results before each iteration 28 | before_each = { 29 | result <- list() 30 | result_dim <- list() 31 | }, 32 | # the benchmark to run 33 | run = { 34 | ds <- arrow::open_dataset(input_file) 35 | 36 | # TODO: generalize this to work with fanniemae_sample once https://github.com/voltrondata-labs/arrowbench/issues/88 is done 37 | if (grepl('fanniemae_2016Q4', source)) { 38 | if ("filters" %in% queries) { 39 | result[["filters"]] <- ds %>% 40 | filter( 41 | is.na(f2), 42 | f3 < 2 43 | | f5 > 55 44 | | f6 < 50 45 | | f8 %in% c('02/2050', '10/2059', '02/2052') 46 | | f14 == '08/01/2018' 47 | | f17 > 10000 48 | | f18 > 20000 49 | | f19 > 3000 50 | | f20 > 5000 51 | | f21 > 10000 52 | | f22 > 3e5 53 | | f23 > 1e5 54 | | f25 > 1000 55 | | f26 > 5e4 56 | ) %>% 57 | collect() 58 | 59 | result_dim[["filters"]] <- c(514L, 31L) 60 | } 61 | } 62 | 63 | if ("everything" %in% queries) { 64 | result[["everything"]] <- ds %>% collect() 65 | result_dim[["everything"]] <- all_sources[[source]]$dim 66 | } 67 | }, 68 | # after each iteration, check the dimensions and delete the results 69 | after_each = { 70 | Map( 71 | function(res, res_dim, query) { 72 | call <- quote(stopifnot(identical(dim(res), res_dim))) 73 | names(call)[[2]] <- paste0("The dimensions for query `", query, "` do not match") 74 | eval(call) 75 | }, 76 | res = result, 77 | res_dim = result_dim, 78 | query = names(result) 79 | ) 80 | 81 | result <- list() 82 | result_dim <- list() 83 | }, 84 | 85 | packages_used = function(params) { 86 | c("arrow", "dplyr") 87 | } 88 | ) 89 | -------------------------------------------------------------------------------- /inst/tpch-answer-gen.R: -------------------------------------------------------------------------------- 1 | # ARROWBENCH_LOCAL_DIR="path/to/arrowbench/storage" Rscript inst/tpch-answer-gen.R 2 | 3 | library(arrowbench) 4 | library(duckdb) 5 | library(arrow, warn.conflicts = FALSE) 6 | library(dplyr, warn.conflicts = FALSE) 7 | library(lubridate, warn.conflicts = FALSE) 8 | 9 | sf <- 1 10 | 11 | tpch_files <- ensure_source("tpch", scale_factor = sf) 12 | 13 | input_functions <- list() 14 | 15 | input_functions[["dplyr"]] <- function(name) { 16 | file <- tpch_files[[name]] 17 | return(arrow::read_parquet(file, as_data_frame = TRUE)) 18 | } 19 | 20 | input_functions[["arrow"]] <- function(name) { 21 | file <- tpch_files[[name]] 22 | return(arrow::open_dataset(file, format = "parquet")) 23 | } 24 | 25 | con <- dbConnect(duckdb::duckdb("answer_gen_db")) 26 | dbExecute(con, paste0("PRAGMA threads=10")) 27 | 28 | # DuckDB tables 29 | for (name in tpch_tables) { 30 | file <- path.expand(tpch_files[[name]]) 31 | 32 | sql_query <- paste0("CREATE TABLE ", name, " AS SELECT * FROM parquet_scan('", file, "');") 33 | 34 | file <- tpch_files[[name]] 35 | dbExecute(con, sql_query) 36 | } 37 | 38 | input_functions[["duckdb"]] <- function(name) { 39 | return(dplyr::tbl(con, name)) 40 | } 41 | 42 | # create directory to save the answers to 43 | dir.create(glue::glue("./answers/scale-factor-{sf}/"), recursive = TRUE) 44 | 45 | for (q in c(1:22)) { 46 | message("==================================================") 47 | message(glue::glue("Query: {q}")) 48 | message("==================================================") 49 | 50 | query <- q 51 | 52 | # grab the sql queries from github (this URL might need to be updated if their location in the repo changes.) 53 | sql <- paste0(httr::GET( 54 | glue::glue("https://raw.githubusercontent.com/duckdb/duckdb/master/extension/tpch/dbgen/queries/q{stringr::str_pad(query, 2, pad = '0')}.sql") 55 | ), collapse = "\n") 56 | 57 | # dplyr with scale factor 10 requires a lot of memory, if hitting `vector memory exhausted (limit reached?)` comment it out 58 | # at scale factor 0.01 there are small differences between duckdb and arrow for some queries. This is likely due to decimal precision / rounding differences, but I haven't dug into it too deeply. 59 | result_dplyr <- tpc_h_queries[[query]](input_functions[["dplyr"]]) 60 | result_arrow <- tpc_h_queries[[query]](input_functions[["arrow"]], collect_func = compute) 61 | result_duckdb <- as_tibble(dbGetQuery(con, sql)) 62 | 63 | # compare the arrow results with both dplyr and duckdb versions 64 | print(waldo::compare(as.data.frame(result_arrow), result_dplyr, tolerance = 0.01, x_arg = "arrow", y_arg = "dplyr")) 65 | print(waldo::compare(as.data.frame(result_arrow), result_duckdb, tolerance = 0.01, x_arg = "arrow", y_arg = "duckdb")) 66 | 67 | write_parquet(result_arrow, glue::glue("./answers/scale-factor-{sf}/tpch-q{stringr::str_pad(query, 2, pad = '0')}-sf{sf}.parquet")) 68 | } 69 | 70 | # clean up duckdb database file 71 | DBI::dbDisconnect(con, shutdown = TRUE) 72 | unlink("answer_gen_db") 73 | -------------------------------------------------------------------------------- /R/params.R: -------------------------------------------------------------------------------- 1 | #' Generate a dataframe of default parameters for a benchmark 2 | #' 3 | #' Generates a dataframe of parameter combinations for a benchmark to try based 4 | #' on the parameter defaults of its `setup` function and supplied parameters. 5 | #' 6 | #' @param x An object for which to generate parameters 7 | #' @param ... Named arguments corresponding to the parameters of `bm`'s `setup` 8 | #' function. May also contain global params `cpu_count`, `lib_path`, `mem_alloc`, 9 | #' and `drop_caches`. See the "Parameterizing benchmarks" section of [Benchmark()] 10 | #' for more details. 11 | #' 12 | #' @return For `get_default_parameters.Benchmark`, a dataframe of parameter combinations 13 | #' to try with a column for each parameter and a row for each combination. 14 | #' 15 | #' @export 16 | get_default_parameters <- function(x, ...) { 17 | UseMethod("get_default_parameters") 18 | } 19 | 20 | #' @rdname get_default_parameters 21 | #' @export 22 | get_default_parameters.default <- function(x, ...) { 23 | stop("No method found for class `", toString(class(x)), '`') 24 | } 25 | 26 | #' @rdname get_default_parameters 27 | #' @export 28 | get_default_parameters.Benchmark <- function(x, ...) { 29 | # This takes the expansion of the default parameters in the function signature 30 | # perhaps restricted by the ... params 31 | params <- modifyList(get_default_args(x$setup), list(...), keep.null = TRUE) 32 | if (identical(params[["lib_path"]], "all")) { 33 | # Default for lib_path is just "latest", if omitted 34 | # "all" means all old versions 35 | # rev() is so we run newest first. This also means we bootstrap data fixtures 36 | # with newest first, so that's some assurance that older versions can read 37 | # what the newer libs write 38 | params$lib_path <- rev(c(names(arrow_version_to_date), "devel", "latest")) 39 | } 40 | if (is.null(params[["cpu_count"]])) { 41 | params$cpu_count <- c(1L, parallel::detectCores()) 42 | } 43 | 44 | # `NULL` is a valid argument but needs to be wrapped in `list()` 45 | to_list <- lengths(params) == 0 46 | params[to_list] <- lapply(params[to_list], list) 47 | 48 | params[["stringsAsFactors"]] <- FALSE 49 | out <- do.call(expand.grid, params) 50 | 51 | # we don't change memory allocators on non-arrow packages 52 | if (!is.null(params[["mem_alloc"]])) { 53 | # a bit of a hack, we can test memory allocators on devel or latest, but 54 | # "4.0" <= "devel" and "4.0" <= "latest" are both true. 55 | out[!is_arrow_package(out, "4.0", x$packages_used), "mem_alloc"] <- NA 56 | out <- unique(out) 57 | } 58 | 59 | if (!is.null(x$valid_params)) { 60 | out <- x$valid_params(out) 61 | } 62 | out 63 | } 64 | 65 | #' @rdname get_default_parameters 66 | #' @export 67 | get_default_parameters.BenchmarkDataFrame <- function(x, ...) { 68 | x$parameters <- purrr::map2(x$benchmark, x$parameters, function(bm, params) { 69 | if (is.null(params)) { 70 | params <- get_default_parameters(bm, ...) 71 | } 72 | params 73 | }) 74 | 75 | x 76 | } 77 | -------------------------------------------------------------------------------- /R/bm-write-csv.R: -------------------------------------------------------------------------------- 1 | #' Benchmark CSV writing 2 | #' 3 | #' @section Parameters: 4 | #' * `source` A CSV file path to write to 5 | #' * `writer` One of `c("arrow", "data.table", "vroom", "readr",)` 6 | #' * `input` One of `c("arrow_table", "data_frame")` 7 | #' 8 | #' @export 9 | write_csv <- Benchmark( 10 | "write_csv", 11 | setup = function(source = names(known_sources), 12 | writer = "arrow", 13 | compression = c("uncompressed", "gzip"), 14 | input = c("arrow_table", "data_frame")) { 15 | writer <- match.arg(writer, c("arrow", "data.table", "vroom", "readr", "base")) 16 | compression <- match.arg(compression, c("uncompressed", "gzip")) 17 | input <- match.arg(input) 18 | 19 | # source defaults are retrieved from the function definition (all available 20 | # known_sources) and then read the source in as a data.frame 21 | source <- ensure_source(source) 22 | df <- read_source(source, as_data_frame = match.arg(input) == "data_frame") 23 | 24 | ext <- switch( 25 | compression, 26 | uncompressed = ".csv", 27 | gzip = ".csv.gz", 28 | paste0(".csv.", compression) 29 | ) 30 | 31 | # Map string param name to functions 32 | BenchEnvironment( 33 | write_csv_func = get_csv_writer(writer), 34 | source = source, 35 | df = df, 36 | ext = ext 37 | ) 38 | }, 39 | # delete the results before each iteration 40 | before_each = { 41 | result_file <- tempfile(fileext = ext) 42 | 43 | }, 44 | # the benchmark to run 45 | run = { 46 | write_csv_func(df, result_file) 47 | }, 48 | # after each iteration, check the dimensions and delete the results 49 | after_each = { 50 | stopifnot(identical(dim(df), dim(arrow::open_dataset(result_file, format = "csv")))) 51 | stopifnot("Output file does not exist" = file.exists(result_file)) 52 | unlink(result_file) 53 | }, 54 | valid_params = function(params) { 55 | ## Only arrow fns will accept an arrow_table 56 | drop <- ( params$input == "arrow_table" & params$writer != "arrow" ) 57 | params[!drop,] 58 | }, 59 | packages_used = function(params) { 60 | params$writer 61 | } 62 | ) 63 | 64 | 65 | #' Get a CSV writer 66 | #' 67 | #' @param writer the writer to use 68 | #' 69 | #' @return the csv writer 70 | #' @export 71 | get_csv_writer <- function(writer) { 72 | library(writer, character.only = TRUE, warn.conflicts = FALSE) 73 | if (writer == "arrow") { 74 | return(function(...) arrow::write_csv_arrow(...)) 75 | } else if (writer == "readr") { 76 | return(function(..., as_data_frame) readr::write_csv(...)) 77 | } else if (writer == "data.table") { 78 | return(function(..., as_data_frame) data.table::fwrite(...)) 79 | } else if (writer == "vroom") { 80 | return(function(..., as_data_frame) vroom::vroom_write(..., delim = ",")) 81 | } else if (writer == "base") { 82 | return(function(df, result_file) { 83 | if (tools::file_ext(result_file) == "gz") result_file <- gzfile(result_file) 84 | utils::write.csv(df, result_file, row.names = FALSE) 85 | }) 86 | } else { 87 | stop("Unsupported writer: ", writer, call. = FALSE) 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /R/bm-array-to-vector.R: -------------------------------------------------------------------------------- 1 | #' Benchmark for reading an Arrow array to a vector 2 | #' 3 | #' This flexes a lower level conversion to R data structures from Arrow data structures. 4 | #' 5 | #' @section Parameters: 6 | #' * `source` A known-file id to use (it will be read in to a data.frame first) 7 | #' * `chunked_arrays` logical, should the arrays converted be `ChunkedArrays` or `Arrays`? 8 | #' * `exclude_nulls` logical, should any columns with any `NULL`s or `NA`s in them be removed? 9 | #' * `alt_rep` logical, should the altrep option be set? (`TRUE` to enable it, `FALSE` to disable) 10 | #' 11 | #' @importFrom purrr map flatten 12 | #' @export 13 | array_to_vector <- Benchmark("array_to_vector", 14 | setup = function( 15 | # the only datasets that have any no-null numerics are 16 | source = c("type_integers", "type_floats"), 17 | chunked_arrays = FALSE, 18 | exclude_nulls = FALSE, 19 | alt_rep = TRUE 20 | ) { 21 | stopifnot( 22 | is.logical(chunked_arrays), 23 | is.logical(exclude_nulls), 24 | is.logical(alt_rep) 25 | ) 26 | source <- match.arg(source, names(all_sources)) 27 | source <- ensure_source(source) 28 | result_dim <- get_source_attr(source, "dim") 29 | table <- read_source(source, as_data_frame = FALSE) 30 | 31 | if (exclude_nulls) { 32 | cols_without_nulls <- unlist(lapply(colnames(table), function(x) table[[x]]$null_count == 0)) 33 | table <- table[which(cols_without_nulls)] 34 | result_dim[2] <- sum(cols_without_nulls) 35 | } 36 | 37 | # extract the arrays 38 | arrays <- purrr::map(colnames(table), ~table[[.]]) 39 | 40 | # If we can operate on arrays, then pull the chunks out and flatten 41 | if (!chunked_arrays) { 42 | arrays <- purrr::flatten(purrr::map(arrays, function (array) { 43 | n_chunks <- array$num_chunks 44 | purrr::map(seq_len(n_chunks) - 1L, ~array$chunk(.)) 45 | })) 46 | } 47 | 48 | array_lengths <- lapply(arrays, function(array) array$length()) 49 | 50 | as_vector_func <- function(array) as.vector(array) 51 | 52 | BenchEnvironment( 53 | as_vector_func = as_vector_func, 54 | array_lengths = array_lengths, 55 | arrays = arrays, 56 | alt_rep = alt_rep 57 | ) 58 | }, 59 | before_each = { 60 | result <- NULL 61 | options(arrow.use_altrep = alt_rep) 62 | }, 63 | run = { 64 | result <- lapply(arrays, as_vector_func) 65 | }, 66 | after_each = { 67 | # altrep checking 68 | # TODO: should we also check that one of the classes is "arrow"? 69 | is_altrep <- unlist(purrr::map(result, ~!is.null(.Internal(altrep_class(.))))) 70 | if (alt_rep) { 71 | altrep_ok <- all(is_altrep) 72 | } else { 73 | altrep_ok <- all(!is_altrep) 74 | } 75 | 76 | stopifnot( 77 | "The array lengths do not match" = all.equal(lapply(result, length), array_lengths), 78 | "The objects do not match the altrep parameter" = altrep_ok 79 | ) 80 | 81 | # reset the altrep option 82 | options(arrow.use_altrep = NULL) 83 | result <- NULL 84 | }, 85 | valid_params = function(params) { 86 | # TODO: only enable on >5.0.0? 87 | params 88 | }, 89 | packages_used = function(params) "arrow" 90 | ) 91 | 92 | -------------------------------------------------------------------------------- /R/bm-write-file.R: -------------------------------------------------------------------------------- 1 | #' Benchmark file writing 2 | #' 3 | #' @section Parameters: 4 | #' * `source` A known-file id, or a CSV(?) file path to read in 5 | #' * `file_type` One of `c("parquet", "feather", "fst")` 6 | #' * `compression` One of the values: `r paste(known_compressions, collapse = ", ")` 7 | #' * `input_type` One of `c("arrow_table", "data_frame")` 8 | #' 9 | #' @export 10 | write_file <- Benchmark("file-write", 11 | setup = function(source = c("fanniemae_2016Q4", "nyctaxi_2010-01"), 12 | file_type = c("parquet", "feather"), 13 | compression = c("uncompressed", "snappy", "lz4"), 14 | input_type = c("arrow_table", "data_frame")) { 15 | # source defaults are retrieved from the function definition (all available 16 | # known_sources) and then read the source in as a data.frame 17 | source <- ensure_source(source) 18 | df <- read_source(source, as_data_frame = match.arg(input_type) == "data_frame") 19 | # file_type defaults to parquet or feather, but can accept fst as well 20 | file_type <- match.arg(file_type, c("parquet", "feather", "fst")) 21 | 22 | # Map string param name to functions 23 | get_write_func <- function(file_type, compression) { 24 | force(compression) 25 | if (file_type == "feather") { 26 | return(function(...) arrow::write_feather(..., compression = compression)) 27 | } else if (file_type == "parquet") { 28 | return(function(...) arrow::write_parquet(..., compression = compression)) 29 | } else if (file_type == "fst") { 30 | # fst is always zstd, just a question of what level of compression 31 | level <- ifelse(compression == "uncompressed", 0, 50) 32 | return(function(...) fst::write_fst(..., compress = level)) 33 | } else { 34 | stop("Unsupported file_type: ", file_type, call. = FALSE) 35 | } 36 | } 37 | write_func <- get_write_func(file_type, compression) 38 | 39 | # put the necessary variables into a BenchmarkEnvironment to be used when 40 | # the benchmark is running. 41 | BenchEnvironment( 42 | write_func = write_func, 43 | file_type = file_type, 44 | source = source, 45 | df = df 46 | ) 47 | }, 48 | # delete the results before each iteration 49 | before_each = { 50 | result_file <- tempfile() 51 | }, 52 | # the benchmark to run 53 | run = { 54 | write_func(df, result_file) 55 | }, 56 | # after each iteration, check the dimensions and delete the results 57 | after_each = { 58 | stopifnot(file.exists(result_file)) 59 | unlink(result_file) 60 | }, 61 | # validate that the parameters given are compatible 62 | valid_params = function(params) { 63 | # make sure that the file_type and the compression is compatible 64 | # and fst doesn't have arrow_table input_type 65 | drop <- !validate_format(params$file_type, params$compression) | 66 | params$file_type == "fst" & params$input_type == "arrow_table" 67 | params[!drop,] 68 | }, 69 | # packages used when specific file_types are used 70 | packages_used = function(params) { 71 | pkg_map <- c( 72 | "feather" = "arrow", 73 | "parquet" = "arrow", 74 | "fst" = "fst" 75 | ) 76 | pkg_map[params$file_type] 77 | } 78 | ) 79 | -------------------------------------------------------------------------------- /tests/testthat/test-result.R: -------------------------------------------------------------------------------- 1 | test_that("R6.1 classes inherit properly", { 2 | SumClass <- R6Point1Class( 3 | classname = "SumClass", 4 | static = list(sum = sum, x = 1:100) 5 | ) 6 | 7 | sum_class <- SumClass$new() 8 | expect_s3_class(sum_class, "SumClass") 9 | expect_identical(SumClass$sum, sum) 10 | 11 | SumOtherClass <- R6Point1Class( 12 | classname = "SumOtherClass", 13 | inherit = SumClass 14 | ) 15 | 16 | sum_other_class <- SumOtherClass$new() 17 | expect_s3_class(sum_other_class, "SumOtherClass") 18 | expect_identical(SumOtherClass$sum, sum) 19 | 20 | expect_equal(SumOtherClass$sum(SumOtherClass$x), 5050L) 21 | }) 22 | 23 | 24 | test_that("inherited serialization/deserialization methods work", { 25 | res <- BenchmarkResult$new( 26 | run_name = "fake_run", 27 | tags = c(is_real = FALSE), 28 | optional_benchmark_info = list( 29 | name = "fake", 30 | result = data.frame(time = 0, status = "superfast", stringsAsFactors = FALSE), 31 | params = list(speed = "lightning") 32 | ) 33 | ) 34 | 35 | # sanity 36 | expect_s3_class(res, "BenchmarkResult") 37 | expect_equal(res$run_name, "fake_run") 38 | 39 | # roundtrips 40 | expect_equal(res$json, BenchmarkResult$from_json(res$json)$json) 41 | expect_equal(res$list, BenchmarkResult$from_list(res$list)$list) 42 | 43 | temp <- tempfile(fileext = '.json') 44 | res$write_json(temp) 45 | expect_equal(res$json, BenchmarkResult$read_json(temp)$json) 46 | file.remove(temp) 47 | }) 48 | 49 | test_that("S3 methods work", { 50 | github <- list( 51 | repository = "https://github.com/conchair/conchair", 52 | commit = "2z8c9c49a5dc4a179243268e4bb6daa5", 53 | pr_number = 47L 54 | ) 55 | run_reason <- "mocked-arrowbench-unit-test" 56 | run_name <- paste(run_reason, github$commit, sep = ": ") 57 | host_name <- "fake-computer" 58 | 59 | withr::with_envvar( 60 | c( 61 | CONBENCH_PROJECT_REPOSITORY = github$repository, 62 | CONBENCH_PROJECT_PR_NUMBER = github$pr_number, 63 | CONBENCH_PROJECT_COMMIT = github$commit, 64 | CONBENCH_MACHINE_INFO_NAME = host_name 65 | ), 66 | { 67 | res <- BenchmarkResult$new( 68 | run_name = run_name, 69 | run_reason = run_reason, 70 | tags = c(is_real = FALSE), 71 | optional_benchmark_info = list( 72 | name = "fake", 73 | result = data.frame(time = 0, status = "superfast", stringsAsFactors = FALSE), 74 | params = list(speed = "lightning") 75 | ) 76 | ) 77 | } 78 | ) 79 | 80 | expect_equal(as.character(res), res$json) 81 | expect_equal(as.list(res), res$list) 82 | 83 | expect_equal(as.data.frame(res), res$to_dataframe()) 84 | expect_equal( 85 | as.data.frame(res), 86 | structure( 87 | list(iteration = 1L, time = 0, status = "superfast", speed = "lightning"), 88 | row.names = c(NA, -1L), 89 | class = c("tbl_df", "tbl", "data.frame"), 90 | run_name = run_name, 91 | run_reason = run_reason, 92 | github = github, 93 | timestamp = res$timestamp, 94 | tags = c(is_real = FALSE) 95 | ) 96 | ) 97 | 98 | expect_equal(get_params_summary(res), res$params_summary) 99 | expect_equal( 100 | get_params_summary(res), 101 | structure( 102 | list(speed = "lightning", did_error = FALSE), 103 | row.names = c(NA, -1L), class = c("tbl_df", "tbl", "data.frame") 104 | ) 105 | ) 106 | }) 107 | -------------------------------------------------------------------------------- /R/bm-read-file.R: -------------------------------------------------------------------------------- 1 | #' Benchmark file reading 2 | #' 3 | #' @section Parameters: 4 | #' * `source` A known-file id, or a CSV(?) file path to read in 5 | #' * `file_type` One of `c("parquet", "feather", "fst")` 6 | #' * `compression` One of the values: `r paste(known_compressions, collapse = ", ")` 7 | #' * `output_type` One of `c("arrow_table", "data_frame")` 8 | #' 9 | #' @export 10 | read_file <- Benchmark("file-read", 11 | setup = function(source = c("fanniemae_2016Q4", "nyctaxi_2010-01"), 12 | # TODO: break out feather_v1 and feather_v2, feather_v2 only in >= 0.17 13 | file_type = c("parquet", "feather"), 14 | compression = c("uncompressed", "snappy", "lz4"), 15 | output_type = c("arrow_table", "data_frame")) { 16 | # file_type defaults to parquet or feather, but can accept fst as well 17 | file_type <- match.arg(file_type, c("parquet", "feather", "fst")) 18 | # the output_type defaults are retrieved from the function definition (arrow_table and data_frame) 19 | output_type <- match.arg(output_type) 20 | 21 | # ensure that we have the right kind of file available 22 | input_file <- ensure_format(source, file_type, compression) 23 | # retrieve the dimensions for run-checking after the benchmark 24 | result_dim <- get_source_attr(source, "dim") 25 | 26 | # put the necessary variables into a BenchmarkEnvironment to be used when the 27 | # benchmark is running. 28 | BenchEnvironment( 29 | # get the correct read function for the input file_type 30 | read_func = get_read_function(file_type), 31 | input_file = input_file, 32 | result_dim = result_dim, 33 | as_data_frame = output_type == "data_frame" 34 | ) 35 | }, 36 | # delete the results before each iteration 37 | before_each = { 38 | result <- NULL 39 | }, 40 | # the benchmark to run 41 | run = { 42 | result <- read_func(input_file, as_data_frame = as_data_frame) 43 | }, 44 | # after each iteration, check the dimensions and delete the results 45 | after_each = { 46 | stopifnot(identical(dim(result), result_dim)) 47 | result <- NULL 48 | }, 49 | # validate that the parameters given are compatible 50 | valid_params = function(params) { 51 | # make sure that the file_type and the compression is compatible 52 | # and fst doesn't have arrow_table output_type 53 | drop <- !validate_format(params$file_type, params$compression) | 54 | params$output_type == "arrow_table" & params$file_type == "fst" 55 | params[!drop,] 56 | }, 57 | # packages used when specific file_types are used 58 | packages_used = function(params) { 59 | pkg_map <- c( 60 | "feather" = "arrow", 61 | "parquet" = "arrow", 62 | "fst" = "fst" 63 | ) 64 | pkg_map[params$file_type] 65 | } 66 | ) 67 | 68 | #' Get a reader 69 | #' 70 | #' @param file_type what file_type to read 71 | #' 72 | #' @return the read function to use 73 | #' @export 74 | get_read_function <- function(file_type) { 75 | pkg_map <- c( 76 | "feather" = "arrow", 77 | "parquet" = "arrow", 78 | "fst" = "fst" 79 | ) 80 | library(pkg_map[[file_type]], character.only = TRUE, warn.conflicts = FALSE) 81 | 82 | if (file_type == "feather") { 83 | return(function(...) arrow::read_feather(...)) 84 | } else if (file_type == "parquet") { 85 | return(function(...) arrow::read_parquet(...)) 86 | } else if (file_type == "fst") { 87 | return(function(..., as_data_frame) fst::read_fst(...)) 88 | } else { 89 | stop("Unsupported file_type: ", file_type, call. = FALSE) 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /R/bm-dataset-taxi-parquet.R: -------------------------------------------------------------------------------- 1 | #' Benchmark Taxi dataset (Parquet) reading 2 | #' 3 | #' @section Parameters: 4 | #' * `query` Name of a known query to run; see `dataset_taxi_parquet$cases` 5 | #' 6 | #' @export 7 | dataset_taxi_parquet <- Benchmark("partitioned-dataset-filter", 8 | setup = function(query = names(dataset_taxi_parquet$cases)) { 9 | library("dplyr", warn.conflicts = FALSE) 10 | dataset <- ensure_dataset("taxi_parquet") 11 | query <- dataset_taxi_parquet$cases[[match.arg(query)]] 12 | 13 | BenchEnvironment( 14 | query = query, 15 | dataset = dataset 16 | ) 17 | }, 18 | before_each = { 19 | result <- NULL 20 | }, 21 | run = { 22 | result <- query$query(dataset) 23 | }, 24 | after_each = { 25 | query$assert(result) 26 | }, 27 | tags_fun = function(params) { 28 | # to reproduce this: https://github.com/voltrondata-labs/benchmarks/blob/main/benchmarks/partitioned_dataset_filter_benchmark.py#L23 29 | params$dataset <- "dataset-taxi-parquet" 30 | params 31 | }, 32 | cases = list( 33 | vignette = list( 34 | query = function(ds) { 35 | ds %>% 36 | filter(total_amount > 100, year == 2015) %>% 37 | select(tip_amount, total_amount, passenger_count) %>% 38 | group_by(passenger_count) %>% 39 | summarize( 40 | tip_pct = median(100 * tip_amount / total_amount), 41 | n = n() 42 | ) %>% 43 | collect() 44 | }, 45 | assert = function(result) { 46 | stopifnot( 47 | identical(dim(result), c(10L, 3L)), 48 | identical(names(result), c("passenger_count", "tip_pct", "n")), 49 | identical(sum(result$n), 200807L) 50 | ) 51 | } 52 | ), 53 | payment_type_3 = list( 54 | query = function(ds) { 55 | ds %>% 56 | filter(payment_type == "3") %>% 57 | select(year, month, passenger_count) %>% 58 | group_by(year, month) %>% 59 | summarize( 60 | total_passengers = sum(passenger_count, na.rm = TRUE), 61 | n = n() 62 | ) %>% 63 | collect() 64 | }, 65 | assert = function(result) { 66 | stopifnot( 67 | identical(dim(result), c(54L, 4L)), 68 | identical(names(result), c("year", "month", "total_passengers", "n")), 69 | identical(sum(result$n), 2412399L) 70 | ) 71 | } 72 | ), 73 | # The intention of this is to filter + read from a small number of parquet 74 | # files (smaller than the number of threads) to see if parallelism is 75 | # beneficial 76 | small_no_files = list( 77 | query = function(ds) { 78 | ds %>% 79 | filter(total_amount > 20, year %in% c(2011, 2019) & month == 2) %>% 80 | select(tip_amount, total_amount, passenger_count) %>% 81 | group_by(passenger_count) %>% 82 | summarize( 83 | tip_pct = median(100 * tip_amount / total_amount), 84 | n = n() 85 | ) %>% 86 | collect() 87 | }, 88 | assert = function(result) { 89 | stopifnot( 90 | identical(dim(result), c(11L, 3L)), 91 | identical(names(result), c("passenger_count", "tip_pct", "n")), 92 | identical(sum(result$n), 3069271L) 93 | ) 94 | } 95 | ), 96 | dims = list( 97 | query = function(ds) { 98 | dim(ds) 99 | }, 100 | assert = function(result) { 101 | stopifnot("dims do not match" = identical(result, c(1547741381L, 20L))) 102 | } 103 | ) 104 | ), 105 | packages_used = function(params) { 106 | c("arrow", "dplyr") 107 | } 108 | ) 109 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | on: [push, pull_request] 2 | 3 | name: R-CMD-check 4 | 5 | jobs: 6 | R-CMD-check: 7 | runs-on: ${{ matrix.config.os }} 8 | 9 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 10 | 11 | strategy: 12 | fail-fast: false 13 | matrix: 14 | config: 15 | - {os: macOS-12, r: 'release', force_suggests: true } 16 | # the stderror isn't redirected correctly on windows, (at least in GHA) 17 | # TODO: figure out if this runs correctly on windows at all 18 | # - {os: windows-latest, r: 'release'} 19 | # We explicitly set the user agent for R devel to the current release version of R so RSPM serves the release binaries. 20 | - {os: ubuntu-20.04, r: 'devel', force_suggests: true } 21 | - {os: ubuntu-20.04, r: 'release', force_suggests: true } 22 | - {os: ubuntu-20.04, r: 'oldrel', force_suggests: true } 23 | - {os: ubuntu-20.04, r: '4.0', force_suggests: true } 24 | 25 | env: 26 | R_REMOTES_NO_ERRORS_FROM_WARNINGS: true 27 | RSPM: ${{ matrix.config.rspm }} 28 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 29 | 30 | steps: 31 | - uses: actions/checkout@v3 32 | 33 | - uses: r-lib/actions/setup-r@v2 34 | with: 35 | r-version: ${{ matrix.config.r }} 36 | use-public-rspm: true 37 | 38 | - uses: r-lib/actions/setup-pandoc@v2 39 | 40 | - name: Query dependencies 41 | run: | 42 | install.packages('remotes') 43 | saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) 44 | writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") 45 | shell: Rscript {0} 46 | 47 | - name: Cache R packages 48 | if: runner.os != 'Windows' 49 | uses: actions/cache@v2 50 | with: 51 | path: ${{ env.R_LIBS_USER }} 52 | key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-2-${{ hashFiles('.github/depends.Rds') }} 53 | restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-2- 54 | 55 | - name: Install system dependencies 56 | if: runner.os == 'Linux' 57 | run: | 58 | while read -r cmd 59 | do 60 | eval sudo $cmd 61 | done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))') 62 | 63 | - name: Install dependencies 64 | run: | 65 | remotes::install_deps(dependencies = TRUE) 66 | remotes::install_cran("rcmdcheck") 67 | shell: Rscript {0} 68 | 69 | - name: Session info 70 | run: | 71 | options(width = 100) 72 | pkgs <- installed.packages()[, "Package"] 73 | sessioninfo::session_info(pkgs, include_base = TRUE) 74 | shell: Rscript {0} 75 | 76 | - name: Check 77 | env: 78 | _R_CHECK_CRAN_INCOMING_: false 79 | _R_CHECK_FORCE_SUGGESTS_: ${{ matrix.config.force_suggests }} 80 | run: | 81 | if ('${{ matrix.config.r }}' == 'release' && grepl('ubuntu', '${{ matrix.config.os }}')) { 82 | Sys.setenv("ARROWBENCH_TEST_CUSTOM_DUCKDB" = TRUE) 83 | } 84 | options(crayon.enabled = TRUE) 85 | rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") 86 | shell: Rscript {0} 87 | 88 | - name: Show testthat output 89 | if: always() 90 | run: find check -name 'testthat.Rout*' -exec cat '{}' \; || true 91 | shell: bash 92 | 93 | - name: Upload check results 94 | if: failure() 95 | uses: actions/upload-artifact@main 96 | with: 97 | name: ${{ runner.os }}-r${{ matrix.config.r }}-results 98 | path: check 99 | -------------------------------------------------------------------------------- /R/external-dependencies.R: -------------------------------------------------------------------------------- 1 | external_cli_available <- function(cli) { 2 | res <- processx::run("which", cli, error_on_status = FALSE) 3 | 4 | if (res$status != 0L) { 5 | msg <- paste(cli, 'not installed or on $PATH.\n\n') 6 | if (cli == "pipx") { 7 | msg <- paste0( 8 | msg, 9 | glue::glue('It can be installed with `install_pipx()`\n\n'), 10 | 'If already installed, ensure it is on $PATH, e.g. by running', 11 | '`pipx ensurepath` or adding `PATH="${PATH}:${HOME}/.local/bin"` to ~/.Renviron' 12 | ) 13 | } else { 14 | msg <- paste0( 15 | msg, 16 | glue::glue('It can be installed with `install_pipx(); install_{cli}()`\n\n'), 17 | 'If already installed with pipx, ensure it is on $PATH, e.g. by running', 18 | '`pipx ensurepath` or adding `PATH="${PATH}:${HOME}/.local/bin"` to ~/.Renviron' 19 | ) 20 | } 21 | 22 | warning(warningCondition(msg, class = "notInstalledWarning")) 23 | } 24 | 25 | res$status == 0L 26 | } 27 | 28 | pipx_available <- function() { 29 | external_cli_available(cli = "pipx") 30 | } 31 | 32 | benchconnect_available <- function() { 33 | external_cli_available(cli = "benchconnect") 34 | } 35 | 36 | datalogistik_available <- function() { 37 | external_cli_available(cli = "datalogistik") 38 | } 39 | 40 | 41 | #' Install pipx 42 | #' 43 | #' Install [pipx](https://pypa.github.io/pipx/), a version of pip that installs 44 | #' Python packages in isolated environments where they will always be available 45 | #' regardless of which version of Python is presently on `$PATH`. Especially 46 | #' useful for installing packages designed to be used via CLIs. 47 | #' 48 | #' @export 49 | install_pipx <- function() { 50 | processx::run("sh", c("-c", "pip install pipx && pipx ensurepath"), echo_cmd = TRUE) 51 | } 52 | 53 | 54 | #' Install benchconnect 55 | #' 56 | #' Install [benchconnect](https://github.com/conbench/conbench/tree/main/benchconnect), 57 | #' a utility for sending benchmark results to a Conbench server 58 | #' 59 | #' @export 60 | install_benchconnect <- function() { 61 | stopifnot(pipx_available()) 62 | 63 | url <- "benchconnect@git+https://github.com/conbench/conbench.git@main#subdirectory=benchconnect" 64 | 65 | if (suppressWarnings(benchconnect_available(), classes = "notInstalledWarning")) { 66 | if (interactive()) { 67 | ans <- readline("benchconnect already installed. Update? [Y/n]: ") 68 | } else { 69 | ans <- "y" 70 | } 71 | if (tolower(ans) %in% c("y", "")) { 72 | processx::run("pipx", c("install", "--include-deps", "--force", url), echo_cmd = TRUE) 73 | } else { 74 | invisible() 75 | } 76 | } else { 77 | processx::run("pipx", c("install", "--include-deps", url), echo_cmd = TRUE) 78 | } 79 | } 80 | 81 | 82 | #' Install datalogistik 83 | #' 84 | #' Install [datalogistik](https://github.com/conbench/datalogistik), a utility 85 | #' for generating, downloading, and converting datasets for benchmarking. 86 | #' 87 | #' Only for interactive use. 88 | #' 89 | #' @export 90 | install_datalogistik <- function() { 91 | # TODO: install pipx? 92 | stopifnot(pipx_available()) 93 | 94 | ref <- Sys.getenv("DATALOGISTIK_BRANCH", unset = "main") 95 | url <- glue("git+https://github.com/conbench/datalogistik.git@{ref}") 96 | 97 | pipx_call <- c("install", "--pip-args=--extra-index-url https://pypi.fury.io/arrow-nightlies --prefer-binary") 98 | if (datalogistik_available()) { 99 | # default to yes (and also this will make it work in non-interactive sessions) 100 | ans <- readline("datalogistik already installed. Update? [Y/n]: ") 101 | if (tolower(ans) %in% c("y", "")) { 102 | # we need the extra args to depend on the development version of arrow 103 | return(processx::run("pipx", c(pipx_call, "--force", url), echo_cmd = TRUE)) 104 | } else { 105 | return(invisible()) 106 | } 107 | } 108 | 109 | processx::run("pipx", c(pipx_call, url), echo_cmd = TRUE) 110 | } 111 | -------------------------------------------------------------------------------- /R/bm-dataset-taxi-2013.R: -------------------------------------------------------------------------------- 1 | #' Benchmark Taxi 2013 dataset reading 2 | #' 3 | #' @section Parameters: 4 | #' * `dataset` Name of dataset to use, either `taxi_2013` or `taxi_2013_sample` (for testing) 5 | #' * `query` Name of a known query to run; see `dataset_taxi_2013$cases` 6 | #' 7 | #' @export 8 | dataset_taxi_2013 <- Benchmark( 9 | "dataset_taxi_2013", 10 | setup = function(dataset = "taxi_2013", 11 | query = names(dataset_taxi_2013$cases)) { 12 | name <- match.arg(dataset, c("taxi_2013", "taxi_2013_sample")) 13 | library("dplyr", warn.conflicts = FALSE) 14 | dataset <- ensure_dataset(name) 15 | query <- dataset_taxi_2013$cases[[match.arg(query)]] 16 | 17 | BenchEnvironment( 18 | name = name, 19 | query = query, 20 | dataset = dataset 21 | ) 22 | }, 23 | before_each = { 24 | result <- NULL 25 | }, 26 | run = { 27 | result <- query$query(dataset) 28 | }, 29 | after_each = { 30 | query$assert(result, name) 31 | }, 32 | cases = list( 33 | basic = list( 34 | query = function(ds) { 35 | ds %>% 36 | filter(total_amount > 100, vendor_id == "CMT") %>% 37 | select(tip_amount, total_amount, payment_type) %>% 38 | group_by(payment_type) %>% 39 | summarize( 40 | tip_pct = median(100 * tip_amount / total_amount), 41 | n = n() 42 | ) %>% 43 | collect() 44 | }, 45 | assert = function(result, name) { 46 | stopifnot( 47 | identical(dim(result), c(if (name == "taxi_2013_sample") 0L else 4L, 3L)), 48 | identical(names(result), c("payment_type", "tip_pct", "n")), 49 | identical(sum(result$n), if (name == "taxi_2013_sample") 0L else 68158L) 50 | ) 51 | } 52 | ), 53 | payment_type_crd = list( 54 | query = function(ds) { 55 | ds %>% 56 | filter(payment_type == "CRD") %>% 57 | mutate(year = year(pickup_datetime), month = month(pickup_datetime)) %>% 58 | select(year, month, total_amount) %>% 59 | group_by(year, month) %>% 60 | summarize( 61 | total_amount = sum(total_amount, na.rm = TRUE), 62 | n = n() 63 | ) %>% 64 | collect() 65 | }, 66 | assert = function(result, name) { 67 | stopifnot( 68 | identical(dim(result), c(12L, 4L)), 69 | identical(names(result), c("year", "month", "total_amount", "n")), 70 | identical(sum(result$n), if (name == "taxi_2013_sample") 567L else 93334004L) 71 | ) 72 | } 73 | ), 74 | # The intention of this is to filter + read from a small number of csv 75 | # files (smaller than the number of threads) to see if parallelism is 76 | # beneficial 77 | small_no_files = list( 78 | query = function(ds) { 79 | ds %>% 80 | mutate(month = month(pickup_datetime)) %>% 81 | filter(total_amount > 20, month %in% c(4L, 7L)) %>% 82 | select(tip_amount, total_amount, payment_type) %>% 83 | group_by(payment_type) %>% 84 | summarize( 85 | tip_pct = median(100 * tip_amount / total_amount), 86 | n = n() 87 | ) %>% 88 | collect() 89 | }, 90 | assert = function(result, name) { 91 | stopifnot( 92 | identical(dim(result), c(if (name == "taxi_2013_sample") 2L else 5L, 3L)), 93 | identical(names(result), c("payment_type", "tip_pct", "n")), 94 | identical(sum(result$n), if (name == "taxi_2013_sample") 36L else 4797187L) 95 | ) 96 | } 97 | ), 98 | dims = list( 99 | query = function(ds) { 100 | dim(ds) 101 | }, 102 | assert = function(result, name) { 103 | stopifnot("dims do not match" = identical(result, c(if (name == "taxi_2013_sample") 1000L else 173179759L, 11L))) 104 | } 105 | ) 106 | ), 107 | packages_used = function(params) { 108 | c("arrow", "dplyr") 109 | } 110 | ) 111 | -------------------------------------------------------------------------------- /R/bm-read-csv.R: -------------------------------------------------------------------------------- 1 | #' Benchmark CSV reading 2 | #' 3 | #' @section Parameters: 4 | #' * `source` A CSV file path to read in 5 | #' * `reader` One of `c("arrow", "data.table", "vroom", "readr")` 6 | #' * `compression` One of `c("uncompressed", "gzip")` 7 | #' * `output_format` One of `c("arrow_table", "data_frame")` 8 | #' 9 | #' @export 10 | #' @importFrom R.utils gzip 11 | read_csv <- Benchmark( 12 | "read_csv", 13 | setup = function(source = names(known_sources), 14 | reader = "arrow", 15 | compression = c("uncompressed", "gzip"), 16 | output_format = c("arrow_table", "data_frame")) { 17 | reader <- match.arg(reader, c("arrow", "data.table", "vroom", "readr")) 18 | compression <- match.arg(compression) 19 | output_format <- match.arg(output_format) 20 | # ensure the the file exists 21 | input_file <- ensure_format(source, "csv", compression) 22 | 23 | # Map string param name to function 24 | delim <- get_source_attr(source, "delim") %||% "," 25 | read_func <- get_csv_reader(reader, delim) 26 | result_dim <- get_source_attr(source, "dim") 27 | 28 | BenchEnvironment( 29 | # Map string param name to function 30 | read_func = get_csv_reader(reader, delim), 31 | input_file = input_file, 32 | result_dim = result_dim, 33 | as_data_frame = output_format == "data_frame", 34 | delim = delim 35 | ) 36 | }, 37 | before_each = { 38 | result <- NULL 39 | }, 40 | run = { 41 | result <- read_func(input_file, delim = delim, as_data_frame = as_data_frame) 42 | }, 43 | after_each = { 44 | correct_format <- FALSE 45 | if (as_data_frame) { 46 | correct_format <- inherits(result, "data.frame") 47 | } else { 48 | correct_format <- inherits(result, c("Table", "ArrowObject")) 49 | } 50 | 51 | stopifnot( 52 | # we have a tolerance of 1 here because vroom reads 1 additional row of 53 | # all NAs since there are two new lines after the header 54 | "The dimensions do not match" = all.equal(dim(result), result_dim, tolerance = 1), 55 | "The format isn't correct" = correct_format 56 | ) 57 | result <- NULL 58 | }, 59 | valid_params = function(params) { 60 | # compression was only supported from arrow 1.0.0 and onward, but `lib_path` 61 | # may not be set here 62 | version_check <- (!is.null(params$lib_path) && params$lib_path < "1.0") 63 | 64 | # on macOS data.table doesn't (typically) have multi core support 65 | # TODO: check if this is actually enabled before running? 66 | drop <- ( params$output_format == "arrow_table" & params$reader != "arrow" ) | 67 | ( params$reader == "readr" & params$cpu_count > 1 ) | 68 | ( params$compression != "uncompressed" & params$reader == "arrow" & version_check ) 69 | params[!drop,] 70 | }, 71 | packages_used = function(params) { 72 | params$reader 73 | } 74 | ) 75 | 76 | #' Get a CSV reader 77 | #' 78 | #' @param reader the reader to use 79 | #' @param delim the delimiter to use 80 | #' 81 | #' @return the csv reader 82 | #' @export 83 | get_csv_reader <- function(reader, delim) { 84 | library(reader, character.only = TRUE, warn.conflicts = FALSE) 85 | # TODO: allow other readers to read non-comma delimed files 86 | if (reader == "arrow") { 87 | # TODO: if gzipped and arrow csv reader version doesn't support, unzip? 88 | return(function(...) arrow::read_delim_arrow(...)) 89 | } else if (reader == "readr") { 90 | return(function(..., as_data_frame) readr::read_delim(...)) 91 | } else if (reader == "data.table") { 92 | sep <- force(delim) 93 | return(function(..., as_data_frame, delim) data.table::fread(..., sep = sep)) 94 | } else if (reader == "vroom") { 95 | # altrep = FALSE because otherwise you aren't getting the data 96 | # TODO: maybe we do want to compare, esp. later when we do altrep 97 | return(function(..., as_data_frame) vroom::vroom(..., altrep = FALSE)) 98 | } else { 99 | stop("Unsupported reader: ", reader, call. = FALSE) 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /tests/testthat/test-ensure-format.R: -------------------------------------------------------------------------------- 1 | # create a temporary directory to be used as the data directory 2 | temp_dir <- tempfile() 3 | dir.create(temp_dir) 4 | 5 | withr::with_envvar( 6 | list(ARROWBENCH_DATA_DIR = temp_dir), { 7 | test_that("ensure_format", { 8 | # there are no temp files yet 9 | expect_false(file.exists(file.path(temp_dir, "temp", "nyctaxi_sample.parquet"))) 10 | expect_false(file.exists(file.path(temp_dir, "temp", "nyctaxi_sample.uncompressed.parquet"))) 11 | expect_false(file.exists(file.path(temp_dir, "temp", "nyctaxi_sample.snappy.parquet"))) 12 | expect_false(file.exists(file.path(temp_dir, "temp", "nyctaxi_sample.csv"))) 13 | expect_false(file.exists(file.path(temp_dir, "temp", "nyctaxi_sample.csv.gz"))) 14 | 15 | # we can transform from one format to another 16 | ensure_format("nyctaxi_sample", "parquet") 17 | expect_true(file.exists(file.path(temp_dir, "temp", "nyctaxi_sample.uncompressed.parquet"))) 18 | 19 | ensure_format("nyctaxi_sample", "parquet", compression = "snappy") 20 | expect_true(file.exists(file.path(temp_dir, "temp", "nyctaxi_sample.snappy.parquet"))) 21 | 22 | ensure_format("nyctaxi_sample", "parquet", compression = "snappy", chunk_size = 100000) 23 | expect_true(file.exists(file.path(temp_dir, "temp", "nyctaxi_sample.chunk_size_1e+05.snappy.parquet"))) 24 | 25 | ensure_format("nyctaxi_sample", "feather", compression = "lz4") 26 | expect_true(file.exists(file.path(temp_dir, "temp", "nyctaxi_sample.lz4.feather"))) 27 | 28 | # note: this is sliiightly bigger than the chunk_size above, but we get the same rounded value 29 | ensure_format("nyctaxi_sample", "feather", chunk_size = 100010) 30 | expect_true(file.exists(file.path(temp_dir, "temp", "nyctaxi_sample.chunk_size_1e+05.uncompressed.feather"))) 31 | 32 | # But, if the difference is bigger, we get that value reflected 33 | ensure_format("nyctaxi_sample", "feather", chunk_size = 100100) 34 | expect_true(file.exists(file.path(temp_dir, "temp", "nyctaxi_sample.chunk_size_1.001e+05.uncompressed.feather"))) 35 | 36 | ensure_format("nyctaxi_sample", "csv", compression = "gzip") 37 | expect_true(file.exists(file.path(temp_dir, "temp", "nyctaxi_sample.csv.gz"))) 38 | 39 | # but because we started as a csv, this doesn't create a new file in the 40 | # temp, instead references it in-situ 41 | out <- ensure_format("nyctaxi_sample", "csv") 42 | expect_identical(out, ensure_source("nyctaxi_sample")) 43 | expect_false(file.exists(file.path(temp_dir, "temp", "nyctaxi_sample.csv"))) 44 | }) 45 | 46 | test_that("ensure_format with tpch", { 47 | # don't test if we are not already trying to install the custom duckdb 48 | skip_if(Sys.getenv("ARROWBENCH_TEST_CUSTOM_DUCKDB", "") == "") 49 | 50 | # there are no temp files yet 51 | expect_false(file.exists(file.path(temp_dir, "lineitem_0.001.parquet"))) 52 | expect_false(file.exists(file.path(temp_dir, "temp", "lineitem_0.001.uncompressed.parquet"))) 53 | 54 | # we can generate 55 | tpch_files <- ensure_tpch(0.0001) 56 | expect_true(file.exists(file.path(temp_dir, "lineitem_0.0001.parquet"))) 57 | 58 | # and we can ensure format 59 | lineitem <- ensure_format(tpch_files[["lineitem"]], "parquet") 60 | expect_equal(lineitem, file.path(temp_dir, "temp", "lineitem_0.0001.uncompressed.parquet")) 61 | expect_true(file.exists(file.path(temp_dir, "temp", "lineitem_0.0001.uncompressed.parquet"))) 62 | }) 63 | }) 64 | 65 | test_that("format + compression validation with a df", { 66 | df <- expand.grid( 67 | source = "a source", 68 | lib_path = "some/path", 69 | format = c("csv", "parquet", "fst"), 70 | compression = c("gzip", "zstd", "snappy"), 71 | stringsAsFactors = FALSE 72 | ) 73 | 74 | expect_identical( 75 | validate_format(df$format, df$compression), 76 | c(TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, FALSE, TRUE, FALSE) 77 | ) 78 | }) 79 | 80 | test_that("format + compression validation", { 81 | expect_true(stop_if_not_valid_format("csv", "gzip")) 82 | 83 | expect_error( 84 | stop_if_not_valid_format("csv", "snappy"), 85 | "The format csv does not support snappy compression" 86 | ) 87 | }) 88 | -------------------------------------------------------------------------------- /tests/testthat/test-publish.R: -------------------------------------------------------------------------------- 1 | test_that("call benchconnect works", { 2 | expect_match(call_benchconnect("--help"), "Command line utilities for interacting with a Conbench API") 3 | }) 4 | 5 | test_that("augment_run() works", { 6 | reason <- "test" 7 | host_name <- "fake-computer" 8 | github <- list( 9 | commit = "fake-commit", 10 | repository = "https://github.com/conchair/conchair", 11 | pr_number = "47" 12 | ) 13 | 14 | unaugmented_run <- BenchmarkRun$new(reason = reason, github = NULL) 15 | withr::with_envvar( 16 | c( 17 | "CONBENCH_MACHINE_INFO_NAME" = host_name, 18 | "CONBENCH_PROJECT_REPOSITORY" = github$repository, 19 | "CONBENCH_PROJECT_COMMIT" = github$commit, 20 | "CONBENCH_PROJECT_PR_NUMBER" = github$pr_number 21 | ), 22 | { augmented_run <- augment_run(unaugmented_run) } 23 | ) 24 | 25 | expect_equal(unaugmented_run$reason, reason) 26 | expect_equal(augmented_run$reason, reason) 27 | 28 | expect_null(unaugmented_run$id) 29 | expect_type(augmented_run$id, "character") 30 | 31 | expect_null(unaugmented_run$machine_info) 32 | expect_type(augmented_run$machine_info, "list") 33 | expect_type(augmented_run$machine_info$name, "character") 34 | expect_equal(augmented_run$machine_info$name, host_name) 35 | 36 | expect_null(unaugmented_run$github) 37 | expect_equal(augmented_run$github, github) 38 | }) 39 | 40 | 41 | test_that("augment_result() works", { 42 | stats <- list(data = list(1, 2, 3), unit = "s", times = NULL, time_unit = NULL, iterations = 3) 43 | host_name <- "fake-computer" 44 | github <- list( 45 | commit = "fake-commit", 46 | repository = "conchair/conchair", 47 | pr_number = "47" 48 | ) 49 | 50 | unaugmented_result <- BenchmarkResult$new(stats = stats, github = NULL) 51 | withr::with_envvar( 52 | c( 53 | "CONBENCH_MACHINE_INFO_NAME" = host_name, 54 | "CONBENCH_PROJECT_REPOSITORY" = github$repository, 55 | "CONBENCH_PROJECT_COMMIT" = github$commit, 56 | "CONBENCH_PROJECT_PR_NUMBER" = github$pr_number 57 | ), 58 | { augmented_result <- augment_result(unaugmented_result) } 59 | ) 60 | 61 | expect_equal(unaugmented_result$timestamp, augmented_result$timestamp) 62 | 63 | expect_equal(unaugmented_result$stats, stats) 64 | expect_equal(augmented_result$stats, stats) 65 | 66 | expect_null(unaugmented_result$batch_id) 67 | expect_type(augmented_result$batch_id, "character") 68 | 69 | expect_null(unaugmented_result$machine_info) 70 | expect_type(augmented_result$machine_info, "list") 71 | expect_type(augmented_result$machine_info$name, "character") 72 | expect_equal(augmented_result$machine_info$name, host_name) 73 | 74 | expect_null(unaugmented_result$github) 75 | expect_equal(augmented_result$github, github) 76 | }) 77 | 78 | 79 | test_that("start_run() works", { 80 | bm_run <- BenchmarkRun$new( 81 | name = "arrowbench-unit-test: 2z8c9c49a5dc4a179243268e4bb6daa5", 82 | reason = "arrowbench-unit-test", 83 | github = list( 84 | commit = "2z8c9c49a5dc4a179243268e4bb6daa5", 85 | repository = "https://github.com/conchair/conchair", 86 | pr_number = "47" 87 | ) 88 | ) 89 | 90 | mockery::stub( 91 | where = start_run, 92 | what = "call_benchconnect", 93 | how = function(args) { 94 | expect_identical(args, c("start", "run", "--json", bm_run$json)) 95 | } 96 | ) 97 | start_run(run = bm_run) 98 | }) 99 | 100 | 101 | test_that("submit_result() works", { 102 | bm_result <- BenchmarkResult$new( 103 | run_name = "arrowbench-unit-test: 2z8c9c49a5dc4a179243268e4bb6daa5", 104 | run_reason = "arrowbench-unit-test", 105 | github = list( 106 | commit = "2z8c9c49a5dc4a179243268e4bb6daa5", 107 | repository = "https://github.com/conchair/conchair", 108 | pr_number = "47" 109 | ), 110 | stats <- list(data = list(1, 2, 3), unit = "s", times = NULL, time_unit = NULL, iterations = 3) 111 | ) 112 | 113 | mockery::stub( 114 | where = submit_result, 115 | what = "call_benchconnect", 116 | how = function(args) { 117 | expect_identical(args, c("submit", "result", "--json", bm_result$json)) 118 | } 119 | ) 120 | submit_result(result = bm_result) 121 | }) 122 | 123 | 124 | test_that("finish_run() works", { 125 | mockery::stub( 126 | where = finish_run, 127 | what = "call_benchconnect", 128 | how = function(args) { 129 | expect_identical(args, c("finish", "run", "--json", "{}")) 130 | } 131 | ) 132 | finish_run() 133 | }) 134 | 135 | unlink("benchconnect-state.json") --------------------------------------------------------------------------------