├── .github
    ├── .gitignore
    └── workflows
    │   ├── test-coverage.yaml
    │   ├── pkgdown.yaml
    │   ├── pr-commands.yaml
    │   └── R-CMD-check.yaml
├── tests
    ├── testthat
    │   ├── teardown.R
    │   ├── setup.R
    │   ├── test-bm-table-to-df.R
    │   ├── test-bm-array-to-vector.R
    │   ├── test-bm-array-altrep-materialization.R
    │   ├── test-bm-write-csv.R
    │   ├── test-bm-write-file.R
    │   ├── test-bm-row-group-size.R
    │   ├── test-bm-read-csv.R
    │   ├── test-bm-read-json.R
    │   ├── test-custom-duckdb.R
    │   ├── test-ensure-lib.R
    │   ├── test-bm-dataset-taxi-2013.R
    │   ├── test-ensure-source.R
    │   ├── test-measure.R
    │   ├── test-external-dependencies.R
    │   ├── test-bm-read-file.R
    │   ├── test-ensure-tpch-source.R
    │   ├── test-params.R
    │   ├── test-benchmark-dataframe.R
    │   ├── helper.R
    │   ├── test-util.R
    │   ├── test-result.R
    │   ├── test-ensure-format.R
    │   └── test-publish.R
    └── testthat.R
├── LICENSE
├── inst
    ├── test_data
    │   ├── chi_traffic_sample.parquet
    │   └── datasets
    │   │   └── taxi_2013
    │   │       ├── taxi_2013_1.csv.gz
    │   │       ├── taxi_2013_2.csv.gz
    │   │       ├── taxi_2013_3.csv.gz
    │   │       ├── taxi_2013_4.csv.gz
    │   │       ├── taxi_2013_5.csv.gz
    │   │       ├── taxi_2013_6.csv.gz
    │   │       ├── taxi_2013_7.csv.gz
    │   │       ├── taxi_2013_8.csv.gz
    │   │       ├── taxi_2013_9.csv.gz
    │   │       ├── taxi_2013_10.csv.gz
    │   │       ├── taxi_2013_11.csv.gz
    │   │       └── taxi_2013_12.csv.gz
    ├── tpch
    │   ├── answers
    │   │   ├── scale-factor-1
    │   │   │   ├── tpch-q01-sf1.parquet
    │   │   │   ├── tpch-q02-sf1.parquet
    │   │   │   ├── tpch-q03-sf1.parquet
    │   │   │   ├── tpch-q04-sf1.parquet
    │   │   │   ├── tpch-q05-sf1.parquet
    │   │   │   ├── tpch-q06-sf1.parquet
    │   │   │   ├── tpch-q07-sf1.parquet
    │   │   │   ├── tpch-q08-sf1.parquet
    │   │   │   ├── tpch-q09-sf1.parquet
    │   │   │   ├── tpch-q10-sf1.parquet
    │   │   │   ├── tpch-q11-sf1.parquet
    │   │   │   ├── tpch-q12-sf1.parquet
    │   │   │   ├── tpch-q13-sf1.parquet
    │   │   │   ├── tpch-q14-sf1.parquet
    │   │   │   ├── tpch-q15-sf1.parquet
    │   │   │   ├── tpch-q16-sf1.parquet
    │   │   │   ├── tpch-q17-sf1.parquet
    │   │   │   ├── tpch-q18-sf1.parquet
    │   │   │   ├── tpch-q19-sf1.parquet
    │   │   │   ├── tpch-q20-sf1.parquet
    │   │   │   ├── tpch-q21-sf1.parquet
    │   │   │   └── tpch-q22-sf1.parquet
    │   │   ├── scale-factor-10
    │   │   │   ├── tpch-q01-sf10.parquet
    │   │   │   ├── tpch-q02-sf10.parquet
    │   │   │   ├── tpch-q03-sf10.parquet
    │   │   │   ├── tpch-q04-sf10.parquet
    │   │   │   ├── tpch-q05-sf10.parquet
    │   │   │   ├── tpch-q06-sf10.parquet
    │   │   │   ├── tpch-q07-sf10.parquet
    │   │   │   ├── tpch-q08-sf10.parquet
    │   │   │   ├── tpch-q09-sf10.parquet
    │   │   │   ├── tpch-q10-sf10.parquet
    │   │   │   ├── tpch-q11-sf10.parquet
    │   │   │   ├── tpch-q12-sf10.parquet
    │   │   │   ├── tpch-q13-sf10.parquet
    │   │   │   ├── tpch-q14-sf10.parquet
    │   │   │   ├── tpch-q15-sf10.parquet
    │   │   │   ├── tpch-q16-sf10.parquet
    │   │   │   ├── tpch-q17-sf10.parquet
    │   │   │   ├── tpch-q18-sf10.parquet
    │   │   │   ├── tpch-q19-sf10.parquet
    │   │   │   ├── tpch-q20-sf10.parquet
    │   │   │   ├── tpch-q21-sf10.parquet
    │   │   │   └── tpch-q22-sf10.parquet
    │   │   ├── scale-factor-0.1
    │   │   │   ├── tpch-q01-sf0.1.parquet
    │   │   │   ├── tpch-q02-sf0.1.parquet
    │   │   │   ├── tpch-q03-sf0.1.parquet
    │   │   │   ├── tpch-q04-sf0.1.parquet
    │   │   │   ├── tpch-q05-sf0.1.parquet
    │   │   │   ├── tpch-q06-sf0.1.parquet
    │   │   │   ├── tpch-q07-sf0.1.parquet
    │   │   │   ├── tpch-q08-sf0.1.parquet
    │   │   │   ├── tpch-q09-sf0.1.parquet
    │   │   │   ├── tpch-q10-sf0.1.parquet
    │   │   │   ├── tpch-q11-sf0.1.parquet
    │   │   │   ├── tpch-q12-sf0.1.parquet
    │   │   │   ├── tpch-q13-sf0.1.parquet
    │   │   │   ├── tpch-q14-sf0.1.parquet
    │   │   │   ├── tpch-q15-sf0.1.parquet
    │   │   │   ├── tpch-q16-sf0.1.parquet
    │   │   │   ├── tpch-q17-sf0.1.parquet
    │   │   │   ├── tpch-q18-sf0.1.parquet
    │   │   │   ├── tpch-q19-sf0.1.parquet
    │   │   │   ├── tpch-q20-sf0.1.parquet
    │   │   │   ├── tpch-q21-sf0.1.parquet
    │   │   │   └── tpch-q22-sf0.1.parquet
    │   │   └── scale-factor-0.01
    │   │   │   ├── tpch-q01-sf0.01.parquet
    │   │   │   ├── tpch-q02-sf0.01.parquet
    │   │   │   ├── tpch-q03-sf0.01.parquet
    │   │   │   ├── tpch-q04-sf0.01.parquet
    │   │   │   ├── tpch-q05-sf0.01.parquet
    │   │   │   ├── tpch-q06-sf0.01.parquet
    │   │   │   ├── tpch-q07-sf0.01.parquet
    │   │   │   ├── tpch-q08-sf0.01.parquet
    │   │   │   ├── tpch-q09-sf0.01.parquet
    │   │   │   ├── tpch-q10-sf0.01.parquet
    │   │   │   ├── tpch-q11-sf0.01.parquet
    │   │   │   ├── tpch-q12-sf0.01.parquet
    │   │   │   ├── tpch-q13-sf0.01.parquet
    │   │   │   ├── tpch-q14-sf0.01.parquet
    │   │   │   ├── tpch-q15-sf0.01.parquet
    │   │   │   ├── tpch-q16-sf0.01.parquet
    │   │   │   ├── tpch-q17-sf0.01.parquet
    │   │   │   ├── tpch-q18-sf0.01.parquet
    │   │   │   ├── tpch-q19-sf0.01.parquet
    │   │   │   ├── tpch-q20-sf0.01.parquet
    │   │   │   ├── tpch-q21-sf0.01.parquet
    │   │   │   └── tpch-q22-sf0.01.parquet
    │   ├── answers_duckdb_data
    │   │   ├── scale-factor-1
    │   │   │   ├── tpch-q01-sf1.parquet
    │   │   │   ├── tpch-q02-sf1.parquet
    │   │   │   ├── tpch-q03-sf1.parquet
    │   │   │   ├── tpch-q04-sf1.parquet
    │   │   │   ├── tpch-q05-sf1.parquet
    │   │   │   ├── tpch-q06-sf1.parquet
    │   │   │   ├── tpch-q07-sf1.parquet
    │   │   │   ├── tpch-q08-sf1.parquet
    │   │   │   ├── tpch-q09-sf1.parquet
    │   │   │   ├── tpch-q10-sf1.parquet
    │   │   │   ├── tpch-q11-sf1.parquet
    │   │   │   ├── tpch-q12-sf1.parquet
    │   │   │   ├── tpch-q13-sf1.parquet
    │   │   │   ├── tpch-q14-sf1.parquet
    │   │   │   ├── tpch-q15-sf1.parquet
    │   │   │   ├── tpch-q16-sf1.parquet
    │   │   │   ├── tpch-q17-sf1.parquet
    │   │   │   ├── tpch-q18-sf1.parquet
    │   │   │   ├── tpch-q19-sf1.parquet
    │   │   │   ├── tpch-q20-sf1.parquet
    │   │   │   ├── tpch-q21-sf1.parquet
    │   │   │   └── tpch-q22-sf1.parquet
    │   │   ├── scale-factor-10
    │   │   │   ├── tpch-q01-sf10.parquet
    │   │   │   ├── tpch-q02-sf10.parquet
    │   │   │   ├── tpch-q03-sf10.parquet
    │   │   │   ├── tpch-q04-sf10.parquet
    │   │   │   ├── tpch-q05-sf10.parquet
    │   │   │   ├── tpch-q06-sf10.parquet
    │   │   │   ├── tpch-q07-sf10.parquet
    │   │   │   ├── tpch-q08-sf10.parquet
    │   │   │   ├── tpch-q09-sf10.parquet
    │   │   │   ├── tpch-q10-sf10.parquet
    │   │   │   ├── tpch-q11-sf10.parquet
    │   │   │   ├── tpch-q12-sf10.parquet
    │   │   │   ├── tpch-q13-sf10.parquet
    │   │   │   ├── tpch-q14-sf10.parquet
    │   │   │   ├── tpch-q15-sf10.parquet
    │   │   │   ├── tpch-q16-sf10.parquet
    │   │   │   ├── tpch-q17-sf10.parquet
    │   │   │   ├── tpch-q18-sf10.parquet
    │   │   │   ├── tpch-q19-sf10.parquet
    │   │   │   ├── tpch-q20-sf10.parquet
    │   │   │   ├── tpch-q21-sf10.parquet
    │   │   │   └── tpch-q22-sf10.parquet
    │   │   ├── scale-factor-0.1
    │   │   │   ├── tpch-q01-sf0.1.parquet
    │   │   │   ├── tpch-q02-sf0.1.parquet
    │   │   │   ├── tpch-q03-sf0.1.parquet
    │   │   │   ├── tpch-q04-sf0.1.parquet
    │   │   │   ├── tpch-q05-sf0.1.parquet
    │   │   │   ├── tpch-q06-sf0.1.parquet
    │   │   │   ├── tpch-q07-sf0.1.parquet
    │   │   │   ├── tpch-q08-sf0.1.parquet
    │   │   │   ├── tpch-q09-sf0.1.parquet
    │   │   │   ├── tpch-q10-sf0.1.parquet
    │   │   │   ├── tpch-q11-sf0.1.parquet
    │   │   │   ├── tpch-q12-sf0.1.parquet
    │   │   │   ├── tpch-q13-sf0.1.parquet
    │   │   │   ├── tpch-q14-sf0.1.parquet
    │   │   │   ├── tpch-q15-sf0.1.parquet
    │   │   │   ├── tpch-q16-sf0.1.parquet
    │   │   │   ├── tpch-q17-sf0.1.parquet
    │   │   │   ├── tpch-q18-sf0.1.parquet
    │   │   │   ├── tpch-q19-sf0.1.parquet
    │   │   │   ├── tpch-q20-sf0.1.parquet
    │   │   │   ├── tpch-q21-sf0.1.parquet
    │   │   │   └── tpch-q22-sf0.1.parquet
    │   │   └── scale-factor-0.01
    │   │   │   ├── tpch-q01-sf0.01.parquet
    │   │   │   ├── tpch-q02-sf0.01.parquet
    │   │   │   ├── tpch-q03-sf0.01.parquet
    │   │   │   ├── tpch-q04-sf0.01.parquet
    │   │   │   ├── tpch-q05-sf0.01.parquet
    │   │   │   ├── tpch-q06-sf0.01.parquet
    │   │   │   ├── tpch-q07-sf0.01.parquet
    │   │   │   ├── tpch-q08-sf0.01.parquet
    │   │   │   ├── tpch-q09-sf0.01.parquet
    │   │   │   ├── tpch-q10-sf0.01.parquet
    │   │   │   ├── tpch-q11-sf0.01.parquet
    │   │   │   ├── tpch-q12-sf0.01.parquet
    │   │   │   ├── tpch-q13-sf0.01.parquet
    │   │   │   ├── tpch-q14-sf0.01.parquet
    │   │   │   ├── tpch-q15-sf0.01.parquet
    │   │   │   ├── tpch-q16-sf0.01.parquet
    │   │   │   ├── tpch-q17-sf0.01.parquet
    │   │   │   ├── tpch-q18-sf0.01.parquet
    │   │   │   ├── tpch-q19-sf0.01.parquet
    │   │   │   ├── tpch-q20-sf0.01.parquet
    │   │   │   ├── tpch-q21-sf0.01.parquet
    │   │   │   └── tpch-q22-sf0.01.parquet
    │   └── README.md
    ├── regenerate-benchmarks-json.R
    ├── benchmarks.json
    └── tpch-answer-gen.R
├── .gitignore
├── .Rbuildignore
├── codecov.yml
├── man
    ├── all_sources.Rd
    ├── get_csv_writer.Rd
    ├── known_sources.Rd
    ├── get_read_function.Rd
    ├── get_json_reader.Rd
    ├── read_source.Rd
    ├── tpch_tables.Rd
    ├── remote_dataset.Rd
    ├── get_csv_reader.Rd
    ├── get_source_attr.Rd
    ├── get_dataset_attr.Rd
    ├── install_benchconnect.Rd
    ├── BenchEnvironment.Rd
    ├── file_with_ext.Rd
    ├── null-default.Rd
    ├── install_datalogistik.Rd
    ├── sync_and_drop_caches.Rd
    ├── confirm_mem_alloc.Rd
    ├── install_pipx.Rd
    ├── tables_refed.Rd
    ├── tpc_h_queries.Rd
    ├── get_package_benchmarks.Rd
    ├── get_write_function.Rd
    ├── ensure_source.Rd
    ├── get_query_func.Rd
    ├── get_params_summary.Rd
    ├── table_to_df.Rd
    ├── dataset_taxi_parquet.Rd
    ├── df_to_table.Rd
    ├── write_csv.Rd
    ├── knowns.Rd
    ├── data_file.Rd
    ├── placebo.Rd
    ├── dataset_taxi_2013.Rd
    ├── read_csv.Rd
    ├── ensure_format.Rd
    ├── ensure_dataset.Rd
    ├── read_file.Rd
    ├── row_group_size.Rd
    ├── write_file.Rd
    ├── read_json.Rd
    ├── validate_format.Rd
    ├── BenchmarkDataFrame.Rd
    ├── assemble_metadata.Rd
    ├── tpch_answer.Rd
    ├── array_altrep_materialization.Rd
    ├── array_to_vector.Rd
    ├── get_sql_query_func.Rd
    ├── generate_tpch.Rd
    ├── measure.Rd
    ├── get_input_func.Rd
    ├── as.data.frame.BenchmarkResults.Rd
    ├── tpc_h.Rd
    ├── get_default_parameters.Rd
    ├── run_bm.Rd
    ├── run_one.Rd
    ├── run_benchmark.Rd
    ├── R6Point1Class.Rd
    └── run.Rd
├── arrowbench.Rproj
├── R
    ├── setup.R
    ├── bm-remote-dataset.R
    ├── bm-table-to-df.R
    ├── publish.R
    ├── bm-placebo.R
    ├── bm-df-to-table.R
    ├── benchmark-dataframe.R
    ├── ensure-tpch-source.R
    ├── custom-duckdb.R
    ├── measure.R
    ├── bm-row-group-size.R
    ├── params.R
    ├── bm-write-csv.R
    ├── bm-array-to-vector.R
    ├── bm-write-file.R
    ├── bm-read-file.R
    ├── bm-dataset-taxi-parquet.R
    ├── external-dependencies.R
    ├── bm-dataset-taxi-2013.R
    └── bm-read-csv.R
├── LICENSE.md
├── DESCRIPTION
└── NAMESPACE


/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/tests/testthat/teardown.R:
--------------------------------------------------------------------------------
1 | wipe_results()
2 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2021
2 | COPYRIGHT HOLDER: 2021 Ursa Computing
3 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(arrowbench)
3 | 
4 | test_check("arrowbench")
5 | 


--------------------------------------------------------------------------------
/inst/test_data/chi_traffic_sample.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/test_data/chi_traffic_sample.parquet


--------------------------------------------------------------------------------
/inst/test_data/datasets/taxi_2013/taxi_2013_1.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/test_data/datasets/taxi_2013/taxi_2013_1.csv.gz


--------------------------------------------------------------------------------
/inst/test_data/datasets/taxi_2013/taxi_2013_2.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/test_data/datasets/taxi_2013/taxi_2013_2.csv.gz


--------------------------------------------------------------------------------
/inst/test_data/datasets/taxi_2013/taxi_2013_3.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/test_data/datasets/taxi_2013/taxi_2013_3.csv.gz


--------------------------------------------------------------------------------
/inst/test_data/datasets/taxi_2013/taxi_2013_4.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/test_data/datasets/taxi_2013/taxi_2013_4.csv.gz


--------------------------------------------------------------------------------
/inst/test_data/datasets/taxi_2013/taxi_2013_5.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/test_data/datasets/taxi_2013/taxi_2013_5.csv.gz


--------------------------------------------------------------------------------
/inst/test_data/datasets/taxi_2013/taxi_2013_6.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/test_data/datasets/taxi_2013/taxi_2013_6.csv.gz


--------------------------------------------------------------------------------
/inst/test_data/datasets/taxi_2013/taxi_2013_7.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/test_data/datasets/taxi_2013/taxi_2013_7.csv.gz


--------------------------------------------------------------------------------
/inst/test_data/datasets/taxi_2013/taxi_2013_8.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/test_data/datasets/taxi_2013/taxi_2013_8.csv.gz


--------------------------------------------------------------------------------
/inst/test_data/datasets/taxi_2013/taxi_2013_9.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/test_data/datasets/taxi_2013/taxi_2013_9.csv.gz


--------------------------------------------------------------------------------
/inst/test_data/datasets/taxi_2013/taxi_2013_10.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/test_data/datasets/taxi_2013/taxi_2013_10.csv.gz


--------------------------------------------------------------------------------
/inst/test_data/datasets/taxi_2013/taxi_2013_11.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/test_data/datasets/taxi_2013/taxi_2013_11.csv.gz


--------------------------------------------------------------------------------
/inst/test_data/datasets/taxi_2013/taxi_2013_12.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/test_data/datasets/taxi_2013/taxi_2013_12.csv.gz


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-1/tpch-q01-sf1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-1/tpch-q01-sf1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-1/tpch-q02-sf1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-1/tpch-q02-sf1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-1/tpch-q03-sf1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-1/tpch-q03-sf1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-1/tpch-q04-sf1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-1/tpch-q04-sf1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-1/tpch-q05-sf1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-1/tpch-q05-sf1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-1/tpch-q06-sf1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-1/tpch-q06-sf1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-1/tpch-q07-sf1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-1/tpch-q07-sf1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-1/tpch-q08-sf1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-1/tpch-q08-sf1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-1/tpch-q09-sf1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-1/tpch-q09-sf1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-1/tpch-q10-sf1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-1/tpch-q10-sf1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-1/tpch-q11-sf1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-1/tpch-q11-sf1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-1/tpch-q12-sf1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-1/tpch-q12-sf1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-1/tpch-q13-sf1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-1/tpch-q13-sf1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-1/tpch-q14-sf1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-1/tpch-q14-sf1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-1/tpch-q15-sf1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-1/tpch-q15-sf1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-1/tpch-q16-sf1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-1/tpch-q16-sf1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-1/tpch-q17-sf1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-1/tpch-q17-sf1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-1/tpch-q18-sf1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-1/tpch-q18-sf1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-1/tpch-q19-sf1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-1/tpch-q19-sf1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-1/tpch-q20-sf1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-1/tpch-q20-sf1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-1/tpch-q21-sf1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-1/tpch-q21-sf1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-1/tpch-q22-sf1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-1/tpch-q22-sf1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-10/tpch-q01-sf10.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-10/tpch-q01-sf10.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-10/tpch-q02-sf10.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-10/tpch-q02-sf10.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-10/tpch-q03-sf10.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-10/tpch-q03-sf10.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-10/tpch-q04-sf10.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-10/tpch-q04-sf10.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-10/tpch-q05-sf10.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-10/tpch-q05-sf10.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-10/tpch-q06-sf10.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-10/tpch-q06-sf10.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-10/tpch-q07-sf10.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-10/tpch-q07-sf10.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-10/tpch-q08-sf10.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-10/tpch-q08-sf10.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-10/tpch-q09-sf10.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-10/tpch-q09-sf10.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-10/tpch-q10-sf10.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-10/tpch-q10-sf10.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-10/tpch-q11-sf10.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-10/tpch-q11-sf10.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-10/tpch-q12-sf10.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-10/tpch-q12-sf10.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-10/tpch-q13-sf10.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-10/tpch-q13-sf10.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-10/tpch-q14-sf10.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-10/tpch-q14-sf10.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-10/tpch-q15-sf10.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-10/tpch-q15-sf10.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-10/tpch-q16-sf10.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-10/tpch-q16-sf10.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-10/tpch-q17-sf10.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-10/tpch-q17-sf10.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-10/tpch-q18-sf10.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-10/tpch-q18-sf10.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-10/tpch-q19-sf10.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-10/tpch-q19-sf10.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-10/tpch-q20-sf10.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-10/tpch-q20-sf10.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-10/tpch-q21-sf10.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-10/tpch-q21-sf10.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-10/tpch-q22-sf10.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-10/tpch-q22-sf10.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-0.1/tpch-q01-sf0.1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.1/tpch-q01-sf0.1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-0.1/tpch-q02-sf0.1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.1/tpch-q02-sf0.1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-0.1/tpch-q03-sf0.1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.1/tpch-q03-sf0.1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-0.1/tpch-q04-sf0.1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.1/tpch-q04-sf0.1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-0.1/tpch-q05-sf0.1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.1/tpch-q05-sf0.1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-0.1/tpch-q06-sf0.1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.1/tpch-q06-sf0.1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-0.1/tpch-q07-sf0.1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.1/tpch-q07-sf0.1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-0.1/tpch-q08-sf0.1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.1/tpch-q08-sf0.1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-0.1/tpch-q09-sf0.1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.1/tpch-q09-sf0.1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-0.1/tpch-q10-sf0.1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.1/tpch-q10-sf0.1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-0.1/tpch-q11-sf0.1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.1/tpch-q11-sf0.1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-0.1/tpch-q12-sf0.1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.1/tpch-q12-sf0.1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-0.1/tpch-q13-sf0.1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.1/tpch-q13-sf0.1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-0.1/tpch-q14-sf0.1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.1/tpch-q14-sf0.1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-0.1/tpch-q15-sf0.1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.1/tpch-q15-sf0.1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-0.1/tpch-q16-sf0.1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.1/tpch-q16-sf0.1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-0.1/tpch-q17-sf0.1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.1/tpch-q17-sf0.1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-0.1/tpch-q18-sf0.1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.1/tpch-q18-sf0.1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-0.1/tpch-q19-sf0.1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.1/tpch-q19-sf0.1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-0.1/tpch-q20-sf0.1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.1/tpch-q20-sf0.1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-0.1/tpch-q21-sf0.1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.1/tpch-q21-sf0.1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-0.1/tpch-q22-sf0.1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.1/tpch-q22-sf0.1.parquet


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .Rproj.user
 2 | .Rhistory
 3 | .RData
 4 | .Ruserdata
 5 | tests/testthat/results
 6 | .DS_Store
 7 | results/
 8 | data/
 9 | tests/testthat/data/
10 | .vscode/
11 | 


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-0.01/tpch-q01-sf0.01.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.01/tpch-q01-sf0.01.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-0.01/tpch-q02-sf0.01.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.01/tpch-q02-sf0.01.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-0.01/tpch-q03-sf0.01.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.01/tpch-q03-sf0.01.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-0.01/tpch-q04-sf0.01.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.01/tpch-q04-sf0.01.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-0.01/tpch-q05-sf0.01.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.01/tpch-q05-sf0.01.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-0.01/tpch-q06-sf0.01.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.01/tpch-q06-sf0.01.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-0.01/tpch-q07-sf0.01.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.01/tpch-q07-sf0.01.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-0.01/tpch-q08-sf0.01.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.01/tpch-q08-sf0.01.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-0.01/tpch-q09-sf0.01.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.01/tpch-q09-sf0.01.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-0.01/tpch-q10-sf0.01.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.01/tpch-q10-sf0.01.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-0.01/tpch-q11-sf0.01.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.01/tpch-q11-sf0.01.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-0.01/tpch-q12-sf0.01.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.01/tpch-q12-sf0.01.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-0.01/tpch-q13-sf0.01.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.01/tpch-q13-sf0.01.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-0.01/tpch-q14-sf0.01.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.01/tpch-q14-sf0.01.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-0.01/tpch-q15-sf0.01.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.01/tpch-q15-sf0.01.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-0.01/tpch-q16-sf0.01.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.01/tpch-q16-sf0.01.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-0.01/tpch-q17-sf0.01.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.01/tpch-q17-sf0.01.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-0.01/tpch-q18-sf0.01.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.01/tpch-q18-sf0.01.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-0.01/tpch-q19-sf0.01.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.01/tpch-q19-sf0.01.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-0.01/tpch-q20-sf0.01.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.01/tpch-q20-sf0.01.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-0.01/tpch-q21-sf0.01.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.01/tpch-q21-sf0.01.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers/scale-factor-0.01/tpch-q22-sf0.01.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers/scale-factor-0.01/tpch-q22-sf0.01.parquet


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^.*\.Rproj$
 2 | ^\.Rproj\.user$
 3 | ^r_libs
 4 | ^data
 5 | ^source_data
 6 | ^results
 7 | ^plots
 8 | ^.*\.prof$
 9 | ^codecov\.yml$
10 | ^\.github$
11 | ^LICENSE\.md$
12 | 


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q01-sf1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q01-sf1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q02-sf1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q02-sf1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q03-sf1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q03-sf1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q04-sf1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q04-sf1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q05-sf1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q05-sf1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q06-sf1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q06-sf1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q07-sf1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q07-sf1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q08-sf1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q08-sf1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q09-sf1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q09-sf1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q10-sf1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q10-sf1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q11-sf1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q11-sf1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q12-sf1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q12-sf1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q13-sf1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q13-sf1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q14-sf1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q14-sf1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q15-sf1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q15-sf1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q16-sf1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q16-sf1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q17-sf1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q17-sf1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q18-sf1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q18-sf1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q19-sf1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q19-sf1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q20-sf1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q20-sf1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q21-sf1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q21-sf1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q22-sf1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-1/tpch-q22-sf1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q01-sf10.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q01-sf10.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q02-sf10.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q02-sf10.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q03-sf10.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q03-sf10.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q04-sf10.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q04-sf10.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q05-sf10.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q05-sf10.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q06-sf10.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q06-sf10.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q07-sf10.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q07-sf10.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q08-sf10.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q08-sf10.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q09-sf10.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q09-sf10.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q10-sf10.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q10-sf10.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q11-sf10.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q11-sf10.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q12-sf10.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q12-sf10.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q13-sf10.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q13-sf10.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q14-sf10.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q14-sf10.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q15-sf10.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q15-sf10.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q16-sf10.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q16-sf10.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q17-sf10.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q17-sf10.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q18-sf10.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q18-sf10.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q19-sf10.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q19-sf10.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q20-sf10.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q20-sf10.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q21-sf10.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q21-sf10.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q22-sf10.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-10/tpch-q22-sf10.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q01-sf0.1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q01-sf0.1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q02-sf0.1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q02-sf0.1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q03-sf0.1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q03-sf0.1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q04-sf0.1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q04-sf0.1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q05-sf0.1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q05-sf0.1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q06-sf0.1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q06-sf0.1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q07-sf0.1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q07-sf0.1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q08-sf0.1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q08-sf0.1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q09-sf0.1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q09-sf0.1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q10-sf0.1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q10-sf0.1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q11-sf0.1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q11-sf0.1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q12-sf0.1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q12-sf0.1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q13-sf0.1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q13-sf0.1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q14-sf0.1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q14-sf0.1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q15-sf0.1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q15-sf0.1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q16-sf0.1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q16-sf0.1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q17-sf0.1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q17-sf0.1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q18-sf0.1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q18-sf0.1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q19-sf0.1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q19-sf0.1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q20-sf0.1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q20-sf0.1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q21-sf0.1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q21-sf0.1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q22-sf0.1.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.1/tpch-q22-sf0.1.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q01-sf0.01.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q01-sf0.01.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q02-sf0.01.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q02-sf0.01.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q03-sf0.01.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q03-sf0.01.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q04-sf0.01.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q04-sf0.01.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q05-sf0.01.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q05-sf0.01.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q06-sf0.01.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q06-sf0.01.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q07-sf0.01.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q07-sf0.01.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q08-sf0.01.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q08-sf0.01.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q09-sf0.01.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q09-sf0.01.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q10-sf0.01.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q10-sf0.01.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q11-sf0.01.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q11-sf0.01.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q12-sf0.01.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q12-sf0.01.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q13-sf0.01.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q13-sf0.01.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q14-sf0.01.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q14-sf0.01.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q15-sf0.01.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q15-sf0.01.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q16-sf0.01.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q16-sf0.01.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q17-sf0.01.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q17-sf0.01.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q18-sf0.01.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q18-sf0.01.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q19-sf0.01.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q19-sf0.01.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q20-sf0.01.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q20-sf0.01.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q21-sf0.01.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q21-sf0.01.parquet


--------------------------------------------------------------------------------
/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q22-sf0.01.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/voltrondata-labs/arrowbench/HEAD/inst/tpch/answers_duckdb_data/scale-factor-0.01/tpch-q22-sf0.01.parquet


--------------------------------------------------------------------------------
/tests/testthat/setup.R:
--------------------------------------------------------------------------------
 1 | if (!pipx_available()) {
 2 |   install_pipx()
 3 | }
 4 | 
 5 | if (!benchconnect_available()) {
 6 |   install_benchconnect()
 7 | }
 8 | 
 9 | if (!datalogistik_available()) {
10 |   install_datalogistik()
11 | }
12 | 


--------------------------------------------------------------------------------
/tests/testthat/test-bm-table-to-df.R:
--------------------------------------------------------------------------------
 1 | test_that("table_to_df benchmark works", {
 2 |   expect_benchmark_run(
 3 |     run_benchmark(
 4 |       table_to_df,
 5 |       source = "nyctaxi_sample",
 6 |       cpu_count = arrow::cpu_count()
 7 |     )
 8 |   )
 9 | })
10 | 
11 | wipe_results()


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
 1 | comment: false
 2 | 
 3 | coverage:
 4 |   status:
 5 |     project:
 6 |       default:
 7 |         target: auto
 8 |         threshold: 1%
 9 |         informational: true
10 |     patch:
11 |       default:
12 |         target: auto
13 |         threshold: 1%
14 |         informational: true
15 | 


--------------------------------------------------------------------------------
/tests/testthat/test-bm-array-to-vector.R:
--------------------------------------------------------------------------------
 1 | test_that("array_to_vector benchmark runs", {
 2 | 
 3 |   expect_benchmark_run(
 4 |     run_benchmark(
 5 |       array_to_vector,
 6 |       source = "nyctaxi_sample",
 7 |       cpu_count = arrow::cpu_count(),
 8 |       alt_rep = FALSE
 9 |     )
10 |   )
11 | })
12 | 


--------------------------------------------------------------------------------
/tests/testthat/test-bm-array-altrep-materialization.R:
--------------------------------------------------------------------------------
 1 | test_that("array_altrep_materialization benchmark runs", {
 2 | 
 3 |   expect_benchmark_run(
 4 |     run_benchmark(
 5 |       array_altrep_materialization,
 6 |       source = "fanniemae_sample",
 7 |       altrep = TRUE,
 8 |       cpu_count = arrow::cpu_count()
 9 |     )
10 |   )
11 | })
12 | 


--------------------------------------------------------------------------------
/tests/testthat/test-bm-write-csv.R:
--------------------------------------------------------------------------------
 1 | test_that("write_csv benchmark works", {
 2 |   expect_benchmark_run(
 3 |     run_benchmark(
 4 |       write_csv,
 5 |       source = "nyctaxi_sample",
 6 |       writer = c("arrow", "data.table", "vroom", "readr", "base"),
 7 |       cpu_count = arrow::cpu_count()
 8 |     )
 9 |   )
10 | })
11 | 
12 | wipe_results()
13 | 


--------------------------------------------------------------------------------
/man/all_sources.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/known-sources.R
 3 | \docType{data}
 4 | \name{all_sources}
 5 | \alias{all_sources}
 6 | \title{Known data files}
 7 | \format{
 8 | An object of class \code{list} of length 13.
 9 | }
10 | \usage{
11 | all_sources
12 | }
13 | \description{
14 | Known data files
15 | }
16 | \keyword{datasets}
17 | 


--------------------------------------------------------------------------------
/man/get_csv_writer.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bm-write-csv.R
 3 | \name{get_csv_writer}
 4 | \alias{get_csv_writer}
 5 | \title{Get a CSV writer}
 6 | \usage{
 7 | get_csv_writer(writer)
 8 | }
 9 | \arguments{
10 | \item{writer}{the writer to use}
11 | }
12 | \value{
13 | the csv writer
14 | }
15 | \description{
16 | Get a CSV writer
17 | }
18 | 


--------------------------------------------------------------------------------
/man/known_sources.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/known-sources.R
 3 | \docType{data}
 4 | \name{known_sources}
 5 | \alias{known_sources}
 6 | \title{Known data files}
 7 | \format{
 8 | An object of class \code{list} of length 10.
 9 | }
10 | \usage{
11 | known_sources
12 | }
13 | \description{
14 | Known data files
15 | }
16 | \keyword{datasets}
17 | 


--------------------------------------------------------------------------------
/man/get_read_function.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bm-read-file.R
 3 | \name{get_read_function}
 4 | \alias{get_read_function}
 5 | \title{Get a reader}
 6 | \usage{
 7 | get_read_function(file_type)
 8 | }
 9 | \arguments{
10 | \item{file_type}{what file_type to read}
11 | }
12 | \value{
13 | the read function to use
14 | }
15 | \description{
16 | Get a reader
17 | }
18 | 


--------------------------------------------------------------------------------
/man/get_json_reader.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bm-read-json.R
 3 | \name{get_json_reader}
 4 | \alias{get_json_reader}
 5 | \title{Get a JSON reader}
 6 | \usage{
 7 | get_json_reader(reader)
 8 | }
 9 | \arguments{
10 | \item{reader}{string of the reader package to use}
11 | }
12 | \value{
13 | the JSON function
14 | }
15 | \description{
16 | Get a JSON reader
17 | }
18 | 


--------------------------------------------------------------------------------
/man/read_source.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ensure-source.R
 3 | \name{read_source}
 4 | \alias{read_source}
 5 | \title{Read a known source}
 6 | \usage{
 7 | read_source(file, ...)
 8 | }
 9 | \arguments{
10 | \item{file}{file to read}
11 | 
12 | \item{...}{extra arguments to pass}
13 | }
14 | \value{
15 | the source
16 | }
17 | \description{
18 | Read a known source
19 | }
20 | 


--------------------------------------------------------------------------------
/man/tpch_tables.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ensure-tpch-source.R
 3 | \docType{data}
 4 | \name{tpch_tables}
 5 | \alias{tpch_tables}
 6 | \title{Table names for TPC-H benchmarks}
 7 | \format{
 8 | An object of class \code{character} of length 8.
 9 | }
10 | \usage{
11 | tpch_tables
12 | }
13 | \description{
14 | Table names for TPC-H benchmarks
15 | }
16 | \keyword{internal}
17 | 


--------------------------------------------------------------------------------
/man/remote_dataset.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bm-remote-dataset.R
 3 | \docType{data}
 4 | \name{remote_dataset}
 5 | \alias{remote_dataset}
 6 | \title{Remote (S3) dataset reading}
 7 | \format{
 8 | An object of class \code{Benchmark} of length 11.
 9 | }
10 | \usage{
11 | remote_dataset
12 | }
13 | \description{
14 | Remote (S3) dataset reading
15 | }
16 | \keyword{datasets}
17 | 


--------------------------------------------------------------------------------
/man/get_csv_reader.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bm-read-csv.R
 3 | \name{get_csv_reader}
 4 | \alias{get_csv_reader}
 5 | \title{Get a CSV reader}
 6 | \usage{
 7 | get_csv_reader(reader, delim)
 8 | }
 9 | \arguments{
10 | \item{reader}{the reader to use}
11 | 
12 | \item{delim}{the delimiter to use}
13 | }
14 | \value{
15 | the csv reader
16 | }
17 | \description{
18 | Get a CSV reader
19 | }
20 | 


--------------------------------------------------------------------------------
/man/get_source_attr.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ensure-source.R
 3 | \name{get_source_attr}
 4 | \alias{get_source_attr}
 5 | \title{Get source attributes}
 6 | \usage{
 7 | get_source_attr(file, attr)
 8 | }
 9 | \arguments{
10 | \item{file}{the file to get attributes for}
11 | 
12 | \item{attr}{the attribute to get}
13 | }
14 | \description{
15 | Get source attributes
16 | }
17 | \keyword{internal}
18 | 


--------------------------------------------------------------------------------
/tests/testthat/test-bm-write-file.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | test_that("write_file benchmark works", {
 4 |   expect_benchmark_run(
 5 |     run_benchmark(
 6 |       write_file,
 7 |       source = "nyctaxi_sample",
 8 |       file_type = c("parquet", "feather"),
 9 |       compression = c("uncompressed", "snappy", "lz4"),
10 |       input_type = c("arrow_table", "data_frame"),
11 |       cpu_count = arrow::cpu_count()
12 |     )
13 |   )
14 | })
15 | 
16 | wipe_results()
17 | 


--------------------------------------------------------------------------------
/man/get_dataset_attr.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ensure-source.R
 3 | \name{get_dataset_attr}
 4 | \alias{get_dataset_attr}
 5 | \title{Get dataset attributes}
 6 | \usage{
 7 | get_dataset_attr(name, attr)
 8 | }
 9 | \arguments{
10 | \item{attr}{the attribute to get}
11 | 
12 | \item{dataset}{the file to get attributes for}
13 | }
14 | \description{
15 | Get dataset attributes
16 | }
17 | \keyword{internal}
18 | 


--------------------------------------------------------------------------------
/man/install_benchconnect.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/external-dependencies.R
 3 | \name{install_benchconnect}
 4 | \alias{install_benchconnect}
 5 | \title{Install benchconnect}
 6 | \usage{
 7 | install_benchconnect()
 8 | }
 9 | \description{
10 | Install \href{https://github.com/conbench/conbench/tree/main/benchconnect}{benchconnect},
11 | a utility for sending benchmark results to a Conbench server
12 | }
13 | 


--------------------------------------------------------------------------------
/arrowbench.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | StripTrailingWhitespace: Yes
16 | 
17 | BuildType: Package
18 | PackageUseDevtools: Yes
19 | PackageInstallArgs: --no-multiarch --with-keep.source
20 | PackageRoxygenize: rd,collate,namespace
21 | 


--------------------------------------------------------------------------------
/tests/testthat/test-bm-row-group-size.R:
--------------------------------------------------------------------------------
 1 | test_that("row_group_size benchmark runs", {
 2 | 
 3 |   params <- get_default_parameters(row_group_size, chunk_size = list(NULL, 10000L, 100000L, 1000000L))
 4 | 
 5 |   expect_benchmark_run(
 6 |     run_benchmark(
 7 |       row_group_size,
 8 |       source = "fanniemae_sample",
 9 |       queries = "everything",
10 |       chunk_size = list(1000L),
11 |       cpu_count = arrow::cpu_count()
12 |     )
13 |   )
14 | })
15 | 
16 | wipe_results()
17 | 


--------------------------------------------------------------------------------
/man/BenchEnvironment.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/benchmark.R
 3 | \name{BenchEnvironment}
 4 | \alias{BenchEnvironment}
 5 | \title{Create a test environment to run benchmarks in}
 6 | \usage{
 7 | BenchEnvironment(...)
 8 | }
 9 | \arguments{
10 | \item{...}{named list of parameters to set in the environment}
11 | }
12 | \value{
13 | An environment
14 | }
15 | \description{
16 | Create a test environment to run benchmarks in
17 | }
18 | 


--------------------------------------------------------------------------------
/man/file_with_ext.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/util.R
 3 | \name{file_with_ext}
 4 | \alias{file_with_ext}
 5 | \title{Get a file with an extension}
 6 | \usage{
 7 | file_with_ext(file, new_ext)
 8 | }
 9 | \arguments{
10 | \item{file}{the file}
11 | 
12 | \item{new_ext}{the new extension}
13 | }
14 | \value{
15 | the file with the new extension
16 | }
17 | \description{
18 | Get a file with an extension
19 | }
20 | \keyword{internal}
21 | 


--------------------------------------------------------------------------------
/man/null-default.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/util.R
 3 | \name{null-default}
 4 | \alias{null-default}
 5 | \alias{\%||\%}
 6 | \title{Default value for NULL}
 7 | \usage{
 8 | a \%||\% b
 9 | }
10 | \arguments{
11 | \item{a}{Thing to test for \code{NULL}-ness}
12 | 
13 | \item{b}{Thing to use if \code{a} is \code{NULL}}
14 | }
15 | \value{
16 | \code{a} unless it's \code{NULL}, then \code{b}
17 | }
18 | \description{
19 | Default value for NULL
20 | }
21 | 


--------------------------------------------------------------------------------
/man/install_datalogistik.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/external-dependencies.R
 3 | \name{install_datalogistik}
 4 | \alias{install_datalogistik}
 5 | \title{Install datalogistik}
 6 | \usage{
 7 | install_datalogistik()
 8 | }
 9 | \description{
10 | Install \href{https://github.com/conbench/datalogistik}{datalogistik}, a utility
11 | for generating, downloading, and converting datasets for benchmarking.
12 | }
13 | \details{
14 | Only for interactive use.
15 | }
16 | 


--------------------------------------------------------------------------------
/man/sync_and_drop_caches.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/util.R
 3 | \name{sync_and_drop_caches}
 4 | \alias{sync_and_drop_caches}
 5 | \title{Attempt to drop disk caches}
 6 | \usage{
 7 | sync_and_drop_caches()
 8 | }
 9 | \value{
10 | Logical; were caches cleared?
11 | }
12 | \description{
13 | Attempts to drop disk caches. Currently only works on Linux. If clearing
14 | fails, will set an option so it will not reattempt on future calls.
15 | }
16 | \keyword{internal}
17 | 


--------------------------------------------------------------------------------
/man/confirm_mem_alloc.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/util.R
 3 | \name{confirm_mem_alloc}
 4 | \alias{confirm_mem_alloc}
 5 | \title{Confirm that the memory allocator enabled}
 6 | \usage{
 7 | confirm_mem_alloc(mem_alloc)
 8 | }
 9 | \arguments{
10 | \item{mem_alloc}{the memory allocator to be tested (one of: "jemalloc", "mimalloc", "system")}
11 | }
12 | \value{
13 | nothing
14 | }
15 | \description{
16 | Confirm that the memory allocator enabled
17 | }
18 | \keyword{internal}
19 | 


--------------------------------------------------------------------------------
/tests/testthat/test-bm-read-csv.R:
--------------------------------------------------------------------------------
 1 | test_that("read_csv setup", {
 2 |   defaults <- get_default_args(read_csv$setup)
 3 |   expect_named(defaults, c("source", "reader", "compression", "output_format"), ignore.order = TRUE)
 4 | })
 5 | 
 6 | 
 7 | test_that("read_csv benchmark works", {
 8 |   expect_benchmark_run(
 9 |     run_benchmark(
10 |       read_csv,
11 |       source = c("nyctaxi_sample", "fanniemae_sample"),
12 |       compression = "uncompressed",
13 |       cpu_count = arrow::cpu_count()
14 |     )
15 |   )
16 | })
17 | 
18 | wipe_results()
19 | 


--------------------------------------------------------------------------------
/tests/testthat/test-bm-read-json.R:
--------------------------------------------------------------------------------
 1 | test_that("read_json setup", {
 2 |   defaults <- get_default_args(read_json$setup)
 3 |   expect_named(defaults, c("source", "reader", "compression", "output_format", "rbinder"), ignore.order = TRUE)
 4 | })
 5 | 
 6 | test_that("read_json benchmark works", {
 7 |   expect_benchmark_run(
 8 |     run_benchmark(
 9 |       read_json,
10 |       reader = "arrow",
11 |       source = "fanniemae_sample",
12 |       compression = "uncompressed",
13 |       cpu_count = arrow::cpu_count()
14 |     )
15 |   )
16 | })
17 | 
18 | wipe_results()


--------------------------------------------------------------------------------
/man/install_pipx.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/external-dependencies.R
 3 | \name{install_pipx}
 4 | \alias{install_pipx}
 5 | \title{Install pipx}
 6 | \usage{
 7 | install_pipx()
 8 | }
 9 | \description{
10 | Install \href{https://pypa.github.io/pipx/}{pipx}, a version of pip that installs
11 | Python packages in isolated environments where they will always be available
12 | regardless of which version of Python is presently on \verb{$PATH}. Especially
13 | useful for installing packages designed to be used via CLIs.
14 | }
15 | 


--------------------------------------------------------------------------------
/man/tables_refed.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bm-tpc-h.R
 3 | \name{tables_refed}
 4 | \alias{tables_refed}
 5 | \title{For extracting table names from TPC-H queries}
 6 | \usage{
 7 | tables_refed(query_func)
 8 | }
 9 | \arguments{
10 | \item{query_func}{a function containing a dplyr pipeline}
11 | }
12 | \value{
13 | all references inside of \code{input_func(...)}, collapsed
14 | }
15 | \description{
16 | This searches a function for all references of \code{input_func(...)} and returns
17 | the contents of \code{...}
18 | }
19 | 


--------------------------------------------------------------------------------
/man/tpc_h_queries.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/tpch-queries.R
 3 | \docType{data}
 4 | \name{tpc_h_queries}
 5 | \alias{tpc_h_queries}
 6 | \title{all queries take an input_func which is a function that will return a dplyr tbl
 7 | referencing the table needed.}
 8 | \format{
 9 | An object of class \code{list} of length 22.
10 | }
11 | \usage{
12 | tpc_h_queries
13 | }
14 | \description{
15 | all queries take an input_func which is a function that will return a dplyr tbl
16 | referencing the table needed.
17 | }
18 | \keyword{internal}
19 | 


--------------------------------------------------------------------------------
/man/get_package_benchmarks.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/benchmark-dataframe.R
 3 | \name{get_package_benchmarks}
 4 | \alias{get_package_benchmarks}
 5 | \title{Get a list of benchmarks in a package}
 6 | \usage{
 7 | get_package_benchmarks(package = "arrowbench")
 8 | }
 9 | \arguments{
10 | \item{package}{String of package name in which to find benchmarks}
11 | }
12 | \value{
13 | An instance of \link{BenchmarkDataFrame} with all the benchmarks contained
14 | by a package
15 | }
16 | \description{
17 | Get a list of benchmarks in a package
18 | }
19 | 


--------------------------------------------------------------------------------
/man/get_write_function.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ensure-format.R
 3 | \name{get_write_function}
 4 | \alias{get_write_function}
 5 | \title{Get a writer}
 6 | \usage{
 7 | get_write_function(format, compression, chunk_size = NULL)
 8 | }
 9 | \arguments{
10 | \item{format}{format to write}
11 | 
12 | \item{compression}{compression to use}
13 | 
14 | \item{chunk_size}{the size of chunks to write (default: NULL, the default for
15 | the format)}
16 | }
17 | \value{
18 | the write function to use
19 | }
20 | \description{
21 | Get a writer
22 | }
23 | 


--------------------------------------------------------------------------------
/man/ensure_source.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ensure-source.R
 3 | \name{ensure_source}
 4 | \alias{ensure_source}
 5 | \title{Make sure a data file exists}
 6 | \usage{
 7 | ensure_source(name, ...)
 8 | }
 9 | \arguments{
10 | \item{name}{A known-source id, a file path, or a URL}
11 | 
12 | \item{...}{arguments to pass on to a custom locator}
13 | }
14 | \value{
15 | A valid path to a source file. If a known source but not present,
16 | it will be downloaded and possibly decompressed.
17 | }
18 | \description{
19 | Make sure a data file exists
20 | }
21 | 


--------------------------------------------------------------------------------
/man/get_query_func.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bm-tpc-h.R
 3 | \name{get_query_func}
 4 | \alias{get_query_func}
 5 | \title{Get a query function that will run a specific TPC-H query}
 6 | \usage{
 7 | get_query_func(query_id, engine = NULL)
 8 | }
 9 | \arguments{
10 | \item{query_id}{which query to get?}
11 | 
12 | \item{engine}{which engine to use (all options return a dplyr-based query,
13 | with the except of \code{"duckdb_sql"} which returns a SQL-based query)}
14 | }
15 | \description{
16 | Get a query function that will run a specific TPC-H query
17 | }
18 | 


--------------------------------------------------------------------------------
/man/get_params_summary.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/benchmark.R
 3 | \name{get_params_summary}
 4 | \alias{get_params_summary}
 5 | \title{Extract the parameter summary as a data.frame}
 6 | \usage{
 7 | get_params_summary(run)
 8 | }
 9 | \arguments{
10 | \item{run}{An instance of \code{BenchmarkResults} as returned by \code{run_benchmark}
11 | or \code{BenchmarkResult} as returned by \code{run_one} and \code{run_bm}}
12 | }
13 | \value{
14 | a tibble
15 | }
16 | \description{
17 | Extract a data.frame that provides the parameters used in a run and the
18 | error status
19 | }
20 | 


--------------------------------------------------------------------------------
/man/table_to_df.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bm-table-to-df.R
 3 | \docType{data}
 4 | \name{table_to_df}
 5 | \alias{table_to_df}
 6 | \title{Benchmark for reading an Arrow table to a data.frame}
 7 | \format{
 8 | An object of class \code{Benchmark} of length 11.
 9 | }
10 | \usage{
11 | table_to_df
12 | }
13 | \description{
14 | This flexes conversion to R data structures from Arrow data structures.
15 | }
16 | \section{Parameters}{
17 | 
18 | \itemize{
19 | \item \code{source} A known-file id to use (it will be read in to a data.frame first)
20 | }
21 | }
22 | 
23 | \keyword{datasets}
24 | 


--------------------------------------------------------------------------------
/tests/testthat/test-custom-duckdb.R:
--------------------------------------------------------------------------------
 1 | test_that("custom DuckDB can be installed to and used from a custom lib", {
 2 |   # ...and can execute SQL
 3 |   expect_equal(
 4 |     query_custom_duckdb("SELECT 'thing' as col_name"),
 5 |     data.frame(col_name = 'thing')
 6 |   )
 7 | 
 8 |   # ...and write parquet files
 9 |   temp_parquet <- tempfile()
10 |   expect_identical(
11 |     export_custom_duckdb("SELECT 'thing' as col_name", temp_parquet),
12 |     temp_parquet
13 |   )
14 | 
15 |   expect_equal(
16 |     as.data.frame(arrow::read_parquet(temp_parquet)),
17 |     data.frame(col_name = 'thing', stringsAsFactors = FALSE)
18 |   )
19 | })
20 | 


--------------------------------------------------------------------------------
/man/dataset_taxi_parquet.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bm-dataset-taxi-parquet.R
 3 | \docType{data}
 4 | \name{dataset_taxi_parquet}
 5 | \alias{dataset_taxi_parquet}
 6 | \title{Benchmark Taxi dataset (Parquet) reading}
 7 | \format{
 8 | An object of class \code{Benchmark} of length 12.
 9 | }
10 | \usage{
11 | dataset_taxi_parquet
12 | }
13 | \description{
14 | Benchmark Taxi dataset (Parquet) reading
15 | }
16 | \section{Parameters}{
17 | 
18 | \itemize{
19 | \item \code{query} Name of a known query to run; see \code{dataset_taxi_parquet$cases}
20 | }
21 | }
22 | 
23 | \keyword{datasets}
24 | 


--------------------------------------------------------------------------------
/man/df_to_table.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bm-df-to-table.R
 3 | \docType{data}
 4 | \name{df_to_table}
 5 | \alias{df_to_table}
 6 | \title{Benchmark for reading a data.frame into an Arrow table}
 7 | \format{
 8 | An object of class \code{Benchmark} of length 11.
 9 | }
10 | \usage{
11 | df_to_table
12 | }
13 | \description{
14 | This flexes that conversion from R data structures to Arrow data structures.
15 | }
16 | \section{Parameters}{
17 | 
18 | \itemize{
19 | \item \code{source} A known-file id to use (it will be read in to a data.frame first)
20 | }
21 | }
22 | 
23 | \keyword{datasets}
24 | 


--------------------------------------------------------------------------------
/man/write_csv.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bm-write-csv.R
 3 | \docType{data}
 4 | \name{write_csv}
 5 | \alias{write_csv}
 6 | \title{Benchmark CSV writing}
 7 | \format{
 8 | An object of class \code{Benchmark} of length 11.
 9 | }
10 | \usage{
11 | write_csv
12 | }
13 | \description{
14 | Benchmark CSV writing
15 | }
16 | \section{Parameters}{
17 | 
18 | \itemize{
19 | \item \code{source} A CSV file path to write to
20 | \item \code{writer} One of \code{c("arrow", "data.table", "vroom", "readr",)}
21 | \item \code{input} One of \code{c("arrow_table", "data_frame")}
22 | }
23 | }
24 | 
25 | \keyword{datasets}
26 | 


--------------------------------------------------------------------------------
/tests/testthat/test-ensure-lib.R:
--------------------------------------------------------------------------------
 1 | test_that("lib_dir()", {
 2 |   expect_identical(
 3 |     lib_dir("foo"),
 4 |     file.path(getwd(), "r_libs", paste0("R-", paste0(c(getRversion()$major, getRversion()$minor), collapse = ".")), "foo")
 5 |   )
 6 | 
 7 |   expect_identical(
 8 |     lib_dir("remote-user/arrow@branch/with/slashes"),
 9 |     file.path(getwd(), "r_libs", paste0("R-", paste0(c(getRversion()$major, getRversion()$minor), collapse = ".")), "remote-user_arrow@branch_with_slashes")
10 |   )
11 | })
12 | 
13 | test_that("identify_repo_ref()", {
14 |   expect_identical(
15 |     identify_repo_ref("remote-name/repo@ref"),
16 |     list(repo = "name/repo", ref = "ref")
17 |   )
18 | })


--------------------------------------------------------------------------------
/man/knowns.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ensure-format.R
 3 | \docType{data}
 4 | \name{knowns}
 5 | \alias{knowns}
 6 | \alias{known_compressions}
 7 | \alias{known_formats}
 8 | \title{Known formats and compressions}
 9 | \format{
10 | An object of class \code{character} of length 8.
11 | 
12 | An object of class \code{character} of length 5.
13 | }
14 | \usage{
15 | known_compressions
16 | 
17 | known_formats
18 | }
19 | \description{
20 | These formats and compression algorithms are known to {arrowbench}. Not all of
21 | them will work with all formats (in fact, parquet is the only one that
22 | supports all of them).
23 | }
24 | \keyword{datasets}
25 | 


--------------------------------------------------------------------------------
/man/data_file.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ensure-source.R
 3 | \name{data_file}
 4 | \alias{data_file}
 5 | \title{Find a data file}
 6 | \usage{
 7 | data_file(...)
 8 | }
 9 | \arguments{
10 | \item{...}{file path to look for}
11 | }
12 | \value{
13 | path to the file (or NULL if the file doesn't exist)
14 | }
15 | \description{
16 | This looks in the locations in the following order and returns the first
17 | path that exists:
18 | }
19 | \details{
20 | \itemize{
21 | \item source dir ("data")
22 | \item as well as the temp directory ("data/temp")
23 | }
24 | 
25 | If there is not a file present in either of those, it returns NULL
26 | }
27 | \keyword{internal}
28 | 


--------------------------------------------------------------------------------
/man/placebo.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bm-placebo.R
 3 | \docType{data}
 4 | \name{placebo}
 5 | \alias{placebo}
 6 | \title{Placebo benchmark for testing}
 7 | \format{
 8 | An object of class \code{Benchmark} of length 11.
 9 | }
10 | \usage{
11 | placebo
12 | }
13 | \description{
14 | Placebo benchmark for testing
15 | }
16 | \section{Parameters}{
17 | 
18 | \itemize{
19 | \item \code{duration} the duration for the benchmark to take
20 | \item \code{error_type} \code{NULL} to cause no error, \code{"rlang::abort"} to use rlang's
21 | \code{abort} and any other string (including \code{"base"}) will use base's \code{stop}
22 | }
23 | }
24 | 
25 | \keyword{internal}
26 | 


--------------------------------------------------------------------------------
/tests/testthat/test-bm-dataset-taxi-2013.R:
--------------------------------------------------------------------------------
 1 | test_that("dataset_taxi_2013 exists", {
 2 |   defaults <- get_default_args(dataset_taxi_2013$setup)
 3 | 
 4 |   expect_named(defaults, c("dataset", "query"))
 5 |   expect_equal(
 6 |     defaults$query,
 7 |     c("basic", "payment_type_crd", "small_no_files", "dims")
 8 |   )
 9 | })
10 | 
11 | test_that("dataset_taxi_2013 runs on sample data", {
12 |   expect_benchmark_run(
13 |     res <- run_benchmark(dataset_taxi_2013, dataset = "taxi_2013_sample", cpu_count = arrow::cpu_count())
14 |   )
15 | 
16 |   lapply(res$optional_benchmark_info$results, function(result) {
17 |     expect_s3_class(result, "BenchmarkResult")
18 |     expect_gte(result$result$real, 0)
19 |   })
20 | })
21 | 


--------------------------------------------------------------------------------
/.github/workflows/test-coverage.yaml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   push:
 3 |     branches:
 4 |       - main
 5 |       - master
 6 |   pull_request:
 7 |     branches:
 8 |       - main
 9 |       - master
10 | 
11 | name: test-coverage
12 | 
13 | jobs:
14 |   test-coverage:
15 |     runs-on: ubuntu-latest
16 |     env:
17 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
18 |     steps:
19 |       - uses: actions/checkout@v3
20 | 
21 |       - uses: r-lib/actions/setup-r@v2
22 |         with:
23 |           use-public-rspm: true
24 | 
25 |       - uses: r-lib/actions/setup-r-dependencies@v2
26 |         with:
27 |           extra-packages: covr
28 | 
29 |       - name: Test coverage
30 |         run: covr::codecov()
31 |         shell: Rscript {0}
32 | 


--------------------------------------------------------------------------------
/man/dataset_taxi_2013.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bm-dataset-taxi-2013.R
 3 | \docType{data}
 4 | \name{dataset_taxi_2013}
 5 | \alias{dataset_taxi_2013}
 6 | \title{Benchmark Taxi 2013 dataset reading}
 7 | \format{
 8 | An object of class \code{Benchmark} of length 12.
 9 | }
10 | \usage{
11 | dataset_taxi_2013
12 | }
13 | \description{
14 | Benchmark Taxi 2013 dataset reading
15 | }
16 | \section{Parameters}{
17 | 
18 | \itemize{
19 | \item \code{dataset} Name of dataset to use, either \code{taxi_2013} or \code{taxi_2013_sample} (for testing)
20 | \item \code{query} Name of a known query to run; see \code{dataset_taxi_2013$cases}
21 | }
22 | }
23 | 
24 | \keyword{datasets}
25 | 


--------------------------------------------------------------------------------
/man/read_csv.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bm-read-csv.R
 3 | \docType{data}
 4 | \name{read_csv}
 5 | \alias{read_csv}
 6 | \title{Benchmark CSV reading}
 7 | \format{
 8 | An object of class \code{Benchmark} of length 11.
 9 | }
10 | \usage{
11 | read_csv
12 | }
13 | \description{
14 | Benchmark CSV reading
15 | }
16 | \section{Parameters}{
17 | 
18 | \itemize{
19 | \item \code{source} A CSV file path to read in
20 | \item \code{reader} One of \code{c("arrow", "data.table", "vroom", "readr")}
21 | \item \code{compression} One of \code{c("uncompressed", "gzip")}
22 | \item \code{output_format} One of \code{c("arrow_table", "data_frame")}
23 | }
24 | }
25 | 
26 | \keyword{datasets}
27 | 


--------------------------------------------------------------------------------
/inst/regenerate-benchmarks-json.R:
--------------------------------------------------------------------------------
 1 | "
 2 | This script regenerates inst/benchmarks.json with all current benchmarks. That
 3 | file is used by arrow-benchmarks-ci here:
 4 | https://github.com/voltrondata-labs/arrow-benchmarks-ci/blob/main/buildkite/benchmark/run.py
 5 | to keep track of benchmarks available in a repository.
 6 | "
 7 | 
 8 | arrowbench::get_package_benchmarks()$name |>
 9 |   lapply(function(name) {
10 |     list(
11 |       command = name,
12 |       name = paste0("arrowbench/", name),
13 |       runner = "arrowbench",
14 |       flags = list(language = "R")
15 |     )
16 |   }) |>
17 |   unname() |>
18 |   jsonlite::write_json(
19 |     path = "inst/benchmarks.json",
20 |     pretty = TRUE,
21 |     auto_unbox = TRUE
22 |   )
23 | 


--------------------------------------------------------------------------------
/man/ensure_format.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ensure-format.R
 3 | \name{ensure_format}
 4 | \alias{ensure_format}
 5 | \title{Ensure that a source has a specific format}
 6 | \usage{
 7 | ensure_format(
 8 |   name,
 9 |   format = known_formats,
10 |   compression = known_compressions,
11 |   chunk_size = NULL
12 | )
13 | }
14 | \arguments{
15 | \item{name}{name of the known source}
16 | 
17 | \item{format}{format to be ensured}
18 | 
19 | \item{compression}{compression to be ensured}
20 | 
21 | \item{chunk_size}{the number of rows to write in each chunk}
22 | }
23 | \value{
24 | the file that was ensured to exist
25 | }
26 | \description{
27 | Ensure that a source has a specific format
28 | }
29 | 


--------------------------------------------------------------------------------
/man/ensure_dataset.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ensure-source.R
 3 | \name{ensure_dataset}
 4 | \alias{ensure_dataset}
 5 | \title{Make sure a multi-file dataset exists}
 6 | \usage{
 7 | ensure_dataset(name, download = TRUE)
 8 | }
 9 | \arguments{
10 | \item{name}{A known-dataset id. See \code{known_datasets}.}
11 | 
12 | \item{download}{logical: should the dataset be synced to the local disk
13 | or queried from its remote URL. Default is \code{TRUE}; files are cached
14 | and not downloaded if they're already found locally.}
15 | }
16 | \value{
17 | An \code{arrow::Dataset}, validated to have the correct number of rows
18 | }
19 | \description{
20 | Make sure a multi-file dataset exists
21 | }
22 | 


--------------------------------------------------------------------------------
/R/setup.R:
--------------------------------------------------------------------------------
 1 | # The ensure_* functions will make sure everything is downloaded lazily,
 2 | # but you can run this to eagerly set up everything up front
 3 | 
 4 | setup_all <- function() {
 5 |   setup_sources()
 6 |   setup_datasets()
 7 |   setup_packages()
 8 | }
 9 | 
10 | setup_sources <- function() {
11 |   for (x in names(known_sources)) {
12 |     message("Downloading source ", x)
13 |     ensure_source(x)
14 |   }
15 | }
16 | 
17 | setup_datasets <- function() {
18 |   for (x in names(known_datasets)) {
19 |     message("Downloading dataset ", x)
20 |     ensure_dataset(x)
21 |   }
22 | }
23 | 
24 | setup_packages <- function() {
25 |   for (x in names(arrow_version_to_date)) {
26 |     message("Installing libs for ", x)
27 |     ensure_lib(x)
28 |   }
29 | }
30 | 


--------------------------------------------------------------------------------
/man/read_file.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bm-read-file.R
 3 | \docType{data}
 4 | \name{read_file}
 5 | \alias{read_file}
 6 | \title{Benchmark file reading}
 7 | \format{
 8 | An object of class \code{Benchmark} of length 11.
 9 | }
10 | \usage{
11 | read_file
12 | }
13 | \description{
14 | Benchmark file reading
15 | }
16 | \section{Parameters}{
17 | 
18 | \itemize{
19 | \item \code{source} A known-file id, or a CSV(?) file path to read in
20 | \item \code{file_type} One of \code{c("parquet", "feather", "fst")}
21 | \item \code{compression} One of the values: uncompressed, snappy, zstd, gzip, lz4, brotli, lzo, bz2
22 | \item \code{output_type} One of \code{c("arrow_table", "data_frame")}
23 | }
24 | }
25 | 
26 | \keyword{datasets}
27 | 


--------------------------------------------------------------------------------
/man/row_group_size.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bm-row-group-size.R
 3 | \docType{data}
 4 | \name{row_group_size}
 5 | \alias{row_group_size}
 6 | \title{Benchmark effect of parquet row group size}
 7 | \format{
 8 | An object of class \code{Benchmark} of length 11.
 9 | }
10 | \usage{
11 | row_group_size
12 | }
13 | \description{
14 | Benchmark effect of parquet row group size
15 | }
16 | \section{Parameters}{
17 | 
18 | \itemize{
19 | \item \code{source} A known-file id, or a file path to read in
20 | \item \code{queries} What queries to run
21 | \item \code{chunk_size} Number of rows to write in each row group. Suggested sizes:
22 | \code{chunk_size = list(NULL, 10000L, 100000L, 1000000L)}
23 | }
24 | }
25 | 
26 | \keyword{datasets}
27 | 


--------------------------------------------------------------------------------
/man/write_file.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bm-write-file.R
 3 | \docType{data}
 4 | \name{write_file}
 5 | \alias{write_file}
 6 | \title{Benchmark file writing}
 7 | \format{
 8 | An object of class \code{Benchmark} of length 11.
 9 | }
10 | \usage{
11 | write_file
12 | }
13 | \description{
14 | Benchmark file writing
15 | }
16 | \section{Parameters}{
17 | 
18 | \itemize{
19 | \item \code{source} A known-file id, or a CSV(?) file path to read in
20 | \item \code{file_type} One of \code{c("parquet", "feather", "fst")}
21 | \item \code{compression} One of the values: uncompressed, snappy, zstd, gzip, lz4, brotli, lzo, bz2
22 | \item \code{input_type} One of \code{c("arrow_table", "data_frame")}
23 | }
24 | }
25 | 
26 | \keyword{datasets}
27 | 


--------------------------------------------------------------------------------
/tests/testthat/test-ensure-source.R:
--------------------------------------------------------------------------------
 1 | test_that("get_source_attr()", {
 2 |   # can get known_source attrs
 3 |   expect_identical(get_source_attr("fanniemae_2016Q4", "dim"), c(22180168L, 31L))
 4 | 
 5 |   # and can get test_source attrs
 6 |   expect_identical(get_source_attr("nyctaxi_sample", "dim"), c(998L, 18L))
 7 | })
 8 | 
 9 | test_that("get_dataset_attr()", {
10 |   # can get known_source attrs
11 |   expect_identical(get_dataset_attr("taxi_parquet", "dim"), c(1547741381L, 20L))
12 | })
13 | 
14 | test_that("ensure_source error handling", {
15 |   expect_error(
16 |     ensure_source("not_a_source"),
17 |     "not_a_source is not a known source"
18 |   )
19 | })
20 | 
21 | test_that("source_filename()", {
22 |   expect_identical(
23 |     source_filename("fanniemae_2016Q4"),
24 |     "fanniemae_2016Q4.csv.gz"
25 |   )
26 | })
27 | 


--------------------------------------------------------------------------------
/tests/testthat/test-measure.R:
--------------------------------------------------------------------------------
 1 | test_that("with_gc_info + errors", {
 2 |   # this tests with_gc_info + errors behavior, but we can't test it quite
 3 |   # directly because of how testthat alters how errors work.
 4 | 
 5 |   suppress_deparse_warning(
 6 |     capture.output(
 7 |       base_error <- run_one(placebo, error_type = "base"), type = "message"
 8 |     )
 9 |   )
10 |   expect_false(is.null(base_error$error))
11 |   expect_match(base_error$error$error, "Error.*something went wrong \\(but I knew that\\)")
12 | 
13 |   suppress_deparse_warning(
14 |     capture.output(
15 |       rlang_error <- run_one(placebo, error_type = "rlang::abort"), type = "message"
16 |     )
17 |   )
18 |   expect_false(is.null(base_error$error))
19 |   expect_match(rlang_error$error$error, "Error.*something went wrong \\(but I knew that\\)")
20 | })
21 | 


--------------------------------------------------------------------------------
/R/bm-remote-dataset.R:
--------------------------------------------------------------------------------
 1 | #' Remote (S3) dataset reading
 2 | #'
 3 | #' @export
 4 | remote_dataset <- Benchmark("remote_dataset",
 5 |   setup = function(source = c("taxi_file_list_parquet", "taxi_file_list_feather")) {
 6 |     library("dplyr")
 7 |     dataset <- ensure_dataset(source, download = FALSE)
 8 |     result_dim <- get_dataset_attr(source, "dim")
 9 | 
10 |     BenchEnvironment(
11 |       dataset = dataset,
12 |       expected_dim = result_dim
13 |     )
14 |   },
15 |   before_each = {
16 |     options("arrow.use_async" = TRUE)
17 |     result <- NULL
18 |   },
19 |   run = {
20 |     result <- collect(dataset)
21 |   },
22 |   after_each = {
23 |     stopifnot(
24 |       "The dimensions do not match" = all.equal(dim(result), expected_dim)
25 |     )
26 |   },
27 |   packages_used = function(params) {
28 |     c("arrow")
29 |   }
30 | )
31 | 


--------------------------------------------------------------------------------
/man/read_json.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bm-read-json.R
 3 | \docType{data}
 4 | \name{read_json}
 5 | \alias{read_json}
 6 | \title{Benchmark JSON reading}
 7 | \format{
 8 | An object of class \code{Benchmark} of length 11.
 9 | }
10 | \usage{
11 | read_json
12 | }
13 | \description{
14 | Benchmark JSON reading
15 | }
16 | \section{Parameters}{
17 | 
18 | \itemize{
19 | \item \code{source} A JSON file path to read in
20 | \item \code{reader} One of \code{c("arrow", "jsonlite", "ndjson", "RcppSimdJson")}
21 | \item \code{compression} One of \code{c("uncompressed", "gzip")}
22 | \item \code{output_format} One of \code{c("arrow_table", "data_frame")}
23 | \item \code{rbinder} Method for simplifying to dataframe. Not relevant for {arrow} and {ndjson}.
24 | }
25 | }
26 | 
27 | \keyword{datasets}
28 | 


--------------------------------------------------------------------------------
/man/validate_format.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ensure-format.R
 3 | \name{validate_format}
 4 | \alias{validate_format}
 5 | \alias{stop_if_not_valid_format}
 6 | \title{Validate format and compression combinations}
 7 | \usage{
 8 | validate_format(format, compression)
 9 | 
10 | stop_if_not_valid_format(format, compression)
11 | }
12 | \arguments{
13 | \item{format}{the format of the file}
14 | 
15 | \item{compression}{the compression codec}
16 | }
17 | \value{
18 | \code{TRUE} invisibly
19 | }
20 | \description{
21 | For a given format + compression, determine if the combination is valid.
22 | \code{validate_format()} returns a vector of \code{TRUE}/\code{FALSE} if the formats are
23 | valid. \code{stop_if_not_valid_format()} will stop if any of the format + compressions
24 | are not valid.
25 | }
26 | \keyword{internal}
27 | 


--------------------------------------------------------------------------------
/tests/testthat/test-external-dependencies.R:
--------------------------------------------------------------------------------
 1 | test_that("external_cli_available() works", {
 2 |   fake_uninstalled_cli <- basename(tempfile())
 3 |   expect_warning(
 4 |     expect_false(
 5 |       external_cli_available(fake_uninstalled_cli)
 6 |     ),
 7 |     regexp = paste(fake_uninstalled_cli, "not installed or on $PATH"),
 8 |     fixed = TRUE
 9 |   )
10 | 
11 |   expect_true(external_cli_available("which"))
12 | })
13 | 
14 | test_that("pipx_available() works", {
15 |   expect_equal(pipx_available(), processx::run("which", "pipx")$status == 0L)
16 | })
17 | 
18 | test_that("benchconnect_available() works", {
19 |   expect_equal(benchconnect_available(), processx::run("which", "benchconnect")$status == 0L)
20 | })
21 | 
22 | test_that("datalogistik_available() works", {
23 |   expect_equal(datalogistik_available(), processx::run("which", "datalogistik")$status == 0L)
24 | })
25 | 


--------------------------------------------------------------------------------
/man/BenchmarkDataFrame.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/benchmark-dataframe.R
 3 | \name{BenchmarkDataFrame}
 4 | \alias{BenchmarkDataFrame}
 5 | \title{A classed dataframe of benchmarks for running}
 6 | \usage{
 7 | BenchmarkDataFrame(benchmarks, parameters)
 8 | }
 9 | \arguments{
10 | \item{benchmarks}{A list with elements of class \code{Benchmark}}
11 | 
12 | \item{parameters}{Optional. A list of dataframes of parameter combinations to
13 | run as generated by \code{\link[=get_default_parameters]{get_default_parameters()}}. If null, defaults will be generated
14 | when \code{\link[=run]{run()}} is called.}
15 | }
16 | \value{
17 | A classed dataframe with \code{name} (benchmark attribute, not object name),
18 | \code{benchmark}, and \code{params} columns
19 | }
20 | \description{
21 | A classed dataframe of benchmarks for running
22 | }
23 | 


--------------------------------------------------------------------------------
/man/assemble_metadata.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/run.R
 3 | \name{assemble_metadata}
 4 | \alias{assemble_metadata}
 5 | \title{Assemble metadata for a benchmark run}
 6 | \usage{
 7 | assemble_metadata(name, params, cpu_count, drop_caches, n_iter)
 8 | }
 9 | \arguments{
10 | \item{name}{Benchmark name, i.e. \code{bm$name}}
11 | 
12 | \item{params}{Named list of parameters for the individual run, i.e. the case}
13 | 
14 | \item{cpu_count}{Number of CPUs allocated}
15 | 
16 | \item{drop_caches}{Attempt to drop the disk cache before each case or iteration.
17 | Currently only works on linux. Permissible values are \code{"case"}, \code{"iteration"},
18 | and \code{NULL}. Defaults to \code{NULL}, i.e. don't drop caches.}
19 | 
20 | \item{n_iter}{Number of iterations}
21 | }
22 | \description{
23 | Assemble metadata for a benchmark run
24 | }
25 | \keyword{internal}
26 | 


--------------------------------------------------------------------------------
/man/tpch_answer.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bm-tpc-h.R
 3 | \name{tpch_answer}
 4 | \alias{tpch_answer}
 5 | \title{Get a TPC-H answer}
 6 | \usage{
 7 | tpch_answer(
 8 |   scale_factor,
 9 |   query_id,
10 |   source = c("arrowbench", "duckdb"),
11 |   data_source = c("duckdb", "dbgen")
12 | )
13 | }
14 | \arguments{
15 | \item{scale_factor}{scale factor (possible values: \code{c(0.01, 0.1, 1, 10)})}
16 | 
17 | \item{query_id}{Id of the query (possible values: 1-22)}
18 | 
19 | \item{source}{source of the answer (default: "arrowbench"), "duckdb" can
20 | return answers for scale_factor 1.}
21 | 
22 | \item{data_source}{which source of data should we construct ansers for? "duckdb"
23 | (the default) has a slightly different set of data in the *_address columns
24 | compared to "dbgen"}
25 | }
26 | \value{
27 | the answer, as a data.frame
28 | }
29 | \description{
30 | Get a TPC-H answer
31 | }
32 | 


--------------------------------------------------------------------------------
/inst/tpch/README.md:
--------------------------------------------------------------------------------
 1 | # Why is there a separate duckdb data directory here?
 2 | 
 3 | The DuckDB data generator actually produces data that is _slightly_ out of spec. Specifically, the `_address` columns generate slightly different data. Generally speaking, this isn't a big deal since the queries don't use pattern matches on those columns, but they do show up in some of the answers.
 4 | 
 5 | But it's plain to see if you look at the official answers and the duckdb ones for a query that includes a `_address` column:
 6 | 
 7 | https://github.com/databricks/tpch-dbgen/blob/6985da461c641fd0d255b214f2d693f1bf08bc33/answers/q2.out
 8 | https://github.com/duckdb/duckdb/blob/c0a4ab96c626426961c207f49c19aa81448e91da/extension/tpch/dbgen/answers/sf1/q02.csv
 9 | 
10 | Additionally, DuckDB >= 0.8 has also changed slightly some of the `s_comment` columns. This doesn't impact queries themselves, but [the answers changed](https://github.com/duckdb/duckdb/pull/6535). Note the answer differences before PR #136.


--------------------------------------------------------------------------------
/man/array_altrep_materialization.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bm-array-altrep-materialization.R
 3 | \docType{data}
 4 | \name{array_altrep_materialization}
 5 | \alias{array_altrep_materialization}
 6 | \title{Benchmark for materializing an altrep Arrow array}
 7 | \format{
 8 | An object of class \code{Benchmark} of length 11.
 9 | }
10 | \usage{
11 | array_altrep_materialization
12 | }
13 | \description{
14 | This flexes a lower level conversion to R data structures from Arrow data structures.
15 | }
16 | \section{Parameters}{
17 | 
18 | \itemize{
19 | \item \code{source} A known-file id to use (it will be read in to a data.frame first)
20 | \item \code{exclude_nulls} Logical. Remove any columns with any \code{NULL}s or \code{NA}s in them?
21 | \item \code{altrep} Logical. Use altrep storage for vectors?
22 | \item \code{subset_indices} Length-one list of vector to use to subset rows of source.
23 | }
24 | }
25 | 
26 | \keyword{datasets}
27 | 


--------------------------------------------------------------------------------
/man/array_to_vector.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bm-array-to-vector.R
 3 | \docType{data}
 4 | \name{array_to_vector}
 5 | \alias{array_to_vector}
 6 | \title{Benchmark for reading an Arrow array to a vector}
 7 | \format{
 8 | An object of class \code{Benchmark} of length 11.
 9 | }
10 | \usage{
11 | array_to_vector
12 | }
13 | \description{
14 | This flexes a lower level conversion to R data structures from Arrow data structures.
15 | }
16 | \section{Parameters}{
17 | 
18 | \itemize{
19 | \item \code{source} A known-file id to use (it will be read in to a data.frame first)
20 | \item \code{chunked_arrays} logical, should the arrays converted be \code{ChunkedArrays} or \code{Arrays}?
21 | \item \code{exclude_nulls} logical, should any columns with any \code{NULL}s or \code{NA}s in them be removed?
22 | \item \code{alt_rep} logical, should the altrep option be set? (\code{TRUE} to enable it, \code{FALSE} to disable)
23 | }
24 | }
25 | 
26 | \keyword{datasets}
27 | 


--------------------------------------------------------------------------------
/man/get_sql_query_func.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bm-tpc-h.R
 3 | \name{get_sql_query_func}
 4 | \alias{get_sql_query_func}
 5 | \title{Get a SQL query}
 6 | \usage{
 7 | get_sql_query_func(query_num)
 8 | }
 9 | \arguments{
10 | \item{query_num}{the query number to fetch the result for}
11 | }
12 | \value{
13 | a function that accepts an argument \code{con} which will run
14 | \code{DBI::dbGetQuery()} against.
15 | }
16 | \description{
17 | Produces a function that can be queried against any DBI backend (e.g. DuckDB)
18 | }
19 | \details{
20 | The function that is returned takes the following arguments. The first two are
21 | suppleid to match the signature of those in tpc_h_queries
22 | \itemize{
23 | \item \code{input_func} set to default \code{NULL}, will have no effect if supplied
24 | \item \code{collect_func} set to default \code{NULL}, will have no effect if supplied
25 | \item \code{con} a (DBI) connection to query against
26 | }
27 | }
28 | \keyword{internal}
29 | 


--------------------------------------------------------------------------------
/man/generate_tpch.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ensure-tpch-source.R
 3 | \name{generate_tpch}
 4 | \alias{generate_tpch}
 5 | \title{Generate tpch data}
 6 | \usage{
 7 | generate_tpch(scale_factor = 1)
 8 | }
 9 | \arguments{
10 | \item{scale_factor}{a relative measure of the size of data in gigabytes.}
11 | }
12 | \description{
13 | Generate tpch data at a given scale factor. By default,
14 | data is output relative to the current working directory. However,
15 | you can set the environment variable \code{ARROWBENCH_DATA_DIR} to
16 | point to another directory. Setting this environment variable has
17 | the advantage of being a central location for general usage. Running
18 | this function will install a custom version of duckdb in an \code{r_libs}
19 | directory, relative to the directory specified by the environment
20 | variable \code{ARROWBENCH_LOCAL_DIR}. When running this function for the first time you will
21 | see significant output from that installation process. This is normal.
22 | }
23 | 


--------------------------------------------------------------------------------
/man/measure.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/measure.R
 3 | \name{measure}
 4 | \alias{measure}
 5 | \title{Measure times and memory usage}
 6 | \usage{
 7 | measure(..., profiling = FALSE, drop_caches = NULL)
 8 | }
 9 | \arguments{
10 | \item{...}{An expression to}
11 | 
12 | \item{profiling}{Logical: collect prof info? If \code{TRUE}, the result data will
13 | contain a \code{prof_file} field, which you can read in with
14 | \code{profvis::profvis(prof_input = file)}. Default is \code{FALSE}}
15 | 
16 | \item{drop_caches}{Attempt to drop the disk cache before each case or iteration.
17 | Currently only works on linux. Permissible values are \code{"case"}, \code{"iteration"},
18 | and \code{NULL}. Defaults to \code{NULL}, i.e. don't drop caches. As \code{measure()} is run
19 | once per iteration, here \code{"iteration"} results in dropping caches once and
20 | \code{NULL} and \code{"case"} result in no cache dropping.}
21 | }
22 | \value{
23 | A tibble of timings and memory usage
24 | }
25 | \description{
26 | Measure times and memory usage
27 | }
28 | 


--------------------------------------------------------------------------------
/man/get_input_func.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bm-tpc-h.R
 3 | \name{get_input_func}
 4 | \alias{get_input_func}
 5 | \title{Get an input function for a table}
 6 | \usage{
 7 | get_input_func(
 8 |   engine,
 9 |   scale_factor,
10 |   query_id,
11 |   format,
12 |   compression = "uncompressed",
13 |   con = NULL,
14 |   memory_map = FALSE,
15 |   chunk_size = NULL
16 | )
17 | }
18 | \arguments{
19 | \item{engine}{which engine to use}
20 | 
21 | \item{scale_factor}{what scale factor to reference}
22 | 
23 | \item{query_id}{which query is being used}
24 | 
25 | \item{format}{which format}
26 | 
27 | \item{compression}{which compression to use (default: "uncompressed")}
28 | 
29 | \item{con}{a connection}
30 | 
31 | \item{memory_map}{should the file be memory mapped? (only relevant for the "native" format with Arrow)}
32 | 
33 | \item{chunk_size}{what chunk_size should be used with the source files? (default: NULL, the default for the file format)}
34 | }
35 | \description{
36 | This returns a function which will return a table reference with the specified
37 | parameters
38 | }
39 | 


--------------------------------------------------------------------------------
/R/bm-table-to-df.R:
--------------------------------------------------------------------------------
 1 | #' Benchmark for reading an Arrow table to a data.frame
 2 | #'
 3 | #' This flexes conversion to R data structures from Arrow data structures.
 4 | #'
 5 | #' @section Parameters:
 6 | #' * `source` A known-file id to use (it will be read in to a data.frame first)
 7 | #'
 8 | #' @export
 9 | table_to_df <- Benchmark("table_to_df",
10 |   setup = function(source = names(known_sources)) {
11 |     source <- ensure_source(source)
12 |     result_dim <- get_source_attr(source, "dim")
13 |     table <- read_source(source, as_data_frame = FALSE)
14 | 
15 |     transfer_func <- function(table) as.data.frame(table)
16 | 
17 |     BenchEnvironment(
18 |       transfer_func = transfer_func,
19 |       result_dim = result_dim,
20 |       table = table
21 |     )
22 |   },
23 |   before_each = {
24 |     result <- NULL
25 |   },
26 |   run = {
27 |     result <- transfer_func(table)
28 |   },
29 |   after_each = {
30 |     stopifnot("The dimensions do not match" = all.equal(dim(result), result_dim))
31 |     result <- NULL
32 |   },
33 |   valid_params = function(params) params,
34 |   packages_used = function(params) "arrow"
35 | )
36 | 
37 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | Copyright (c) 2021 Ursa Computing
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/man/as.data.frame.BenchmarkResults.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/result.R
 3 | \name{as.data.frame.BenchmarkResults}
 4 | \alias{as.data.frame.BenchmarkResults}
 5 | \alias{as.data.frame.BenchmarkResult}
 6 | \title{Convert benchmark result object to a tidy data frame}
 7 | \usage{
 8 | \method{as.data.frame}{BenchmarkResults}(x, row.names = NULL, optional = FALSE, ...)
 9 | 
10 | \method{as.data.frame}{BenchmarkResult}(x, row.names = NULL, optional = FALSE, packages = "arrow", ...)
11 | }
12 | \arguments{
13 | \item{x}{a benchmark result object or list of them as returned by \code{\link[=run_one]{run_one()}} or \code{\link[=run_benchmark]{run_benchmark()}}}
14 | 
15 | \item{row.names}{for generic consistency}
16 | 
17 | \item{optional}{for generic consistency}
18 | 
19 | \item{...}{additional arguments passed on to methods for individual results.
20 | \code{packages} is the only currently supported argument.}
21 | 
22 | \item{packages}{Packages for which to extract versions}
23 | }
24 | \value{
25 | A data.frame suitable for analysis in R
26 | }
27 | \description{
28 | Convert benchmark result object to a tidy data frame
29 | }
30 | 


--------------------------------------------------------------------------------
/man/tpc_h.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bm-tpc-h.R
 3 | \docType{data}
 4 | \name{tpc_h}
 5 | \alias{tpc_h}
 6 | \title{Benchmark TPC-H queries}
 7 | \format{
 8 | An object of class \code{Benchmark} of length 11.
 9 | }
10 | \usage{
11 | tpc_h
12 | }
13 | \description{
14 | Benchmark TPC-H queries
15 | }
16 | \section{Parameters}{
17 | 
18 | \itemize{
19 | \item \code{engine} One of \code{c("arrow", "duckdb", "dplyr")}
20 | \item \code{query_id} integer, 1-22
21 | \item \code{format} One of \code{c("parquet", "feather", "native")}
22 | \item \code{scale_factor} Scale factor to use for data generation (e.g. 0.1, 1, 10, 100)
23 | \item \code{memory_map} Should memory mapping be used when reading a file in? (only
24 | applicable to arrow, native. \code{FALSE} will result in the file being explicitly
25 | read into memory before the benchmark)
26 | \item \code{output} the format of the output (either \code{"data_frame"} (default) or \code{"arrow_table"})
27 | \item \code{chunk_size} a size of row groups to aim for in parquet or feather files (default:
28 | NULL is the default for \code{arrow:write_parquet()} or \code{arrow::write_feather()})
29 | }
30 | }
31 | 
32 | \keyword{datasets}
33 | 


--------------------------------------------------------------------------------
/tests/testthat/test-bm-read-file.R:
--------------------------------------------------------------------------------
 1 | test_that("read_file validation", {
 2 |   # read_file has a few combinations in its default arguments that aren't valid
 3 |   read_file_no_validate <- read_file
 4 |   read_file_no_validate$valid_params <- NULL
 5 | 
 6 |   params_no_validate <- get_default_parameters(read_file_no_validate)
 7 | 
 8 |   params <- get_default_parameters(read_file)
 9 | 
10 |   expect_lt(nrow(params), nrow(params_no_validate))
11 | 
12 |   # specifically feather+snappy is not a possibility
13 |   expect_identical(
14 |     nrow(params[params$file_type == "feather" & params$compression == "snappy", ]),
15 |     0L
16 |   )
17 | })
18 | 
19 | for (file_type in c("parquet", "feather")) {
20 |   if (file_type == "parquet") {
21 |     compression <- c("uncompressed", "snappy", "lz4")
22 |   } else {
23 |     compression <- "uncompressed"
24 |   }
25 | 
26 |   test_that(paste0("read_file benchmark works for ", file_type), {
27 |     expect_benchmark_run(
28 |       run_benchmark(
29 |         read_file,
30 |         source = "nyctaxi_sample",
31 |         file_type = file_type,
32 |         compression = compression,
33 |         output_type = c("arrow_table", "data_frame"),
34 |         cpu_count = arrow::cpu_count()
35 |       )
36 |     )
37 |   })
38 | }
39 | 
40 | 
41 | wipe_results()
42 | 


--------------------------------------------------------------------------------
/R/publish.R:
--------------------------------------------------------------------------------
 1 | # Call benchconnect
 2 | #
 3 | # @param args A character vector of arguments to pass to the benchconnect binary
 4 | #
 5 | # @returns A string of stdout returned by the call
 6 | call_benchconnect <- function(args) {
 7 |   stopifnot(benchconnect_available())
 8 |   res <- processx::run(command = "benchconnect", args = args, echo_cmd = TRUE, echo = TRUE)
 9 |   message(res$stderr)
10 |   res$stdout
11 | }
12 | 
13 | 
14 | augment_run <- function(run) {
15 |   stdout <- call_benchconnect(c("augment", "run", "--json", run$json))
16 |   BenchmarkRun$from_json(stdout)
17 | }
18 | 
19 | augment_result <- function(result) {
20 |   stdout <- call_benchconnect(c("augment", "result", "--json", result$json))
21 |   BenchmarkResult$from_json(stdout)
22 | }
23 | 
24 | 
25 | start_run <- function(run) {
26 |   call_benchconnect(c("start", "run", "--json", run$json))
27 | }
28 | 
29 | submit_result <- function(result) {
30 |   call_benchconnect(c("submit", "result", "--json", result$json))
31 | }
32 | 
33 | finish_run <- function(run) {
34 |   # Ed note: `run` is not used right now, but there are some things we can pass
35 |   # here in the future, so I put it here for parallelism for now. Since it is
36 |   # not evaluated, it doesn't need to be specified for now.
37 |   call_benchconnect(c("finish", "run", "--json", "{}"))
38 | }
39 | 


--------------------------------------------------------------------------------
/R/bm-placebo.R:
--------------------------------------------------------------------------------
 1 | #' Placebo benchmark for testing
 2 | #'
 3 | #' @section Parameters:
 4 | #' * `duration` the duration for the benchmark to take
 5 | #' * `error_type` `NULL` to cause no error, `"rlang::abort"` to use rlang's
 6 | #' `abort` and any other string (including `"base"`) will use base's `stop`
 7 | #'
 8 | #' @keywords internal
 9 | placebo <- Benchmark("placebo",
10 |   setup = function(duration = 0.01, error_type = NULL, output_type = NULL, grid = TRUE) {
11 |     BenchEnvironment(placebo_func = function() {
12 |       if (!is.null(output_type)) {
13 |         msg <- "here's some output"
14 |         if (output_type == "message") {
15 |           message("A message: ", msg)
16 |         } else if (output_type == "warning") {
17 |           warning("A warning:", msg)
18 |         } else if (output_type == "cat") {
19 |           cat("A cat:", msg)
20 |         }
21 |       }
22 | 
23 |       if (!is.null(error_type)) {
24 |         msg <- "something went wrong (but I knew that)"
25 |         if (error_type == "rlang::abort") {
26 |           rlang::abort(msg)
27 |         }
28 |         stop(msg)
29 |       }
30 |       Sys.sleep(duration)
31 |       })
32 |   },
33 |   before_each = TRUE,
34 |   run = {
35 |     placebo_func()
36 |   },
37 |   after_each = TRUE,
38 |   valid_params = function(params) {
39 |     params
40 |   },
41 |   packages_used = function(params) {
42 |     "base"
43 |   }
44 | )
45 | 


--------------------------------------------------------------------------------
/man/get_default_parameters.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/params.R
 3 | \name{get_default_parameters}
 4 | \alias{get_default_parameters}
 5 | \alias{get_default_parameters.default}
 6 | \alias{get_default_parameters.Benchmark}
 7 | \alias{get_default_parameters.BenchmarkDataFrame}
 8 | \title{Generate a dataframe of default parameters for a benchmark}
 9 | \usage{
10 | get_default_parameters(x, ...)
11 | 
12 | \method{get_default_parameters}{default}(x, ...)
13 | 
14 | \method{get_default_parameters}{Benchmark}(x, ...)
15 | 
16 | \method{get_default_parameters}{BenchmarkDataFrame}(x, ...)
17 | }
18 | \arguments{
19 | \item{x}{An object for which to generate parameters}
20 | 
21 | \item{...}{Named arguments corresponding to the parameters of \code{bm}'s \code{setup}
22 | function. May also contain global params \code{cpu_count}, \code{lib_path}, \code{mem_alloc},
23 | and \code{drop_caches}. See the "Parameterizing benchmarks" section of \code{\link[=Benchmark]{Benchmark()}}
24 | for more details.}
25 | }
26 | \value{
27 | For \code{get_default_parameters.Benchmark}, a dataframe of parameter combinations
28 | to try with a column for each parameter and a row for each combination.
29 | }
30 | \description{
31 | Generates a dataframe of parameter combinations for a benchmark to try based
32 | on the parameter defaults of its \code{setup} function and supplied parameters.
33 | }
34 | 


--------------------------------------------------------------------------------
/tests/testthat/test-ensure-tpch-source.R:
--------------------------------------------------------------------------------
 1 | # This test (might) include installing a custom version of DuckDB that has the
 2 | # tpc-h extension built. This doesn't work well when coverage is running, so
 3 | # skip these tests when generating coverage.
 4 | skip_on_covr()
 5 | skip_if(Sys.getenv("ARROWBENCH_TEST_CUSTOM_DUCKDB", "") == "")
 6 | 
 7 | temp_dir <- tempfile()
 8 | dir.create(temp_dir)
 9 | 
10 | expected_filenames <- as.list(set_names(
11 |   file.path(temp_dir, paste0(tpch_tables, "_0.0001.parquet")),
12 |   nm = tpch_tables
13 | ))
14 | 
15 | withr::with_envvar(
16 |   list(ARROWBENCH_DATA_DIR = temp_dir),
17 |   {
18 | 
19 |     test_that("can generate a small dataset", {
20 |       tpch_files <- ensure_tpch(0.0001)
21 |       expect_identical(
22 |         tpch_files,
23 |         expected_filenames
24 |       )
25 |     })
26 | 
27 |     test_that("can read that same small dataset if it is in the data folder already", {
28 |       mockery::stub(ensure_tpch, 'generate_tpch', function(scale_factor) stop("this should not be called"))
29 |       tpch_files <- ensure_tpch(0.0001)
30 |       expect_identical(
31 |         tpch_files,
32 |         expected_filenames
33 |       )
34 |     })
35 | 
36 |     test_that("and ensure gets the same thing", {
37 |       tpch_files <- ensure_source("tpch", scale_factor = 0.0001)
38 |       expect_identical(
39 |         tpch_files,
40 |         expected_filenames
41 |       )
42 |     })
43 |   }
44 | )
45 | 


--------------------------------------------------------------------------------
/R/bm-df-to-table.R:
--------------------------------------------------------------------------------
 1 | #' Benchmark for reading a data.frame into an Arrow table
 2 | #'
 3 | #' This flexes that conversion from R data structures to Arrow data structures.
 4 | #'
 5 | #' @section Parameters:
 6 | #' * `source` A known-file id to use (it will be read in to a data.frame first)
 7 | #'
 8 | #' @export
 9 | df_to_table <- Benchmark("dataframe-to-table",
10 |   setup = function(
11 |     source = c(
12 |       "chi_traffic_2020_Q1",
13 |       "type_strings",
14 |       "type_dict",
15 |       "type_integers",
16 |       "type_floats",
17 |       "type_nested"
18 |     )
19 |   ) {
20 |     source <- ensure_source(source)
21 |     result_dim <- get_source_attr(source, "dim")
22 |     # Make sure that we're not (accidentally) creating altrep vectors which will
23 |     # make the benchmark measure both arrow->R and then also R->arrow when we
24 |     # really want to just measure R->arrow.
25 |     df <- read_source(source, as_data_frame = TRUE)
26 | 
27 |     transfer_func <- function(df) arrow::Table$create(df)
28 | 
29 |     BenchEnvironment(
30 |       transfer_func = transfer_func,
31 |       result_dim = result_dim,
32 |       df = df
33 |     )
34 |   },
35 |   before_each = {
36 |     result <- NULL
37 |   },
38 |   run = {
39 |     result <- transfer_func(df)
40 |   },
41 |   after_each = {
42 |     stopifnot("The dimensions do not match" = all.equal(dim(result), result_dim))
43 |     result <- NULL
44 |   },
45 |   valid_params = function(params) params,
46 |   packages_used = function(params) "arrow"
47 | )
48 | 
49 | 


--------------------------------------------------------------------------------
/man/run_bm.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/run.R
 3 | \name{run_bm}
 4 | \alias{run_bm}
 5 | \title{Execute a benchmark run}
 6 | \usage{
 7 | run_bm(
 8 |   bm,
 9 |   ...,
10 |   n_iter = 1,
11 |   batch_id = NULL,
12 |   profiling = FALSE,
13 |   global_params = list(),
14 |   run_id = NULL,
15 |   run_name = NULL,
16 |   run_reason = NULL
17 | )
18 | }
19 | \arguments{
20 | \item{bm}{\code{\link[=Benchmark]{Benchmark()}} object}
21 | 
22 | \item{...}{parameters passed to \code{bm$setup()} or global parameters; see the
23 | "Parameterizing benchmarks" section of \code{\link[=Benchmark]{Benchmark()}}}
24 | 
25 | \item{n_iter}{Integer number of iterations to replicate each benchmark}
26 | 
27 | \item{batch_id}{a length 1 character vector to identify the batch}
28 | 
29 | \item{profiling}{Logical: collect prof info? If \code{TRUE}, the result data will
30 | contain a \code{prof_file} field, which you can read in with
31 | \code{profvis::profvis(prof_input = file)}. Default is \code{FALSE}}
32 | 
33 | \item{global_params}{the global parameters that have been set}
34 | 
35 | \item{run_id}{Unique ID for the run}
36 | 
37 | \item{run_name}{Name for the run}
38 | 
39 | \item{run_reason}{Low-cardinality reason for the run, e.g. "commit" or "test"}
40 | }
41 | \description{
42 | This is the function that gets called in the script that \code{\link[=run_one]{run_one()}} prepares.
43 | You may call this function interactively, but you won't get the isolation
44 | in a fresh R process that \code{run_one()} provides.
45 | }
46 | 


--------------------------------------------------------------------------------
/.github/workflows/pkgdown.yaml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   push:
 3 |     branches:
 4 |       - main
 5 | 
 6 | name: pkgdown
 7 | 
 8 | jobs:
 9 |   pkgdown:
10 |     runs-on: macOS-latest
11 |     env:
12 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
13 |     steps:
14 |       - uses: actions/checkout@v3
15 | 
16 |       - uses: r-lib/actions/setup-r@v2
17 | 
18 |       - uses: r-lib/actions/setup-pandoc@v2
19 | 
20 |       - name: Query dependencies
21 |         run: |
22 |           install.packages('remotes')
23 |           saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
24 |           writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
25 |         shell: Rscript {0}
26 | 
27 |       - name: Cache R packages
28 |         uses: actions/cache@v2
29 |         with:
30 |           path: ${{ env.R_LIBS_USER }}
31 |           key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
32 |           restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-
33 | 
34 |       - name: Install dependencies
35 |         run: |
36 |           remotes::install_deps(dependencies = TRUE)
37 |           install.packages("pkgdown", type = "binary")
38 |         shell: Rscript {0}
39 | 
40 |       - name: Install package
41 |         run: R CMD INSTALL .
42 | 
43 |       - name: Deploy package
44 |         run: |
45 |           git config --local user.email "actions@github.com"
46 |           git config --local user.name "GitHub Actions"
47 |           Rscript -e 'pkgdown::deploy_to_branch(new_process = FALSE)'
48 | 


--------------------------------------------------------------------------------
/tests/testthat/test-params.R:
--------------------------------------------------------------------------------
 1 | test_that("get_default_parameters.BenchmarkDataFrame() can fill in params col", {
 2 |   bm_list <- list(placebo, placebo)
 3 |   bm_df <- BenchmarkDataFrame(bm_list)
 4 |   assert_benchmark_dataframe(bm_df, bm_list)
 5 | 
 6 |   bm_df_augmented <- get_default_parameters(bm_df)
 7 |   assert_benchmark_dataframe(bm_df_augmented, bm_list, lapply(bm_list, get_default_parameters))
 8 |   lapply(bm_df_augmented$parameters, function(param_df) {
 9 |     expect_s3_class(param_df, "data.frame")
10 |     expect_equal(param_df, get_default_parameters(placebo))
11 |     expect_gt(nrow(param_df), 0L)
12 |   })
13 | 
14 |   # handle keyword args
15 |   bm_df_augmented <- get_default_parameters(bm_df, duration = 1)
16 |   assert_benchmark_dataframe(bm_df_augmented, bm_list, lapply(bm_list, get_default_parameters, duration = 1))
17 |   lapply(bm_df_augmented$parameters, function(param_df) {
18 |     expect_s3_class(param_df, "data.frame")
19 |     expect_equal(param_df, get_default_parameters(placebo, duration = 1))
20 |     expect_gt(nrow(param_df), 0L)
21 |   })
22 | 
23 |   # handle partially-specified param lists
24 |   bm_df <- BenchmarkDataFrame(bm_list, parameters = list(get_default_parameters(placebo, duration = 1), NULL))
25 |   bm_df_augmented <- get_default_parameters(bm_df, duration = 1)
26 |   assert_benchmark_dataframe(bm_df_augmented, bm_list, lapply(bm_list, get_default_parameters, duration = 1))
27 |   lapply(bm_df_augmented$parameters, function(param_df) {
28 |     expect_s3_class(param_df, "data.frame")
29 |     expect_equal(param_df, get_default_parameters(placebo, duration = 1))
30 |     expect_gt(nrow(param_df), 0L)
31 |   })
32 | })
33 | 


--------------------------------------------------------------------------------
/tests/testthat/test-benchmark-dataframe.R:
--------------------------------------------------------------------------------
 1 | test_that("BenchmarkDataFrame can be instantiated", {
 2 |   for (bm_list in list(
 3 |     list(placebo),
 4 |     list(placebo, placebo),
 5 |     list(a = placebo, b = placebo)
 6 |   )) {
 7 |     bm_df <- BenchmarkDataFrame(benchmarks = bm_list)
 8 |     assert_benchmark_dataframe(bm_df, benchmarks = bm_list)
 9 |   }
10 | 
11 |   bm_list <- list(placebo, placebo)
12 |   param_list <- list(get_default_parameters(placebo), NULL)
13 |   bm_df <- BenchmarkDataFrame(benchmarks = bm_list, parameters = param_list)
14 |   assert_benchmark_dataframe(bm_df, benchmarks = bm_list, parameters = param_list)
15 | 
16 |   expect_error(
17 |     BenchmarkDataFrame(1),
18 |     "All elements of `benchmarks` are not of class `Benchmark`!"
19 |   )
20 | })
21 | 
22 | 
23 | test_that("format.BenchmarkDataFrame() works", {
24 |   bm_df <- BenchmarkDataFrame(benchmarks = list(placebo))
25 |   expect_output(print(bm_df), "# <BenchmarkDataFrame>")
26 | })
27 | 
28 | 
29 | # A vector of benchmark attribute names run on `ursa-i9-9960x`
30 | URSA_I9_9960X_R_BENCHMARK_NAMES <- c(
31 |   "dataframe-to-table",  # `df_to_table`
32 |   "file-read",
33 |   "file-write",
34 |   "partitioned-dataset-filter",  # `dataset_taxi_parquet`
35 |   "wide-dataframe",  # not actually an R benchmark
36 |   "tpch"  # `tpc_h`
37 | )
38 | 
39 | test_that("`get_package_benchmarks()` works", {
40 |   bm_df <- get_package_benchmarks()
41 |   assert_benchmark_dataframe(bm_df = bm_df, benchmarks = bm_df$benchmark)
42 |   expect_gt(nrow(bm_df), 0L)
43 |   # currently `any()` because `wide-dataframe` is actually a Python benchmark,
44 |   # but is still listed in arrow-benchmarks-ci in R. If removed, change to `all()`.
45 |   expect_true(any(URSA_I9_9960X_R_BENCHMARK_NAMES %in% bm_df$name))
46 | })
47 | 


--------------------------------------------------------------------------------
/R/benchmark-dataframe.R:
--------------------------------------------------------------------------------
 1 | #' A classed dataframe of benchmarks for running
 2 | #'
 3 | #' @param benchmarks A list with elements of class `Benchmark`
 4 | #' @param parameters Optional. A list of dataframes of parameter combinations to
 5 | #' run as generated by [get_default_parameters()]. If null, defaults will be generated
 6 | #' when [run()] is called.
 7 | #'
 8 | #' @return A classed dataframe with `name` (benchmark attribute, not object name),
 9 | #' `benchmark`, and `params` columns
10 | #'
11 | #' @export
12 | BenchmarkDataFrame <- function(benchmarks, parameters) {
13 |   lapply(benchmarks, function(bm) stopifnot(
14 |     "All elements of `benchmarks` are not of class `Benchmark`!" = inherits(bm, "Benchmark")
15 |   ))
16 | 
17 |   bm_names <- vapply(benchmarks, function(bm) bm$name, character(1))
18 |   if (missing(parameters)) {
19 |     parameters <- rep(list(NULL), length = length(benchmarks))
20 |   }
21 | 
22 |   structure(
23 |     tibble::tibble(
24 |       name = bm_names,
25 |       benchmark = benchmarks,
26 |       parameters = parameters
27 |     ),
28 |     class = c("BenchmarkDataFrame", "tbl_df", "tbl", "data.frame")
29 |   )
30 | }
31 | 
32 | 
33 | #' @export
34 | format.BenchmarkDataFrame <- function(x, ...) {
35 |   c("# <BenchmarkDataFrame>", NextMethod())
36 | }
37 | 
38 | 
39 | #' Get a list of benchmarks in a package
40 | #'
41 | #' @param package String of package name in which to find benchmarks
42 | #'
43 | #' @return An instance of [BenchmarkDataFrame] with all the benchmarks contained
44 | #' by a package
45 | #'
46 | #' @export
47 | get_package_benchmarks <- function(package = "arrowbench") {
48 |   nms <- getNamespaceExports(package)
49 |   objs <- mget(nms, envir = getNamespace(package))
50 |   bms <- Filter(function(x) inherits(x, "Benchmark"), objs)
51 |   BenchmarkDataFrame(benchmarks = bms)
52 | }
53 | 


--------------------------------------------------------------------------------
/man/run_one.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/run.R
 3 | \name{run_one}
 4 | \alias{run_one}
 5 | \title{Run a Benchmark with a single set of parameters}
 6 | \usage{
 7 | run_one(
 8 |   bm,
 9 |   ...,
10 |   n_iter = 1,
11 |   batch_id = NULL,
12 |   dry_run = FALSE,
13 |   profiling = FALSE,
14 |   progress_bar = NULL,
15 |   read_only = FALSE,
16 |   run_id = NULL,
17 |   run_name = NULL,
18 |   run_reason = NULL,
19 |   test_packages = NULL
20 | )
21 | }
22 | \arguments{
23 | \item{bm}{\code{\link[=Benchmark]{Benchmark()}} object}
24 | 
25 | \item{...}{parameters passed to \code{bm$setup()} or global parameters; see the
26 | "Parameterizing benchmarks" section of \code{\link[=Benchmark]{Benchmark()}}}
27 | 
28 | \item{n_iter}{Integer number of iterations to replicate each benchmark}
29 | 
30 | \item{batch_id}{a length 1 character vector to identify the batch}
31 | 
32 | \item{dry_run}{logical: just return the R source code that would be run in
33 | a subprocess? Default is \code{FALSE}, meaning that the benchmarks will be run.}
34 | 
35 | \item{profiling}{Logical: collect prof info? If \code{TRUE}, the result data will
36 | contain a \code{prof_file} field, which you can read in with
37 | \code{profvis::profvis(prof_input = file)}. Default is \code{FALSE}}
38 | 
39 | \item{progress_bar}{a \code{progress} object to update progress to (default \code{NULL})}
40 | 
41 | \item{read_only}{this will only attempt to read benchmark files and will not
42 | run any that it cannot find.}
43 | 
44 | \item{run_id}{Unique ID for the run}
45 | 
46 | \item{run_name}{Name for the run}
47 | 
48 | \item{run_reason}{Low-cardinality reason for the run, e.g. "commit" or "test"}
49 | 
50 | \item{test_packages}{a character vector of packages that the benchmarks test (default \code{NULL})}
51 | }
52 | \value{
53 | An instance of \code{BenchmarkResult}: an R6 object containing either
54 | "stats" or "error".
55 | }
56 | \description{
57 | Run a Benchmark with a single set of parameters
58 | }
59 | 


--------------------------------------------------------------------------------
/.github/workflows/pr-commands.yaml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   issue_comment:
 3 |     types: [created]
 4 | name: Commands
 5 | jobs:
 6 |   document:
 7 |     if: startsWith(github.event.comment.body, '/document')
 8 |     name: document
 9 |     runs-on: macOS-latest
10 |     env:
11 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
12 |     steps:
13 |       - uses: actions/checkout@v3
14 |       - uses: r-lib/actions/pr-fetch@v2
15 |         with:
16 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
17 |       - uses: r-lib/actions/setup-r@v2
18 |       - name: Install dependencies
19 |         run: Rscript -e 'install.packages(c("remotes", "roxygen2"))' -e 'remotes::install_deps(dependencies = TRUE)'
20 |       - name: Document
21 |         run: Rscript -e 'roxygen2::roxygenise()'
22 |       - name: commit
23 |         run: |
24 |           git config --local user.email "actions@github.com"
25 |           git config --local user.name "GitHub Actions"
26 |           git add man/\* NAMESPACE
27 |           git commit -m 'Document'
28 |       - uses: r-lib/actions/pr-push@v2
29 |         with:
30 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
31 |   style:
32 |     if: startsWith(github.event.comment.body, '/style')
33 |     name: style
34 |     runs-on: macOS-latest
35 |     env:
36 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
37 |     steps:
38 |       - uses: actions/checkout@v3
39 |       - uses: r-lib/actions/pr-fetch@v2
40 |         with:
41 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
42 |       - uses: r-lib/actions/setup-r@v2
43 |       - name: Install dependencies
44 |         run: Rscript -e 'install.packages("styler")'
45 |       - name: Style
46 |         run: Rscript -e 'styler::style_pkg()'
47 |       - name: commit
48 |         run: |
49 |           git config --local user.email "actions@github.com"
50 |           git config --local user.name "GitHub Actions"
51 |           git add \*.R
52 |           git commit -m 'Style'
53 |       - uses: r-lib/actions/pr-push@v2
54 |         with:
55 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
56 | 


--------------------------------------------------------------------------------
/tests/testthat/helper.R:
--------------------------------------------------------------------------------
 1 | wipe_results <- function() unlink(test_path("results/"), recursive = TRUE)
 2 | 
 3 | expect_benchmark_run <- function(..., success = TRUE) {
 4 |   suppress_deparse_warning(
 5 |     # Capture the messages
 6 |     output <- capture.output(
 7 |       # Expect some console output
 8 |       expect_output(
 9 |         result <- eval(...)
10 |       ),
11 |       type = "message"
12 |     )
13 |   )
14 | 
15 |   expect_s3_class(result, "BenchmarkResults")
16 | 
17 |   # If we require success, then we should confirm that the `error` attribute of
18 |   # each result is empty
19 |   if (success) {
20 |     # the calling handler, etc is all so that we can send _one_ instance of the
21 |     # message output and not a bunch
22 |     messaged <- FALSE
23 |     withCallingHandlers(
24 |       for (res in result$results) {
25 |         expect_null(res$error)
26 |       },
27 |       error = function(e) {
28 |         if (!messaged) {
29 |           message(paste0(output, collapse = "\n"))
30 |           messaged <<- TRUE
31 |         }
32 |         e
33 |       }
34 |     )
35 |   }
36 | 
37 | }
38 | 
39 | suppress_deparse_warning <- function(...) {
40 |   # surpress the deparse may be incomplete warnings which are a side-effect of
41 |   # loadall + testing
42 |   withCallingHandlers(
43 |     ...,
44 |     warning = function(w) {
45 |       if (startsWith(conditionMessage(w), "deparse may be incomplete"))
46 |         invokeRestart("muffleWarning")
47 |     })
48 | }
49 | 
50 | 
51 | assert_benchmark_dataframe <- function(bm_df, benchmarks, parameters) {
52 |   if (missing(parameters)) {
53 |     parameters <- rep(list(NULL), length(benchmarks))
54 |   }
55 | 
56 |   expect_s3_class(bm_df, c("BenchmarkDataFrame", "tbl", "tbl_df", "data.frame"))
57 |   expect_true(all(c("name", "benchmark", "parameters") %in% names(bm_df)))
58 |   expect_equal(nrow(bm_df), length(benchmarks))
59 |   expect_equal(bm_df$name, vapply(benchmarks, function(x) x$name, character(1)))
60 |   expect_equal(bm_df$benchmark, benchmarks)
61 |   expect_equal(bm_df$parameters, parameters)
62 | }
63 | 
64 | 


--------------------------------------------------------------------------------
/man/run_benchmark.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/run.R
 3 | \name{run_benchmark}
 4 | \alias{run_benchmark}
 5 | \title{Run a Benchmark across a range of parameters}
 6 | \usage{
 7 | run_benchmark(
 8 |   bm,
 9 |   ...,
10 |   params = get_default_parameters(bm, ...),
11 |   n_iter = 1,
12 |   dry_run = FALSE,
13 |   profiling = FALSE,
14 |   read_only = FALSE,
15 |   run_id = NULL,
16 |   run_name = NULL,
17 |   run_reason = NULL
18 | )
19 | }
20 | \arguments{
21 | \item{bm}{\code{\link[=Benchmark]{Benchmark()}} object}
22 | 
23 | \item{...}{Optional benchmark parameters to run across}
24 | 
25 | \item{params}{\code{data.frame} of parameter combinations. By default, this will
26 | be constructed from the expansion of the \code{...} arguments, the declared
27 | parameter options in \code{bm$setup}, and any restrictions potentially defined in
28 | \code{bm$valid_params()}.}
29 | 
30 | \item{n_iter}{Integer number of iterations to replicate each benchmark. If
31 | \code{n_iter} is also supplied in \code{params}, that takes precedence.}
32 | 
33 | \item{dry_run}{logical: just return the R source code that would be run in
34 | a subprocess? Default is \code{FALSE}, meaning that the benchmarks will be run.}
35 | 
36 | \item{profiling}{Logical: collect prof info? If \code{TRUE}, the result data will
37 | contain a \code{prof_file} field, which you can read in with
38 | \code{profvis::profvis(prof_input = file)}. Default is \code{FALSE}}
39 | 
40 | \item{read_only}{this will only attempt to read benchmark files and will not
41 | run any that it cannot find.}
42 | 
43 | \item{run_id}{Unique ID for the run}
44 | 
45 | \item{run_name}{Name for the run. If not specified, will use \verb{\{run_reason\}: \{commit hash\}}}
46 | 
47 | \item{run_reason}{Low-cardinality reason for the run, e.g. "commit" or "test"}
48 | }
49 | \value{
50 | A \code{BenchmarkResults} object, containing \code{results} attribute of a list
51 | of length \code{nrow(params)}, each of those a \code{BenchmarkResult} object.
52 | For a simpler view of results, call \code{as.data.frame()} on it.
53 | }
54 | \description{
55 | Run a Benchmark across a range of parameters
56 | }
57 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: arrowbench
 2 | Type: Package
 3 | Title: Tools for Continuous and Interactive Benchmarking
 4 | Version: 0.2.0
 5 | Authors@R: c(
 6 |     person("Neal", "Richardson", email = "neal.p.richardson@gmail.com", role = "aut"),
 7 |     person("Jonathan", "Keane", email = "jkeane@gmail.com", role = c("aut", "cre")),
 8 |     person("Edward", "Visel", email = "edward.visel@gmail.com", role = "aut", comment = c(ORCID = "0000-0002-2811-6254"))
 9 |   )
10 | Description: Tools for defining benchmarks, running them across a 
11 |     range of parameters, and reporting their results in a standardized form.
12 | License: MIT + file LICENSE
13 | Encoding: UTF-8
14 | Depends: R (>= 3.5.0)
15 | Imports:
16 |     arrow,
17 |     bench,
18 |     dplyr,
19 |     duckdb (>= 0.8.0),
20 |     distro,
21 |     glue, 
22 |     jsonlite,
23 |     processx,
24 |     progress,
25 |     purrr,
26 |     R6,
27 |     remotes,
28 |     rlang,
29 |     R.utils,
30 |     sessioninfo,
31 |     tibble,
32 |     utils,
33 |     uuid,
34 |     waldo,
35 |     withr
36 | Suggests:
37 |     testthat (>= 3.0.0),
38 |     archive,
39 |     data.table,
40 |     DBI,
41 |     dbplyr,
42 |     fst,
43 |     jsonify,
44 |     lubridate,
45 |     mockery,
46 |     ndjson,
47 |     RcppSimdJson,
48 |     readr,
49 |     vroom
50 | RoxygenNote: 7.2.3
51 | Roxygen: list(markdown = TRUE, load = "source")
52 | Collate: 
53 |     'benchmark-dataframe.R'
54 |     'benchmark.R'
55 |     'bm-array-altrep-materialization.R'
56 |     'bm-array-to-vector.R'
57 |     'bm-dataset-taxi-2013.R'
58 |     'bm-dataset-taxi-parquet.R'
59 |     'bm-df-to-table.R'
60 |     'bm-placebo.R'
61 |     'bm-read-csv.R'
62 |     'bm-read-file.R'
63 |     'bm-read-json.R'
64 |     'bm-remote-dataset.R'
65 |     'bm-row-group-size.R'
66 |     'bm-table-to-df.R'
67 |     'bm-tpc-h.R'
68 |     'bm-write-csv.R'
69 |     'bm-write-file.R'
70 |     'custom-duckdb.R'
71 |     'ensure-format.R'
72 |     'ensure-lib.R'
73 |     'known-sources.R'
74 |     'ensure-source.R'
75 |     'ensure-tpch-source.R'
76 |     'external-dependencies.R'
77 |     'measure.R'
78 |     'params.R'
79 |     'publish.R'
80 |     'util.R'
81 |     'result.R'
82 |     'run.R'
83 |     'setup.R'
84 |     'tpch-queries.R'
85 | Config/testthat/edition: 3
86 | 


--------------------------------------------------------------------------------
/R/ensure-tpch-source.R:
--------------------------------------------------------------------------------
 1 | #' Table names for TPC-H benchmarks
 2 | #'
 3 | #' @keywords internal
 4 | #' @export
 5 | tpch_tables <- c("customer", "lineitem", "nation", "orders", "part", "partsupp", "region", "supplier")
 6 | 
 7 | #' Generate tpch data
 8 | #'
 9 | #' Generate tpch data at a given scale factor. By default,
10 | #' data is output relative to the current working directory. However,
11 | #' you can set the environment variable `ARROWBENCH_DATA_DIR` to
12 | #' point to another directory. Setting this environment variable has
13 | #' the advantage of being a central location for general usage. Running
14 | #' this function will install a custom version of duckdb in an `r_libs`
15 | #' directory, relative to the directory specified by the environment
16 | #' variable `ARROWBENCH_LOCAL_DIR`. When running this function for the first time you will
17 | #' see significant output from that installation process. This is normal.
18 | #'
19 | #' @param scale_factor a relative measure of the size of data in gigabytes.
20 | #'
21 | #' @export
22 | generate_tpch <- function(scale_factor = 1) {
23 |   duckdb_file <- tempfile()
24 |   on.exit(unlink(duckdb_file, recursive = TRUE))
25 | 
26 |   # generate the tables
27 |   query_custom_duckdb(
28 |     paste0("CALL dbgen(sf=", scale_factor, ");"),
29 |     dbdir = duckdb_file
30 |   )
31 | 
32 |   # write each table to paruqet
33 |   out <- lapply(tpch_tables, function(name) {
34 |     filename <- source_data_file(paste0(name, "_", format(scale_factor, scientific = FALSE), ".parquet"))
35 |     query <- paste0("SELECT * FROM ", name, ";")
36 |     export_custom_duckdb(query, filename, dbdir = duckdb_file)
37 | 
38 |     filename
39 |   })
40 | 
41 |   set_names(out, tpch_tables)
42 | }
43 | 
44 | #' @importFrom rlang set_names
45 | ensure_tpch <- function(scale_factor = 1) {
46 |   ensure_source_dirs_exist()
47 | 
48 |   filenames <- paste0(paste(tpch_tables, format(scale_factor, scientific = FALSE), sep="_"), ".parquet")
49 | 
50 |   # Check for places this file might already be and return those.
51 |   cached_files <- map(filenames, data_file)
52 |   if (all(!map_lgl(cached_files, is.null))) {
53 |     # if the file is in our temp storage or source storage, go for it there.
54 |     return(set_names(cached_files, nm = tpch_tables))
55 |   }
56 | 
57 |   # generate it
58 |   generate_tpch(scale_factor)
59 | }


--------------------------------------------------------------------------------
/R/custom-duckdb.R:
--------------------------------------------------------------------------------
 1 | ensure_custom_duckdb <- function() {
 2 |   result <- tryCatch({
 3 |     con <- DBI::dbConnect(duckdb::duckdb())
 4 |     on.exit(DBI::dbDisconnect(con, shutdown = TRUE))
 5 |     DBI::dbExecute(con, "LOAD tpch;")
 6 |     DBI::dbGetQuery(con, "select scale_factor, query_nr from tpch_answers() LIMIT 1;")
 7 |   },
 8 |   error = function(e) {
 9 |     error_is_from_us <- grepl(
10 |       paste0(c(
11 |         "(name tpch_answers is not on the catalog)",
12 |         "(name tpch_answers does not exist)",
13 |         "(tpch.duckdb_extension\" not found)"
14 |       ),
15 |       collapse = "|"
16 |       ),
17 |       conditionMessage(e)
18 |     )
19 | 
20 |     if (error_is_from_us) {
21 |       NULL
22 |     } else {
23 |       rlang::abort(
24 |         "An unexpected error occured whilst querying TPC-H enabled duckdb",
25 |         parent = e
26 |       )
27 |     }
28 |   }
29 |   )
30 | 
31 |   # Check that the result has a query in it
32 |   if (identical(result$query_nr, 1L)) {
33 |     return(invisible(NULL))
34 |   }
35 | 
36 | 
37 |   install_duckdb_tpch()
38 |   result <- try(
39 |     ensure_custom_duckdb(),
40 |     silent = FALSE
41 |   )
42 | 
43 |   if (!inherits(result, "try-error")) {
44 |     return(invisible(NULL))
45 |   }
46 | 
47 |   stop("Could not load the DuckDB TPC-H extension.")
48 | }
49 | 
50 | query_custom_duckdb <- function(sql, dbdir = ":memory:") {
51 |   ensure_custom_duckdb()
52 | 
53 |   con <- DBI::dbConnect(duckdb::duckdb(dbdir = dbdir))
54 |   on.exit(DBI::dbDisconnect(con, shutdown = TRUE))
55 |   DBI::dbExecute(con, "LOAD tpch;")
56 |   DBI::dbGetQuery(con, sql)
57 | }
58 | 
59 | export_custom_duckdb <- function(sql, sink, dbdir = ":memory:") {
60 |   ensure_custom_duckdb()
61 | 
62 |   con <- DBI::dbConnect(duckdb::duckdb(dbdir = dbdir))
63 |   on.exit(DBI::dbDisconnect(con, shutdown = TRUE))
64 |   DBI::dbExecute(con, "LOAD tpch;")
65 |   res <- DBI::dbSendQuery(con, sql, arrow = TRUE)
66 | 
67 |   # this could be streamed in the future when the parquet writer
68 |   # in R supports streaming
69 |   reader <- duckdb::duckdb_fetch_record_batch(res)
70 |   table <- reader$read_table()
71 |   arrow::write_parquet(table, sink)
72 | 
73 |   sink
74 | }
75 | 
76 | install_duckdb_tpch <- function() {
77 |   con <- DBI::dbConnect(duckdb::duckdb())
78 |   on.exit(DBI::dbDisconnect(con))
79 |   DBI::dbExecute(con, "INSTALL tpch; LOAD tpch;")
80 | }
81 | 


--------------------------------------------------------------------------------
/R/measure.R:
--------------------------------------------------------------------------------
 1 | #' Measure times and memory usage
 2 | #'
 3 | #' @param ... An expression to
 4 | #' @param drop_caches Attempt to drop the disk cache before each case or iteration.
 5 | #' Currently only works on linux. Permissible values are `"case"`, `"iteration"`,
 6 | #' and `NULL`. Defaults to `NULL`, i.e. don't drop caches. As `measure()` is run
 7 | #' once per iteration, here `"iteration"` results in dropping caches once and
 8 | #' `NULL` and `"case"` result in no cache dropping.
 9 | #' @inheritParams run_benchmark
10 | #'
11 | #' @return A tibble of timings and memory usage
12 | #' @export
13 | measure <- function(..., profiling = FALSE, drop_caches = NULL) {
14 |   start_mem <- bench::bench_process_memory()
15 |   if (!is.null(drop_caches) && drop_caches == "iteration") {
16 |     sync_and_drop_caches()
17 |   }
18 |   gc_info <- with_gc_info({
19 |     prof_file <- with_profiling(profiling, {
20 |       timings <- bench::bench_time(eval.parent(...))
21 |     })
22 |   })
23 |   end_mem <- bench::bench_process_memory()
24 | 
25 |   timings <- as.data.frame(as.list(timings))
26 | 
27 |   timings$start_mem_bytes <- as.numeric(start_mem["current"])
28 |   timings$end_mem_bytes <- as.numeric(end_mem["current"])
29 |   timings$max_mem_bytes <- as.numeric(end_mem["max"])
30 |   timings$prof_file <- prof_file
31 | 
32 |   cbind(timings, gc_info)
33 | }
34 | 
35 | with_profiling <- function(profiling_on, expr) {
36 |   if (profiling_on) {
37 |     prof_file <- basename(tempfile(fileext = ".prof"))
38 |     utils::Rprof(filename = prof_file, memory.profiling = TRUE, gc.profiling = TRUE, line.profiling = TRUE)
39 |     on.exit(utils::Rprof(NULL))
40 |   } else {
41 |     prof_file <- NULL
42 |   }
43 |   eval.parent(expr)
44 |   prof_file
45 | }
46 | 
47 | with_gc_info <- function(expr) {
48 |   force(expr)
49 |   with_gcinfo <- "bench" %:::% "with_gcinfo"
50 |   gc_output <- with_gcinfo(eval.parent(expr))
51 |   # This will swallow errors, so check for error output and re-raise
52 |   if (length(gc_output) > 0 && any(startsWith(gc_output, "Error")) ) {
53 |     stop(paste(gc_output, collapse = "\n"), call. = FALSE)
54 |   }
55 |   parse_gc <- "bench" %:::% "parse_gc"
56 |   gc <- parse_gc(gc_output)
57 |   names(gc) <- paste0("gc_", names(gc))
58 |   if (nrow(gc) == 0) {
59 |     # Means there was no garbage collection, so let's fill this in with 0s
60 |     gc[1, ] <- list(0L, 0L, 0L)
61 |   }
62 |   # Cat out any messages so that we don't swallow them.
63 |   # TODO: filter out what has been parsed?
64 |   cat(gc_output)
65 |   gc
66 | }
67 | 
68 | # work around checks looking for`:::`
69 | `%:::%` = function(pkg, fun) get(fun, envir = asNamespace(pkg), inherits = FALSE)
70 | 


--------------------------------------------------------------------------------
/tests/testthat/test-util.R:
--------------------------------------------------------------------------------
 1 | test_that("cache key", {
 2 |   expect_identical(bm_run_cache_key("foo", alpha = "one", beta = 2), "foo/one-2")
 3 |   expect_identical(bm_run_cache_key("foo", beta = 2, alpha = "one"), "foo/one-2")
 4 | })
 5 | 
 6 | test_that("find_r()", {
 7 |   out <- system(paste(find_r(), "--no-save -s 2>&1"), intern = TRUE, input = "print('output')\n")
 8 |   expect_match(out, "output")
 9 | 
10 |   # when system fails, there's also a warning
11 |   expect_warning(error_out <- system(paste(find_r(), "--no-save -s 2>&1"), intern = TRUE, input = "stop('this is an error')\n"))
12 |   expect_match(error_out[[1]], "this is an error")
13 | })
14 | 
15 | 
16 | test_that("get_default_args", {
17 |   func <- function(
18 |     one = 1,
19 |     a_few = c(1, 2, 3),
20 |     null = NULL,
21 |     # we need to use something in the package here for environment scoping +
22 |     # testthat reasons
23 |     a_vector = known_sources,
24 |     none
25 |   ) NULL
26 | 
27 |   expect_identical(
28 |     get_default_args(func),
29 |     list(one = 1, a_few = c(1, 2, 3), a_vector = known_sources)
30 |   )
31 | })
32 | 
33 | 
34 | test_that("sync_and_drop_caches() works", {
35 |   # @param ... named values where names are values for `args` and values are
36 |   # whether to fail
37 |   make_mock_run_function <- function(...) {
38 |     dots <- list(...)
39 |     function(command, args, error_on_status) {
40 |       list(status = as.integer(dots[[args]]))
41 |     }
42 |   }
43 | 
44 |   cases = suppressWarnings(purrr::cross(list(
45 |     "sync; echo 3 | sudo tee /proc/sys/vm/drop_caches" = c(TRUE, FALSE),
46 |     "sync; sudo purge" = c(TRUE, FALSE)
47 |   )))
48 | 
49 |   for (case in cases) {
50 |     options(
51 |       "arrowbench.drop_caches_failed" = NULL,
52 |       "arrowbench.purge_failed" = NULL
53 |     )
54 | 
55 |     mockery::stub(
56 |       where = sync_and_drop_caches,
57 |       what = "processx::run",
58 |       how = do.call(make_mock_run_function, case)
59 |     )
60 | 
61 |     expect_identical(sync_and_drop_caches(), any(!unlist(case)))
62 | 
63 |     if (case[["sync; echo 3 | sudo tee /proc/sys/vm/drop_caches"]]) {
64 |       expect_true(getOption("arrowbench.drop_caches_failed"))
65 |       if (case[["sync; sudo purge"]]) {
66 |         expect_true(getOption("arrowbench.purge_failed"))
67 |       } else {
68 |         expect_null(getOption("arrowbench.purge_failed"))
69 |       }
70 |     } else {
71 |       expect_null(getOption("arrowbench.drop_caches_failed"))
72 |       expect_null(getOption("arrowbench.purge_failed"))
73 |     }
74 |   }
75 | 
76 |   options(
77 |     "arrowbench.drop_caches_failed" = NULL,
78 |     "arrowbench.purge_failed" = NULL
79 |   )
80 | })


--------------------------------------------------------------------------------
/man/R6Point1Class.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/result.R
 3 | \name{R6Point1Class}
 4 | \alias{R6Point1Class}
 5 | \title{Version of R6 with heritable static/class methods and attributes}
 6 | \usage{
 7 | R6Point1Class(..., static = NULL)
 8 | }
 9 | \arguments{
10 | \item{...}{Passed through to \link[R6:R6Class]{R6::R6Class}}
11 | 
12 | \item{static}{A named list of static/class functions/values to turn into
13 | methods/attributes. Note there is currently no differentiation between static
14 | and class methods at the moment; static methods are simply class methods that
15 | do not access \code{self}, though it will exist in their evaluation environment.
16 | This arrangement can be changed in the future if reason exists.}
17 | }
18 | \description{
19 | Elements in \code{static} can be called without instantiation, e.g. \code{Class$method()}.
20 | Functions are evaluated in the environment of \code{Class}, so you can refer to \code{self}
21 | (which is the class—not the instance—here) to create class methods.
22 | }
23 | \section{Why this exists}{
24 | 
25 | 
26 | Sometimes we want static/class methods/attributes that can be accessed from
27 | the class (e.g. \code{MyR6Class$my_static_method()}) instead of an instance of
28 | that class (e.g. \code{MyR6Class$new(...)$my_normal_method()}). As individual
29 | classes are environments, these can be added after the fact like so:
30 | 
31 | \if{html}{\out{<div class="sourceCode r">}}\preformatted{MyR6Class <- R6Class(...)
32 | MyR6Class$my_static_method <- function(x) ...
33 | }\if{html}{\out{</div>}}
34 | 
35 | But the problem with the above is it's not heritable; if you make a class that
36 | inherits from \code{MyR6Class}, it will not have \verb{$my_static_method()} unless you
37 | manually re-add it.
38 | 
39 | This class structure abstracts the pattern, so when you create a new class, it
40 | checks if the parent contains anything in \code{private$static}, and copies over any
41 | methods/attributes there, less any overwritten in the new class.
42 | }
43 | 
44 | \section{How static/class methods/attributes may be useful}{
45 | 
46 | 
47 | There are lots of reasons you may want static/class methods/attributes, but
48 | the immediate use-case here is to create alternate methods for instantiating
49 | a class besides \verb{$new()}/\verb{$initialize()}. For instance, if a class can be
50 | represented as JSON, it's quite helpful to have a \verb{$from_json()} method that
51 | can recreate an instance from a JSON blob.
52 | 
53 | You could have a separate special reader function that returns an instance,
54 | but especially as classes multiply this solution becomes difficult to
55 | maintain.
56 | }
57 | 
58 | \keyword{internal}
59 | 


--------------------------------------------------------------------------------
/inst/benchmarks.json:
--------------------------------------------------------------------------------
  1 | [
  2 |   {
  3 |     "command": "dataset_taxi_2013",
  4 |     "name": "arrowbench/dataset_taxi_2013",
  5 |     "runner": "arrowbench",
  6 |     "flags": {
  7 |       "language": "R"
  8 |     }
  9 |   },
 10 |   {
 11 |     "command": "row_group_size",
 12 |     "name": "arrowbench/row_group_size",
 13 |     "runner": "arrowbench",
 14 |     "flags": {
 15 |       "language": "R"
 16 |     }
 17 |   },
 18 |   {
 19 |     "command": "write_csv",
 20 |     "name": "arrowbench/write_csv",
 21 |     "runner": "arrowbench",
 22 |     "flags": {
 23 |       "language": "R"
 24 |     }
 25 |   },
 26 |   {
 27 |     "command": "read_csv",
 28 |     "name": "arrowbench/read_csv",
 29 |     "runner": "arrowbench",
 30 |     "flags": {
 31 |       "language": "R"
 32 |     }
 33 |   },
 34 |   {
 35 |     "command": "read_json",
 36 |     "name": "arrowbench/read_json",
 37 |     "runner": "arrowbench",
 38 |     "flags": {
 39 |       "language": "R"
 40 |     }
 41 |   },
 42 |   {
 43 |     "command": "remote_dataset",
 44 |     "name": "arrowbench/remote_dataset",
 45 |     "runner": "arrowbench",
 46 |     "flags": {
 47 |       "language": "R"
 48 |     }
 49 |   },
 50 |   {
 51 |     "command": "file-write",
 52 |     "name": "arrowbench/file-write",
 53 |     "runner": "arrowbench",
 54 |     "flags": {
 55 |       "language": "R"
 56 |     }
 57 |   },
 58 |   {
 59 |     "command": "dataframe-to-table",
 60 |     "name": "arrowbench/dataframe-to-table",
 61 |     "runner": "arrowbench",
 62 |     "flags": {
 63 |       "language": "R"
 64 |     }
 65 |   },
 66 |   {
 67 |     "command": "table_to_df",
 68 |     "name": "arrowbench/table_to_df",
 69 |     "runner": "arrowbench",
 70 |     "flags": {
 71 |       "language": "R"
 72 |     }
 73 |   },
 74 |   {
 75 |     "command": "array_to_vector",
 76 |     "name": "arrowbench/array_to_vector",
 77 |     "runner": "arrowbench",
 78 |     "flags": {
 79 |       "language": "R"
 80 |     }
 81 |   },
 82 |   {
 83 |     "command": "partitioned-dataset-filter",
 84 |     "name": "arrowbench/partitioned-dataset-filter",
 85 |     "runner": "arrowbench",
 86 |     "flags": {
 87 |       "language": "R"
 88 |     }
 89 |   },
 90 |   {
 91 |     "command": "file-read",
 92 |     "name": "arrowbench/file-read",
 93 |     "runner": "arrowbench",
 94 |     "flags": {
 95 |       "language": "R"
 96 |     }
 97 |   },
 98 |   {
 99 |     "command": "tpch",
100 |     "name": "arrowbench/tpch",
101 |     "runner": "arrowbench",
102 |     "flags": {
103 |       "language": "R"
104 |     }
105 |   },
106 |   {
107 |     "command": "array_altrep_materialization",
108 |     "name": "arrowbench/array_altrep_materialization",
109 |     "runner": "arrowbench",
110 |     "flags": {
111 |       "language": "R"
112 |     }
113 |   }
114 | ]
115 | 


--------------------------------------------------------------------------------
/man/run.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/run.R
 3 | \name{run}
 4 | \alias{run}
 5 | \alias{run.BenchmarkDataFrame}
 6 | \title{Run an object}
 7 | \usage{
 8 | run(x, ...)
 9 | 
10 | \method{run}{BenchmarkDataFrame}(x, ..., publish = FALSE, run_id = NULL, run_name = NULL, run_reason = NULL)
11 | }
12 | \arguments{
13 | \item{x}{An S3 classed object to run}
14 | 
15 | \item{...}{Additional arguments passed through to methods. For
16 | \code{run.BenchmarkDataFrame}, passed through to \code{\link[=get_default_parameters]{get_default_parameters()}} (when
17 | parameters are not specified) and \code{\link[=run_benchmark]{run_benchmark()}}.}
18 | 
19 | \item{publish}{Flag for whether to publish results to a Conbench server. See
20 | "Environment Variables" section for how to specify server details. Requires
21 | the benchconnect CLI is installed; see \code{\link[=install_benchconnect]{install_benchconnect()}}.}
22 | 
23 | \item{run_id}{Unique ID for the run. If not specified, will be generated.}
24 | 
25 | \item{run_name}{Name for the run. If not specified, will use \verb{\{run_reason\}: \{commit hash\}}}
26 | 
27 | \item{run_reason}{Required. Low-cardinality reason for the run, e.g. "commit" or "test"}
28 | }
29 | \value{
30 | A modified object containing run results. For \code{run.BenchmarkDataFrame},
31 | a \code{results} list column is appended.
32 | }
33 | \description{
34 | Run an object
35 | }
36 | \section{Environment Variables}{
37 | 
38 | \itemize{
39 | \item \code{CONBENCH_URL}: Required. The URL of the Conbench server with no trailing
40 | slash. For arrow, should be \verb{https://conbench.ursa.dev}.
41 | \item \code{CONBENCH_EMAIL}: The email to use for Conbench login. Only required if the
42 | server is private.
43 | \item \code{CONBENCH_PASSWORD}: The password to use for Conbench login. Only required
44 | if the server is private.
45 | \item \code{CONBENCH_PROJECT_REPOSITORY}: The repository name (in the format
46 | \code{org/repo}) or the URL (in the format \verb{https://github.com/org/repo}).
47 | Defaults to \code{"https://github.com/apache/arrow"} if unset.
48 | \item \code{CONBENCH_PROJECT_PR_NUMBER}: Recommended. The number of the GitHub pull
49 | request that is running this benchmark, or \code{NULL} if it's a run on the
50 | default branch
51 | \item \code{CONBENCH_PROJECT_COMMIT}: The 40-character commit SHA of the repo being
52 | benchmarked. If missing, will attempt to obtain it from
53 | \code{arrow::arrow_info()$build_info$git_id}, though this may not be populated
54 | depending on how Arrow was built.
55 | \item \code{CONBENCH_MACHINE_INFO_NAME}: Will override detected machine host name sent
56 | in \code{machine_info.name} when posting runs and results. Needed for cases where
57 | the actual host name can vary, like CI and cloud runners.
58 | }
59 | }
60 | 
61 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method(as.character,Serializable)
 4 | S3method(as.data.frame,BenchmarkResult)
 5 | S3method(as.data.frame,BenchmarkResults)
 6 | S3method(as.list,Serializable)
 7 | S3method(format,BenchmarkDataFrame)
 8 | S3method(get_default_parameters,Benchmark)
 9 | S3method(get_default_parameters,BenchmarkDataFrame)
10 | S3method(get_default_parameters,default)
11 | S3method(run,BenchmarkDataFrame)
12 | S3method(run,default)
13 | export("%||%")
14 | export(BenchEnvironment)
15 | export(Benchmark)
16 | export(BenchmarkDataFrame)
17 | export(all_sources)
18 | export(array_altrep_materialization)
19 | export(array_to_vector)
20 | export(confirm_mem_alloc)
21 | export(dataset_taxi_2013)
22 | export(dataset_taxi_parquet)
23 | export(df_to_table)
24 | export(ensure_dataset)
25 | export(ensure_format)
26 | export(ensure_source)
27 | export(file_with_ext)
28 | export(generate_tpch)
29 | export(get_csv_reader)
30 | export(get_csv_writer)
31 | export(get_dataset_attr)
32 | export(get_default_parameters)
33 | export(get_input_func)
34 | export(get_json_reader)
35 | export(get_package_benchmarks)
36 | export(get_params_summary)
37 | export(get_query_func)
38 | export(get_read_function)
39 | export(get_source_attr)
40 | export(get_sql_query_func)
41 | export(get_write_function)
42 | export(install_benchconnect)
43 | export(install_datalogistik)
44 | export(install_pipx)
45 | export(known_compressions)
46 | export(known_formats)
47 | export(known_sources)
48 | export(measure)
49 | export(read_csv)
50 | export(read_file)
51 | export(read_json)
52 | export(read_source)
53 | export(remote_dataset)
54 | export(row_group_size)
55 | export(run)
56 | export(run_benchmark)
57 | export(run_bm)
58 | export(run_one)
59 | export(sync_and_drop_caches)
60 | export(table_to_df)
61 | export(tables_refed)
62 | export(tpc_h)
63 | export(tpc_h_queries)
64 | export(tpch_answer)
65 | export(tpch_tables)
66 | export(write_csv)
67 | export(write_file)
68 | importFrom(R.utils,gunzip)
69 | importFrom(R.utils,gzip)
70 | importFrom(distro,distro)
71 | importFrom(glue,glue)
72 | importFrom(jsonlite,fromJSON)
73 | importFrom(jsonlite,toJSON)
74 | importFrom(progress,progress_bar)
75 | importFrom(purrr,flatten)
76 | importFrom(purrr,map)
77 | importFrom(purrr,map_int)
78 | importFrom(purrr,map_lgl)
79 | importFrom(purrr,pmap)
80 | importFrom(purrr,transpose)
81 | importFrom(remotes,install_github)
82 | importFrom(rlang,is_missing)
83 | importFrom(rlang,set_names)
84 | importFrom(sessioninfo,package_info)
85 | importFrom(stats,setNames)
86 | importFrom(utils,head)
87 | importFrom(utils,install.packages)
88 | importFrom(utils,installed.packages)
89 | importFrom(utils,modifyList)
90 | importFrom(utils,packageDescription)
91 | importFrom(utils,tail)
92 | importFrom(utils,write.csv)
93 | importFrom(waldo,compare)
94 | importFrom(withr,with_envvar)
95 | importFrom(withr,with_makevars)
96 | importFrom(withr,with_options)
97 | 


--------------------------------------------------------------------------------
/R/bm-row-group-size.R:
--------------------------------------------------------------------------------
 1 | #' Benchmark effect of parquet row group size
 2 | #'
 3 | #' @section Parameters:
 4 | #' * `source` A known-file id, or a file path to read in
 5 | #' * `queries` What queries to run
 6 | #' * `chunk_size` Number of rows to write in each row group. Suggested sizes:
 7 | #'     `chunk_size = list(NULL, 10000L, 100000L, 1000000L)`
 8 | #'
 9 | #' @export
10 | row_group_size <- Benchmark(
11 |   "row_group_size",
12 |   setup = function(source = c("fanniemae_2016Q4", "fanniemae_sample"),  # TODO implement more sources
13 |                    queries = c("filters", "everything"),
14 |                    chunk_size = NULL) {
15 |     # ensure that we have the right kind of file available
16 |     input_file <- ensure_format(
17 |       name = source, format = "parquet", compression = "snappy", chunk_size = chunk_size
18 |     )
19 | 
20 |     library("dplyr", warn.conflicts = FALSE)
21 | 
22 |     # put the necessary variables into a BenchmarkEnvironment to be used when the
23 |     # benchmark is running.
24 |     BenchEnvironment(source = source, input_file = input_file, queries = queries)
25 |   },
26 | 
27 |   # delete the results before each iteration
28 |   before_each = {
29 |     result <- list()
30 |     result_dim <- list()
31 |   },
32 |   # the benchmark to run
33 |   run = {
34 |     ds <- arrow::open_dataset(input_file)
35 | 
36 |     # TODO: generalize this to work with fanniemae_sample once https://github.com/voltrondata-labs/arrowbench/issues/88 is done
37 |     if (grepl('fanniemae_2016Q4', source)) {
38 |       if ("filters" %in% queries) {
39 |         result[["filters"]] <- ds %>%
40 |           filter(
41 |             is.na(f2),
42 |             f3 < 2
43 |             | f5 > 55
44 |             | f6 < 50
45 |             | f8 %in% c('02/2050', '10/2059', '02/2052')
46 |             | f14 == '08/01/2018'
47 |             | f17 > 10000
48 |             | f18 > 20000
49 |             | f19 > 3000
50 |             | f20 > 5000
51 |             | f21 > 10000
52 |             | f22 > 3e5
53 |             | f23 > 1e5
54 |             | f25 > 1000
55 |             | f26 > 5e4
56 |           ) %>%
57 |           collect()
58 | 
59 |         result_dim[["filters"]] <- c(514L, 31L)
60 |       }
61 |     }
62 | 
63 |     if ("everything" %in% queries) {
64 |       result[["everything"]] <- ds %>% collect()
65 |       result_dim[["everything"]] <- all_sources[[source]]$dim
66 |     }
67 |   },
68 |   # after each iteration, check the dimensions and delete the results
69 |   after_each = {
70 |     Map(
71 |       function(res, res_dim, query) {
72 |         call <- quote(stopifnot(identical(dim(res), res_dim)))
73 |         names(call)[[2]] <- paste0("The dimensions for query `", query, "` do not match")
74 |         eval(call)
75 |       },
76 |       res = result,
77 |       res_dim = result_dim,
78 |       query = names(result)
79 |     )
80 | 
81 |     result <- list()
82 |     result_dim <- list()
83 |   },
84 | 
85 |   packages_used = function(params) {
86 |     c("arrow", "dplyr")
87 |   }
88 | )
89 | 


--------------------------------------------------------------------------------
/inst/tpch-answer-gen.R:
--------------------------------------------------------------------------------
 1 | # ARROWBENCH_LOCAL_DIR="path/to/arrowbench/storage" Rscript inst/tpch-answer-gen.R
 2 | 
 3 | library(arrowbench)
 4 | library(duckdb)
 5 | library(arrow, warn.conflicts = FALSE)
 6 | library(dplyr, warn.conflicts = FALSE)
 7 | library(lubridate, warn.conflicts = FALSE)
 8 | 
 9 | sf <- 1
10 | 
11 | tpch_files <- ensure_source("tpch", scale_factor = sf)
12 | 
13 | input_functions <- list()
14 | 
15 | input_functions[["dplyr"]] <- function(name) {
16 |   file <- tpch_files[[name]]
17 |   return(arrow::read_parquet(file, as_data_frame = TRUE))
18 | }
19 | 
20 | input_functions[["arrow"]] <- function(name) {
21 |   file <- tpch_files[[name]]
22 |   return(arrow::open_dataset(file, format = "parquet"))
23 | }
24 | 
25 | con <- dbConnect(duckdb::duckdb("answer_gen_db"))
26 | dbExecute(con, paste0("PRAGMA threads=10"))
27 | 
28 | # DuckDB tables
29 | for (name in tpch_tables) {
30 |   file <- path.expand(tpch_files[[name]])
31 | 
32 |   sql_query <- paste0("CREATE TABLE ", name, " AS SELECT * FROM parquet_scan('", file, "');")
33 | 
34 |   file <- tpch_files[[name]]
35 |   dbExecute(con, sql_query)
36 | }
37 | 
38 | input_functions[["duckdb"]] <- function(name) {
39 |   return(dplyr::tbl(con, name))
40 | }
41 | 
42 | # create directory to save the answers to
43 | dir.create(glue::glue("./answers/scale-factor-{sf}/"), recursive = TRUE)
44 | 
45 | for (q in c(1:22)) {
46 |   message("==================================================")
47 |   message(glue::glue("Query: {q}"))
48 |   message("==================================================")
49 | 
50 |   query <- q
51 | 
52 |   # grab the sql queries from github (this URL might need to be updated if their location in the repo changes.)
53 |   sql <- paste0(httr::GET(
54 |     glue::glue("https://raw.githubusercontent.com/duckdb/duckdb/master/extension/tpch/dbgen/queries/q{stringr::str_pad(query, 2, pad = '0')}.sql")
55 |   ), collapse = "\n")
56 | 
57 |   # dplyr with scale factor 10 requires a lot of memory, if hitting `vector memory exhausted (limit reached?)` comment it out
58 |   # at scale factor 0.01 there are small differences between duckdb and arrow for some queries. This is likely due to decimal precision / rounding differences, but I haven't dug into it too deeply.
59 |   result_dplyr <- tpc_h_queries[[query]](input_functions[["dplyr"]])
60 |   result_arrow <- tpc_h_queries[[query]](input_functions[["arrow"]], collect_func = compute)
61 |   result_duckdb <- as_tibble(dbGetQuery(con, sql))
62 | 
63 |   # compare the arrow results with both dplyr and duckdb versions
64 |   print(waldo::compare(as.data.frame(result_arrow), result_dplyr, tolerance = 0.01, x_arg = "arrow", y_arg = "dplyr"))
65 |   print(waldo::compare(as.data.frame(result_arrow), result_duckdb, tolerance = 0.01, x_arg = "arrow", y_arg = "duckdb"))
66 | 
67 |   write_parquet(result_arrow, glue::glue("./answers/scale-factor-{sf}/tpch-q{stringr::str_pad(query, 2, pad = '0')}-sf{sf}.parquet"))
68 | }
69 | 
70 | # clean up duckdb database file
71 | DBI::dbDisconnect(con, shutdown = TRUE)
72 | unlink("answer_gen_db")
73 | 


--------------------------------------------------------------------------------
/R/params.R:
--------------------------------------------------------------------------------
 1 | #' Generate a dataframe of default parameters for a benchmark
 2 | #'
 3 | #' Generates a dataframe of parameter combinations for a benchmark to try based
 4 | #' on the parameter defaults of its `setup` function and supplied parameters.
 5 | #'
 6 | #' @param x An object for which to generate parameters
 7 | #' @param ... Named arguments corresponding to the parameters of `bm`'s `setup`
 8 | #' function. May also contain global params `cpu_count`, `lib_path`, `mem_alloc`,
 9 | #' and `drop_caches`. See the "Parameterizing benchmarks" section of [Benchmark()]
10 | #' for more details.
11 | #'
12 | #' @return For `get_default_parameters.Benchmark`, a dataframe of parameter combinations
13 | #' to try with a column for each parameter and a row for each combination.
14 | #'
15 | #' @export
16 | get_default_parameters <- function(x, ...) {
17 |   UseMethod("get_default_parameters")
18 | }
19 | 
20 | #' @rdname get_default_parameters
21 | #' @export
22 | get_default_parameters.default <- function(x, ...) {
23 |   stop("No method found for class `", toString(class(x)), '`')
24 | }
25 | 
26 | #' @rdname get_default_parameters
27 | #' @export
28 | get_default_parameters.Benchmark <- function(x, ...) {
29 |   # This takes the expansion of the default parameters in the function signature
30 |   # perhaps restricted by the ... params
31 |   params <- modifyList(get_default_args(x$setup), list(...), keep.null = TRUE)
32 |   if (identical(params[["lib_path"]], "all")) {
33 |     # Default for lib_path is just "latest", if omitted
34 |     # "all" means all old versions
35 |     # rev() is so we run newest first. This also means we bootstrap data fixtures
36 |     # with newest first, so that's some assurance that older versions can read
37 |     # what the newer libs write
38 |     params$lib_path <- rev(c(names(arrow_version_to_date), "devel", "latest"))
39 |   }
40 |   if (is.null(params[["cpu_count"]])) {
41 |     params$cpu_count <- c(1L, parallel::detectCores())
42 |   }
43 | 
44 |   # `NULL` is a valid argument but needs to be wrapped in `list()`
45 |   to_list <- lengths(params) == 0
46 |   params[to_list] <- lapply(params[to_list], list)
47 | 
48 |   params[["stringsAsFactors"]] <- FALSE
49 |   out <- do.call(expand.grid, params)
50 | 
51 |   # we don't change memory allocators on non-arrow packages
52 |   if (!is.null(params[["mem_alloc"]])) {
53 |     # a bit of a hack, we can test memory allocators on devel or latest, but
54 |     # "4.0" <= "devel" and "4.0" <= "latest" are both true.
55 |     out[!is_arrow_package(out, "4.0", x$packages_used), "mem_alloc"] <- NA
56 |     out <- unique(out)
57 |   }
58 | 
59 |   if (!is.null(x$valid_params)) {
60 |     out <- x$valid_params(out)
61 |   }
62 |   out
63 | }
64 | 
65 | #' @rdname get_default_parameters
66 | #' @export
67 | get_default_parameters.BenchmarkDataFrame <- function(x, ...) {
68 |   x$parameters <- purrr::map2(x$benchmark, x$parameters, function(bm, params) {
69 |     if (is.null(params)) {
70 |       params <- get_default_parameters(bm, ...)
71 |     }
72 |     params
73 |   })
74 | 
75 |   x
76 | }
77 | 


--------------------------------------------------------------------------------
/R/bm-write-csv.R:
--------------------------------------------------------------------------------
 1 | #' Benchmark CSV writing
 2 | #'
 3 | #' @section Parameters:
 4 | #' * `source` A CSV file path to write to
 5 | #' * `writer` One of `c("arrow", "data.table", "vroom", "readr",)`
 6 | #' * `input` One of `c("arrow_table", "data_frame")`
 7 | #'
 8 | #' @export
 9 | write_csv <- Benchmark(
10 |   "write_csv",
11 |   setup = function(source = names(known_sources),
12 |                    writer = "arrow",
13 |                    compression = c("uncompressed", "gzip"),
14 |                    input = c("arrow_table", "data_frame")) {
15 |     writer <- match.arg(writer, c("arrow", "data.table", "vroom", "readr", "base"))
16 |     compression <- match.arg(compression, c("uncompressed", "gzip"))
17 |     input <- match.arg(input)
18 | 
19 |     # source defaults are retrieved from the function definition (all available
20 |     # known_sources) and then read the source in as a data.frame
21 |     source <- ensure_source(source)
22 |     df <- read_source(source, as_data_frame = match.arg(input) == "data_frame")
23 | 
24 |     ext <- switch(
25 |       compression,
26 |       uncompressed = ".csv",
27 |       gzip = ".csv.gz",
28 |       paste0(".csv.", compression)
29 |     )
30 | 
31 |     # Map string param name to functions
32 |     BenchEnvironment(
33 |       write_csv_func = get_csv_writer(writer),
34 |       source = source,
35 |       df = df,
36 |       ext = ext
37 |     )
38 |   },
39 |   # delete the results before each iteration
40 |   before_each = {
41 |     result_file <- tempfile(fileext = ext)
42 | 
43 |   },
44 |   # the benchmark to run
45 |   run = {
46 |     write_csv_func(df, result_file)
47 |   },
48 |   # after each iteration, check the dimensions and delete the results
49 |   after_each = {
50 |     stopifnot(identical(dim(df), dim(arrow::open_dataset(result_file, format = "csv"))))
51 |     stopifnot("Output file does not exist" = file.exists(result_file))
52 |     unlink(result_file)
53 |   },
54 |   valid_params = function(params) {
55 |     ## Only arrow fns will accept an arrow_table
56 |     drop <- ( params$input == "arrow_table" & params$writer != "arrow" )
57 |     params[!drop,]
58 |   },
59 |   packages_used = function(params) {
60 |     params$writer
61 |   }
62 | )
63 | 
64 | 
65 | #' Get a CSV writer
66 | #'
67 | #' @param writer the writer to use
68 | #'
69 | #' @return the csv writer
70 | #' @export
71 | get_csv_writer <- function(writer) {
72 |   library(writer, character.only = TRUE, warn.conflicts = FALSE)
73 |   if (writer == "arrow") {
74 |     return(function(...) arrow::write_csv_arrow(...))
75 |   } else if (writer == "readr") {
76 |     return(function(..., as_data_frame) readr::write_csv(...))
77 |   } else if (writer == "data.table") {
78 |     return(function(..., as_data_frame) data.table::fwrite(...))
79 |   } else if (writer == "vroom") {
80 |     return(function(..., as_data_frame) vroom::vroom_write(..., delim = ","))
81 |   } else if (writer == "base") {
82 |     return(function(df, result_file) {
83 |       if (tools::file_ext(result_file) == "gz") result_file <- gzfile(result_file)
84 |       utils::write.csv(df, result_file, row.names = FALSE)
85 |       })
86 |   } else {
87 |     stop("Unsupported writer: ", writer, call. = FALSE)
88 |   }
89 | }
90 | 


--------------------------------------------------------------------------------
/R/bm-array-to-vector.R:
--------------------------------------------------------------------------------
 1 | #' Benchmark for reading an Arrow array to a vector
 2 | #'
 3 | #' This flexes a lower level conversion to R data structures from Arrow data structures.
 4 | #'
 5 | #' @section Parameters:
 6 | #' * `source` A known-file id to use (it will be read in to a data.frame first)
 7 | #' * `chunked_arrays` logical, should the arrays converted be `ChunkedArrays` or `Arrays`?
 8 | #' * `exclude_nulls` logical, should any columns with any `NULL`s or `NA`s in them be removed?
 9 | #' * `alt_rep` logical, should the altrep option be set? (`TRUE` to enable it, `FALSE` to disable)
10 | #'
11 | #' @importFrom purrr map flatten
12 | #' @export
13 | array_to_vector <- Benchmark("array_to_vector",
14 |   setup = function(
15 |       # the only datasets that have any no-null numerics are
16 |       source = c("type_integers", "type_floats"),
17 |       chunked_arrays = FALSE,
18 |       exclude_nulls = FALSE,
19 |       alt_rep = TRUE
20 |       ) {
21 |     stopifnot(
22 |       is.logical(chunked_arrays),
23 |       is.logical(exclude_nulls),
24 |       is.logical(alt_rep)
25 |     )
26 |     source <- match.arg(source, names(all_sources))
27 |     source <- ensure_source(source)
28 |     result_dim <- get_source_attr(source, "dim")
29 |     table <- read_source(source, as_data_frame = FALSE)
30 | 
31 |     if (exclude_nulls) {
32 |       cols_without_nulls <- unlist(lapply(colnames(table), function(x) table[[x]]$null_count == 0))
33 |       table <- table[which(cols_without_nulls)]
34 |       result_dim[2] <- sum(cols_without_nulls)
35 |     }
36 | 
37 |     # extract the arrays
38 |     arrays <- purrr::map(colnames(table), ~table[[.]])
39 | 
40 |     # If we can operate on arrays, then pull the chunks out and flatten
41 |     if (!chunked_arrays) {
42 |       arrays <- purrr::flatten(purrr::map(arrays, function (array) {
43 |         n_chunks <- array$num_chunks
44 |         purrr::map(seq_len(n_chunks) - 1L, ~array$chunk(.))
45 |       }))
46 |     }
47 | 
48 |     array_lengths <- lapply(arrays, function(array) array$length())
49 | 
50 |     as_vector_func <- function(array) as.vector(array)
51 | 
52 |     BenchEnvironment(
53 |       as_vector_func = as_vector_func,
54 |       array_lengths = array_lengths,
55 |       arrays = arrays,
56 |       alt_rep = alt_rep
57 |     )
58 |   },
59 |   before_each = {
60 |     result <- NULL
61 |     options(arrow.use_altrep = alt_rep)
62 |   },
63 |   run = {
64 |     result <- lapply(arrays, as_vector_func)
65 |   },
66 |   after_each = {
67 |     # altrep checking
68 |     # TODO: should we also check that one of the classes is "arrow"?
69 |     is_altrep <- unlist(purrr::map(result, ~!is.null(.Internal(altrep_class(.)))))
70 |     if (alt_rep) {
71 |       altrep_ok <- all(is_altrep)
72 |     } else {
73 |       altrep_ok <- all(!is_altrep)
74 |     }
75 | 
76 |     stopifnot(
77 |       "The array lengths do not match" = all.equal(lapply(result, length), array_lengths),
78 |       "The objects do not match the altrep parameter" = altrep_ok
79 |       )
80 | 
81 |     # reset the altrep option
82 |     options(arrow.use_altrep = NULL)
83 |     result <- NULL
84 |   },
85 |   valid_params = function(params) {
86 |     # TODO: only enable on >5.0.0?
87 |     params
88 |   },
89 |   packages_used = function(params) "arrow"
90 | )
91 | 
92 | 


--------------------------------------------------------------------------------
/R/bm-write-file.R:
--------------------------------------------------------------------------------
 1 | #' Benchmark file writing
 2 | #'
 3 | #' @section Parameters:
 4 | #' * `source` A known-file id, or a CSV(?) file path to read in
 5 | #' * `file_type` One of `c("parquet", "feather", "fst")`
 6 | #' * `compression` One of the values: `r paste(known_compressions, collapse = ", ")`
 7 | #' * `input_type` One of `c("arrow_table", "data_frame")`
 8 | #'
 9 | #' @export
10 | write_file <- Benchmark("file-write",
11 |   setup = function(source = c("fanniemae_2016Q4", "nyctaxi_2010-01"),
12 |                    file_type = c("parquet", "feather"),
13 |                    compression = c("uncompressed", "snappy", "lz4"),
14 |                    input_type = c("arrow_table", "data_frame")) {
15 |     # source defaults are retrieved from the function definition (all available
16 |     # known_sources) and then read the source in as a data.frame
17 |     source <- ensure_source(source)
18 |     df <- read_source(source, as_data_frame = match.arg(input_type) == "data_frame")
19 |     # file_type defaults to parquet or feather, but can accept fst as well
20 |     file_type <- match.arg(file_type, c("parquet", "feather", "fst"))
21 | 
22 |     # Map string param name to functions
23 |     get_write_func <- function(file_type, compression) {
24 |       force(compression)
25 |       if (file_type == "feather") {
26 |         return(function(...) arrow::write_feather(..., compression = compression))
27 |       } else if (file_type == "parquet") {
28 |         return(function(...) arrow::write_parquet(..., compression = compression))
29 |       } else if (file_type == "fst") {
30 |         # fst is always zstd, just a question of what level of compression
31 |         level <- ifelse(compression == "uncompressed", 0, 50)
32 |         return(function(...) fst::write_fst(..., compress = level))
33 |       } else {
34 |         stop("Unsupported file_type: ", file_type, call. = FALSE)
35 |       }
36 |     }
37 |     write_func <- get_write_func(file_type, compression)
38 | 
39 |     # put the necessary variables into a BenchmarkEnvironment to be used when
40 |     # the benchmark is running.
41 |     BenchEnvironment(
42 |       write_func = write_func,
43 |       file_type = file_type,
44 |       source = source,
45 |       df = df
46 |     )
47 |   },
48 |   # delete the results before each iteration
49 |   before_each = {
50 |     result_file <- tempfile()
51 |   },
52 |   # the benchmark to run
53 |   run = {
54 |     write_func(df, result_file)
55 |   },
56 |   # after each iteration, check the dimensions and delete the results
57 |   after_each = {
58 |     stopifnot(file.exists(result_file))
59 |     unlink(result_file)
60 |   },
61 |   # validate that the parameters given are compatible
62 |   valid_params = function(params) {
63 |     # make sure that the file_type and the compression is compatible
64 |     # and fst doesn't have arrow_table input_type
65 |     drop <- !validate_format(params$file_type, params$compression) |
66 |       params$file_type == "fst" & params$input_type == "arrow_table"
67 |     params[!drop,]
68 |   },
69 |   # packages used when specific file_types are used
70 |   packages_used = function(params) {
71 |     pkg_map <- c(
72 |       "feather" = "arrow",
73 |       "parquet" = "arrow",
74 |       "fst" = "fst"
75 |     )
76 |     pkg_map[params$file_type]
77 |   }
78 | )
79 | 


--------------------------------------------------------------------------------
/tests/testthat/test-result.R:
--------------------------------------------------------------------------------
  1 | test_that("R6.1 classes inherit properly", {
  2 |   SumClass <- R6Point1Class(
  3 |     classname = "SumClass",
  4 |     static = list(sum = sum, x = 1:100)
  5 |   )
  6 | 
  7 |   sum_class <- SumClass$new()
  8 |   expect_s3_class(sum_class, "SumClass")
  9 |   expect_identical(SumClass$sum, sum)
 10 | 
 11 |   SumOtherClass <- R6Point1Class(
 12 |     classname = "SumOtherClass",
 13 |     inherit = SumClass
 14 |   )
 15 | 
 16 |   sum_other_class <- SumOtherClass$new()
 17 |   expect_s3_class(sum_other_class, "SumOtherClass")
 18 |   expect_identical(SumOtherClass$sum, sum)
 19 | 
 20 |   expect_equal(SumOtherClass$sum(SumOtherClass$x), 5050L)
 21 | })
 22 | 
 23 | 
 24 | test_that("inherited serialization/deserialization methods work", {
 25 |   res <- BenchmarkResult$new(
 26 |     run_name = "fake_run",
 27 |     tags = c(is_real = FALSE),
 28 |     optional_benchmark_info = list(
 29 |       name = "fake",
 30 |       result = data.frame(time = 0, status = "superfast", stringsAsFactors = FALSE),
 31 |       params = list(speed = "lightning")
 32 |     )
 33 |   )
 34 | 
 35 |   # sanity
 36 |   expect_s3_class(res, "BenchmarkResult")
 37 |   expect_equal(res$run_name, "fake_run")
 38 | 
 39 |   # roundtrips
 40 |   expect_equal(res$json, BenchmarkResult$from_json(res$json)$json)
 41 |   expect_equal(res$list, BenchmarkResult$from_list(res$list)$list)
 42 | 
 43 |   temp <- tempfile(fileext = '.json')
 44 |   res$write_json(temp)
 45 |   expect_equal(res$json, BenchmarkResult$read_json(temp)$json)
 46 |   file.remove(temp)
 47 | })
 48 | 
 49 | test_that("S3 methods work", {
 50 |   github <- list(
 51 |     repository = "https://github.com/conchair/conchair",
 52 |     commit = "2z8c9c49a5dc4a179243268e4bb6daa5",
 53 |     pr_number = 47L
 54 |   )
 55 |   run_reason <- "mocked-arrowbench-unit-test"
 56 |   run_name <- paste(run_reason, github$commit, sep = ": ")
 57 |   host_name <- "fake-computer"
 58 | 
 59 |   withr::with_envvar(
 60 |     c(
 61 |       CONBENCH_PROJECT_REPOSITORY = github$repository,
 62 |       CONBENCH_PROJECT_PR_NUMBER = github$pr_number,
 63 |       CONBENCH_PROJECT_COMMIT = github$commit,
 64 |       CONBENCH_MACHINE_INFO_NAME = host_name
 65 |     ),
 66 |     {
 67 |       res <- BenchmarkResult$new(
 68 |         run_name = run_name,
 69 |         run_reason = run_reason,
 70 |         tags = c(is_real = FALSE),
 71 |         optional_benchmark_info = list(
 72 |           name = "fake",
 73 |           result = data.frame(time = 0, status = "superfast", stringsAsFactors = FALSE),
 74 |           params = list(speed = "lightning")
 75 |         )
 76 |       )
 77 |     }
 78 |   )
 79 | 
 80 |   expect_equal(as.character(res), res$json)
 81 |   expect_equal(as.list(res), res$list)
 82 | 
 83 |   expect_equal(as.data.frame(res), res$to_dataframe())
 84 |   expect_equal(
 85 |     as.data.frame(res),
 86 |     structure(
 87 |       list(iteration = 1L, time = 0, status = "superfast", speed = "lightning"),
 88 |       row.names = c(NA, -1L),
 89 |       class = c("tbl_df", "tbl", "data.frame"),
 90 |       run_name = run_name,
 91 |       run_reason = run_reason,
 92 |       github = github,
 93 |       timestamp = res$timestamp,
 94 |       tags = c(is_real = FALSE)
 95 |     )
 96 |   )
 97 | 
 98 |   expect_equal(get_params_summary(res), res$params_summary)
 99 |   expect_equal(
100 |     get_params_summary(res),
101 |     structure(
102 |       list(speed = "lightning", did_error = FALSE),
103 |       row.names = c(NA, -1L), class = c("tbl_df", "tbl", "data.frame")
104 |     )
105 |   )
106 | })
107 | 


--------------------------------------------------------------------------------
/R/bm-read-file.R:
--------------------------------------------------------------------------------
 1 | #' Benchmark file reading
 2 | #'
 3 | #' @section Parameters:
 4 | #' * `source` A known-file id, or a CSV(?) file path to read in
 5 | #' * `file_type` One of `c("parquet", "feather", "fst")`
 6 | #' * `compression` One of the values: `r paste(known_compressions, collapse = ", ")`
 7 | #' * `output_type` One of `c("arrow_table", "data_frame")`
 8 | #'
 9 | #' @export
10 | read_file <- Benchmark("file-read",
11 |   setup = function(source = c("fanniemae_2016Q4", "nyctaxi_2010-01"),
12 |                    # TODO: break out feather_v1 and feather_v2, feather_v2 only in >= 0.17
13 |                    file_type = c("parquet", "feather"),
14 |                    compression = c("uncompressed", "snappy", "lz4"),
15 |                    output_type = c("arrow_table", "data_frame")) {
16 |     # file_type defaults to parquet or feather, but can accept fst as well
17 |     file_type <- match.arg(file_type, c("parquet", "feather", "fst"))
18 |     # the output_type defaults are retrieved from the function definition (arrow_table and data_frame)
19 |     output_type <- match.arg(output_type)
20 | 
21 |     # ensure that we have the right kind of file available
22 |     input_file <- ensure_format(source, file_type, compression)
23 |     # retrieve the dimensions for run-checking after the benchmark
24 |     result_dim <- get_source_attr(source, "dim")
25 | 
26 |     # put the necessary variables into a BenchmarkEnvironment to be used when the
27 |     # benchmark is running.
28 |     BenchEnvironment(
29 |       # get the correct read function for the input file_type
30 |       read_func = get_read_function(file_type),
31 |       input_file = input_file,
32 |       result_dim = result_dim,
33 |       as_data_frame = output_type == "data_frame"
34 |     )
35 |   },
36 |   # delete the results before each iteration
37 |   before_each = {
38 |     result <- NULL
39 |   },
40 |   # the benchmark to run
41 |   run = {
42 |     result <- read_func(input_file, as_data_frame = as_data_frame)
43 |   },
44 |   # after each iteration, check the dimensions and delete the results
45 |   after_each = {
46 |     stopifnot(identical(dim(result), result_dim))
47 |     result <- NULL
48 |   },
49 |   # validate that the parameters given are compatible
50 |   valid_params = function(params) {
51 |     # make sure that the file_type and the compression is compatible
52 |     # and fst doesn't have arrow_table output_type
53 |     drop <- !validate_format(params$file_type, params$compression) |
54 |             params$output_type == "arrow_table" & params$file_type == "fst"
55 |     params[!drop,]
56 |   },
57 |   # packages used when specific file_types are used
58 |   packages_used = function(params) {
59 |     pkg_map <- c(
60 |       "feather" = "arrow",
61 |       "parquet" = "arrow",
62 |       "fst" = "fst"
63 |     )
64 |     pkg_map[params$file_type]
65 |   }
66 | )
67 | 
68 | #' Get a reader
69 | #'
70 | #' @param file_type what file_type to read
71 | #'
72 | #' @return the read function to use
73 | #' @export
74 | get_read_function <- function(file_type) {
75 |   pkg_map <- c(
76 |     "feather" = "arrow",
77 |     "parquet" = "arrow",
78 |     "fst" = "fst"
79 |   )
80 |   library(pkg_map[[file_type]], character.only = TRUE, warn.conflicts = FALSE)
81 | 
82 |   if (file_type == "feather") {
83 |     return(function(...) arrow::read_feather(...))
84 |   } else if (file_type == "parquet") {
85 |     return(function(...) arrow::read_parquet(...))
86 |   } else if (file_type == "fst") {
87 |     return(function(..., as_data_frame) fst::read_fst(...))
88 |   } else {
89 |     stop("Unsupported file_type: ", file_type, call. = FALSE)
90 |   }
91 | }
92 | 


--------------------------------------------------------------------------------
/R/bm-dataset-taxi-parquet.R:
--------------------------------------------------------------------------------
  1 | #' Benchmark Taxi dataset (Parquet) reading
  2 | #'
  3 | #' @section Parameters:
  4 | #' * `query` Name of a known query to run; see `dataset_taxi_parquet$cases`
  5 | #'
  6 | #' @export
  7 | dataset_taxi_parquet <- Benchmark("partitioned-dataset-filter",
  8 |   setup = function(query = names(dataset_taxi_parquet$cases)) {
  9 |     library("dplyr", warn.conflicts = FALSE)
 10 |     dataset <- ensure_dataset("taxi_parquet")
 11 |     query <- dataset_taxi_parquet$cases[[match.arg(query)]]
 12 | 
 13 |     BenchEnvironment(
 14 |       query = query,
 15 |       dataset = dataset
 16 |     )
 17 |   },
 18 |   before_each = {
 19 |     result <- NULL
 20 |   },
 21 |   run = {
 22 |     result <- query$query(dataset)
 23 |   },
 24 |   after_each = {
 25 |     query$assert(result)
 26 |   },
 27 |   tags_fun = function(params) {
 28 |     # to reproduce this: https://github.com/voltrondata-labs/benchmarks/blob/main/benchmarks/partitioned_dataset_filter_benchmark.py#L23
 29 |     params$dataset <- "dataset-taxi-parquet"
 30 |     params
 31 |   },
 32 |   cases = list(
 33 |     vignette = list(
 34 |       query = function(ds) {
 35 |         ds %>%
 36 |           filter(total_amount > 100, year == 2015) %>%
 37 |           select(tip_amount, total_amount, passenger_count) %>%
 38 |           group_by(passenger_count) %>%
 39 |           summarize(
 40 |             tip_pct = median(100 * tip_amount / total_amount),
 41 |             n = n()
 42 |           ) %>%
 43 |           collect()
 44 |       },
 45 |       assert = function(result) {
 46 |         stopifnot(
 47 |            identical(dim(result), c(10L, 3L)),
 48 |            identical(names(result), c("passenger_count", "tip_pct", "n")),
 49 |            identical(sum(result$n), 200807L)
 50 |          )
 51 |        }
 52 |     ),
 53 |     payment_type_3 = list(
 54 |       query = function(ds) {
 55 |         ds %>%
 56 |           filter(payment_type == "3") %>%
 57 |           select(year, month, passenger_count) %>%
 58 |           group_by(year, month) %>%
 59 |           summarize(
 60 |             total_passengers = sum(passenger_count, na.rm = TRUE),
 61 |             n = n()
 62 |           ) %>%
 63 |           collect()
 64 |       },
 65 |       assert = function(result) {
 66 |         stopifnot(
 67 |           identical(dim(result), c(54L, 4L)),
 68 |           identical(names(result), c("year", "month", "total_passengers", "n")),
 69 |           identical(sum(result$n), 2412399L)
 70 |         )
 71 |       }
 72 |     ),
 73 |     # The intention of this is to filter + read from a small number of parquet
 74 |     # files (smaller than the number of threads) to see if parallelism is
 75 |     # beneficial
 76 |     small_no_files = list(
 77 |       query = function(ds) {
 78 |         ds %>%
 79 |           filter(total_amount > 20, year %in% c(2011, 2019) & month == 2) %>%
 80 |           select(tip_amount, total_amount, passenger_count) %>%
 81 |           group_by(passenger_count) %>%
 82 |           summarize(
 83 |             tip_pct = median(100 * tip_amount / total_amount),
 84 |             n = n()
 85 |           ) %>%
 86 |           collect()
 87 |       },
 88 |       assert = function(result) {
 89 |         stopifnot(
 90 |           identical(dim(result), c(11L, 3L)),
 91 |           identical(names(result), c("passenger_count", "tip_pct", "n")),
 92 |           identical(sum(result$n), 3069271L)
 93 |         )
 94 |       }
 95 |     ),
 96 |     dims = list(
 97 |       query = function(ds) {
 98 |         dim(ds)
 99 |       },
100 |       assert = function(result) {
101 |         stopifnot("dims do not match" = identical(result, c(1547741381L, 20L)))
102 |       }
103 |     )
104 |   ),
105 |   packages_used = function(params) {
106 |     c("arrow", "dplyr")
107 |   }
108 | )
109 | 


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
 1 | on: [push, pull_request]
 2 | 
 3 | name: R-CMD-check
 4 | 
 5 | jobs:
 6 |   R-CMD-check:
 7 |     runs-on: ${{ matrix.config.os }}
 8 | 
 9 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
10 | 
11 |     strategy:
12 |       fail-fast: false
13 |       matrix:
14 |         config:
15 |           - {os: macOS-12,   r: 'release', force_suggests: true }
16 |             # the stderror isn't redirected correctly on windows, (at least in GHA)
17 |             # TODO: figure out if this runs correctly on windows at all
18 |             # - {os: windows-latest, r: 'release'}
19 |             # We explicitly set the user agent for R devel to the current release version of R so RSPM serves the release binaries.
20 |           - {os: ubuntu-20.04,   r: 'devel',   force_suggests: true }
21 |           - {os: ubuntu-20.04,   r: 'release', force_suggests: true }
22 |           - {os: ubuntu-20.04,   r: 'oldrel',  force_suggests: true }
23 |           - {os: ubuntu-20.04,   r: '4.0',     force_suggests: true }
24 | 
25 |     env:
26 |       R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
27 |       RSPM: ${{ matrix.config.rspm }}
28 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
29 | 
30 |     steps:
31 |       - uses: actions/checkout@v3
32 | 
33 |       - uses: r-lib/actions/setup-r@v2
34 |         with:
35 |           r-version: ${{ matrix.config.r }}
36 |           use-public-rspm: true
37 | 
38 |       - uses: r-lib/actions/setup-pandoc@v2
39 | 
40 |       - name: Query dependencies
41 |         run: |
42 |           install.packages('remotes')
43 |           saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
44 |           writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
45 |         shell: Rscript {0}
46 | 
47 |       - name: Cache R packages
48 |         if: runner.os != 'Windows'
49 |         uses: actions/cache@v2
50 |         with:
51 |           path: ${{ env.R_LIBS_USER }}
52 |           key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-2-${{ hashFiles('.github/depends.Rds') }}
53 |           restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-2-
54 | 
55 |       - name: Install system dependencies
56 |         if: runner.os == 'Linux'
57 |         run: |
58 |           while read -r cmd
59 |           do
60 |             eval sudo $cmd
61 |           done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))')
62 | 
63 |       - name: Install dependencies
64 |         run: |
65 |           remotes::install_deps(dependencies = TRUE)
66 |           remotes::install_cran("rcmdcheck")
67 |         shell: Rscript {0}
68 | 
69 |       - name: Session info
70 |         run: |
71 |           options(width = 100)
72 |           pkgs <- installed.packages()[, "Package"]
73 |           sessioninfo::session_info(pkgs, include_base = TRUE)
74 |         shell: Rscript {0}
75 | 
76 |       - name: Check
77 |         env:
78 |           _R_CHECK_CRAN_INCOMING_: false
79 |           _R_CHECK_FORCE_SUGGESTS_: ${{ matrix.config.force_suggests }}
80 |         run: |
81 |           if ('${{ matrix.config.r }}' == 'release' && grepl('ubuntu', '${{ matrix.config.os }}')) {
82 |             Sys.setenv("ARROWBENCH_TEST_CUSTOM_DUCKDB" = TRUE)
83 |           }
84 |           options(crayon.enabled = TRUE)
85 |           rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check")
86 |         shell: Rscript {0}
87 | 
88 |       - name: Show testthat output
89 |         if: always()
90 |         run: find check -name 'testthat.Rout*' -exec cat '{}' \; || true
91 |         shell: bash
92 | 
93 |       - name: Upload check results
94 |         if: failure()
95 |         uses: actions/upload-artifact@main
96 |         with:
97 |           name: ${{ runner.os }}-r${{ matrix.config.r }}-results
98 |           path: check
99 | 


--------------------------------------------------------------------------------
/R/external-dependencies.R:
--------------------------------------------------------------------------------
  1 | external_cli_available <- function(cli) {
  2 |   res <- processx::run("which", cli, error_on_status = FALSE)
  3 | 
  4 |   if (res$status != 0L) {
  5 |     msg <- paste(cli, 'not installed or on $PATH.\n\n')
  6 |     if (cli == "pipx") {
  7 |       msg <- paste0(
  8 |         msg,
  9 |         glue::glue('It can be installed with `install_pipx()`\n\n'),
 10 |         'If already installed, ensure it is on $PATH, e.g. by running',
 11 |         '`pipx ensurepath` or adding `PATH="${PATH}:${HOME}/.local/bin"` to ~/.Renviron'
 12 |       )
 13 |     } else {
 14 |       msg <- paste0(
 15 |         msg,
 16 |         glue::glue('It can be installed with `install_pipx(); install_{cli}()`\n\n'),
 17 |         'If already installed with pipx, ensure it is on $PATH, e.g. by running',
 18 |         '`pipx ensurepath` or adding `PATH="${PATH}:${HOME}/.local/bin"` to ~/.Renviron'
 19 |       )
 20 |     }
 21 | 
 22 |     warning(warningCondition(msg, class = "notInstalledWarning"))
 23 |   }
 24 | 
 25 |   res$status == 0L
 26 | }
 27 | 
 28 | pipx_available <- function() {
 29 |   external_cli_available(cli = "pipx")
 30 | }
 31 | 
 32 | benchconnect_available <- function() {
 33 |   external_cli_available(cli = "benchconnect")
 34 | }
 35 | 
 36 | datalogistik_available <- function() {
 37 |   external_cli_available(cli = "datalogistik")
 38 | }
 39 | 
 40 | 
 41 | #' Install pipx
 42 | #'
 43 | #' Install [pipx](https://pypa.github.io/pipx/), a version of pip that installs
 44 | #' Python packages in isolated environments where they will always be available
 45 | #' regardless of which version of Python is presently on `$PATH`. Especially
 46 | #' useful for installing packages designed to be used via CLIs.
 47 | #'
 48 | #' @export
 49 | install_pipx <- function() {
 50 |   processx::run("sh", c("-c", "pip install pipx && pipx ensurepath"), echo_cmd = TRUE)
 51 | }
 52 | 
 53 | 
 54 | #' Install benchconnect
 55 | #'
 56 | #' Install [benchconnect](https://github.com/conbench/conbench/tree/main/benchconnect),
 57 | #' a utility for sending benchmark results to a Conbench server
 58 | #'
 59 | #' @export
 60 | install_benchconnect <- function() {
 61 |   stopifnot(pipx_available())
 62 | 
 63 |   url <- "benchconnect@git+https://github.com/conbench/conbench.git@main#subdirectory=benchconnect"
 64 | 
 65 |   if (suppressWarnings(benchconnect_available(), classes = "notInstalledWarning")) {
 66 |     if (interactive()) {
 67 |       ans <- readline("benchconnect already installed. Update? [Y/n]: ")
 68 |     } else {
 69 |       ans <- "y"
 70 |     }
 71 |     if (tolower(ans) %in% c("y", "")) {
 72 |       processx::run("pipx", c("install", "--include-deps", "--force", url), echo_cmd = TRUE)
 73 |     } else {
 74 |       invisible()
 75 |     }
 76 |   } else {
 77 |     processx::run("pipx", c("install", "--include-deps", url), echo_cmd = TRUE)
 78 |   }
 79 | }
 80 | 
 81 | 
 82 | #' Install datalogistik
 83 | #'
 84 | #' Install [datalogistik](https://github.com/conbench/datalogistik), a utility
 85 | #' for generating, downloading, and converting datasets for benchmarking.
 86 | #'
 87 | #' Only for interactive use.
 88 | #'
 89 | #' @export
 90 | install_datalogistik <- function() {
 91 |   # TODO: install pipx?
 92 |   stopifnot(pipx_available())
 93 | 
 94 |   ref <- Sys.getenv("DATALOGISTIK_BRANCH", unset = "main")
 95 |   url <- glue("git+https://github.com/conbench/datalogistik.git@{ref}")
 96 | 
 97 |   pipx_call <- c("install", "--pip-args=--extra-index-url https://pypi.fury.io/arrow-nightlies --prefer-binary")
 98 |   if (datalogistik_available()) {
 99 |     # default to yes (and also this will make it work in non-interactive sessions)
100 |     ans <- readline("datalogistik already installed. Update? [Y/n]: ")
101 |     if (tolower(ans) %in% c("y", "")) {
102 |       # we need the extra args to depend on the development version of arrow
103 |       return(processx::run("pipx", c(pipx_call, "--force", url), echo_cmd = TRUE))
104 |     } else {
105 |       return(invisible())
106 |     }
107 |   }
108 | 
109 |   processx::run("pipx", c(pipx_call, url), echo_cmd = TRUE)
110 | }
111 | 


--------------------------------------------------------------------------------
/R/bm-dataset-taxi-2013.R:
--------------------------------------------------------------------------------
  1 | #' Benchmark Taxi 2013 dataset reading
  2 | #'
  3 | #' @section Parameters:
  4 | #' * `dataset` Name of dataset to use, either `taxi_2013` or `taxi_2013_sample` (for testing)
  5 | #' * `query` Name of a known query to run; see `dataset_taxi_2013$cases`
  6 | #'
  7 | #' @export
  8 | dataset_taxi_2013 <- Benchmark(
  9 |   "dataset_taxi_2013",
 10 |   setup = function(dataset = "taxi_2013",
 11 |                    query = names(dataset_taxi_2013$cases)) {
 12 |     name <- match.arg(dataset, c("taxi_2013", "taxi_2013_sample"))
 13 |     library("dplyr", warn.conflicts = FALSE)
 14 |     dataset <- ensure_dataset(name)
 15 |     query <- dataset_taxi_2013$cases[[match.arg(query)]]
 16 | 
 17 |     BenchEnvironment(
 18 |       name = name,
 19 |       query = query,
 20 |       dataset = dataset
 21 |     )
 22 |   },
 23 |   before_each = {
 24 |     result <- NULL
 25 |   },
 26 |   run = {
 27 |     result <- query$query(dataset)
 28 |   },
 29 |   after_each = {
 30 |     query$assert(result, name)
 31 |   },
 32 |   cases = list(
 33 |     basic = list(
 34 |       query = function(ds) {
 35 |         ds %>%
 36 |           filter(total_amount > 100, vendor_id == "CMT") %>%
 37 |           select(tip_amount, total_amount, payment_type) %>%
 38 |           group_by(payment_type) %>%
 39 |           summarize(
 40 |             tip_pct = median(100 * tip_amount / total_amount),
 41 |             n = n()
 42 |           ) %>%
 43 |           collect()
 44 |       },
 45 |       assert = function(result, name) {
 46 |         stopifnot(
 47 |           identical(dim(result), c(if (name == "taxi_2013_sample") 0L else 4L, 3L)),
 48 |           identical(names(result), c("payment_type", "tip_pct", "n")),
 49 |           identical(sum(result$n), if (name == "taxi_2013_sample") 0L else 68158L)
 50 |         )
 51 |       }
 52 |     ),
 53 |     payment_type_crd = list(
 54 |       query = function(ds) {
 55 |         ds %>%
 56 |           filter(payment_type == "CRD") %>%
 57 |           mutate(year = year(pickup_datetime), month = month(pickup_datetime)) %>%
 58 |           select(year, month, total_amount) %>%
 59 |           group_by(year, month) %>%
 60 |           summarize(
 61 |             total_amount = sum(total_amount, na.rm = TRUE),
 62 |             n = n()
 63 |           ) %>%
 64 |           collect()
 65 |       },
 66 |       assert = function(result, name) {
 67 |         stopifnot(
 68 |           identical(dim(result), c(12L, 4L)),
 69 |           identical(names(result), c("year", "month", "total_amount", "n")),
 70 |           identical(sum(result$n), if (name == "taxi_2013_sample") 567L else 93334004L)
 71 |         )
 72 |       }
 73 |     ),
 74 |     # The intention of this is to filter + read from a small number of csv
 75 |     # files (smaller than the number of threads) to see if parallelism is
 76 |     # beneficial
 77 |     small_no_files = list(
 78 |       query = function(ds) {
 79 |         ds %>%
 80 |           mutate(month = month(pickup_datetime)) %>%
 81 |           filter(total_amount > 20, month %in% c(4L, 7L)) %>%
 82 |           select(tip_amount, total_amount, payment_type) %>%
 83 |           group_by(payment_type) %>%
 84 |           summarize(
 85 |             tip_pct = median(100 * tip_amount / total_amount),
 86 |             n = n()
 87 |           ) %>%
 88 |           collect()
 89 |       },
 90 |       assert = function(result, name) {
 91 |         stopifnot(
 92 |           identical(dim(result), c(if (name == "taxi_2013_sample") 2L else 5L, 3L)),
 93 |           identical(names(result), c("payment_type", "tip_pct", "n")),
 94 |           identical(sum(result$n), if (name == "taxi_2013_sample") 36L else 4797187L)
 95 |         )
 96 |       }
 97 |     ),
 98 |     dims = list(
 99 |       query = function(ds) {
100 |         dim(ds)
101 |       },
102 |       assert = function(result, name) {
103 |         stopifnot("dims do not match" = identical(result, c(if (name == "taxi_2013_sample") 1000L else 173179759L, 11L)))
104 |       }
105 |     )
106 |   ),
107 |   packages_used = function(params) {
108 |     c("arrow", "dplyr")
109 |   }
110 | )
111 | 


--------------------------------------------------------------------------------
/R/bm-read-csv.R:
--------------------------------------------------------------------------------
  1 | #' Benchmark CSV reading
  2 | #'
  3 | #' @section Parameters:
  4 | #' * `source` A CSV file path to read in
  5 | #' * `reader` One of `c("arrow", "data.table", "vroom", "readr")`
  6 | #' * `compression` One of `c("uncompressed", "gzip")`
  7 | #' * `output_format` One of `c("arrow_table", "data_frame")`
  8 | #'
  9 | #' @export
 10 | #' @importFrom R.utils gzip
 11 | read_csv <- Benchmark(
 12 |   "read_csv",
 13 |   setup = function(source = names(known_sources),
 14 |                    reader = "arrow",
 15 |                    compression = c("uncompressed", "gzip"),
 16 |                    output_format = c("arrow_table", "data_frame")) {
 17 |     reader <- match.arg(reader, c("arrow", "data.table", "vroom", "readr"))
 18 |     compression <- match.arg(compression)
 19 |     output_format <- match.arg(output_format)
 20 |     # ensure the the file exists
 21 |     input_file <- ensure_format(source, "csv", compression)
 22 | 
 23 |     # Map string param name to function
 24 |     delim <- get_source_attr(source, "delim") %||% ","
 25 |     read_func <- get_csv_reader(reader, delim)
 26 |     result_dim <- get_source_attr(source, "dim")
 27 | 
 28 |     BenchEnvironment(
 29 |       # Map string param name to function
 30 |       read_func = get_csv_reader(reader, delim),
 31 |       input_file = input_file,
 32 |       result_dim = result_dim,
 33 |       as_data_frame = output_format == "data_frame",
 34 |       delim = delim
 35 |     )
 36 |   },
 37 |   before_each = {
 38 |     result <- NULL
 39 |   },
 40 |   run = {
 41 |     result <- read_func(input_file, delim = delim, as_data_frame = as_data_frame)
 42 |   },
 43 |   after_each = {
 44 |     correct_format <- FALSE
 45 |     if (as_data_frame) {
 46 |       correct_format <- inherits(result, "data.frame")
 47 |     } else {
 48 |       correct_format <- inherits(result, c("Table", "ArrowObject"))
 49 |     }
 50 | 
 51 |     stopifnot(
 52 |       # we have a tolerance of 1 here because vroom reads 1 additional row of
 53 |       # all NAs since there are two new lines after the header
 54 |       "The dimensions do not match" = all.equal(dim(result), result_dim, tolerance = 1),
 55 |       "The format isn't correct" = correct_format
 56 |     )
 57 |     result <- NULL
 58 |   },
 59 |   valid_params = function(params) {
 60 |     # compression was only supported from arrow 1.0.0 and onward, but `lib_path`
 61 |     # may not be set here
 62 |     version_check <- (!is.null(params$lib_path) && params$lib_path < "1.0")
 63 | 
 64 |     # on macOS data.table doesn't (typically) have multi core support
 65 |     # TODO: check if this is actually enabled before running?
 66 |     drop <- ( params$output_format == "arrow_table" & params$reader != "arrow" ) |
 67 |       ( params$reader == "readr" & params$cpu_count > 1 ) |
 68 |       ( params$compression != "uncompressed" & params$reader == "arrow" & version_check )
 69 |     params[!drop,]
 70 |   },
 71 |   packages_used = function(params) {
 72 |     params$reader
 73 |   }
 74 | )
 75 | 
 76 | #' Get a CSV reader
 77 | #'
 78 | #' @param reader the reader to use
 79 | #' @param delim the delimiter to use
 80 | #'
 81 | #' @return the csv reader
 82 | #' @export
 83 | get_csv_reader <- function(reader, delim) {
 84 |   library(reader, character.only = TRUE, warn.conflicts = FALSE)
 85 |   # TODO: allow other readers to read non-comma delimed files
 86 |   if (reader == "arrow") {
 87 |     # TODO: if gzipped and arrow csv reader version doesn't support, unzip?
 88 |     return(function(...) arrow::read_delim_arrow(...))
 89 |   } else if (reader == "readr") {
 90 |     return(function(..., as_data_frame) readr::read_delim(...))
 91 |   } else if (reader == "data.table") {
 92 |     sep <- force(delim)
 93 |     return(function(..., as_data_frame, delim) data.table::fread(..., sep = sep))
 94 |   } else if (reader == "vroom") {
 95 |     # altrep = FALSE because otherwise you aren't getting the data
 96 |     # TODO: maybe we do want to compare, esp. later when we do altrep
 97 |     return(function(..., as_data_frame) vroom::vroom(..., altrep = FALSE))
 98 |   } else {
 99 |     stop("Unsupported reader: ", reader, call. = FALSE)
100 |   }
101 | }
102 | 


--------------------------------------------------------------------------------
/tests/testthat/test-ensure-format.R:
--------------------------------------------------------------------------------
 1 | # create a temporary directory to be used as the data directory
 2 | temp_dir <- tempfile()
 3 | dir.create(temp_dir)
 4 | 
 5 | withr::with_envvar(
 6 |   list(ARROWBENCH_DATA_DIR = temp_dir), {
 7 |   test_that("ensure_format", {
 8 |     # there are no temp files yet
 9 |     expect_false(file.exists(file.path(temp_dir, "temp", "nyctaxi_sample.parquet")))
10 |     expect_false(file.exists(file.path(temp_dir, "temp", "nyctaxi_sample.uncompressed.parquet")))
11 |     expect_false(file.exists(file.path(temp_dir, "temp", "nyctaxi_sample.snappy.parquet")))
12 |     expect_false(file.exists(file.path(temp_dir, "temp", "nyctaxi_sample.csv")))
13 |     expect_false(file.exists(file.path(temp_dir, "temp", "nyctaxi_sample.csv.gz")))
14 | 
15 |     # we can transform from one format to another
16 |     ensure_format("nyctaxi_sample", "parquet")
17 |     expect_true(file.exists(file.path(temp_dir, "temp", "nyctaxi_sample.uncompressed.parquet")))
18 | 
19 |     ensure_format("nyctaxi_sample", "parquet", compression = "snappy")
20 |     expect_true(file.exists(file.path(temp_dir, "temp", "nyctaxi_sample.snappy.parquet")))
21 | 
22 |     ensure_format("nyctaxi_sample", "parquet", compression = "snappy", chunk_size = 100000)
23 |     expect_true(file.exists(file.path(temp_dir, "temp", "nyctaxi_sample.chunk_size_1e+05.snappy.parquet")))
24 | 
25 |     ensure_format("nyctaxi_sample", "feather", compression = "lz4")
26 |     expect_true(file.exists(file.path(temp_dir, "temp", "nyctaxi_sample.lz4.feather")))
27 | 
28 |     # note: this is sliiightly bigger than the chunk_size above, but we get the same rounded value
29 |     ensure_format("nyctaxi_sample", "feather", chunk_size = 100010)
30 |     expect_true(file.exists(file.path(temp_dir, "temp", "nyctaxi_sample.chunk_size_1e+05.uncompressed.feather")))
31 | 
32 |     # But, if the difference is bigger, we get that value reflected
33 |     ensure_format("nyctaxi_sample", "feather", chunk_size = 100100)
34 |     expect_true(file.exists(file.path(temp_dir, "temp", "nyctaxi_sample.chunk_size_1.001e+05.uncompressed.feather")))
35 | 
36 |     ensure_format("nyctaxi_sample", "csv", compression = "gzip")
37 |     expect_true(file.exists(file.path(temp_dir, "temp", "nyctaxi_sample.csv.gz")))
38 | 
39 |     # but because we started as a csv, this doesn't create a new file in the
40 |     # temp, instead references it in-situ
41 |     out <- ensure_format("nyctaxi_sample", "csv")
42 |     expect_identical(out, ensure_source("nyctaxi_sample"))
43 |     expect_false(file.exists(file.path(temp_dir, "temp", "nyctaxi_sample.csv")))
44 |   })
45 | 
46 |   test_that("ensure_format with tpch", {
47 |     # don't test if we are not already trying to install the custom duckdb
48 |     skip_if(Sys.getenv("ARROWBENCH_TEST_CUSTOM_DUCKDB", "") == "")
49 | 
50 |     # there are no temp files yet
51 |     expect_false(file.exists(file.path(temp_dir, "lineitem_0.001.parquet")))
52 |     expect_false(file.exists(file.path(temp_dir, "temp", "lineitem_0.001.uncompressed.parquet")))
53 | 
54 |     # we can generate
55 |     tpch_files <- ensure_tpch(0.0001)
56 |     expect_true(file.exists(file.path(temp_dir, "lineitem_0.0001.parquet")))
57 | 
58 |     # and we can ensure format
59 |     lineitem <- ensure_format(tpch_files[["lineitem"]], "parquet")
60 |     expect_equal(lineitem, file.path(temp_dir, "temp", "lineitem_0.0001.uncompressed.parquet"))
61 |     expect_true(file.exists(file.path(temp_dir, "temp", "lineitem_0.0001.uncompressed.parquet")))
62 |   })
63 | })
64 | 
65 | test_that("format + compression validation with a df", {
66 |   df <- expand.grid(
67 |     source = "a source",
68 |     lib_path = "some/path",
69 |     format = c("csv", "parquet", "fst"),
70 |     compression = c("gzip", "zstd", "snappy"),
71 |     stringsAsFactors = FALSE
72 |   )
73 | 
74 |   expect_identical(
75 |     validate_format(df$format, df$compression),
76 |     c(TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, FALSE, TRUE, FALSE)
77 |   )
78 | })
79 | 
80 | test_that("format + compression validation", {
81 |   expect_true(stop_if_not_valid_format("csv", "gzip"))
82 | 
83 |   expect_error(
84 |     stop_if_not_valid_format("csv", "snappy"),
85 |     "The format csv does not support snappy compression"
86 |   )
87 | })
88 | 


--------------------------------------------------------------------------------
/tests/testthat/test-publish.R:
--------------------------------------------------------------------------------
  1 | test_that("call benchconnect works", {
  2 |   expect_match(call_benchconnect("--help"), "Command line utilities for interacting with a Conbench API")
  3 | })
  4 | 
  5 | test_that("augment_run() works", {
  6 |   reason <- "test"
  7 |   host_name <- "fake-computer"
  8 |   github <- list(
  9 |     commit = "fake-commit",
 10 |     repository = "https://github.com/conchair/conchair",
 11 |     pr_number = "47"
 12 |   )
 13 | 
 14 |   unaugmented_run <- BenchmarkRun$new(reason = reason, github = NULL)
 15 |   withr::with_envvar(
 16 |     c(
 17 |       "CONBENCH_MACHINE_INFO_NAME" = host_name,
 18 |       "CONBENCH_PROJECT_REPOSITORY" = github$repository,
 19 |       "CONBENCH_PROJECT_COMMIT" = github$commit,
 20 |       "CONBENCH_PROJECT_PR_NUMBER" = github$pr_number
 21 |     ),
 22 |     { augmented_run <- augment_run(unaugmented_run) }
 23 |   )
 24 | 
 25 |   expect_equal(unaugmented_run$reason, reason)
 26 |   expect_equal(augmented_run$reason, reason)
 27 | 
 28 |   expect_null(unaugmented_run$id)
 29 |   expect_type(augmented_run$id, "character")
 30 | 
 31 |   expect_null(unaugmented_run$machine_info)
 32 |   expect_type(augmented_run$machine_info, "list")
 33 |   expect_type(augmented_run$machine_info$name, "character")
 34 |   expect_equal(augmented_run$machine_info$name, host_name)
 35 | 
 36 |   expect_null(unaugmented_run$github)
 37 |   expect_equal(augmented_run$github, github)
 38 | })
 39 | 
 40 | 
 41 | test_that("augment_result() works", {
 42 |   stats <- list(data = list(1, 2, 3), unit = "s", times = NULL, time_unit = NULL, iterations = 3)
 43 |   host_name <- "fake-computer"
 44 |   github <- list(
 45 |     commit = "fake-commit",
 46 |     repository = "conchair/conchair",
 47 |     pr_number = "47"
 48 |   )
 49 | 
 50 |   unaugmented_result <- BenchmarkResult$new(stats = stats, github = NULL)
 51 |   withr::with_envvar(
 52 |     c(
 53 |       "CONBENCH_MACHINE_INFO_NAME" = host_name,
 54 |       "CONBENCH_PROJECT_REPOSITORY" = github$repository,
 55 |       "CONBENCH_PROJECT_COMMIT" = github$commit,
 56 |       "CONBENCH_PROJECT_PR_NUMBER" = github$pr_number
 57 |     ),
 58 |     { augmented_result <- augment_result(unaugmented_result) }
 59 |   )
 60 | 
 61 |   expect_equal(unaugmented_result$timestamp, augmented_result$timestamp)
 62 | 
 63 |   expect_equal(unaugmented_result$stats, stats)
 64 |   expect_equal(augmented_result$stats, stats)
 65 | 
 66 |   expect_null(unaugmented_result$batch_id)
 67 |   expect_type(augmented_result$batch_id, "character")
 68 | 
 69 |   expect_null(unaugmented_result$machine_info)
 70 |   expect_type(augmented_result$machine_info, "list")
 71 |   expect_type(augmented_result$machine_info$name, "character")
 72 |   expect_equal(augmented_result$machine_info$name, host_name)
 73 | 
 74 |   expect_null(unaugmented_result$github)
 75 |   expect_equal(augmented_result$github, github)
 76 | })
 77 | 
 78 | 
 79 | test_that("start_run() works", {
 80 |   bm_run <- BenchmarkRun$new(
 81 |     name = "arrowbench-unit-test: 2z8c9c49a5dc4a179243268e4bb6daa5",
 82 |     reason = "arrowbench-unit-test",
 83 |     github = list(
 84 |       commit = "2z8c9c49a5dc4a179243268e4bb6daa5",
 85 |       repository = "https://github.com/conchair/conchair",
 86 |       pr_number = "47"
 87 |     )
 88 |   )
 89 | 
 90 |   mockery::stub(
 91 |     where = start_run,
 92 |     what = "call_benchconnect",
 93 |     how = function(args) {
 94 |       expect_identical(args, c("start", "run", "--json", bm_run$json))
 95 |     }
 96 |   )
 97 |   start_run(run = bm_run)
 98 | })
 99 | 
100 | 
101 | test_that("submit_result() works", {
102 |   bm_result <- BenchmarkResult$new(
103 |     run_name = "arrowbench-unit-test: 2z8c9c49a5dc4a179243268e4bb6daa5",
104 |     run_reason = "arrowbench-unit-test",
105 |     github = list(
106 |       commit = "2z8c9c49a5dc4a179243268e4bb6daa5",
107 |       repository = "https://github.com/conchair/conchair",
108 |       pr_number = "47"
109 |     ),
110 |     stats <- list(data = list(1, 2, 3), unit = "s", times = NULL, time_unit = NULL, iterations = 3)
111 |   )
112 | 
113 |   mockery::stub(
114 |     where = submit_result,
115 |     what = "call_benchconnect",
116 |     how = function(args) {
117 |       expect_identical(args, c("submit", "result", "--json", bm_result$json))
118 |     }
119 |   )
120 |   submit_result(result = bm_result)
121 | })
122 | 
123 | 
124 | test_that("finish_run() works", {
125 |   mockery::stub(
126 |     where = finish_run,
127 |     what = "call_benchconnect",
128 |     how = function(args) {
129 |       expect_identical(args, c("finish", "run", "--json", "{}"))
130 |     }
131 |   )
132 |   finish_run()
133 | })
134 | 
135 | unlink("benchconnect-state.json")


--------------------------------------------------------------------------------