├── .asf.yaml ├── .gitattributes ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── feature_request.md │ └── question.md ├── actions │ └── setup-builder │ │ └── action.yaml ├── dependabot.yml ├── pull_request_template.md └── workflows │ ├── README.md │ ├── arrow.yml │ ├── arrow_flight.yml │ ├── audit.yml │ ├── dev.yml │ ├── dev_pr.yml │ ├── dev_pr │ └── labeler.yml │ ├── docs.yml │ ├── integration.yml │ ├── miri.sh │ ├── miri.yaml │ ├── parquet.yml │ ├── parquet_derive.yml │ ├── release.yml │ ├── rust.yml │ └── take.yml ├── .github_changelog_generator ├── .gitignore ├── .gitmodules ├── .pre-commit-config.yaml ├── CHANGELOG-old.md ├── CHANGELOG.md ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── Cargo.toml ├── LICENSE.txt ├── NOTICE.txt ├── README.md ├── arrow-arith ├── Cargo.toml ├── LICENSE.txt ├── NOTICE.txt └── src │ ├── aggregate.rs │ ├── arithmetic.rs │ ├── arity.rs │ ├── bitwise.rs │ ├── boolean.rs │ ├── lib.rs │ ├── numeric.rs │ └── temporal.rs ├── arrow-array ├── Cargo.toml ├── LICENSE.txt ├── NOTICE.txt ├── benches │ ├── decimal_overflow.rs │ ├── fixed_size_list_array.rs │ ├── gc_view_types.rs │ ├── occupancy.rs │ └── union_array.rs └── src │ ├── arithmetic.rs │ ├── array │ ├── binary_array.rs │ ├── boolean_array.rs │ ├── byte_array.rs │ ├── byte_view_array.rs │ ├── dictionary_array.rs │ ├── fixed_size_binary_array.rs │ ├── fixed_size_list_array.rs │ ├── list_array.rs │ ├── list_view_array.rs │ ├── map_array.rs │ ├── mod.rs │ ├── null_array.rs │ ├── primitive_array.rs │ ├── run_array.rs │ ├── string_array.rs │ ├── struct_array.rs │ └── union_array.rs │ ├── builder │ ├── boolean_builder.rs │ ├── buffer_builder.rs │ ├── fixed_size_binary_builder.rs │ ├── fixed_size_binary_dictionary_builder.rs │ ├── fixed_size_list_builder.rs │ ├── generic_byte_run_builder.rs │ ├── generic_bytes_builder.rs │ ├── generic_bytes_dictionary_builder.rs │ ├── generic_bytes_view_builder.rs │ ├── generic_list_builder.rs │ ├── generic_list_view_builder.rs │ ├── map_builder.rs │ ├── mod.rs │ ├── null_builder.rs │ ├── primitive_builder.rs │ ├── primitive_dictionary_builder.rs │ ├── primitive_run_builder.rs │ ├── struct_builder.rs │ └── union_builder.rs │ ├── cast.rs │ ├── delta.rs │ ├── ffi.rs │ ├── ffi_stream.rs │ ├── iterator.rs │ ├── lib.rs │ ├── numeric.rs │ ├── record_batch.rs │ ├── run_iterator.rs │ ├── scalar.rs │ ├── temporal_conversions.rs │ ├── timezone.rs │ ├── trusted_len.rs │ └── types.rs ├── arrow-avro ├── Cargo.toml ├── LICENSE.txt ├── NOTICE.txt ├── benches │ └── avro_reader.rs ├── examples │ └── read_with_utf8view.rs └── src │ ├── codec.rs │ ├── compression.rs │ ├── lib.rs │ ├── reader │ ├── block.rs │ ├── cursor.rs │ ├── header.rs │ ├── mod.rs │ ├── record.rs │ └── vlq.rs │ └── schema.rs ├── arrow-buffer ├── Cargo.toml ├── LICENSE.txt ├── NOTICE.txt ├── benches │ ├── bit_mask.rs │ ├── i256.rs │ └── offset.rs └── src │ ├── alloc │ ├── alignment.rs │ └── mod.rs │ ├── arith.rs │ ├── bigint │ ├── div.rs │ └── mod.rs │ ├── buffer │ ├── boolean.rs │ ├── immutable.rs │ ├── mod.rs │ ├── mutable.rs │ ├── null.rs │ ├── offset.rs │ ├── ops.rs │ ├── run.rs │ └── scalar.rs │ ├── builder │ ├── boolean.rs │ ├── mod.rs │ ├── null.rs │ └── offset.rs │ ├── bytes.rs │ ├── interval.rs │ ├── lib.rs │ ├── native.rs │ └── util │ ├── bit_chunk_iterator.rs │ ├── bit_iterator.rs │ ├── bit_mask.rs │ ├── bit_util.rs │ └── mod.rs ├── arrow-cast ├── Cargo.toml ├── LICENSE.txt ├── NOTICE.txt ├── benches │ ├── parse_date.rs │ ├── parse_decimal.rs │ ├── parse_time.rs │ └── parse_timestamp.rs └── src │ ├── base64.rs │ ├── cast │ ├── decimal.rs │ ├── dictionary.rs │ ├── list.rs │ ├── map.rs │ ├── mod.rs │ └── string.rs │ ├── display.rs │ ├── lib.rs │ ├── parse.rs │ └── pretty.rs ├── arrow-csv ├── Cargo.toml ├── LICENSE.txt ├── NOTICE.txt ├── examples │ ├── README.md │ └── csv_calculation.rs ├── src │ ├── lib.rs │ ├── reader │ │ ├── mod.rs │ │ └── records.rs │ └── writer.rs └── test │ └── data │ ├── custom_null_test.csv │ ├── decimal_test.csv │ ├── dictionary_nullable_test.csv │ ├── example.csv │ ├── init_null_test.csv │ ├── null_test.csv │ ├── scientific_notation_test.csv │ ├── truncated_rows.csv │ ├── uk_cities.csv │ ├── uk_cities_with_headers.csv │ ├── various_invalid_types │ ├── invalid_bool.csv │ ├── invalid_float.csv │ ├── invalid_int.csv │ └── null_in_non_nullable.csv │ └── various_types.csv ├── arrow-data ├── Cargo.toml ├── LICENSE.txt ├── NOTICE.txt └── src │ ├── byte_view.rs │ ├── data.rs │ ├── decimal.rs │ ├── equal │ ├── boolean.rs │ ├── byte_view.rs │ ├── dictionary.rs │ ├── fixed_binary.rs │ ├── fixed_list.rs │ ├── list.rs │ ├── mod.rs │ ├── null.rs │ ├── primitive.rs │ ├── run.rs │ ├── structure.rs │ ├── union.rs │ ├── utils.rs │ └── variable_size.rs │ ├── ffi.rs │ ├── lib.rs │ └── transform │ ├── boolean.rs │ ├── fixed_binary.rs │ ├── fixed_size_list.rs │ ├── list.rs │ ├── mod.rs │ ├── null.rs │ ├── primitive.rs │ ├── structure.rs │ ├── union.rs │ ├── utils.rs │ └── variable_size.rs ├── arrow-flight ├── CONTRIBUTING.md ├── Cargo.toml ├── LICENSE.txt ├── NOTICE.txt ├── README.md ├── examples │ ├── data │ │ ├── ca_root.key │ │ ├── ca_root.pem │ │ ├── client.key │ │ ├── client.pem │ │ ├── gen_certs.sh │ │ ├── server.key │ │ └── server.pem │ ├── flight_sql_server.rs │ └── server.rs ├── gen │ ├── Cargo.toml │ └── src │ │ └── main.rs ├── regen.sh ├── src │ ├── arrow.flight.protocol.rs │ ├── bin │ │ └── flight_sql_client.rs │ ├── client.rs │ ├── decode.rs │ ├── encode.rs │ ├── error.rs │ ├── lib.rs │ ├── sql │ │ ├── arrow.flight.protocol.sql.rs │ │ ├── client.rs │ │ ├── metadata │ │ │ ├── catalogs.rs │ │ │ ├── db_schemas.rs │ │ │ ├── mod.rs │ │ │ ├── sql_info.rs │ │ │ ├── table_types.rs │ │ │ ├── tables.rs │ │ │ └── xdbc_info.rs │ │ ├── mod.rs │ │ └── server.rs │ ├── streams.rs │ ├── trailers.rs │ └── utils.rs └── tests │ ├── client.rs │ ├── common │ ├── fixture.rs │ ├── mod.rs │ ├── server.rs │ ├── trailers_layer.rs │ └── utils.rs │ ├── encode_decode.rs │ ├── flight_sql_client.rs │ └── flight_sql_client_cli.rs ├── arrow-integration-test ├── Cargo.toml ├── LICENSE.txt ├── NOTICE.txt ├── data │ └── integration.json └── src │ ├── datatype.rs │ ├── field.rs │ ├── lib.rs │ └── schema.rs ├── arrow-integration-testing ├── Cargo.toml ├── README.md ├── src │ ├── bin │ │ ├── arrow-file-to-stream.rs │ │ ├── arrow-json-integration-test.rs │ │ ├── arrow-stream-to-file.rs │ │ ├── flight-test-integration-client.rs │ │ └── flight-test-integration-server.rs │ ├── flight_client_scenarios │ │ ├── auth_basic_proto.rs │ │ ├── integration_test.rs │ │ ├── middleware.rs │ │ └── mod.rs │ ├── flight_server_scenarios │ │ ├── auth_basic_proto.rs │ │ ├── integration_test.rs │ │ ├── middleware.rs │ │ └── mod.rs │ └── lib.rs └── tests │ ├── ipc_reader.rs │ └── ipc_writer.rs ├── arrow-ipc ├── CONTRIBUTING.md ├── Cargo.toml ├── LICENSE.txt ├── NOTICE.txt ├── benches │ ├── ipc_reader.rs │ └── ipc_writer.rs ├── regen.sh └── src │ ├── compression.rs │ ├── convert.rs │ ├── gen │ ├── File.rs │ ├── Message.rs │ ├── Schema.rs │ ├── SparseTensor.rs │ ├── Tensor.rs │ └── mod.rs │ ├── lib.rs │ ├── reader.rs │ ├── reader │ └── stream.rs │ └── writer.rs ├── arrow-json ├── Cargo.toml ├── LICENSE.txt ├── NOTICE.txt ├── benches │ └── serde.rs ├── src │ ├── lib.rs │ ├── reader │ │ ├── boolean_array.rs │ │ ├── decimal_array.rs │ │ ├── list_array.rs │ │ ├── map_array.rs │ │ ├── mod.rs │ │ ├── null_array.rs │ │ ├── primitive_array.rs │ │ ├── schema.rs │ │ ├── serializer.rs │ │ ├── string_array.rs │ │ ├── string_view_array.rs │ │ ├── struct_array.rs │ │ ├── tape.rs │ │ └── timestamp_array.rs │ └── writer │ │ ├── encoder.rs │ │ └── mod.rs └── test │ └── data │ ├── arrays.json │ ├── basic.json │ ├── basic_nulls.json │ ├── list_string_dict_nested.json │ ├── list_string_dict_nested_nulls.json │ ├── mixed_arrays.json │ ├── mixed_arrays.json.gz │ ├── nested_structs.json │ └── nested_with_nulls.json ├── arrow-ord ├── Cargo.toml ├── LICENSE.txt ├── NOTICE.txt └── src │ ├── cmp.rs │ ├── comparison.rs │ ├── lib.rs │ ├── ord.rs │ ├── partition.rs │ ├── rank.rs │ └── sort.rs ├── arrow-pyarrow-integration-testing ├── .cargo │ └── config ├── .gitignore ├── Cargo.toml ├── README.md ├── pyproject.toml ├── src │ └── lib.rs └── tests │ └── test_sql.py ├── arrow-row ├── Cargo.toml ├── LICENSE.txt ├── NOTICE.txt └── src │ ├── fixed.rs │ ├── lib.rs │ ├── list.rs │ └── variable.rs ├── arrow-schema ├── Cargo.toml ├── LICENSE.txt ├── NOTICE.txt ├── benches │ └── ffi.rs └── src │ ├── datatype.rs │ ├── datatype_parse.rs │ ├── error.rs │ ├── extension │ ├── canonical │ │ ├── bool8.rs │ │ ├── fixed_shape_tensor.rs │ │ ├── json.rs │ │ ├── mod.rs │ │ ├── opaque.rs │ │ ├── uuid.rs │ │ └── variable_shape_tensor.rs │ └── mod.rs │ ├── ffi.rs │ ├── field.rs │ ├── fields.rs │ ├── lib.rs │ └── schema.rs ├── arrow-select ├── Cargo.toml ├── LICENSE.txt ├── NOTICE.txt └── src │ ├── concat.rs │ ├── dictionary.rs │ ├── filter.rs │ ├── interleave.rs │ ├── lib.rs │ ├── nullif.rs │ ├── take.rs │ ├── union_extract.rs │ ├── window.rs │ └── zip.rs ├── arrow-string ├── Cargo.toml ├── LICENSE.txt ├── NOTICE.txt └── src │ ├── binary_like.rs │ ├── binary_predicate.rs │ ├── concat_elements.rs │ ├── length.rs │ ├── lib.rs │ ├── like.rs │ ├── predicate.rs │ ├── regexp.rs │ └── substring.rs ├── arrow ├── CONTRIBUTING.md ├── Cargo.toml ├── LICENSE.txt ├── NOTICE.txt ├── README.md ├── benches │ ├── aggregate_kernels.rs │ ├── arithmetic_kernels.rs │ ├── array_data_validate.rs │ ├── array_from_vec.rs │ ├── array_slice.rs │ ├── bit_length_kernel.rs │ ├── bitwise_kernel.rs │ ├── boolean_append_packed.rs │ ├── boolean_kernels.rs │ ├── buffer_bit_ops.rs │ ├── buffer_create.rs │ ├── builder.rs │ ├── cast_kernels.rs │ ├── comparison_kernels.rs │ ├── concatenate_kernel.rs │ ├── csv_reader.rs │ ├── csv_writer.rs │ ├── decimal_validate.rs │ ├── equal.rs │ ├── filter_kernels.rs │ ├── interleave_kernels.rs │ ├── json_reader.rs │ ├── json_writer.rs │ ├── length_kernel.rs │ ├── lexsort.rs │ ├── mutable_array.rs │ ├── partition_kernels.rs │ ├── primitive_run_accessor.rs │ ├── primitive_run_take.rs │ ├── regexp_kernels.rs │ ├── row_format.rs │ ├── sort_kernel.rs │ ├── string_dictionary_builder.rs │ ├── string_run_builder.rs │ ├── string_run_iterator.rs │ ├── substring_kernels.rs │ └── take_kernels.rs ├── examples │ ├── README.md │ ├── builders.rs │ ├── collect.rs │ ├── dynamic_types.rs │ ├── read_csv.rs │ ├── read_csv_infer_schema.rs │ ├── tensor_builder.rs │ ├── version.rs │ └── zero_copy_ipc.rs ├── src │ ├── array │ │ └── mod.rs │ ├── compute │ │ ├── README.md │ │ ├── kernels.rs │ │ └── mod.rs │ ├── datatypes │ │ └── mod.rs │ ├── error.rs │ ├── lib.rs │ ├── pyarrow.rs │ ├── tensor.rs │ └── util │ │ ├── bench_util.rs │ │ ├── data_gen.rs │ │ ├── mod.rs │ │ ├── string_writer.rs │ │ └── test_util.rs └── tests │ ├── arithmetic.rs │ ├── array_cast.rs │ ├── array_equal.rs │ ├── array_transform.rs │ ├── array_validation.rs │ ├── csv.rs │ ├── pyarrow.rs │ ├── schema.rs │ ├── shrink_to_fit.rs │ └── timezone.rs ├── dev ├── .gitignore └── release │ ├── README.md │ ├── check-rat-report.py │ ├── create-tarball.sh │ ├── label_issues.py │ ├── rat_exclude_files.txt │ ├── release-tarball.sh │ ├── remove-old-artifacts.sh │ ├── run-rat.sh │ ├── update_change_log.sh │ └── verify-release-candidate.sh ├── format ├── File.fbs ├── Flight.proto ├── FlightSql.proto ├── Message.fbs ├── README.rst ├── Schema.fbs ├── SparseTensor.fbs └── Tensor.fbs ├── header ├── parquet-variant ├── Cargo.toml ├── README.md └── src │ ├── decoder.rs │ ├── lib.rs │ ├── test_variant.rs │ ├── utils.rs │ └── variant.rs ├── parquet ├── CONTRIBUTING.md ├── Cargo.toml ├── LICENSE.txt ├── NOTICE.txt ├── README.md ├── benches │ ├── arrow_reader.rs │ ├── arrow_reader_clickbench.rs │ ├── arrow_reader_row_filter.rs │ ├── arrow_statistics.rs │ ├── arrow_writer.rs │ ├── compression.rs │ ├── encoding.rs │ ├── metadata.rs │ └── row_selector.rs ├── examples │ ├── async_read_parquet.rs │ ├── external_metadata.rs │ ├── read_parquet.rs │ ├── read_with_rowgroup.rs │ └── write_parquet.rs ├── pytest │ ├── requirements.in │ ├── requirements.txt │ └── test_parquet_integration.py ├── regen.sh ├── src │ ├── arrow │ │ ├── array_reader │ │ │ ├── builder.rs │ │ │ ├── byte_array.rs │ │ │ ├── byte_array_dictionary.rs │ │ │ ├── byte_view_array.rs │ │ │ ├── empty_array.rs │ │ │ ├── fixed_len_byte_array.rs │ │ │ ├── fixed_size_list_array.rs │ │ │ ├── list_array.rs │ │ │ ├── map_array.rs │ │ │ ├── mod.rs │ │ │ ├── null_array.rs │ │ │ ├── primitive_array.rs │ │ │ ├── struct_array.rs │ │ │ └── test_util.rs │ │ ├── arrow_reader │ │ │ ├── filter.rs │ │ │ ├── mod.rs │ │ │ ├── read_plan.rs │ │ │ ├── selection.rs │ │ │ └── statistics.rs │ │ ├── arrow_writer │ │ │ ├── byte_array.rs │ │ │ ├── levels.rs │ │ │ └── mod.rs │ │ ├── async_reader │ │ │ ├── metadata.rs │ │ │ ├── mod.rs │ │ │ └── store.rs │ │ ├── async_writer │ │ │ ├── mod.rs │ │ │ └── store.rs │ │ ├── buffer │ │ │ ├── bit_util.rs │ │ │ ├── dictionary_buffer.rs │ │ │ ├── mod.rs │ │ │ ├── offset_buffer.rs │ │ │ └── view_buffer.rs │ │ ├── decoder │ │ │ ├── delta_byte_array.rs │ │ │ ├── dictionary_index.rs │ │ │ └── mod.rs │ │ ├── mod.rs │ │ ├── record_reader │ │ │ ├── buffer.rs │ │ │ ├── definition_levels.rs │ │ │ └── mod.rs │ │ └── schema │ │ │ ├── complex.rs │ │ │ ├── mod.rs │ │ │ └── primitive.rs │ ├── basic.rs │ ├── bin │ │ ├── parquet-concat.rs │ │ ├── parquet-fromcsv-help.txt │ │ ├── parquet-fromcsv.rs │ │ ├── parquet-index.rs │ │ ├── parquet-layout.rs │ │ ├── parquet-read.rs │ │ ├── parquet-rewrite.rs │ │ ├── parquet-rowcount.rs │ │ ├── parquet-schema.rs │ │ └── parquet-show-bloom-filter.rs │ ├── bloom_filter │ │ └── mod.rs │ ├── column │ │ ├── mod.rs │ │ ├── page.rs │ │ ├── page_encryption.rs │ │ ├── page_encryption_disabled.rs │ │ ├── reader.rs │ │ ├── reader │ │ │ └── decoder.rs │ │ └── writer │ │ │ ├── encoder.rs │ │ │ └── mod.rs │ ├── compression.rs │ ├── data_type.rs │ ├── encodings │ │ ├── decoding.rs │ │ ├── decoding │ │ │ └── byte_stream_split_decoder.rs │ │ ├── encoding │ │ │ ├── byte_stream_split_encoder.rs │ │ │ ├── dict_encoder.rs │ │ │ └── mod.rs │ │ ├── levels.rs │ │ ├── mod.rs │ │ └── rle.rs │ ├── encryption │ │ ├── ciphers.rs │ │ ├── decrypt.rs │ │ ├── encrypt.rs │ │ ├── mod.rs │ │ └── modules.rs │ ├── errors.rs │ ├── file │ │ ├── column_crypto_metadata.rs │ │ ├── footer.rs │ │ ├── metadata │ │ │ ├── memory.rs │ │ │ ├── mod.rs │ │ │ ├── reader.rs │ │ │ └── writer.rs │ │ ├── mod.rs │ │ ├── page_encoding_stats.rs │ │ ├── page_index │ │ │ ├── index.rs │ │ │ ├── index_reader.rs │ │ │ ├── mod.rs │ │ │ └── offset_index.rs │ │ ├── properties.rs │ │ ├── reader.rs │ │ ├── serialized_reader.rs │ │ ├── statistics.rs │ │ └── writer.rs │ ├── format.rs │ ├── lib.rs │ ├── record │ │ ├── api.rs │ │ ├── mod.rs │ │ ├── reader.rs │ │ ├── record_reader.rs │ │ ├── record_writer.rs │ │ └── triplet.rs │ ├── schema │ │ ├── mod.rs │ │ ├── parser.rs │ │ ├── printer.rs │ │ ├── types.rs │ │ └── visitor.rs │ ├── thrift.rs │ └── util │ │ ├── bit_pack.rs │ │ ├── bit_util.rs │ │ ├── interner.rs │ │ ├── mod.rs │ │ ├── test_common │ │ ├── file_util.rs │ │ ├── mod.rs │ │ ├── page_util.rs │ │ └── rand_gen.rs │ │ └── utf8.rs └── tests │ ├── arrow_reader │ ├── bad_data.rs │ ├── bad_raw_metadata.bin │ ├── checksum.rs │ ├── mod.rs │ └── statistics.rs │ ├── arrow_writer_layout.rs │ └── encryption │ ├── encryption.rs │ ├── encryption_agnostic.rs │ ├── encryption_async.rs │ ├── encryption_disabled.rs │ ├── encryption_util.rs │ └── mod.rs ├── parquet_derive ├── Cargo.toml ├── LICENSE.txt ├── NOTICE.txt ├── README.md └── src │ ├── lib.rs │ └── parquet_field.rs ├── parquet_derive_test ├── Cargo.toml └── src │ └── lib.rs ├── pre-commit.sh └── rustfmt.toml /.asf.yaml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | # Documentation can be found here: 19 | # https://github.com/apache/infrastructure-asfyaml/blob/main/README.md 20 | 21 | notifications: 22 | commits: commits@arrow.apache.org 23 | issues: github@arrow.apache.org 24 | pullrequests: github@arrow.apache.org 25 | discussions: github@arrow.apache.org 26 | jira_options: link label worklog 27 | github: 28 | description: "Official Rust implementation of Apache Arrow" 29 | homepage: https://arrow.apache.org/ 30 | labels: 31 | - arrow 32 | - parquet 33 | - rust 34 | enabled_merge_buttons: 35 | squash: true 36 | squash_commit_message: PR_TITLE_AND_DESC 37 | merge: false 38 | rebase: false 39 | features: 40 | issues: true 41 | discussions: true 42 | protected_branches: 43 | main: 44 | required_status_checks: 45 | # require branches to be up-to-date before merging 46 | strict: true 47 | # don't require any jobs to pass 48 | contexts: [] 49 | 50 | # publishes the content of the `asf-site` branch to 51 | # https://arrow.apache.org/rust/ 52 | publish: 53 | whoami: asf-site 54 | subdir: rust 55 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | parquet/src/format.rs linguist-generated 2 | arrow-flight/src/arrow.flight.protocol.rs linguist-generated 3 | arrow-flight/src/sql/arrow.flight.protocol.sql.rs linguist-generated 4 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | 14 | 15 | **To Reproduce** 16 | 19 | 20 | **Expected behavior** 21 | 24 | 25 | **Additional context** 26 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem or challenge? Please describe what you are trying to do.** 11 | 15 | 16 | **Describe the solution you'd like** 17 | 20 | 21 | **Describe alternatives you've considered** 22 | 25 | 26 | **Additional context** 27 | 30 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/question.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Question 3 | about: Ask question about this project 4 | title: '' 5 | labels: question 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Which part is this question about** 11 | 14 | 15 | **Describe your question** 16 | 19 | 20 | **Additional context** 21 | 24 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: cargo 4 | directory: "/" 5 | schedule: 6 | interval: daily 7 | open-pull-requests-limit: 10 8 | target-branch: main 9 | labels: [ auto-dependencies, arrow ] 10 | - package-ecosystem: "github-actions" 11 | directory: "/" 12 | schedule: 13 | interval: "daily" 14 | open-pull-requests-limit: 10 15 | labels: [ auto-dependencies ] 16 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | # Which issue does this PR close? 2 | 3 | We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax. 4 | 5 | Closes #NNN. 6 | 7 | # Rationale for this change 8 | 9 | Why are you proposing this change? If this is already explained clearly in the issue then this section is not needed. 10 | Explaining clearly why changes are proposed helps reviewers understand your changes and offer better suggestions for fixes. 11 | 12 | # What changes are included in this PR? 13 | 14 | There is no need to duplicate the description in the issue here but it is sometimes worth providing a summary of the individual changes in this PR. 15 | 16 | # Are there any user-facing changes? 17 | 18 | If there are user-facing changes then we may require documentation to be updated before approving the PR. 19 | 20 | If there are any breaking changes to public APIs, please call them out. 21 | -------------------------------------------------------------------------------- /.github/workflows/README.md: -------------------------------------------------------------------------------- 1 | 19 | 20 | The CI is structured so most tests are run in specific workflows: 21 | `arrow.yml` for `arrow`, `parquet.yml` for `parquet` and so on. 22 | 23 | The basic idea is to run all tests on pushes to main (to ensure we 24 | keep main green) but run only the individual workflows on PRs that 25 | change files that could affect them. 26 | -------------------------------------------------------------------------------- /.github/workflows/audit.yml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | name: audit 19 | 20 | concurrency: 21 | group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} 22 | cancel-in-progress: true 23 | 24 | # trigger for all PRs that touch certain files and changes to main 25 | on: 26 | push: 27 | branches: 28 | - main 29 | pull_request: 30 | paths: 31 | - '**/Cargo.toml' 32 | - '**/Cargo.lock' 33 | 34 | jobs: 35 | cargo-audit: 36 | name: Audit 37 | runs-on: ubuntu-latest 38 | steps: 39 | - uses: actions/checkout@v4 40 | - name: Install cargo-audit 41 | run: cargo install cargo-audit 42 | - name: Run audit check 43 | run: cargo audit 44 | -------------------------------------------------------------------------------- /.github/workflows/dev.yml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | name: dev 19 | 20 | concurrency: 21 | group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} 22 | cancel-in-progress: true 23 | 24 | # trigger for all PRs and changes to main 25 | on: 26 | push: 27 | branches: 28 | - main 29 | pull_request: 30 | 31 | env: 32 | ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} 33 | ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} 34 | 35 | jobs: 36 | 37 | rat: 38 | name: Release Audit Tool (RAT) 39 | runs-on: ubuntu-latest 40 | steps: 41 | - uses: actions/checkout@v4 42 | - name: Setup Python 43 | uses: actions/setup-python@v5 44 | with: 45 | python-version: 3.8 46 | - name: Audit licenses 47 | run: ./dev/release/run-rat.sh . 48 | 49 | prettier: 50 | name: Markdown format 51 | runs-on: ubuntu-latest 52 | steps: 53 | - uses: actions/checkout@v4 54 | - uses: actions/setup-node@v4 55 | with: 56 | node-version: "14" 57 | - name: Prettier check 58 | run: | 59 | # if you encounter error, run the command below and commit the changes 60 | npx prettier@2.3.2 --write {arrow,arrow-flight,dev,arrow-integration-testing,parquet}/**/*.md README.md CODE_OF_CONDUCT.md CONTRIBUTING.md 61 | git diff --exit-code 62 | -------------------------------------------------------------------------------- /.github/workflows/dev_pr.yml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | name: dev_pr 19 | 20 | concurrency: 21 | group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} 22 | cancel-in-progress: true 23 | 24 | # Trigger whenever a PR is changed (title as well as new / changed commits) 25 | on: 26 | pull_request_target: 27 | types: 28 | - opened 29 | - edited 30 | - synchronize 31 | 32 | jobs: 33 | process: 34 | name: Process 35 | runs-on: ubuntu-latest 36 | permissions: 37 | contents: read 38 | pull-requests: write 39 | steps: 40 | - uses: actions/checkout@v4 41 | 42 | - name: Assign GitHub labels 43 | if: | 44 | github.event_name == 'pull_request_target' && 45 | (github.event.action == 'opened' || 46 | github.event.action == 'synchronize') 47 | uses: actions/labeler@v5.0.0 48 | with: 49 | repo-token: ${{ secrets.GITHUB_TOKEN }} 50 | configuration-path: .github/workflows/dev_pr/labeler.yml 51 | sync-labels: true 52 | -------------------------------------------------------------------------------- /.github/workflows/dev_pr/labeler.yml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | arrow: 19 | - changed-files: 20 | - any-glob-to-any-file: 21 | - 'arrow-arith/**/*' 22 | - 'arrow-array/**/*' 23 | - 'arrow-buffer/**/*' 24 | - 'arrow-cast/**/*' 25 | - 'arrow-csv/**/*' 26 | - 'arrow-data/**/*' 27 | - 'arrow-flight/**/*' 28 | - 'arrow-integration-test/**/*' 29 | - 'arrow-integration-testing/**/*' 30 | - 'arrow-ipc/**/*' 31 | - 'arrow-json/**/*' 32 | - 'arrow-avro/**/*' 33 | - 'arrow-ord/**/*' 34 | - 'arrow-row/**/*' 35 | - 'arrow-schema/**/*' 36 | - 'arrow-select/**/*' 37 | - 'arrow-string/**/*' 38 | - 'arrow/**/*' 39 | 40 | arrow-flight: 41 | - changed-files: 42 | - any-glob-to-any-file: 43 | - 'arrow-flight/**/*' 44 | 45 | parquet: 46 | - changed-files: 47 | - any-glob-to-any-file: [ 'parquet/**/*' ] 48 | 49 | parquet-derive: 50 | - changed-files: 51 | - any-glob-to-any-file: [ 'parquet_derive/**/*' ] 52 | -------------------------------------------------------------------------------- /.github/workflows/miri.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Script 4 | # 5 | # Must be run with nightly rust for example 6 | # rustup default nightly 7 | 8 | set -e 9 | 10 | export MIRIFLAGS="-Zmiri-disable-isolation" 11 | cargo miri setup 12 | cargo clean 13 | 14 | echo "Starting Arrow MIRI run..." 15 | cargo miri test -p arrow-buffer 16 | cargo miri test -p arrow-data --features ffi 17 | cargo miri test -p arrow-schema --features ffi 18 | cargo miri test -p arrow-ord 19 | cargo miri test -p arrow-array 20 | cargo miri test -p arrow-arith -------------------------------------------------------------------------------- /.github/workflows/miri.yaml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | name: miri 19 | 20 | concurrency: 21 | group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} 22 | cancel-in-progress: true 23 | 24 | # trigger for all PRs that touch certain files and changes to main 25 | on: 26 | push: 27 | branches: 28 | - main 29 | pull_request: 30 | paths: 31 | - .github/** 32 | - arrow-array/** 33 | - arrow-buffer/** 34 | - arrow-cast/** 35 | - arrow-csv/** 36 | - arrow-data/** 37 | - arrow-ipc/** 38 | - arrow-json/** 39 | - arrow-avro/** 40 | - arrow-schema/** 41 | - arrow-select/** 42 | - arrow-string/** 43 | - arrow/** 44 | 45 | jobs: 46 | miri-checks: 47 | name: MIRI 48 | runs-on: ubuntu-latest 49 | steps: 50 | - uses: actions/checkout@v4 51 | with: 52 | submodules: true 53 | - name: Setup Rust toolchain 54 | run: | 55 | rustup toolchain install nightly --component miri 56 | rustup override set nightly 57 | cargo miri setup 58 | - name: Run Miri Checks 59 | env: 60 | RUST_BACKTRACE: full 61 | RUST_LOG: "trace" 62 | run: bash .github/workflows/miri.sh 63 | -------------------------------------------------------------------------------- /.github/workflows/parquet_derive.yml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | --- 19 | # tests for parquet_derive crate 20 | name: parquet_derive 21 | 22 | concurrency: 23 | group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} 24 | cancel-in-progress: true 25 | 26 | # trigger for all PRs that touch certain files and changes to main 27 | on: 28 | push: 29 | branches: 30 | - main 31 | pull_request: 32 | paths: 33 | - parquet/** 34 | - parquet_derive/** 35 | - parquet_derive_test/** 36 | - .github/** 37 | 38 | jobs: 39 | # test the crate 40 | linux-test: 41 | name: Test 42 | runs-on: ubuntu-latest 43 | container: 44 | image: amd64/rust 45 | steps: 46 | - uses: actions/checkout@v4 47 | with: 48 | submodules: true 49 | - name: Setup Rust toolchain 50 | uses: ./.github/actions/setup-builder 51 | - name: Test 52 | run: cargo test -p parquet_derive 53 | 54 | clippy: 55 | name: Clippy 56 | runs-on: ubuntu-latest 57 | container: 58 | image: amd64/rust 59 | steps: 60 | - uses: actions/checkout@v4 61 | - name: Setup Rust toolchain 62 | uses: ./.github/actions/setup-builder 63 | - name: Setup Clippy 64 | run: rustup component add clippy 65 | - name: Run clippy 66 | run: cargo clippy -p parquet_derive --all-features -- -D warnings 67 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | # Creates a github release on https://github.com/apache/arrow-rs/releases 19 | # when a tag is pushed to the repository 20 | name: Release 21 | on: 22 | push: 23 | tags: 24 | - '*' 25 | - '!*-rc*' 26 | permissions: 27 | contents: write 28 | env: 29 | GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} 30 | jobs: 31 | publish: 32 | name: Publish 33 | runs-on: ubuntu-latest 34 | timeout-minutes: 5 35 | steps: 36 | - uses: actions/checkout@v4 37 | - name: Create GitHub Releases 38 | run: | 39 | version=${GITHUB_REF_NAME} 40 | title="arrow ${version}" 41 | notes_file=CHANGELOG.md 42 | gh release create ${GITHUB_REF_NAME} \ 43 | --title "${title}" \ 44 | --notes-file ${notes_file} \ 45 | --verify-tag 46 | -------------------------------------------------------------------------------- /.github/workflows/take.yml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | name: Assign the issue via a `take` comment 19 | on: 20 | issue_comment: 21 | types: created 22 | 23 | permissions: 24 | issues: write 25 | 26 | jobs: 27 | issue_assign: 28 | if: (!github.event.issue.pull_request) && github.event.comment.body == 'take' 29 | runs-on: ubuntu-latest 30 | steps: 31 | - uses: actions/github-script@v7 32 | with: 33 | script: | 34 | github.rest.issues.addAssignees({ 35 | owner: context.repo.owner, 36 | repo: context.repo.repo, 37 | issue_number: context.issue.number, 38 | assignees: [context.payload.comment.user.login], 39 | }) 40 | -------------------------------------------------------------------------------- /.github_changelog_generator: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | # 20 | 21 | # Add special sections for documentation, security and performance 22 | add-sections={"documentation":{"prefix":"**Documentation updates:**","labels":["documentation"]},"security":{"prefix":"**Security updates:**","labels":["security"]},"performance":{"prefix":"**Performance improvements:**","labels":["performance"]}} 23 | # uncomment to not show PRs. TBD if we shown them or not. 24 | #pull-requests=false 25 | # so that the component is shown associated with the issue 26 | issue-line-labels=arrow,parquet,arrow-flight 27 | exclude-labels=development-process,invalid,object-store,question 28 | breaking_labels=api-change 29 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | Cargo.lock 2 | target 3 | rusty-tags.vi 4 | .history 5 | .flatbuffers/ 6 | .idea/ 7 | .vscode 8 | .devcontainer 9 | venv/* 10 | # created by doctests 11 | parquet/data.parquet 12 | # release notes cache 13 | .githubchangeloggenerator.cache 14 | .githubchangeloggenerator.cache.log 15 | justfile 16 | .prettierignore 17 | .env 18 | .editorconfig 19 | # local azurite file 20 | __azurite* 21 | __blobstorage__ 22 | 23 | # .bak files 24 | *.bak 25 | *.bak2 26 | # OS-specific .gitignores 27 | 28 | # Mac .gitignore 29 | # General 30 | .DS_Store 31 | .AppleDouble 32 | .LSOverride 33 | 34 | # Icon must end with two \r 35 | Icon 36 | 37 | # Thumbnails 38 | ._* 39 | 40 | # Files that might appear in the root of a volume 41 | .DocumentRevisions-V100 42 | .fseventsd 43 | .Spotlight-V100 44 | .TemporaryItems 45 | .Trashes 46 | .VolumeIcon.icns 47 | .com.apple.timemachine.donotpresent 48 | 49 | # Directories potentially created on remote AFP share 50 | .AppleDB 51 | .AppleDesktop 52 | Network Trash Folder 53 | Temporary Items 54 | .apdisk 55 | 56 | # Linux .gitignore 57 | *~ 58 | 59 | # temporary files which can be created if a process still has a handle open of a deleted file 60 | .fuse_hidden* 61 | 62 | # KDE directory preferences 63 | .directory 64 | 65 | # Linux trash folder which might appear on any partition or disk 66 | .Trash-* 67 | 68 | # .nfs files are created when an open file is removed but is still being accessed 69 | .nfs* 70 | 71 | # Windows .gitignore 72 | # Windows thumbnail cache files 73 | Thumbs.db 74 | Thumbs.db:encryptable 75 | ehthumbs.db 76 | ehthumbs_vista.db 77 | 78 | # Dump file 79 | *.stackdump 80 | 81 | # Folder config file 82 | [Dd]esktop.ini 83 | 84 | # Recycle Bin used on file shares 85 | $RECYCLE.BIN/ 86 | 87 | # Windows Installer files 88 | *.cab 89 | *.msi 90 | *.msix 91 | *.msm 92 | *.msp 93 | 94 | # Windows shortcuts 95 | *.lnk 96 | 97 | # Python virtual env in parquet crate 98 | parquet/pytest/venv/ 99 | __pycache__/ 100 | 101 | # Parquet file from arrow_reader_clickbench 102 | hits_1.parquet -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "testing"] 2 | path = testing 3 | url = https://github.com/apache/arrow-testing 4 | [submodule "parquet-testing"] 5 | path = parquet-testing 6 | url = https://github.com/apache/parquet-testing.git 7 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | 19 | 20 | # Code of Conduct 21 | 22 | - [Code of Conduct for The Apache Software Foundation][1] 23 | 24 | [1]: https://www.apache.org/foundation/policies/conduct.html 25 | -------------------------------------------------------------------------------- /arrow-arith/Cargo.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [package] 19 | name = "arrow-arith" 20 | version = { workspace = true } 21 | description = "Arrow arithmetic kernels" 22 | homepage = { workspace = true } 23 | repository = { workspace = true } 24 | authors = { workspace = true } 25 | license = { workspace = true } 26 | keywords = { workspace = true } 27 | include = { workspace = true } 28 | edition = { workspace = true } 29 | rust-version = { workspace = true } 30 | 31 | [lib] 32 | name = "arrow_arith" 33 | bench = false 34 | 35 | [package.metadata.docs.rs] 36 | all-features = true 37 | 38 | [dependencies] 39 | arrow-array = { workspace = true } 40 | arrow-buffer = { workspace = true } 41 | arrow-data = { workspace = true } 42 | arrow-schema = { workspace = true } 43 | chrono = { workspace = true } 44 | num = { version = "0.4", default-features = false, features = ["std"] } 45 | -------------------------------------------------------------------------------- /arrow-arith/LICENSE.txt: -------------------------------------------------------------------------------- 1 | ../LICENSE.txt -------------------------------------------------------------------------------- /arrow-arith/NOTICE.txt: -------------------------------------------------------------------------------- 1 | ../NOTICE.txt -------------------------------------------------------------------------------- /arrow-arith/src/lib.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! Arrow arithmetic and aggregation kernels 19 | 20 | #![doc( 21 | html_logo_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_white-bg.svg", 22 | html_favicon_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_transparent-bg.svg" 23 | )] 24 | #![cfg_attr(docsrs, feature(doc_auto_cfg))] 25 | #![warn(missing_docs)] 26 | pub mod aggregate; 27 | #[doc(hidden)] // Kernels to be removed in a future release 28 | pub mod arithmetic; 29 | pub mod arity; 30 | pub mod bitwise; 31 | pub mod boolean; 32 | pub mod numeric; 33 | pub mod temporal; 34 | -------------------------------------------------------------------------------- /arrow-array/LICENSE.txt: -------------------------------------------------------------------------------- 1 | ../LICENSE.txt -------------------------------------------------------------------------------- /arrow-array/NOTICE.txt: -------------------------------------------------------------------------------- 1 | ../NOTICE.txt -------------------------------------------------------------------------------- /arrow-array/benches/fixed_size_list_array.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use arrow_array::{Array, FixedSizeListArray, Int32Array}; 19 | use arrow_schema::Field; 20 | use criterion::*; 21 | use rand::{rng, Rng}; 22 | use std::{hint, sync::Arc}; 23 | 24 | fn gen_fsl(len: usize, value_len: usize) -> FixedSizeListArray { 25 | let mut rng = rng(); 26 | let values = Arc::new(Int32Array::from( 27 | (0..len).map(|_| rng.random::()).collect::>(), 28 | )); 29 | let field = Arc::new(Field::new_list_field(values.data_type().clone(), true)); 30 | FixedSizeListArray::new(field, value_len as i32, values, None) 31 | } 32 | 33 | fn criterion_benchmark(c: &mut Criterion) { 34 | let len = 4096; 35 | for value_len in [1, 32, 1024] { 36 | let fsl = gen_fsl(len, value_len); 37 | c.bench_function( 38 | &format!("fixed_size_list_array(len: {len}, value_len: {value_len})"), 39 | |b| { 40 | b.iter(|| { 41 | for i in 0..len / value_len { 42 | hint::black_box(fsl.value(i)); 43 | } 44 | }); 45 | }, 46 | ); 47 | } 48 | } 49 | 50 | criterion_group!(benches, criterion_benchmark); 51 | criterion_main!(benches); 52 | -------------------------------------------------------------------------------- /arrow-array/benches/gc_view_types.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use arrow_array::StringViewArray; 19 | use criterion::*; 20 | use std::hint; 21 | 22 | fn gen_view_array(size: usize) -> StringViewArray { 23 | StringViewArray::from_iter((0..size).map(|v| match v % 3 { 24 | 0 => Some("small"), 25 | 1 => Some("larger than 12 bytes array"), 26 | 2 => None, 27 | _ => unreachable!("unreachable"), 28 | })) 29 | } 30 | 31 | fn criterion_benchmark(c: &mut Criterion) { 32 | let array = gen_view_array(100_000); 33 | 34 | c.bench_function("gc view types all", |b| { 35 | b.iter(|| { 36 | hint::black_box(array.gc()); 37 | }); 38 | }); 39 | 40 | let sliced = array.slice(0, 100_000 / 2); 41 | c.bench_function("gc view types slice half", |b| { 42 | b.iter(|| { 43 | hint::black_box(sliced.gc()); 44 | }); 45 | }); 46 | } 47 | 48 | criterion_group!(benches, criterion_benchmark); 49 | criterion_main!(benches); 50 | -------------------------------------------------------------------------------- /arrow-array/src/numeric.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use crate::ArrowPrimitiveType; 19 | 20 | /// A subtype of primitive type that represents numeric values. 21 | pub trait ArrowNumericType: ArrowPrimitiveType {} 22 | 23 | impl ArrowNumericType for T {} 24 | -------------------------------------------------------------------------------- /arrow-avro/LICENSE.txt: -------------------------------------------------------------------------------- 1 | ../LICENSE.txt -------------------------------------------------------------------------------- /arrow-avro/NOTICE.txt: -------------------------------------------------------------------------------- 1 | ../NOTICE.txt -------------------------------------------------------------------------------- /arrow-buffer/Cargo.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [package] 19 | name = "arrow-buffer" 20 | version = { workspace = true } 21 | description = "Buffer abstractions for Apache Arrow" 22 | homepage = { workspace = true } 23 | repository = { workspace = true } 24 | authors = { workspace = true } 25 | license = { workspace = true } 26 | keywords = { workspace = true } 27 | include = { workspace = true } 28 | edition = { workspace = true } 29 | rust-version = { workspace = true } 30 | 31 | [lib] 32 | name = "arrow_buffer" 33 | bench = false 34 | 35 | [package.metadata.docs.rs] 36 | all-features = true 37 | 38 | [dependencies] 39 | bytes = { version = "1.4" } 40 | num = { version = "0.4", default-features = false, features = ["std"] } 41 | half = { version = "2.1", default-features = false } 42 | 43 | [dev-dependencies] 44 | criterion = { version = "0.5", default-features = false } 45 | rand = { version = "0.9", default-features = false, features = ["std", "std_rng", "thread_rng"] } 46 | 47 | [[bench]] 48 | name = "bit_mask" 49 | harness = false 50 | 51 | [[bench]] 52 | name = "i256" 53 | harness = false 54 | 55 | [[bench]] 56 | name = "offset" 57 | harness = false 58 | -------------------------------------------------------------------------------- /arrow-buffer/LICENSE.txt: -------------------------------------------------------------------------------- 1 | ../LICENSE.txt -------------------------------------------------------------------------------- /arrow-buffer/NOTICE.txt: -------------------------------------------------------------------------------- 1 | ../NOTICE.txt -------------------------------------------------------------------------------- /arrow-buffer/benches/offset.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use arrow_buffer::{OffsetBuffer, OffsetBufferBuilder}; 19 | use criterion::*; 20 | use rand::rngs::StdRng; 21 | use rand::{Rng, SeedableRng}; 22 | use std::hint; 23 | 24 | const SIZE: usize = 1024; 25 | 26 | fn criterion_benchmark(c: &mut Criterion) { 27 | let mut rng = StdRng::seed_from_u64(42); 28 | let lengths: Vec = hint::black_box((0..SIZE).map(|_| rng.random_range(0..40)).collect()); 29 | 30 | c.bench_function("OffsetBuffer::from_lengths", |b| { 31 | b.iter(|| OffsetBuffer::::from_lengths(lengths.iter().copied())); 32 | }); 33 | 34 | c.bench_function("OffsetBufferBuilder::push_length", |b| { 35 | b.iter(|| { 36 | let mut builder = OffsetBufferBuilder::::new(lengths.len()); 37 | lengths.iter().for_each(|x| builder.push_length(*x)); 38 | builder.finish() 39 | }); 40 | }); 41 | 42 | let offsets = OffsetBuffer::::from_lengths(lengths.iter().copied()).into_inner(); 43 | 44 | c.bench_function("OffsetBuffer::new", |b| { 45 | b.iter(|| OffsetBuffer::new(hint::black_box(offsets.clone()))); 46 | }); 47 | } 48 | 49 | criterion_group!(benches, criterion_benchmark); 50 | criterion_main!(benches); 51 | -------------------------------------------------------------------------------- /arrow-buffer/src/buffer/mod.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! Types of shared memory region 19 | 20 | mod offset; 21 | pub use offset::*; 22 | mod immutable; 23 | pub use immutable::*; 24 | mod mutable; 25 | pub use mutable::*; 26 | mod ops; 27 | pub use ops::*; 28 | mod scalar; 29 | pub use scalar::*; 30 | mod boolean; 31 | pub use boolean::*; 32 | mod null; 33 | pub use null::*; 34 | mod run; 35 | pub use run::*; 36 | -------------------------------------------------------------------------------- /arrow-buffer/src/lib.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! Low-level buffer abstractions for [Apache Arrow Rust](https://docs.rs/arrow) 19 | 20 | #![doc( 21 | html_logo_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_white-bg.svg", 22 | html_favicon_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_transparent-bg.svg" 23 | )] 24 | #![cfg_attr(docsrs, feature(doc_auto_cfg))] 25 | // used by [`buffer::mutable::dangling_ptr`] 26 | #![cfg_attr(miri, feature(strict_provenance))] 27 | #![warn(missing_docs)] 28 | 29 | pub mod alloc; 30 | pub mod buffer; 31 | pub use buffer::*; 32 | 33 | pub mod builder; 34 | pub use builder::*; 35 | 36 | mod bigint; 37 | pub use bigint::i256; 38 | 39 | mod bytes; 40 | 41 | mod native; 42 | pub use native::*; 43 | 44 | mod util; 45 | pub use util::*; 46 | 47 | mod interval; 48 | pub use interval::*; 49 | 50 | mod arith; 51 | -------------------------------------------------------------------------------- /arrow-buffer/src/util/mod.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | pub mod bit_chunk_iterator; 19 | pub mod bit_iterator; 20 | pub mod bit_mask; 21 | pub mod bit_util; 22 | -------------------------------------------------------------------------------- /arrow-cast/LICENSE.txt: -------------------------------------------------------------------------------- 1 | ../LICENSE.txt -------------------------------------------------------------------------------- /arrow-cast/NOTICE.txt: -------------------------------------------------------------------------------- 1 | ../NOTICE.txt -------------------------------------------------------------------------------- /arrow-cast/benches/parse_date.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use arrow_array::types::Date32Type; 19 | use arrow_cast::parse::Parser; 20 | use criterion::*; 21 | use std::hint; 22 | 23 | fn criterion_benchmark(c: &mut Criterion) { 24 | let timestamps = ["2020-09-08", "2020-9-8", "2020-09-8", "2020-9-08"]; 25 | 26 | for timestamp in timestamps { 27 | let t = hint::black_box(timestamp); 28 | c.bench_function(t, |b| { 29 | b.iter(|| Date32Type::parse(t).unwrap()); 30 | }); 31 | } 32 | } 33 | 34 | criterion_group!(benches, criterion_benchmark); 35 | criterion_main!(benches); 36 | -------------------------------------------------------------------------------- /arrow-cast/benches/parse_decimal.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use arrow_array::types::Decimal256Type; 19 | use arrow_cast::parse::parse_decimal; 20 | use criterion::*; 21 | use std::hint; 22 | 23 | fn criterion_benchmark(c: &mut Criterion) { 24 | let decimals = [ 25 | "123.123", 26 | "123.1234", 27 | "123.1", 28 | "123", 29 | "-123.123", 30 | "-123.1234", 31 | "-123.1", 32 | "-123", 33 | "0.0000123", 34 | "12.", 35 | "-12.", 36 | "00.1", 37 | "-00.1", 38 | "12345678912345678.1234", 39 | "-12345678912345678.1234", 40 | "99999999999999999.999", 41 | "-99999999999999999.999", 42 | ".123", 43 | "-.123", 44 | "123.", 45 | "-123.", 46 | ]; 47 | 48 | for decimal in decimals { 49 | let d = hint::black_box(decimal); 50 | c.bench_function(d, |b| { 51 | b.iter(|| parse_decimal::(d, 20, 3).unwrap()); 52 | }); 53 | } 54 | } 55 | 56 | criterion_group!(benches, criterion_benchmark); 57 | criterion_main!(benches); 58 | -------------------------------------------------------------------------------- /arrow-cast/benches/parse_time.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use arrow_cast::parse::string_to_time_nanoseconds; 19 | use criterion::*; 20 | use std::hint; 21 | 22 | fn criterion_benchmark(c: &mut Criterion) { 23 | let timestamps = [ 24 | "9:50", 25 | "09:50", 26 | "09:50 PM", 27 | "9:50:12 AM", 28 | "09:50:12 PM", 29 | "09:50:12.123456789", 30 | "9:50:12.123456789", 31 | "09:50:12.123456789 PM", 32 | ]; 33 | 34 | for timestamp in timestamps { 35 | let t = hint::black_box(timestamp); 36 | c.bench_function(t, |b| { 37 | b.iter(|| string_to_time_nanoseconds(t).unwrap()); 38 | }); 39 | } 40 | } 41 | 42 | criterion_group!(benches, criterion_benchmark); 43 | criterion_main!(benches); 44 | -------------------------------------------------------------------------------- /arrow-cast/benches/parse_timestamp.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use arrow_cast::parse::string_to_timestamp_nanos; 19 | use criterion::*; 20 | use std::hint; 21 | 22 | fn criterion_benchmark(c: &mut Criterion) { 23 | let timestamps = [ 24 | "2020-09-08", 25 | "2020-09-08T13:42:29", 26 | "2020-09-08T13:42:29.190", 27 | "2020-09-08T13:42:29.190855", 28 | "2020-09-08T13:42:29.190855999", 29 | "2020-09-08T13:42:29+00:00", 30 | "2020-09-08T13:42:29.190+00:00", 31 | "2020-09-08T13:42:29.190855+00:00", 32 | "2020-09-08T13:42:29.190855999-05:00", 33 | "2020-09-08T13:42:29.190855Z", 34 | ]; 35 | 36 | for timestamp in timestamps { 37 | let t = hint::black_box(timestamp); 38 | c.bench_function(t, |b| { 39 | b.iter(|| string_to_timestamp_nanos(t).unwrap()); 40 | }); 41 | } 42 | } 43 | 44 | criterion_group!(benches, criterion_benchmark); 45 | criterion_main!(benches); 46 | -------------------------------------------------------------------------------- /arrow-cast/src/lib.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! Functions for converting from one data type to another in [Apache Arrow](https://docs.rs/arrow) 19 | 20 | #![doc( 21 | html_logo_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_white-bg.svg", 22 | html_favicon_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_transparent-bg.svg" 23 | )] 24 | #![cfg_attr(docsrs, feature(doc_auto_cfg))] 25 | #![warn(missing_docs)] 26 | pub mod cast; 27 | pub use cast::*; 28 | pub mod display; 29 | pub mod parse; 30 | #[cfg(feature = "prettyprint")] 31 | pub mod pretty; 32 | 33 | pub mod base64; 34 | -------------------------------------------------------------------------------- /arrow-csv/Cargo.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [package] 19 | name = "arrow-csv" 20 | version = { workspace = true } 21 | description = "Support for parsing CSV format to and from the Arrow format" 22 | homepage = { workspace = true } 23 | repository = { workspace = true } 24 | authors = { workspace = true } 25 | license = { workspace = true } 26 | keywords = { workspace = true } 27 | include = { workspace = true } 28 | edition = { workspace = true } 29 | rust-version = { workspace = true } 30 | 31 | [lib] 32 | name = "arrow_csv" 33 | bench = false 34 | 35 | [package.metadata.docs.rs] 36 | all-features = true 37 | 38 | [dependencies] 39 | arrow-array = { workspace = true } 40 | arrow-cast = { workspace = true } 41 | arrow-schema = { workspace = true } 42 | chrono = { workspace = true } 43 | csv = { version = "1.1", default-features = false } 44 | csv-core = { version = "0.1" } 45 | lazy_static = { version = "1.4", default-features = false } 46 | regex = { version = "1.7.0", default-features = false, features = ["std", "unicode", "perf"] } 47 | 48 | [dev-dependencies] 49 | arrow-buffer = { workspace = true } 50 | tempfile = "3.3" 51 | futures = "0.3" 52 | tokio = { version = "1.27", default-features = false, features = ["io-util"] } 53 | bytes = "1.4" 54 | -------------------------------------------------------------------------------- /arrow-csv/LICENSE.txt: -------------------------------------------------------------------------------- 1 | ../LICENSE.txt -------------------------------------------------------------------------------- /arrow-csv/NOTICE.txt: -------------------------------------------------------------------------------- 1 | ../NOTICE.txt -------------------------------------------------------------------------------- /arrow-csv/examples/README.md: -------------------------------------------------------------------------------- 1 | 19 | 20 | # Examples 21 | - [`csv_calculation.rs`](csv_calculation.rs): performs a simple calculation using the CSV reader -------------------------------------------------------------------------------- /arrow-csv/examples/csv_calculation.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use arrow_array::cast::AsArray; 19 | use arrow_array::types::Int16Type; 20 | use arrow_csv::ReaderBuilder; 21 | 22 | use arrow_schema::{DataType, Field, Schema}; 23 | use std::fs::File; 24 | use std::sync::Arc; 25 | 26 | fn main() { 27 | // read csv from file 28 | let file = File::open("arrow-csv/test/data/example.csv").unwrap(); 29 | let csv_schema = Schema::new(vec![ 30 | Field::new("c1", DataType::Int16, true), 31 | Field::new("c2", DataType::Float32, true), 32 | Field::new("c3", DataType::Utf8, true), 33 | Field::new("c4", DataType::Boolean, true), 34 | ]); 35 | let mut reader = ReaderBuilder::new(Arc::new(csv_schema)) 36 | .with_header(true) 37 | .build(file) 38 | .unwrap(); 39 | 40 | match reader.next() { 41 | Some(r) => match r { 42 | Ok(r) => { 43 | // get the column(0) max value 44 | let col = r.column(0).as_primitive::(); 45 | let max = col.iter().max().flatten(); 46 | println!("max value column(0): {max:?}") 47 | } 48 | Err(e) => { 49 | println!("{e:?}"); 50 | } 51 | }, 52 | None => { 53 | println!("csv is empty"); 54 | } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /arrow-csv/test/data/custom_null_test.csv: -------------------------------------------------------------------------------- 1 | c_int,c_float,c_string,c_bool 2 | 1,1.1,"1.11",True 3 | nil,2.2,"2.22",TRUE 4 | 3,nil,"3.33",true 5 | 4,4.4,nil,False 6 | 5,6.6,"",nil 7 | -------------------------------------------------------------------------------- /arrow-csv/test/data/decimal_test.csv: -------------------------------------------------------------------------------- 1 | "Elgin, Scotland, the UK",57.653484,-3.335724 2 | "Stoke-on-Trent, Staffordshire, the UK",53.002666,-2.179404 3 | "Solihull, Birmingham, UK",52.412811,-1.778197 4 | "Cardiff, Cardiff county, UK",51.481583,-3.179090 5 | "Cardiff, Cardiff county, UK",12.12345678,-3.179090 6 | "Eastbourne, East Sussex, UK",50.76,0.290472 7 | "Eastbourne, East Sussex, UK",.123,0.290472 8 | "Eastbourne, East Sussex, UK",123.,0.290472 9 | "Eastbourne, East Sussex, UK",123,0.290472 10 | "Eastbourne, East Sussex, UK",-50.76,0.290472 -------------------------------------------------------------------------------- /arrow-csv/test/data/dictionary_nullable_test.csv: -------------------------------------------------------------------------------- 1 | id,name 2 | 1, 3 | 2,bob 4 | -------------------------------------------------------------------------------- /arrow-csv/test/data/example.csv: -------------------------------------------------------------------------------- 1 | c1,c2,c3,c4 2 | 1,1.1,"hong kong",true 3 | 3,323.12,"XiAn",false 4 | 10,131323.12,"cheng du",false -------------------------------------------------------------------------------- /arrow-csv/test/data/init_null_test.csv: -------------------------------------------------------------------------------- 1 | c_int,c_float,c_string,c_bool,c_null 2 | ,,,, 3 | 2,2.2,"a",TRUE, 4 | 3,,"b",true, 5 | 4,4.4,,False, 6 | 5,6.6,"",FALSE, -------------------------------------------------------------------------------- /arrow-csv/test/data/null_test.csv: -------------------------------------------------------------------------------- 1 | c_int,c_float,c_string,c_bool 2 | 1,1.1,"1.11",True 3 | 2,2.2,"2.22",TRUE 4 | 3,,"3.33",true 5 | 4,4.4,,False 6 | 5,6.6,"",FALSE -------------------------------------------------------------------------------- /arrow-csv/test/data/scientific_notation_test.csv: -------------------------------------------------------------------------------- 1 | 1.439e+04, positive_exponent 2 | 1.31e+04, positive_exponent 3 | 1.2711e+0, positive_exponent 4 | 1.44e+04, positive_exponent 5 | 2.22e+04, positive_exponent 6 | 1.149e+04, positive_exponent 7 | 2.139e+04, positive_exponent 8 | 7.322e+04, positive_exponent 9 | 1.531e+04, positive_exponent 10 | 2.206e-04, negative_exponent 11 | 1.517e-04, negative_exponent 12 | 2.332e-04, negative_exponent 13 | 2.19e-04, negative_exponent 14 | 2.087e-04, negative_exponent 15 | 12683.18, no_exponent 16 | 7134.6, no_exponent 17 | 8540.17, no_exponent 18 | 21462.36, no_exponent 19 | 1120.76, no_exponent -------------------------------------------------------------------------------- /arrow-csv/test/data/truncated_rows.csv: -------------------------------------------------------------------------------- 1 | Name,Age,Occupation,DOB 2 | A1,34,Engineer,1985-07-16 3 | B2,29,Doctor 4 | , 5 | C3,45,Artist,1964-11-30 6 | 7 | D4 8 | E5,31,, 9 | -------------------------------------------------------------------------------- /arrow-csv/test/data/uk_cities.csv: -------------------------------------------------------------------------------- 1 | "Elgin, Scotland, the UK",57.653484,-3.335724 2 | "Stoke-on-Trent, Staffordshire, the UK",53.002666,-2.179404 3 | "Solihull, Birmingham, UK",52.412811,-1.778197 4 | "Cardiff, Cardiff county, UK",51.481583,-3.179090 5 | "Eastbourne, East Sussex, UK",50.768036,0.290472 6 | "Oxford, Oxfordshire, UK",51.752022,-1.257677 7 | "London, UK",51.509865,-0.118092 8 | "Swindon, Swindon, UK",51.568535,-1.772232 9 | "Gravesend, Kent, UK",51.441883,0.370759 10 | "Northampton, Northamptonshire, UK",52.240479,-0.902656 11 | "Rugby, Warwickshire, UK",52.370876,-1.265032 12 | "Sutton Coldfield, West Midlands, UK",52.570385,-1.824042 13 | "Harlow, Essex, UK",51.772938,0.102310 14 | "Aberdeen, Aberdeen City, UK",57.149651,-2.099075 15 | "Swansea, Swansea, UK",51.621441,-3.943646 16 | "Chesterfield, Derbyshire, UK",53.235046,-1.421629 17 | "Londonderry, Derry, UK",55.006763,-7.318268 18 | "Salisbury, Wiltshire, UK",51.068787,-1.794472 19 | "Weymouth, Dorset, UK",50.614429,-2.457621 20 | "Wolverhampton, West Midlands, UK",52.591370,-2.110748 21 | "Preston, Lancashire, UK",53.765762,-2.692337 22 | "Bournemouth, UK",50.720806,-1.904755 23 | "Doncaster, South Yorkshire, UK",53.522820,-1.128462 24 | "Ayr, South Ayrshire, UK",55.458565,-4.629179 25 | "Hastings, East Sussex, UK",50.854259,0.573453 26 | "Bedford, UK",52.136436,-0.460739 27 | "Basildon, Essex, UK",51.572376,0.470009 28 | "Chippenham, Wiltshire, UK",51.458057,-2.116074 29 | "Belfast, UK",54.607868,-5.926437 30 | "Uckfield, East Sussex, UK",50.967941,0.085831 31 | "Worthing, West Sussex, UK",50.825024,-0.383835 32 | "Leeds, West Yorkshire, UK",53.801277,-1.548567 33 | "Kendal, Cumbria, UK",54.328506,-2.743870 34 | "Plymouth, UK",50.376289,-4.143841 35 | "Haverhill, Suffolk, UK",52.080875,0.444517 36 | "Frankton, Warwickshire, UK",52.328415,-1.377561 37 | "Inverness, the UK",57.477772,-4.224721 -------------------------------------------------------------------------------- /arrow-csv/test/data/uk_cities_with_headers.csv: -------------------------------------------------------------------------------- 1 | city,lat,lng 2 | "Elgin, Scotland, the UK",57.653484,-3.335724 3 | "Stoke-on-Trent, Staffordshire, the UK",53.002666,-2.179404 4 | "Solihull, Birmingham, UK",52.412811,-1.778197 5 | "Cardiff, Cardiff county, UK",51.481583,-3.179090 6 | "Eastbourne, East Sussex, UK",50.768036,0.290472 7 | "Oxford, Oxfordshire, UK",51.752022,-1.257677 8 | "London, UK",51.509865,-0.118092 9 | "Swindon, Swindon, UK",51.568535,-1.772232 10 | "Gravesend, Kent, UK",51.441883,0.370759 11 | "Northampton, Northamptonshire, UK",52.240479,-0.902656 12 | "Rugby, Warwickshire, UK",52.370876,-1.265032 13 | "Sutton Coldfield, West Midlands, UK",52.570385,-1.824042 14 | "Harlow, Essex, UK",51.772938,0.102310 15 | "Aberdeen, Aberdeen City, UK",57.149651,-2.099075 16 | "Swansea, Swansea, UK",51.621441,-3.943646 17 | "Chesterfield, Derbyshire, UK",53.235046,-1.421629 18 | "Londonderry, Derry, UK",55.006763,-7.318268 19 | "Salisbury, Wiltshire, UK",51.068787,-1.794472 20 | "Weymouth, Dorset, UK",50.614429,-2.457621 21 | "Wolverhampton, West Midlands, UK",52.591370,-2.110748 22 | "Preston, Lancashire, UK",53.765762,-2.692337 23 | "Bournemouth, UK",50.720806,-1.904755 24 | "Doncaster, South Yorkshire, UK",53.522820,-1.128462 25 | "Ayr, South Ayrshire, UK",55.458565,-4.629179 26 | "Hastings, East Sussex, UK",50.854259,0.573453 27 | "Bedford, UK",52.136436,-0.460739 28 | "Basildon, Essex, UK",51.572376,0.470009 29 | "Chippenham, Wiltshire, UK",51.458057,-2.116074 30 | "Belfast, UK",54.607868,-5.926437 31 | "Uckfield, East Sussex, UK",50.967941,0.085831 32 | "Worthing, West Sussex, UK",50.825024,-0.383835 33 | "Leeds, West Yorkshire, UK",53.801277,-1.548567 34 | "Kendal, Cumbria, UK",54.328506,-2.743870 35 | "Plymouth, UK",50.376289,-4.143841 36 | "Haverhill, Suffolk, UK",52.080875,0.444517 37 | "Frankton, Warwickshire, UK",52.328415,-1.377561 38 | "Inverness, the UK",57.477772,-4.224721 -------------------------------------------------------------------------------- /arrow-csv/test/data/various_invalid_types/invalid_bool.csv: -------------------------------------------------------------------------------- 1 | c_int|c_float|c_string|c_bool 2 | 1|1.1|"1.11"|true 3 | 2|2.2|"2.22"|none 4 | 3|3.3|"3.33"|true 5 | 4|4.4|"4.4"|false 6 | 5|6.6|""|false 7 | -------------------------------------------------------------------------------- /arrow-csv/test/data/various_invalid_types/invalid_float.csv: -------------------------------------------------------------------------------- 1 | c_int|c_float|c_string|c_bool 2 | 1|1.1|"1.11"|true 3 | 2|2.2|"2.22"|true 4 | 3||"3.33"|true 5 | 4|4.x4||false 6 | 5|6.6|""|false -------------------------------------------------------------------------------- /arrow-csv/test/data/various_invalid_types/invalid_int.csv: -------------------------------------------------------------------------------- 1 | c_int|c_float|c_string|c_bool 2 | 1|1.1|"1.11"|true 3 | 2.3|2.2|"2.22"|false 4 | 3|3.3|"3.33"|true 5 | 4|4.4|"4.4"|false 6 | 5|6.6|""|false 7 | -------------------------------------------------------------------------------- /arrow-csv/test/data/various_invalid_types/null_in_non_nullable.csv: -------------------------------------------------------------------------------- 1 | c_int|c_float|c_string|c_bool 2 | 1|1.1|"1.11"|true 3 | 2|2.2|"2.22"|true 4 | 3|3.3|"3.33"|true 5 | 4|4.4||false 6 | 5|6.6|""|false 7 | -------------------------------------------------------------------------------- /arrow-csv/test/data/various_types.csv: -------------------------------------------------------------------------------- 1 | c_int|c_float|c_string|c_bool|c_date|c_datetime 2 | 1|1.1|"1.11"|true|1970-01-01|1970-01-01T00:00:00 3 | 2|2.2|"2.22"|true|2020-11-08|2020-11-08T01:00:00 4 | 3||"3.33"|true|1969-12-31|1969-11-08T02:00:00 5 | 4|4.4||false|| 6 | 5|6.6|""|false|1990-01-01|1990-01-01T03:00:00 7 | 4|4e6||false|| 8 | 4|4.0e-6||false|| 9 | 6|NaN||false|| 10 | 7|inf||false|| 11 | 8|-inf||false|| 12 | -------------------------------------------------------------------------------- /arrow-data/Cargo.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [package] 19 | name = "arrow-data" 20 | version = { workspace = true } 21 | description = "Array data abstractions for Apache Arrow" 22 | homepage = { workspace = true } 23 | repository = { workspace = true } 24 | authors = { workspace = true } 25 | license = { workspace = true } 26 | keywords = { workspace = true } 27 | include = { workspace = true } 28 | edition = { workspace = true } 29 | rust-version = { workspace = true } 30 | 31 | [lib] 32 | name = "arrow_data" 33 | bench = false 34 | 35 | [features] 36 | # force_validate runs full data validation for all arrays that are created 37 | # this is not enabled by default as it is too computationally expensive 38 | # but is run as part of our CI checks 39 | force_validate = [] 40 | # Enable ffi support 41 | ffi = ["arrow-schema/ffi"] 42 | 43 | [package.metadata.docs.rs] 44 | all-features = true 45 | 46 | [dependencies] 47 | 48 | arrow-buffer = { workspace = true } 49 | arrow-schema = { workspace = true } 50 | 51 | num = { version = "0.4", default-features = false, features = ["std"] } 52 | half = { version = "2.1", default-features = false } 53 | 54 | [dev-dependencies] 55 | 56 | [build-dependencies] 57 | -------------------------------------------------------------------------------- /arrow-data/LICENSE.txt: -------------------------------------------------------------------------------- 1 | ../LICENSE.txt -------------------------------------------------------------------------------- /arrow-data/NOTICE.txt: -------------------------------------------------------------------------------- 1 | ../NOTICE.txt -------------------------------------------------------------------------------- /arrow-data/src/equal/null.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use crate::data::ArrayData; 19 | 20 | #[inline] 21 | pub(super) fn null_equal( 22 | _lhs: &ArrayData, 23 | _rhs: &ArrayData, 24 | _lhs_start: usize, 25 | _rhs_start: usize, 26 | _len: usize, 27 | ) -> bool { 28 | // a null buffer's range is always true, as every element is by definition equal (to null). 29 | // We only need to compare data_types 30 | true 31 | } 32 | -------------------------------------------------------------------------------- /arrow-data/src/lib.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! Low-level array data abstractions for [Apache Arrow Rust](https://docs.rs/arrow) 19 | //! 20 | //! For a higher-level, strongly-typed interface see [arrow_array](https://docs.rs/arrow_array) 21 | 22 | #![doc( 23 | html_logo_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_white-bg.svg", 24 | html_favicon_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_transparent-bg.svg" 25 | )] 26 | #![cfg_attr(docsrs, feature(doc_auto_cfg))] 27 | #![warn(missing_docs)] 28 | mod data; 29 | pub use data::*; 30 | 31 | mod equal; 32 | pub mod transform; 33 | 34 | pub use arrow_buffer::{bit_iterator, bit_mask}; 35 | pub mod decimal; 36 | 37 | #[cfg(feature = "ffi")] 38 | pub mod ffi; 39 | 40 | mod byte_view; 41 | pub use byte_view::*; 42 | -------------------------------------------------------------------------------- /arrow-data/src/transform/boolean.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use super::{Extend, _MutableArrayData, utils::resize_for_bits}; 19 | use crate::bit_mask::set_bits; 20 | use crate::ArrayData; 21 | 22 | pub(super) fn build_extend(array: &ArrayData) -> Extend { 23 | let values = array.buffers()[0].as_slice(); 24 | Box::new( 25 | move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| { 26 | let buffer = &mut mutable.buffer1; 27 | resize_for_bits(buffer, mutable.len + len); 28 | set_bits( 29 | buffer.as_slice_mut(), 30 | values, 31 | mutable.len, 32 | array.offset() + start, 33 | len, 34 | ); 35 | }, 36 | ) 37 | } 38 | 39 | pub(super) fn extend_nulls(mutable: &mut _MutableArrayData, len: usize) { 40 | let buffer = &mut mutable.buffer1; 41 | resize_for_bits(buffer, mutable.len + len); 42 | } 43 | -------------------------------------------------------------------------------- /arrow-data/src/transform/fixed_binary.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use super::{Extend, _MutableArrayData}; 19 | use crate::ArrayData; 20 | use arrow_schema::DataType; 21 | 22 | pub(super) fn build_extend(array: &ArrayData) -> Extend { 23 | let size = match array.data_type() { 24 | DataType::FixedSizeBinary(i) => *i as usize, 25 | _ => unreachable!(), 26 | }; 27 | 28 | let values = &array.buffers()[0].as_slice()[array.offset() * size..]; 29 | Box::new( 30 | move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| { 31 | let buffer = &mut mutable.buffer1; 32 | buffer.extend_from_slice(&values[start * size..(start + len) * size]); 33 | }, 34 | ) 35 | } 36 | 37 | pub(super) fn extend_nulls(mutable: &mut _MutableArrayData, len: usize) { 38 | let size = match mutable.data_type { 39 | DataType::FixedSizeBinary(i) => i as usize, 40 | _ => unreachable!(), 41 | }; 42 | 43 | let values_buffer = &mut mutable.buffer1; 44 | values_buffer.extend_zeros(len * size); 45 | } 46 | -------------------------------------------------------------------------------- /arrow-data/src/transform/fixed_size_list.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use crate::ArrayData; 19 | use arrow_schema::DataType; 20 | 21 | use super::{Extend, _MutableArrayData}; 22 | 23 | pub(super) fn build_extend(array: &ArrayData) -> Extend { 24 | let size = match array.data_type() { 25 | DataType::FixedSizeList(_, i) => *i as usize, 26 | _ => unreachable!(), 27 | }; 28 | 29 | Box::new( 30 | move |mutable: &mut _MutableArrayData, index: usize, start: usize, len: usize| { 31 | mutable 32 | .child_data 33 | .iter_mut() 34 | .for_each(|child| child.extend(index, start * size, (start + len) * size)) 35 | }, 36 | ) 37 | } 38 | 39 | pub(super) fn extend_nulls(mutable: &mut _MutableArrayData, len: usize) { 40 | let size = match mutable.data_type { 41 | DataType::FixedSizeList(_, i) => i as usize, 42 | _ => unreachable!(), 43 | }; 44 | 45 | mutable 46 | .child_data 47 | .iter_mut() 48 | .for_each(|child| child.extend_nulls(len * size)) 49 | } 50 | -------------------------------------------------------------------------------- /arrow-data/src/transform/null.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use super::{Extend, _MutableArrayData}; 19 | use crate::ArrayData; 20 | 21 | pub(super) fn build_extend(_: &ArrayData) -> Extend { 22 | Box::new(move |_, _, _, _| {}) 23 | } 24 | 25 | pub(super) fn extend_nulls(_: &mut _MutableArrayData, _: usize) {} 26 | -------------------------------------------------------------------------------- /arrow-data/src/transform/primitive.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use crate::ArrayData; 19 | use arrow_buffer::ArrowNativeType; 20 | use std::mem::size_of; 21 | use std::ops::Add; 22 | 23 | use super::{Extend, _MutableArrayData}; 24 | 25 | pub(super) fn build_extend(array: &ArrayData) -> Extend { 26 | let values = array.buffer::(0); 27 | Box::new( 28 | move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| { 29 | mutable 30 | .buffer1 31 | .extend_from_slice(&values[start..start + len]); 32 | }, 33 | ) 34 | } 35 | 36 | pub(super) fn build_extend_with_offset(array: &ArrayData, offset: T) -> Extend 37 | where 38 | T: ArrowNativeType + Add, 39 | { 40 | let values = array.buffer::(0); 41 | Box::new( 42 | move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| { 43 | mutable 44 | .buffer1 45 | .extend(values[start..start + len].iter().map(|x| *x + offset)); 46 | }, 47 | ) 48 | } 49 | 50 | pub(super) fn extend_nulls(mutable: &mut _MutableArrayData, len: usize) { 51 | mutable.buffer1.extend_zeros(len * size_of::()); 52 | } 53 | -------------------------------------------------------------------------------- /arrow-data/src/transform/structure.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use super::{Extend, _MutableArrayData}; 19 | use crate::ArrayData; 20 | 21 | pub(super) fn build_extend(_: &ArrayData) -> Extend { 22 | Box::new( 23 | move |mutable: &mut _MutableArrayData, index: usize, start: usize, len: usize| { 24 | mutable 25 | .child_data 26 | .iter_mut() 27 | .for_each(|child| child.extend(index, start, start + len)) 28 | }, 29 | ) 30 | } 31 | 32 | pub(super) fn extend_nulls(mutable: &mut _MutableArrayData, len: usize) { 33 | mutable 34 | .child_data 35 | .iter_mut() 36 | .for_each(|child| child.extend_nulls(len)) 37 | } 38 | -------------------------------------------------------------------------------- /arrow-flight/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | 19 | 20 | # Flight 21 | 22 | ## Generated Code 23 | 24 | The prost/tonic code can be generated by running, which in turn invokes the Rust binary located in [gen](./gen) 25 | 26 | This is necessary after modifying the protobuf definitions or altering the dependencies of [gen](./gen), and requires a 27 | valid installation of [protoc](https://github.com/protocolbuffers/protobuf#protocol-compiler-installation). 28 | 29 | ```bash 30 | ./regen.sh 31 | ``` 32 | 33 | ### Why Vendor 34 | 35 | The standard approach to integrating `prost-build` / `tonic-build` is to use a `build.rs` script that automatically generates the code as part of the standard build process. 36 | 37 | Unfortunately this caused a lot of friction for users: 38 | 39 | - Requires all users to have a protoc install in order to compile the crate - [#2616](https://github.com/apache/arrow-rs/issues/2616) 40 | - Some distributions have very old versions of protoc that don't support required functionality - [#1574](https://github.com/apache/arrow-rs/issues/1574) 41 | - Inconsistent support within IDEs for code completion of automatically generated code 42 | -------------------------------------------------------------------------------- /arrow-flight/LICENSE.txt: -------------------------------------------------------------------------------- 1 | ../LICENSE.txt -------------------------------------------------------------------------------- /arrow-flight/NOTICE.txt: -------------------------------------------------------------------------------- 1 | ../NOTICE.txt -------------------------------------------------------------------------------- /arrow-flight/examples/data/ca_root.key: -------------------------------------------------------------------------------- 1 | -----BEGIN PRIVATE KEY----- 2 | MIIEvwIBADANBgkqhkiG9w0BAQEFAASCBKkwggSlAgEAAoIBAQCPOUfaBhf+grVf 3 | DTBMpCX/sD6T4sPSIV/e/lwXw05gVT2Uh3vW4iuKZnxXkEwn66h8FZCeRXdkFUPd 4 | Nid2losqwJX4RlZHJvcLK5abFFuncyumU3/szsG+oA2m6i7r1a+msa53puJgfvF+ 5 | qIP8U8OSyTPl4qKgDMq+Q8Gk2UWXcFSjOz4pe0LAaR2Sopp5tU0TBopBgZWp5OX+ 6 | Nz23zrPwYn9EHKa2npLDqZQRMPGgg5H02inD7RrGhOEamacRSsGsrArUd3Qfk3V2 7 | mUkSPhixrhneWdXaAUYYbcoVBpMMoHn0bVOT+feRnAHqKxEjQOfhq6av+aAYdaaw 8 | 2aeyxFSPAgMBAAECggEAIBMu8TfOJdR18+yLNY7JZ8K7sPjm6+dHhjQmwbeelNpi 9 | KSJSbKRIJf2fuSh6tbIEEI3vzlI2U7ajdSYMnnVGg3OP4oM2DUc/qkO/vclIYW7c 10 | hqpKiuke4KcNh6UIjw5TARznuqooJn4nbTsmG9tSgEwJEGG6E+fpe8TG2DlEaY6L 11 | seXzFxEKbS8Jac8hex3lCp16FPgyP9I3u1ukqsj2S4+VcmzFT4L9QxK0tgjOotUb 12 | Mem3cO5OYDfkzMj+JKz+58G+n/5TILCEq86O+JG7fFzjA9BPp6wZgWHKG6bDDHoV 13 | CwNExOooVPYSyHebVg+K0AgRTwwAouLWboF5k4GtqQKBgQDKFt+kd1qUx3iqIREi 14 | jQtC8/ADGTyvMLJkA8fCxZYsn+hVp3G5uZxbOWWNU+j9B7X5gpljS5MxSvSAXLap 15 | tYCm1wkCB9qkZ5RxPPlE7fOCbmpJcZn39lC6q32I2U2IFM3VGzXvS6Eq2TJcdomb 16 | Q6WaNAuGoHtmdHsTF6IQQH3PhQKBgQC1bliMVbLoJ+c2rXAhmTJLsgesJQCpkJRH 17 | hOuiQOjH+ytSywIJd3K0vVHcnvCguS4wsw10N/V07KMuKjBztcWK0XB65MVczxQU 18 | mQNEOO5DnqNtD73ewwZJpHO52zMb5nuiBW1TtZJR6Ejb+/PccJJFQtQh0fgDNIgv 19 | SDeJr00uAwKBgQCkTya0zQoL74bvJIWPBwiB/l5F1q4dbsWMRfqUJRxt2fIIOmVf 20 | 8UbscOVdnDzSBgPyAzp9nkPmDyBugXNO1rDsdefwC7y8cS8jRsURQzr68JVgc4oR 21 | LucCb05ZBv1fyDJ2s3INxvsaIJAu134YjE7H6sr4QeEzMz+iDVQKTLU+FQKBgQCP 22 | VSixXXbkv7mDUcvXT08TzAcRsX3vd+AdDF6XpY4Bh2V3mBf44JwHO9XzZMGXSBS8 23 | tof3e8n34IceDoiOsVjpzYJ+JEPjiVlE2IljGVLqquZTX+9RkGNkRH6Eoy6iaRjd 24 | LEptqnF5Mfh1invieDCmdSBfw6Ca76JzMCtoI8zmfwKBgQDJE2UZJBfsFpXceqfV 25 | fYQHb4t5tu+kch4JrDpJPgMtTvZ0nQtD7aI/gPQIryl4K2MKwIO3vPs/rNNJuVWV 26 | K9ZTTnqqbiLII94ulYR3YRuotysEMkO2eJ6UsfnPjgLpJmaAQrHD4XNcpE60bmJf 27 | VlViy6Cnn8Xj+CC3HnF1XppyWw== 28 | -----END PRIVATE KEY----- 29 | -------------------------------------------------------------------------------- /arrow-flight/examples/data/ca_root.pem: -------------------------------------------------------------------------------- 1 | -----BEGIN CERTIFICATE----- 2 | MIIDGzCCAgOgAwIBAgIUFd5IwYARWN1WGmiqnpzxQ3pE3vAwDQYJKoZIhvcNAQEL 3 | BQAwHTELMAkGA1UEBhMCRkkxDjAMBgNVBAMMBXZhaGlkMB4XDTI0MTAxODE4MTcy 4 | NVoXDTI5MTAxNzE4MTcyNVowHTELMAkGA1UEBhMCRkkxDjAMBgNVBAMMBXZhaGlk 5 | MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAjzlH2gYX/oK1Xw0wTKQl 6 | /7A+k+LD0iFf3v5cF8NOYFU9lId71uIrimZ8V5BMJ+uofBWQnkV3ZBVD3TYndpaL 7 | KsCV+EZWRyb3CyuWmxRbp3MrplN/7M7BvqANpuou69WvprGud6biYH7xfqiD/FPD 8 | kskz5eKioAzKvkPBpNlFl3BUozs+KXtCwGkdkqKaebVNEwaKQYGVqeTl/jc9t86z 9 | 8GJ/RBymtp6Sw6mUETDxoIOR9Nopw+0axoThGpmnEUrBrKwK1Hd0H5N1dplJEj4Y 10 | sa4Z3lnV2gFGGG3KFQaTDKB59G1Tk/n3kZwB6isRI0Dn4aumr/mgGHWmsNmnssRU 11 | jwIDAQABo1MwUTAdBgNVHQ4EFgQUu2qQpBe9cOo0HndIR9VvpbCJnyEwHwYDVR0j 12 | BBgwFoAUu2qQpBe9cOo0HndIR9VvpbCJnyEwDwYDVR0TAQH/BAUwAwEB/zANBgkq 13 | hkiG9w0BAQsFAAOCAQEAFqbmJn8kPwKGqTiD2pUHzdUJz1QnqMOc0oZ+VzUyCVuq 14 | VFB3HX6Bjjc/NEDVSIRh0hsCB0s6bSwb23nmtlpZQlHF3JiuH2T0l1gGhVQBnbGj 15 | CBeldH7/VdcB9r/RASX2wkL8all0FswFx5dbiyrGOYeVUt5tV7mxe40LQLisWuu0 16 | SY6cPs1TjCVy7oJ61/++OfyffGyBiANXRNYSROeNxxOtXwVqKNP4eWX7UVxXgDKF 17 | iaGG5g9S6e3AefEgUTSWz3LWyMqT+VbTuqLw1NG9O112pFsPAEr4FTNh6ljIEVeH 18 | kvoBrHgP8VDV30GmganXu9LNOIIuFqhOoPxZYW1JGQ== 19 | -----END CERTIFICATE----- 20 | -------------------------------------------------------------------------------- /arrow-flight/examples/data/client.key: -------------------------------------------------------------------------------- 1 | -----BEGIN PRIVATE KEY----- 2 | MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDO9UXCYgahDzPB 3 | WsSlGRDEw8a0lhW/AP8dHygEiWcHJ6Fa0Q7/t7/SMJQzJsnM7qMKDqCxJfutDFFO 4 | 8WykoIgX06ENcTOei+DRo9NUmr2VRv5Uk3PN/AChwwm+/VCyRcx45ImumL7dWznp 5 | /a5GRWS2LtJ+j7zUVurIivV2TOT84zMWxvDTMfwyV93DMbKYdzVDIhjLO+AS8tFM 6 | rd2fsLAhDOb2iJ4HpR5Bfl47LuCyH6/2NyB8h53HI5bm/eo14gR3UtEY2UBTgJcC 7 | 8pPXmsYqZNOhmIYxA8rgpAFe+7IEGyRQU1zPLHgKtGSJXXEwfWd2Tspamy1/4nbx 8 | sMOjyTD1AgMBAAECggEAFOomjZlYcChYI58npkOdNTVPvGo6cl4Iagn/Ak5cchWo 9 | McRUzrTkFrrAD1dJJ4OqSFboGF7tSzSmduLhBtYOStIhQXmCLPCX3LfAoJkutYpC 10 | I4vvNQ7VhYNvVbTF1e5F1qXJ4DIhf787YBE57nIO1nZu4UlnXNIX0gJxAe6NaZzh 11 | w6eWZRNi/BOat3lCGxo5Q5PBsoRwEILsF9SlMimwSesKqivG4WVySZzygpsnp+rv 12 | TV2WPyw0m05tpr0Y+z5fUayPi5Vtxs8DoLKfnKD+/HOLniPaaDhHcD4R0DU/O8rb 13 | 0f6zfNW0S3A5esrKsIXZ02ljjEIjeGs6ZnJUXf5kwQKBgQDn8n61UEm42bDPmwv2 14 | 8b935Ghp5TAYsHH5Ti0YCK6aeOrCCPoRmb9cG3+zAo6nzad3QtULIJhc/I/wTbJ2 15 | +EXSamMw3Pieub+b8lbUwe2js3AOxx/hoIkjFK4GQALfCjEejoVSQgx84c+Xh3Xx 16 | EBjPi7Bf1EzetSQgVr/7310tUQKBgQDka2Px49ufA4P3WTMB3sCJ2mBEjZoxfV4m 17 | AX1Kwerp9Im2VT8JdzNFLi4hUuxIFJHq13NqQDPo38vG3iVPb0QLZUNg/eD3F53d 18 | 5JKcwDqyi+K3GuNN8JVDFcTP23svKZvNrUxrH/X/rTjf6H+o0PCU7hbcaRihBj4O 19 | LodhXM1QZQKBgQCcnLz3ehW9LafYFe2XORY6QsxJrEm0cfcTCbkjAEG85aZFi8Am 20 | ADUYYSzxVumx3geUXFjzYXffWV1rOVsmP28Nq238QUfSDMOW7aO5lpP5s6AXKlaZ 21 | h0bxm/YBHFmT9hLCeiMFLrm13NbJjtqRAXhvMvfNTND2k0KoX0rUK7aDcQKBgANc 22 | IQg51c0s/A/zW/C3GhXtBGV9v7rioTPl6bXjzFTMqAnqjxbQjgyCdWNAGQGma88M 23 | hEIX16uhPxoZwmq8tD+GqcsPcPOxgTz2U8HCghqGPDt9RcKru0ZVV6PHErTK4+PO 24 | nMUmnzO3lJKWlOoiyKcoKWXsldQoh1kmDg+8kbiRAoGBALLmrQwJX+j28ZFZAN/2 25 | SNBryhxtwDcpTSf1Qb4rbtCxJVwCTq9yUdcYA2SsvkA68pNo5mOe/ft6T0GYho1/ 26 | LiJJXduKiTsHV0xpeqA+imuIZDAc6Dgc6GWqskVn6XKUCGcFFvdxoBxAlBVmsbhS 27 | ajeci0ZCJpUvebBrNmaLXLrX 28 | -----END PRIVATE KEY----- 29 | -------------------------------------------------------------------------------- /arrow-flight/examples/data/client.pem: -------------------------------------------------------------------------------- 1 | -----BEGIN CERTIFICATE----- 2 | MIIDKzCCAhOgAwIBAgIURGJB6QyNfnGJiavpS6DB4+o4YPkwDQYJKoZIhvcNAQEL 3 | BQAwHTELMAkGA1UEBhMCRkkxDjAMBgNVBAMMBXZhaGlkMB4XDTI0MTAxODE4MTcy 4 | NVoXDTI5MTAxNzE4MTcyNVowHTELMAkGA1UEBhMCRkkxDjAMBgNVBAMMBXZhaGlk 5 | MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAzvVFwmIGoQ8zwVrEpRkQ 6 | xMPGtJYVvwD/HR8oBIlnByehWtEO/7e/0jCUMybJzO6jCg6gsSX7rQxRTvFspKCI 7 | F9OhDXEznovg0aPTVJq9lUb+VJNzzfwAocMJvv1QskXMeOSJrpi+3Vs56f2uRkVk 8 | ti7Sfo+81FbqyIr1dkzk/OMzFsbw0zH8MlfdwzGymHc1QyIYyzvgEvLRTK3dn7Cw 9 | IQzm9oieB6UeQX5eOy7gsh+v9jcgfIedxyOW5v3qNeIEd1LRGNlAU4CXAvKT15rG 10 | KmTToZiGMQPK4KQBXvuyBBskUFNczyx4CrRkiV1xMH1ndk7KWpstf+J28bDDo8kw 11 | 9QIDAQABo2MwYTAfBgNVHSMEGDAWgBS7apCkF71w6jQed0hH1W+lsImfITAJBgNV 12 | HRMEAjAAMBQGA1UdEQQNMAuCCWxvY2FsaG9zdDAdBgNVHQ4EFgQUX3kjlcnwAErq 13 | /sdYXJAAGQevoS0wDQYJKoZIhvcNAQELBQADggEBAEiM/+/xI60HDz9ArPiJQJgS 14 | lj7Q+F+FLcQVHfmjTbfuaPQD18BSOI+Oj9qKsxD8LAtkY/Go+Ppq/wGtN72KdLfR 15 | sfBiPrES/vRZaF1UriEq7hJTsSmiQwocbobFZ7T+eBRjwPG5XgqgOg6O3LJQPDVU 16 | lrzW1EkruBysdZj2ThN7yPoDQl9m1zWNvyBRDUvDsIiUkYZOwP6vfCXsImUGzR0A 17 | q6DxVXM2WVoCILRLjYD+nlSmcuxbzOuXXHarIHPC3zJtaiNAboEuF2C7fuGimP2Z 18 | xT67aVWgVx2DW/IycIGslbdnMeMOHxkEE2o7ubHb2T2/ZRmTR/USgnbAu3vXDCc= 19 | -----END CERTIFICATE----- 20 | -------------------------------------------------------------------------------- /arrow-flight/examples/data/gen_certs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # source: https://users.rust-lang.org/t/use-tokio-tungstenite-with-rustls-instead-of-native-tls-for-secure-websockets/90130 4 | 5 | make_and_sign() { 6 | # Create unencrypted private key and a CSR (certificate signing request) 7 | openssl req -newkey rsa:2048 -nodes -subj "/C=FI/CN=vahid" -keyout "$1.key" -out "$1.csr" 8 | 9 | # Create self-signed certificate (`$1.pem`) with the private key and CSR 10 | openssl x509 -signkey "$1.key" -in "$1.csr" -req -days 365 -out "$1.pem" 11 | 12 | # Sign the CSR (`$1.pem`) with the root CA certificate and private key 13 | # => this overwrites `$1.pem` because it gets signed 14 | openssl x509 -req -CA ca_root.pem -CAkey ca_root.key -in "$1.csr" -out "$1.pem" -days 1825 -CAcreateserial -extfile localhost.ext 15 | } 16 | 17 | # Create a self-signed root CA 18 | openssl req -x509 -sha256 -nodes -subj "/C=FI/CN=vahid" -days 1825 -newkey rsa:2048 -keyout ca_root.key -out ca_root.pem 19 | 20 | # Create file localhost.ext with the following content: 21 | cat <<'EOF' > localhost.ext 22 | authorityKeyIdentifier=keyid,issuer 23 | basicConstraints=CA:FALSE 24 | subjectAltName = @alt_names 25 | [alt_names] 26 | DNS.1 = localhost 27 | EOF 28 | 29 | make_and_sign client 30 | make_and_sign server 31 | -------------------------------------------------------------------------------- /arrow-flight/examples/data/server.key: -------------------------------------------------------------------------------- 1 | -----BEGIN PRIVATE KEY----- 2 | MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQCJqUDUQQfum3u/ 3 | b9qN1QXvdd2riDljQGIWXe3PGoc8wP1nCkJukxZEJr3Wii8x9v/iJ/ZwkwIV9OEm 4 | y2ER6x0Xh7aYKVF6B2w0R2Q8fWhZx4/kes6mEa6pXutolvqLF1NIUgvpw+yagGPc 5 | Te6RM02AZb3pS/gqqbh3Ayj1q1oiCHZH3p+kpQIMvxumMU+t2Aa7blbr8O2pUOfu 6 | faYUwDg/EPTCsRltiFvGz953uxe1ZbibuI1mjVEpAs4nI3VfKSxChSdlGZtyMsMS 7 | UA5NEQ4MO6di0Xa4cW7uK1/q0yb3M4zl1am4iibCoPb5JsAbSVIqtpc4QqKekNNK 8 | XLYNiYkdAgMBAAECggEADX4c4BD1rZHoyaU1dzRbbhKP5Qmnkk5TrtyK/AMeITrL 9 | O4e6ZisdghWIAtitcojCoNCI65TFHhDP5qUFgSOZyN4g6/4UwdCrbt5WeUVTboVM 10 | WbkduxI9m9U1Xp15thtVqt970nBFJwmRgBzVVr8ACCcwu4aaLthOv798CsQ+Q9qK 11 | qBhqr6QCQMzmG9+RDFN62n0wN5GlQsSrgmX+taP2S2EBqpWS0p4q4xv5UWAPh1Q/ 12 | sHWQ+5W0h6rR+X/NGQ3OY1yvlWAQlmu6HSY5QuRJZdAilJRZOI6Z1kge134oIvHx 13 | TSgzIZ+UEYz8l15ENu55xJxTYkSJLJu3ZtOBlVGo0QKBgQDBlXmJ+n4InHXewCId 14 | e1ai1VA9QQvc6hl19PEQ0m7A4wiqM+PWIixX2s3zjDyHiD3pJWBu9aR4VDST8Mhe 15 | XIncsJRzqXpdSnEHlSsws9GQT13htchO8k31pBBNJsm2VG7CeDr21Ty3wWMYzPL5 16 | esJPXNjAk9/92L6cMDk+rY8S2QKBgQC2C+IiWxMmwoSfmvfTNdoeBEj+mL82EsXF 17 | gV7JcSjMVXbnPSbq2fnlOTrAe1TCsf8jlcdQHNStLToV7smewfGSMt3LLzEWuH4j 18 | oaLyPPwgfwMZcJShlyh/49KxspnYNh/d0B9cMXysJ7fasPXhPc1icxOAaj317dGT 19 | wpqIYST15QKBgH8ZeewKyDc1KvquYXfD5DUTIxSkoXpRRZg50xD9aPRB3A2L9jpA 20 | w6aHJZOTbqZ54FQ0UyVnKK5McxzzCUtTLLz2PvHVw15/IskcunfGiIBca8JnDPYw 21 | htkR+BX/so6AzxvlvqkThkMLuNmurpT4cZ2T00hVjspnRXbc/qjVYUu5AoGBAJGn 22 | qGL8o7IbaKmbADaJm97wZsbtx++nzX1dKU3bsFzs9JnHn9/AT+VLiOqAwyyT5iO+ 23 | rx42MyPsT9FEoSTY4EOtVzA+rfQT4Tj5FrrieIg3l28/WGyB2PGagvWShO0tQQnk 24 | f3LwVWQY002y0MhK/o2KrUvNtY2Tnl+vEJ2VvFDRAoGBALkegS4yvT1MqB9a/WLk 25 | ROXQ94CJTDM751Lp6OnyJy9kVWlQvFQXL7FN836LJN22JFCANnPUrbVXht3hXadb 26 | 3KHe19MouNEMG+CZK9cuk8SKIlt18Kvnv3fVxhksaQAYJOcPMFbvwiv8UoochR5u 27 | UuUQ9LfFpj9eL/gsgjICQTiy 28 | -----END PRIVATE KEY----- 29 | -------------------------------------------------------------------------------- /arrow-flight/examples/data/server.pem: -------------------------------------------------------------------------------- 1 | -----BEGIN CERTIFICATE----- 2 | MIIDKzCCAhOgAwIBAgIURGJB6QyNfnGJiavpS6DB4+o4YPowDQYJKoZIhvcNAQEL 3 | BQAwHTELMAkGA1UEBhMCRkkxDjAMBgNVBAMMBXZhaGlkMB4XDTI0MTAxODE4MTcy 4 | NVoXDTI5MTAxNzE4MTcyNVowHTELMAkGA1UEBhMCRkkxDjAMBgNVBAMMBXZhaGlk 5 | MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAialA1EEH7pt7v2/ajdUF 6 | 73Xdq4g5Y0BiFl3tzxqHPMD9ZwpCbpMWRCa91oovMfb/4if2cJMCFfThJsthEesd 7 | F4e2mClRegdsNEdkPH1oWceP5HrOphGuqV7raJb6ixdTSFIL6cPsmoBj3E3ukTNN 8 | gGW96Uv4Kqm4dwMo9ataIgh2R96fpKUCDL8bpjFPrdgGu25W6/DtqVDn7n2mFMA4 9 | PxD0wrEZbYhbxs/ed7sXtWW4m7iNZo1RKQLOJyN1XyksQoUnZRmbcjLDElAOTREO 10 | DDunYtF2uHFu7itf6tMm9zOM5dWpuIomwqD2+SbAG0lSKraXOEKinpDTSly2DYmJ 11 | HQIDAQABo2MwYTAfBgNVHSMEGDAWgBS7apCkF71w6jQed0hH1W+lsImfITAJBgNV 12 | HRMEAjAAMBQGA1UdEQQNMAuCCWxvY2FsaG9zdDAdBgNVHQ4EFgQUTVRUyJuulyjb 13 | PNY60pZnbn3YZM0wDQYJKoZIhvcNAQELBQADggEBAFjMnkZ8uh53x/vvACI/QiVG 14 | u8+2bzUllXDp5cS4Fb/AxQABvNROgfrfxcUkcjoRoyim2V8KyYQmmmIGrBXpevMs 15 | /gIPKo5haj5Ijnttz+9IbRY0h9XqEEevZbuM8o7zCpxxA305qbrmGkApRsRbiuZP 16 | JglsBdR6z9HhURfhfQbiaxLhu1jmdWk/XZyghQDXJpzTDE8k1kDjBSHmklQ5JQ2J 17 | o2NmXD78RQ5ZIRu9pmH47rs7CeTy6CjRlHK3lGOb4PfE+eHGPUXbwOl+iP6WuHG6 18 | K5erpWgHWvPXH4MMTNOgaJN7HNLyxVbC9W1l0JpqN8UmHwj6bXC262Soz+STbfE= 19 | -----END CERTIFICATE----- 20 | -------------------------------------------------------------------------------- /arrow-flight/gen/Cargo.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [package] 19 | name = "gen" 20 | description = "Code generation for arrow-flight" 21 | version = "0.1.0" 22 | edition = { workspace = true } 23 | rust-version = { workspace = true } 24 | authors = { workspace = true } 25 | homepage = { workspace = true } 26 | repository = { workspace = true } 27 | license = { workspace = true } 28 | publish = false 29 | 30 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 31 | 32 | [dependencies] 33 | # Pin specific version of the tonic-build dependencies to avoid auto-generated 34 | # (and checked in) arrow.flight.protocol.rs from changing 35 | prost-build = { version = "=0.13.5", default-features = false } 36 | tonic-build = { version = "=0.12.3", default-features = false, features = ["transport", "prost"] } 37 | -------------------------------------------------------------------------------- /arrow-flight/regen.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | 20 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) 21 | cd $SCRIPT_DIR && cargo run --manifest-path gen/Cargo.toml 22 | -------------------------------------------------------------------------------- /arrow-flight/tests/common/mod.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | pub mod fixture; 19 | pub mod server; 20 | pub mod trailers_layer; 21 | pub mod utils; 22 | -------------------------------------------------------------------------------- /arrow-integration-test/Cargo.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [package] 19 | name = "arrow-integration-test" 20 | version = { workspace = true } 21 | description = "Support for the Apache Arrow JSON test data format" 22 | homepage = { workspace = true } 23 | repository = { workspace = true } 24 | authors = { workspace = true } 25 | license = { workspace = true } 26 | keywords = { workspace = true } 27 | include = { workspace = true } 28 | edition = { workspace = true } 29 | rust-version = { workspace = true } 30 | 31 | [lib] 32 | name = "arrow_integration_test" 33 | bench = false 34 | 35 | [package.metadata.docs.rs] 36 | all-features = true 37 | 38 | [dependencies] 39 | arrow = { workspace = true } 40 | arrow-buffer = { workspace = true } 41 | hex = { version = "0.4", default-features = false, features = ["std"] } 42 | serde = { version = "1.0", default-features = false, features = ["rc", "derive"] } 43 | serde_json = { version = "1.0", default-features = false, features = ["std"] } 44 | num = { version = "0.4", default-features = false, features = ["std"] } 45 | -------------------------------------------------------------------------------- /arrow-integration-test/LICENSE.txt: -------------------------------------------------------------------------------- 1 | ../LICENSE.txt -------------------------------------------------------------------------------- /arrow-integration-test/NOTICE.txt: -------------------------------------------------------------------------------- 1 | ../NOTICE.txt -------------------------------------------------------------------------------- /arrow-integration-testing/src/bin/arrow-file-to-stream.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | // The unused_crate_dependencies lint does not work well for crates defining additional examples/bin targets 19 | #![allow(unused_crate_dependencies)] 20 | 21 | use arrow::error::Result; 22 | use arrow::ipc::reader::FileReader; 23 | use arrow::ipc::writer::StreamWriter; 24 | use clap::Parser; 25 | use std::fs::File; 26 | use std::io::{self, BufReader}; 27 | 28 | #[derive(Debug, Parser)] 29 | #[clap(author, version, about("Read an arrow file and stream to stdout"), long_about = None)] 30 | struct Args { 31 | file_name: String, 32 | } 33 | 34 | fn main() -> Result<()> { 35 | let args = Args::parse(); 36 | let f = File::open(args.file_name)?; 37 | let reader = BufReader::new(f); 38 | let mut reader = FileReader::try_new(reader, None)?; 39 | let schema = reader.schema(); 40 | 41 | let mut writer = StreamWriter::try_new(io::stdout(), &schema)?; 42 | 43 | reader.try_for_each(|batch| { 44 | let batch = batch?; 45 | writer.write(&batch) 46 | })?; 47 | writer.finish()?; 48 | 49 | Ok(()) 50 | } 51 | -------------------------------------------------------------------------------- /arrow-integration-testing/src/bin/arrow-stream-to-file.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | // The unused_crate_dependencies lint does not work well for crates defining additional examples/bin targets 19 | #![allow(unused_crate_dependencies)] 20 | 21 | use std::io; 22 | 23 | use arrow::error::Result; 24 | use arrow::ipc::reader::StreamReader; 25 | use arrow::ipc::writer::FileWriter; 26 | 27 | fn main() -> Result<()> { 28 | let mut arrow_stream_reader = StreamReader::try_new(io::stdin(), None)?; 29 | let schema = arrow_stream_reader.schema(); 30 | 31 | let mut writer = FileWriter::try_new(io::stdout(), &schema)?; 32 | 33 | arrow_stream_reader.try_for_each(|batch| writer.write(&batch?))?; 34 | writer.finish()?; 35 | 36 | Ok(()) 37 | } 38 | -------------------------------------------------------------------------------- /arrow-integration-testing/src/flight_client_scenarios/mod.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! Collection of utilities for testing the Flight client. 19 | 20 | pub mod auth_basic_proto; 21 | pub mod integration_test; 22 | pub mod middleware; 23 | -------------------------------------------------------------------------------- /arrow-integration-testing/src/flight_server_scenarios/mod.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! Collection of utilities for testing the Flight server. 19 | use std::net::SocketAddr; 20 | 21 | use arrow_flight::{FlightEndpoint, Location, Ticket}; 22 | use tokio::net::TcpListener; 23 | 24 | pub mod auth_basic_proto; 25 | pub mod integration_test; 26 | pub mod middleware; 27 | 28 | type Error = Box; 29 | type Result = std::result::Result; 30 | 31 | /// Listen on a port and return the address 32 | pub async fn listen_on(port: u16) -> Result { 33 | let addr: SocketAddr = format!("0.0.0.0:{port}").parse()?; 34 | 35 | let listener = TcpListener::bind(addr).await?; 36 | let addr = listener.local_addr()?; 37 | 38 | Ok(addr) 39 | } 40 | 41 | /// Create a FlightEndpoint with a ticket and location 42 | pub fn endpoint(ticket: &str, location_uri: impl Into) -> FlightEndpoint { 43 | FlightEndpoint { 44 | ticket: Some(Ticket { 45 | ticket: ticket.as_bytes().to_vec().into(), 46 | }), 47 | location: vec![Location { 48 | uri: location_uri.into(), 49 | }], 50 | expiration_time: None, 51 | app_metadata: vec![].into(), 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /arrow-ipc/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | 19 | 20 | ## Developer's guide 21 | 22 | # IPC 23 | 24 | The expected flatc version is 1.12.0+, built from [flatbuffers](https://github.com/google/flatbuffers) 25 | master at fixed commit ID, by regen.sh. 26 | 27 | The IPC flatbuffer code was generated by running this command from the root of the project: 28 | 29 | ```bash 30 | ./regen.sh 31 | ``` 32 | 33 | The above script will run the `flatc` compiler and perform some adjustments to the source code: 34 | 35 | - Replace `type__` with `type_` 36 | - Remove `org::apache::arrow::flatbuffers` namespace 37 | - Add includes to each generated file 38 | -------------------------------------------------------------------------------- /arrow-ipc/Cargo.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [package] 19 | name = "arrow-ipc" 20 | version = { workspace = true } 21 | description = "Support for the Arrow IPC format" 22 | homepage = { workspace = true } 23 | repository = { workspace = true } 24 | authors = { workspace = true } 25 | license = { workspace = true } 26 | keywords = { workspace = true } 27 | include = { workspace = true } 28 | edition = { workspace = true } 29 | rust-version = { workspace = true } 30 | 31 | [lib] 32 | name = "arrow_ipc" 33 | bench = false 34 | 35 | [package.metadata.docs.rs] 36 | all-features = true 37 | 38 | [dependencies] 39 | arrow-array = { workspace = true } 40 | arrow-buffer = { workspace = true } 41 | arrow-data = { workspace = true } 42 | arrow-schema = { workspace = true } 43 | flatbuffers = { version = "25.2.10", default-features = false } 44 | lz4_flex = { version = "0.11", default-features = false, features = ["std", "frame"], optional = true } 45 | zstd = { version = "0.13.0", default-features = false, optional = true } 46 | 47 | [features] 48 | default = [] 49 | lz4 = ["lz4_flex"] 50 | 51 | [dev-dependencies] 52 | criterion = "0.5.1" 53 | tempfile = "3.3" 54 | tokio = "1.43.0" 55 | # used in benches 56 | memmap2 = "0.9.3" 57 | bytes = "1.9" 58 | 59 | [[bench]] 60 | name = "ipc_writer" 61 | harness = false 62 | 63 | [[bench]] 64 | name = "ipc_reader" 65 | harness = false 66 | -------------------------------------------------------------------------------- /arrow-ipc/LICENSE.txt: -------------------------------------------------------------------------------- 1 | ../LICENSE.txt -------------------------------------------------------------------------------- /arrow-ipc/NOTICE.txt: -------------------------------------------------------------------------------- 1 | ../NOTICE.txt -------------------------------------------------------------------------------- /arrow-ipc/src/gen/mod.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! Generated code 19 | 20 | #![allow(non_snake_case)] 21 | 22 | #[allow(clippy::all)] 23 | pub mod File; 24 | #[allow(clippy::all)] 25 | pub mod Message; 26 | #[allow(clippy::all)] 27 | pub mod Schema; 28 | #[allow(clippy::all)] 29 | pub mod SparseTensor; 30 | #[allow(clippy::all)] 31 | pub mod Tensor; 32 | -------------------------------------------------------------------------------- /arrow-json/LICENSE.txt: -------------------------------------------------------------------------------- 1 | ../LICENSE.txt -------------------------------------------------------------------------------- /arrow-json/NOTICE.txt: -------------------------------------------------------------------------------- 1 | ../NOTICE.txt -------------------------------------------------------------------------------- /arrow-json/src/reader/boolean_array.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use arrow_array::builder::BooleanBuilder; 19 | use arrow_array::Array; 20 | use arrow_data::ArrayData; 21 | use arrow_schema::ArrowError; 22 | 23 | use crate::reader::tape::{Tape, TapeElement}; 24 | use crate::reader::ArrayDecoder; 25 | 26 | #[derive(Default)] 27 | pub struct BooleanArrayDecoder {} 28 | 29 | impl ArrayDecoder for BooleanArrayDecoder { 30 | fn decode(&mut self, tape: &Tape<'_>, pos: &[u32]) -> Result { 31 | let mut builder = BooleanBuilder::with_capacity(pos.len()); 32 | for p in pos { 33 | match tape.get(*p) { 34 | TapeElement::Null => builder.append_null(), 35 | TapeElement::True => builder.append_value(true), 36 | TapeElement::False => builder.append_value(false), 37 | _ => return Err(tape.error(*p, "boolean")), 38 | } 39 | } 40 | 41 | Ok(builder.finish().into_data()) 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /arrow-json/src/reader/null_array.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use crate::reader::tape::{Tape, TapeElement}; 19 | use crate::reader::ArrayDecoder; 20 | use arrow_data::{ArrayData, ArrayDataBuilder}; 21 | use arrow_schema::{ArrowError, DataType}; 22 | 23 | #[derive(Default)] 24 | pub struct NullArrayDecoder {} 25 | 26 | impl ArrayDecoder for NullArrayDecoder { 27 | fn decode(&mut self, tape: &Tape<'_>, pos: &[u32]) -> Result { 28 | for p in pos { 29 | if !matches!(tape.get(*p), TapeElement::Null) { 30 | return Err(tape.error(*p, "null")); 31 | } 32 | } 33 | ArrayDataBuilder::new(DataType::Null).len(pos.len()).build() 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /arrow-json/test/data/arrays.json: -------------------------------------------------------------------------------- 1 | {"a":1, "b":[2.0, 1.3, -6.1], "c":[false, true], "d":"4"} 2 | {"a":-10, "b":[2.0, 1.3, -6.1], "c":[true, true], "d":"4"} 3 | {"a":1627668684594000000, "b":[2.0, null, -6.1], "c":[false, null], "d":"text"} 4 | -------------------------------------------------------------------------------- /arrow-json/test/data/basic.json: -------------------------------------------------------------------------------- 1 | {"a":1, "b":2.0, "c":false, "d":"4", "e":"1970-1-2", "f": "1.02", "g": "2012-04-23T18:25:43.511", "h": 1.125} 2 | {"a":-10, "b":-3.5, "c":true, "d":"4", "e": "1969-12-31", "f": "-0.3", "g": "2016-04-23T18:25:43.511", "h": 3.5} 3 | {"a":2, "b":0.6, "c":false, "d":"text", "e": "1970-01-02 11:11:11", "f": "1377.223"} 4 | {"a":1, "b":2.0, "c":false, "d":"4", "f": "1337.009"} 5 | {"a":7, "b":-3.5, "c":true, "d":"4", "f": "1"} 6 | {"a":1, "b":0.6, "c":false, "d":"text", "f": "1338", "g": "2018-10-23T18:33:16.481"} 7 | {"a":1, "b":2.0, "c":false, "d":"4", "f": "12345829100000"} 8 | {"a":5, "b":-3.5, "c":true, "d":"4", "f": "99999999.99"} 9 | {"a":1, "b":0.6, "c":false, "d":"text", "f": "1"} 10 | {"a":1, "b":2.0, "c":false, "d":"4", "f": "1"} 11 | {"a":1, "b":-3.5, "c":true, "d":"4", "f": "1"} 12 | {"a":100000000000000, "b":0.6, "c":false, "d":"text", "f": "1"} -------------------------------------------------------------------------------- /arrow-json/test/data/basic_nulls.json: -------------------------------------------------------------------------------- 1 | {"a":1, "b":2.0, "c":false} 2 | {"a":null, "b":-3.5, "c":true, "d":"4"} 3 | {"c":false, "d":"text"} 4 | {"a":1, "b":2.0, "c":false, "d":"4"} 5 | {"a":7, "b":-3.5, "c":null, "d":null} 6 | {"a":1, "b":0.6, "c":false} 7 | {"a":1, "b":2.0, "d":"4"} 8 | {"a":5, "c":true} 9 | {"a":1, "b":0.6, "c":false, "d":"text"} 10 | {"a":1, "b":2.0, "c":false, "d":"4"} 11 | {"a":1, "b":-3.5, "c":true, "d":"4"} 12 | {} -------------------------------------------------------------------------------- /arrow-json/test/data/list_string_dict_nested.json: -------------------------------------------------------------------------------- 1 | {"machine": "a", "events": ["Elect Leader", "Do Ballot"]} 2 | {"machine": "b", "events": ["Do Ballot", "Send Data", "Elect Leader"]} 3 | {"machine": "c", "events": ["Send Data"]} 4 | -------------------------------------------------------------------------------- /arrow-json/test/data/list_string_dict_nested_nulls.json: -------------------------------------------------------------------------------- 1 | {"machine": "a", "events": [null, "Elect Leader", "Do Ballot"]} 2 | {"machine": "b", "events": ["Do Ballot", null, "Send Data", "Elect Leader"]} 3 | {"machine": "c", "events": ["Send Data"]} 4 | -------------------------------------------------------------------------------- /arrow-json/test/data/mixed_arrays.json: -------------------------------------------------------------------------------- 1 | {"a":1, "b":[2.0, 1.3, -6.1], "c":[false, true], "d":4.1} 2 | {"a":-10, "b":[2.0, 1.3, -6.1], "c":null, "d":null} 3 | {"a":2, "b":[2.0, null, -6.1], "c":[false, null], "d":"text"} 4 | {"a":3, "b":4, "c": true, "d":[1, false, "array", 2.4]} 5 | -------------------------------------------------------------------------------- /arrow-json/test/data/mixed_arrays.json.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/arrow-rs/3681540e072857746597aa65f083886dbf37f2d5/arrow-json/test/data/mixed_arrays.json.gz -------------------------------------------------------------------------------- /arrow-json/test/data/nested_structs.json: -------------------------------------------------------------------------------- 1 | {"a": {"b": true, "c": {"d": "text"}}} 2 | {"a": {"b": false, "c": null}} 3 | {"a": {"b": true, "c": {"d": "text"}}} 4 | {"a": 1} -------------------------------------------------------------------------------- /arrow-json/test/data/nested_with_nulls.json: -------------------------------------------------------------------------------- 1 | {"a": null, "b": null, "c": null, "d": {"d1": null, "d2": [null, 1, 2, null]}} 2 | {"a": null, "b": -3.5, "c": true, "d": {"d1": null, "d2": null}} 3 | {"a": null, "b": null, "c": false, "d": {"d1": "1970-01-01", "d2": null}} 4 | {"a": 1, "b": 2.0, "c": false, "d": {"d1": null, "d2": null}} 5 | -------------------------------------------------------------------------------- /arrow-ord/Cargo.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [package] 19 | name = "arrow-ord" 20 | version = { workspace = true } 21 | description = "Ordering kernels for arrow arrays" 22 | homepage = { workspace = true } 23 | repository = { workspace = true } 24 | authors = { workspace = true } 25 | license = { workspace = true } 26 | keywords = { workspace = true } 27 | include = { workspace = true } 28 | edition = { workspace = true } 29 | rust-version = { workspace = true } 30 | 31 | [lib] 32 | name = "arrow_ord" 33 | bench = false 34 | 35 | [package.metadata.docs.rs] 36 | all-features = true 37 | 38 | [dependencies] 39 | arrow-array = { workspace = true } 40 | arrow-buffer = { workspace = true } 41 | arrow-data = { workspace = true } 42 | arrow-schema = { workspace = true } 43 | arrow-select = { workspace = true } 44 | 45 | [dev-dependencies] 46 | half = { version = "2.1", default-features = false, features = ["num-traits"] } 47 | rand = { version = "0.9", default-features = false, features = ["std", "std_rng"] } 48 | -------------------------------------------------------------------------------- /arrow-ord/LICENSE.txt: -------------------------------------------------------------------------------- 1 | ../LICENSE.txt -------------------------------------------------------------------------------- /arrow-ord/NOTICE.txt: -------------------------------------------------------------------------------- 1 | ../NOTICE.txt -------------------------------------------------------------------------------- /arrow-pyarrow-integration-testing/.cargo/config: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [target.x86_64-apple-darwin] 19 | rustflags = [ 20 | "-C", "link-arg=-undefined", 21 | "-C", "link-arg=dynamic_lookup", 22 | ] -------------------------------------------------------------------------------- /arrow-pyarrow-integration-testing/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | venv 3 | -------------------------------------------------------------------------------- /arrow-pyarrow-integration-testing/Cargo.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [package] 19 | name = "arrow-pyarrow-integration-testing" 20 | description = "" 21 | version = "0.1.0" 22 | homepage = "https://github.com/apache/arrow-rs" 23 | repository = "https://github.com/apache/arrow-rs" 24 | authors = ["Apache Arrow "] 25 | license = "Apache-2.0" 26 | keywords = [ "arrow" ] 27 | edition = "2021" 28 | rust-version = "1.81" 29 | publish = false 30 | 31 | [lib] 32 | name = "arrow_pyarrow_integration_testing" 33 | crate-type = ["cdylib"] 34 | 35 | [dependencies] 36 | arrow = { path = "../arrow", features = ["pyarrow"] } 37 | pyo3 = { version = "0.24.1", features = ["extension-module"] } 38 | -------------------------------------------------------------------------------- /arrow-pyarrow-integration-testing/pyproject.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [build-system] 19 | requires = ["maturin>=1.0,<2.0"] 20 | build-backend = "maturin" 21 | 22 | dependencies = ["pyarrow>=1"] 23 | -------------------------------------------------------------------------------- /arrow-row/Cargo.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [package] 19 | name = "arrow-row" 20 | version = { workspace = true } 21 | description = "Arrow row format" 22 | homepage = { workspace = true } 23 | repository = { workspace = true } 24 | authors = { workspace = true } 25 | license = { workspace = true } 26 | keywords = { workspace = true } 27 | include = { workspace = true } 28 | edition = { workspace = true } 29 | rust-version = { workspace = true } 30 | 31 | [lib] 32 | name = "arrow_row" 33 | bench = false 34 | 35 | [package.metadata.docs.rs] 36 | all-features = true 37 | 38 | [dependencies] 39 | arrow-array = { workspace = true } 40 | arrow-buffer = { workspace = true } 41 | arrow-data = { workspace = true } 42 | arrow-schema = { workspace = true } 43 | 44 | half = { version = "2.1", default-features = false } 45 | 46 | [dev-dependencies] 47 | arrow-cast = { workspace = true } 48 | arrow-ord = { workspace = true } 49 | rand = { version = "0.9", default-features = false, features = ["std", "std_rng", "thread_rng"] } 50 | 51 | -------------------------------------------------------------------------------- /arrow-row/LICENSE.txt: -------------------------------------------------------------------------------- 1 | ../LICENSE.txt -------------------------------------------------------------------------------- /arrow-row/NOTICE.txt: -------------------------------------------------------------------------------- 1 | ../NOTICE.txt -------------------------------------------------------------------------------- /arrow-schema/Cargo.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [package] 19 | name = "arrow-schema" 20 | version = { workspace = true } 21 | description = "Defines the logical types for arrow arrays" 22 | homepage = { workspace = true } 23 | repository = { workspace = true } 24 | authors = { workspace = true } 25 | license = { workspace = true } 26 | keywords = { workspace = true } 27 | include = { workspace = true } 28 | edition = { workspace = true } 29 | rust-version = { workspace = true } 30 | 31 | [lib] 32 | name = "arrow_schema" 33 | bench = false 34 | 35 | [dependencies] 36 | serde = { version = "1.0", default-features = false, features = [ 37 | "derive", 38 | "std", 39 | "rc", 40 | ], optional = true } 41 | bitflags = { version = "2.0.0", default-features = false, optional = true } 42 | serde_json = { version = "1.0", optional = true } 43 | 44 | [features] 45 | canonical_extension_types = ["dep:serde", "dep:serde_json"] 46 | # Enable ffi support 47 | ffi = ["bitflags"] 48 | serde = ["dep:serde"] 49 | 50 | [package.metadata.docs.rs] 51 | all-features = true 52 | 53 | [dev-dependencies] 54 | bincode = { version = "1.3.3", default-features = false } 55 | criterion = { version = "0.5", default-features = false } 56 | 57 | [[bench]] 58 | name = "ffi" 59 | harness = false 60 | required-features = ["ffi"] 61 | -------------------------------------------------------------------------------- /arrow-schema/LICENSE.txt: -------------------------------------------------------------------------------- 1 | ../LICENSE.txt -------------------------------------------------------------------------------- /arrow-schema/NOTICE.txt: -------------------------------------------------------------------------------- 1 | ../NOTICE.txt -------------------------------------------------------------------------------- /arrow-schema/benches/ffi.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use arrow_schema::ffi::FFI_ArrowSchema; 19 | use arrow_schema::{DataType, Field}; 20 | use criterion::*; 21 | use std::sync::Arc; 22 | 23 | fn criterion_benchmark(c: &mut Criterion) { 24 | let fields = vec![ 25 | Arc::new(Field::new("c1", DataType::Utf8, false)), 26 | Arc::new(Field::new("c2", DataType::Utf8, false)), 27 | Arc::new(Field::new("c3", DataType::Utf8, false)), 28 | Arc::new(Field::new("c4", DataType::Utf8, false)), 29 | Arc::new(Field::new("c5", DataType::Utf8, false)), 30 | ]; 31 | let data_type = DataType::Struct(fields.into()); 32 | c.bench_function("ffi_arrow_schema_try_from", |b| { 33 | b.iter(|| FFI_ArrowSchema::try_from(&data_type)); 34 | }); 35 | } 36 | 37 | criterion_group!(benches, criterion_benchmark); 38 | criterion_main!(benches); 39 | -------------------------------------------------------------------------------- /arrow-select/Cargo.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [package] 19 | name = "arrow-select" 20 | version = { workspace = true } 21 | description = "Selection kernels for arrow arrays" 22 | homepage = { workspace = true } 23 | repository = { workspace = true } 24 | authors = { workspace = true } 25 | license = { workspace = true } 26 | keywords = { workspace = true } 27 | include = { workspace = true } 28 | edition = { workspace = true } 29 | rust-version = { workspace = true } 30 | 31 | [lib] 32 | name = "arrow_select" 33 | bench = false 34 | 35 | [package.metadata.docs.rs] 36 | all-features = true 37 | 38 | [dependencies] 39 | arrow-buffer = { workspace = true } 40 | arrow-data = { workspace = true } 41 | arrow-schema = { workspace = true } 42 | arrow-array = { workspace = true } 43 | num = { version = "0.4", default-features = false, features = ["std"] } 44 | ahash = { version = "0.8", default-features = false} 45 | 46 | [dev-dependencies] 47 | rand = { version = "0.9", default-features = false, features = ["std", "std_rng", "thread_rng"] } 48 | -------------------------------------------------------------------------------- /arrow-select/LICENSE.txt: -------------------------------------------------------------------------------- 1 | ../LICENSE.txt -------------------------------------------------------------------------------- /arrow-select/NOTICE.txt: -------------------------------------------------------------------------------- 1 | ../NOTICE.txt -------------------------------------------------------------------------------- /arrow-select/src/lib.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! Arrow selection kernels 19 | 20 | #![doc( 21 | html_logo_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_white-bg.svg", 22 | html_favicon_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_transparent-bg.svg" 23 | )] 24 | #![cfg_attr(docsrs, feature(doc_auto_cfg))] 25 | #![warn(missing_docs)] 26 | 27 | pub mod concat; 28 | mod dictionary; 29 | pub mod filter; 30 | pub mod interleave; 31 | pub mod nullif; 32 | pub mod take; 33 | pub mod union_extract; 34 | pub mod window; 35 | pub mod zip; 36 | -------------------------------------------------------------------------------- /arrow-string/Cargo.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [package] 19 | name = "arrow-string" 20 | version = { workspace = true } 21 | description = "String kernels for arrow arrays" 22 | homepage = { workspace = true } 23 | repository = { workspace = true } 24 | authors = { workspace = true } 25 | license = { workspace = true } 26 | keywords = { workspace = true } 27 | include = { workspace = true } 28 | edition = { workspace = true } 29 | rust-version = { workspace = true } 30 | 31 | [lib] 32 | name = "arrow_string" 33 | bench = false 34 | 35 | [package.metadata.docs.rs] 36 | all-features = true 37 | 38 | [dependencies] 39 | arrow-buffer = { workspace = true } 40 | arrow-data = { workspace = true } 41 | arrow-schema = { workspace = true } 42 | arrow-array = { workspace = true } 43 | arrow-select = { workspace = true } 44 | regex = { version = "1.7.0", default-features = false, features = ["std", "unicode", "perf"] } 45 | regex-syntax = { version = "0.8.0", default-features = false, features = ["unicode"] } 46 | num = { version = "0.4", default-features = false, features = ["std"] } 47 | memchr = "2.7.4" 48 | -------------------------------------------------------------------------------- /arrow-string/LICENSE.txt: -------------------------------------------------------------------------------- 1 | ../LICENSE.txt -------------------------------------------------------------------------------- /arrow-string/NOTICE.txt: -------------------------------------------------------------------------------- 1 | ../NOTICE.txt -------------------------------------------------------------------------------- /arrow-string/src/lib.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! Arrow string kernels 19 | 20 | #![doc( 21 | html_logo_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_white-bg.svg", 22 | html_favicon_url = "https://arrow.apache.org/img/arrow-logo_chevrons_black-txt_transparent-bg.svg" 23 | )] 24 | #![cfg_attr(docsrs, feature(doc_auto_cfg))] 25 | #![warn(missing_docs)] 26 | 27 | mod binary_like; 28 | mod binary_predicate; 29 | pub mod concat_elements; 30 | pub mod length; 31 | pub mod like; 32 | mod predicate; 33 | pub mod regexp; 34 | pub mod substring; 35 | -------------------------------------------------------------------------------- /arrow/LICENSE.txt: -------------------------------------------------------------------------------- 1 | ../LICENSE.txt -------------------------------------------------------------------------------- /arrow/NOTICE.txt: -------------------------------------------------------------------------------- 1 | ../NOTICE.txt -------------------------------------------------------------------------------- /arrow/benches/array_slice.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | #[macro_use] 19 | extern crate criterion; 20 | use criterion::Criterion; 21 | 22 | extern crate arrow; 23 | 24 | use arrow::array::*; 25 | use std::sync::Arc; 26 | 27 | fn create_array_slice(array: &ArrayRef, length: usize) -> ArrayRef { 28 | array.slice(0, length) 29 | } 30 | 31 | fn create_array_with_nulls(size: usize) -> ArrayRef { 32 | let array: Float64Array = (0..size) 33 | .map(|i| if i % 2 == 0 { Some(1.0) } else { None }) 34 | .collect(); 35 | Arc::new(array) 36 | } 37 | 38 | fn array_slice_benchmark(c: &mut Criterion) { 39 | let array = create_array_with_nulls(4096); 40 | c.bench_function("array_slice 128", |b| { 41 | b.iter(|| create_array_slice(&array, 128)) 42 | }); 43 | c.bench_function("array_slice 512", |b| { 44 | b.iter(|| create_array_slice(&array, 512)) 45 | }); 46 | c.bench_function("array_slice 2048", |b| { 47 | b.iter(|| create_array_slice(&array, 2048)) 48 | }); 49 | } 50 | 51 | criterion_group!(benches, array_slice_benchmark); 52 | criterion_main!(benches); 53 | -------------------------------------------------------------------------------- /arrow/benches/bit_length_kernel.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | #[macro_use] 19 | extern crate criterion; 20 | use criterion::Criterion; 21 | 22 | extern crate arrow; 23 | 24 | use arrow::{array::*, compute::kernels::length::bit_length}; 25 | use std::hint; 26 | 27 | fn bench_bit_length(array: &StringArray) { 28 | hint::black_box(bit_length(array).unwrap()); 29 | } 30 | 31 | fn add_benchmark(c: &mut Criterion) { 32 | fn double_vec(v: Vec) -> Vec { 33 | [&v[..], &v[..]].concat() 34 | } 35 | 36 | // double ["hello", " ", "world", "!"] 10 times 37 | let mut values = vec!["one", "on", "o", ""]; 38 | for _ in 0..10 { 39 | values = double_vec(values); 40 | } 41 | let array = StringArray::from(values); 42 | 43 | c.bench_function("bit_length", |b| b.iter(|| bench_bit_length(&array))); 44 | } 45 | 46 | criterion_group!(benches, add_benchmark); 47 | criterion_main!(benches); 48 | -------------------------------------------------------------------------------- /arrow/benches/boolean_append_packed.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use arrow::array::BooleanBufferBuilder; 19 | use criterion::{criterion_group, criterion_main, Criterion}; 20 | use rand::{rng, Rng}; 21 | 22 | fn rand_bytes(len: usize) -> Vec { 23 | let mut rng = rng(); 24 | let mut buf = vec![0_u8; len]; 25 | rng.fill(buf.as_mut_slice()); 26 | buf 27 | } 28 | 29 | fn boolean_append_packed(c: &mut Criterion) { 30 | let mut rng = rng(); 31 | let source = rand_bytes(1024); 32 | let ranges: Vec<_> = (0..100) 33 | .map(|_| { 34 | let start: usize = rng.random_range(0..1024 * 8); 35 | let end: usize = rng.random_range(start..1024 * 8); 36 | start..end 37 | }) 38 | .collect(); 39 | 40 | let total_bits: usize = ranges.iter().map(|x| x.end - x.start).sum(); 41 | 42 | c.bench_function("boolean_append_packed", |b| { 43 | b.iter(|| { 44 | let mut buffer = BooleanBufferBuilder::new(total_bits); 45 | for range in &ranges { 46 | buffer.append_packed_range(range.clone(), &source); 47 | } 48 | assert_eq!(buffer.len(), total_bits); 49 | }) 50 | }); 51 | } 52 | 53 | criterion_group!(benches, boolean_append_packed); 54 | criterion_main!(benches); 55 | -------------------------------------------------------------------------------- /arrow/benches/length_kernel.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | #[macro_use] 19 | extern crate criterion; 20 | use criterion::Criterion; 21 | 22 | extern crate arrow; 23 | 24 | use arrow::array::*; 25 | use arrow::compute::kernels::length::length; 26 | use std::hint; 27 | 28 | fn bench_length(array: &StringArray) { 29 | hint::black_box(length(array).unwrap()); 30 | } 31 | 32 | fn add_benchmark(c: &mut Criterion) { 33 | fn double_vec(v: Vec) -> Vec { 34 | [&v[..], &v[..]].concat() 35 | } 36 | 37 | // double ["hello", " ", "world", "!"] 10 times 38 | let mut values = vec!["one", "on", "o", ""]; 39 | for _ in 0..10 { 40 | values = double_vec(values); 41 | } 42 | let array = StringArray::from(values); 43 | 44 | c.bench_function("length", |b| b.iter(|| bench_length(&array))); 45 | } 46 | 47 | criterion_group!(benches, add_benchmark); 48 | criterion_main!(benches); 49 | -------------------------------------------------------------------------------- /arrow/benches/mutable_array.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | #[macro_use] 19 | extern crate criterion; 20 | use criterion::Criterion; 21 | 22 | use rand::Rng; 23 | 24 | extern crate arrow; 25 | 26 | use arrow::util::test_util::seedable_rng; 27 | use arrow::{array::*, util::bench_util::create_string_array}; 28 | 29 | fn create_slices(size: usize) -> Vec<(usize, usize)> { 30 | let rng = &mut seedable_rng(); 31 | 32 | (0..size) 33 | .map(|_| { 34 | let start = rng.random_range(0..size / 2); 35 | let end = rng.random_range(start + 1..size); 36 | (start, end) 37 | }) 38 | .collect() 39 | } 40 | 41 | fn bench(v1: &T, slices: &[(usize, usize)]) { 42 | let data = v1.to_data(); 43 | let mut mutable = MutableArrayData::new(vec![&data], false, 5); 44 | for (start, end) in slices { 45 | mutable.extend(0, *start, *end) 46 | } 47 | mutable.freeze(); 48 | } 49 | 50 | fn add_benchmark(c: &mut Criterion) { 51 | let v1 = create_string_array::(1024, 0.0); 52 | let v2 = create_slices(1024); 53 | c.bench_function("mutable str 1024", |b| b.iter(|| bench(&v1, &v2))); 54 | 55 | let v1 = create_string_array::(1024, 0.5); 56 | let v2 = create_slices(1024); 57 | c.bench_function("mutable str nulls 1024", |b| b.iter(|| bench(&v1, &v2))); 58 | } 59 | 60 | criterion_group!(benches, add_benchmark); 61 | criterion_main!(benches); 62 | -------------------------------------------------------------------------------- /arrow/benches/regexp_kernels.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | #[macro_use] 19 | extern crate criterion; 20 | use criterion::Criterion; 21 | 22 | extern crate arrow; 23 | 24 | use arrow::array::*; 25 | use arrow::compute::kernels::regexp::*; 26 | use arrow::util::bench_util::*; 27 | use std::hint; 28 | 29 | fn bench_regexp(arr: &GenericStringArray, regex_array: &dyn Datum) { 30 | regexp_match(hint::black_box(arr), regex_array, None).unwrap(); 31 | } 32 | 33 | fn add_benchmark(c: &mut Criterion) { 34 | let size = 65536; 35 | let val_len = 1000; 36 | 37 | let arr_string = create_string_array_with_len::(size, 0.0, val_len); 38 | let pattern_values = vec![r".*-(\d*)-.*"; size]; 39 | let pattern = GenericStringArray::::from(pattern_values); 40 | 41 | c.bench_function("regexp", |b| b.iter(|| bench_regexp(&arr_string, &pattern))); 42 | 43 | let pattern_values = vec![r".*-(\d*)-.*"]; 44 | let pattern = Scalar::new(GenericStringArray::::from(pattern_values)); 45 | 46 | c.bench_function("regexp scalar", |b| { 47 | b.iter(|| bench_regexp(&arr_string, &pattern)) 48 | }); 49 | } 50 | 51 | criterion_group!(benches, add_benchmark); 52 | criterion_main!(benches); 53 | -------------------------------------------------------------------------------- /arrow/examples/README.md: -------------------------------------------------------------------------------- 1 | 19 | 20 | # Examples 21 | 22 | - [`builders.rs`](builders.rs): Using the Builder API 23 | - [`collect.rs`](collect.rs): Using the `FromIter` API 24 | - [`dynamic_types.rs`](dynamic_types.rs): Dealing with mixed types dynamically at runtime 25 | - [`read_csv.rs`](read_csv.rs): Reading CSV files with explicit schema, pretty printing Arrays 26 | - [`read_csv_infer_schema.rs`](read_csv_infer_schema.rs): Reading CSV files, pretty printing Arrays 27 | - [`zero_copy_ipc.rs`](zero_copy_ipc.rs): Zero copy read of Arrow IPC file using `mmap` 28 | - [`tensor_builder.rs`](tensor_builder.rs): Using tensor builder 29 | - [`version.rs`](version.rs): Print the arrow version and exit 30 | -------------------------------------------------------------------------------- /arrow/examples/read_csv.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | extern crate arrow; 19 | 20 | use std::fs::File; 21 | use std::sync::Arc; 22 | 23 | use arrow::csv; 24 | use arrow::datatypes::{DataType, Field, Schema}; 25 | use arrow::util::pretty::print_batches; 26 | 27 | fn main() { 28 | let schema = Schema::new(vec![ 29 | Field::new("city", DataType::Utf8, false), 30 | Field::new("lat", DataType::Float64, false), 31 | Field::new("lng", DataType::Float64, false), 32 | ]); 33 | 34 | let path = format!( 35 | "{}/../arrow-csv/test/data/uk_cities.csv", 36 | env!("CARGO_MANIFEST_DIR") 37 | ); 38 | let file = File::open(path).unwrap(); 39 | 40 | let mut csv = csv::ReaderBuilder::new(Arc::new(schema)) 41 | .build(file) 42 | .unwrap(); 43 | 44 | let batch = csv.next().unwrap().unwrap(); 45 | print_batches(&[batch]).unwrap(); 46 | } 47 | -------------------------------------------------------------------------------- /arrow/examples/read_csv_infer_schema.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | extern crate arrow; 19 | 20 | use arrow::csv; 21 | use arrow::util::pretty::print_batches; 22 | use arrow_csv::reader::Format; 23 | use std::fs::File; 24 | use std::io::Seek; 25 | use std::sync::Arc; 26 | 27 | fn main() { 28 | let path = format!( 29 | "{}/../arrow-csv/test/data/uk_cities_with_headers.csv", 30 | env!("CARGO_MANIFEST_DIR") 31 | ); 32 | let mut file = File::open(path).unwrap(); 33 | let format = Format::default().with_header(true); 34 | let (schema, _) = format.infer_schema(&mut file, Some(100)).unwrap(); 35 | file.rewind().unwrap(); 36 | 37 | let builder = csv::ReaderBuilder::new(Arc::new(schema)).with_format(format); 38 | let mut csv = builder.build(file).unwrap(); 39 | let batch = csv.next().unwrap().unwrap(); 40 | print_batches(&[batch]).unwrap(); 41 | } 42 | -------------------------------------------------------------------------------- /arrow/examples/version.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! Print the arrow version and exit 19 | 20 | use arrow::ARROW_VERSION; 21 | 22 | fn main() { 23 | println!("arrow version: {ARROW_VERSION}"); 24 | } 25 | -------------------------------------------------------------------------------- /arrow/src/array/mod.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! Statically typed implementations of Arrow Arrays 19 | //! 20 | //! **See [arrow_array] for examples and usage instructions** 21 | 22 | // --------------------- Array & ArrayData --------------------- 23 | pub use arrow_array::builder::*; 24 | pub use arrow_array::cast::*; 25 | pub use arrow_array::iterator::*; 26 | pub use arrow_array::*; 27 | pub use arrow_data::{ 28 | layout, ArrayData, ArrayDataBuilder, ArrayDataRef, BufferSpec, ByteView, DataTypeLayout, 29 | }; 30 | 31 | pub use arrow_data::transform::{Capacities, MutableArrayData}; 32 | 33 | #[cfg(feature = "ffi")] 34 | #[allow(deprecated)] 35 | pub use arrow_array::ffi::export_array_into_raw; 36 | 37 | // --------------------- Array's values comparison --------------------- 38 | 39 | #[allow(deprecated)] 40 | pub use arrow_ord::ord::{build_compare, make_comparator, DynComparator}; 41 | -------------------------------------------------------------------------------- /arrow/src/compute/kernels.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! Computation kernels on Arrow Arrays 19 | 20 | pub use arrow_arith::{aggregate, arithmetic, arity, bitwise, boolean, numeric, temporal}; 21 | pub use arrow_cast::cast; 22 | pub use arrow_cast::parse as cast_utils; 23 | pub use arrow_ord::{cmp, partition, rank, sort}; 24 | pub use arrow_select::{concat, filter, interleave, nullif, take, union_extract, window, zip}; 25 | pub use arrow_string::{concat_elements, length, regexp, substring}; 26 | 27 | /// Comparison kernels for `Array`s. 28 | pub mod comparison { 29 | pub use arrow_ord::comparison::*; 30 | pub use arrow_string::like::*; 31 | // continue to export deprecated methods until they are removed 32 | pub use arrow_string::regexp::{regexp_is_match, regexp_is_match_scalar}; 33 | #[allow(deprecated)] 34 | pub use arrow_string::regexp::{regexp_is_match_utf8, regexp_is_match_utf8_scalar}; 35 | } 36 | -------------------------------------------------------------------------------- /arrow/src/compute/mod.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! Computation kernels on Arrow Arrays 19 | 20 | pub mod kernels; 21 | 22 | pub use self::kernels::aggregate::*; 23 | pub use self::kernels::arithmetic::*; 24 | pub use self::kernels::arity::*; 25 | pub use self::kernels::boolean::*; 26 | pub use self::kernels::cast::*; 27 | pub use self::kernels::comparison::*; 28 | pub use self::kernels::concat::*; 29 | pub use self::kernels::filter::*; 30 | pub use self::kernels::interleave::*; 31 | pub use self::kernels::nullif::*; 32 | pub use self::kernels::partition::*; 33 | pub use self::kernels::rank::*; 34 | pub use self::kernels::regexp::*; 35 | pub use self::kernels::sort::*; 36 | pub use self::kernels::take::*; 37 | pub use self::kernels::temporal::*; 38 | pub use self::kernels::union_extract::*; 39 | pub use self::kernels::window::*; 40 | -------------------------------------------------------------------------------- /arrow/src/datatypes/mod.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! Defines the logical data types of Arrow arrays. 19 | //! 20 | //! The most important things you might be looking for are: 21 | //! * [`Schema`] to describe a schema. 22 | //! * [`Field`] to describe one field within a schema. 23 | //! * [`DataType`] to describe the type of a field. 24 | 25 | pub use arrow_array::types::*; 26 | pub use arrow_array::{ArrowNativeTypeOp, ArrowNumericType, ArrowPrimitiveType}; 27 | pub use arrow_buffer::{i256, ArrowNativeType, ToByteSlice}; 28 | pub use arrow_data::decimal::*; 29 | pub use arrow_schema::{ 30 | DataType, Field, FieldRef, Fields, IntervalUnit, Schema, SchemaBuilder, SchemaRef, TimeUnit, 31 | UnionFields, UnionMode, 32 | }; 33 | -------------------------------------------------------------------------------- /arrow/src/error.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! Defines `ArrowError` for representing failures in various Arrow operations. 19 | 20 | pub use arrow_schema::ArrowError; 21 | 22 | /// A specialized `Result` type for Arrow operations. 23 | pub type Result = std::result::Result; 24 | -------------------------------------------------------------------------------- /arrow/src/util/mod.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! Utility functions for working with Arrow data 19 | pub use arrow_buffer::{bit_chunk_iterator, bit_util}; 20 | 21 | pub use arrow_data::bit_iterator; 22 | pub use arrow_data::bit_mask; 23 | 24 | #[cfg(feature = "test_utils")] 25 | pub mod bench_util; 26 | #[cfg(feature = "test_utils")] 27 | pub mod data_gen; 28 | #[cfg(feature = "prettyprint")] 29 | pub use arrow_cast::pretty; 30 | pub mod string_writer; 31 | #[cfg(any(test, feature = "test_utils"))] 32 | pub mod test_util; 33 | 34 | pub use arrow_cast::display; 35 | -------------------------------------------------------------------------------- /arrow/tests/schema.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use arrow::datatypes::{DataType, Field, Schema}; 19 | use std::collections::HashMap; 20 | /// The tests in this file ensure a `Schema` can be manipulated 21 | /// outside of the arrow crate 22 | 23 | #[test] 24 | fn schema_destructure() { 25 | let meta = [("foo".to_string(), "baz".to_string())] 26 | .into_iter() 27 | .collect::>(); 28 | 29 | let field = Field::new("c1", DataType::Utf8, false); 30 | let schema = Schema::new(vec![field]).with_metadata(meta); 31 | 32 | // Destructuring a Schema allows rewriting metadata 33 | // without copying 34 | // 35 | // Model this usecase below: 36 | 37 | let Schema { 38 | fields, 39 | mut metadata, 40 | } = schema; 41 | 42 | metadata.insert("foo".to_string(), "bar".to_string()); 43 | 44 | let new_schema = Schema::new(fields).with_metadata(metadata); 45 | 46 | assert_eq!(new_schema.metadata.get("foo").unwrap(), "bar"); 47 | } 48 | -------------------------------------------------------------------------------- /dev/.gitignore: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | # Python virtual environments for dev tools 19 | .venv*/ 20 | 21 | __pycache__ 22 | *.egg-info 23 | -------------------------------------------------------------------------------- /dev/release/check-rat-report.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | ############################################################################## 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | ############################################################################## 20 | import fnmatch 21 | import re 22 | import sys 23 | import xml.etree.ElementTree as ET 24 | 25 | if len(sys.argv) != 3: 26 | sys.stderr.write("Usage: %s exclude_globs.lst rat_report.xml\n" % 27 | sys.argv[0]) 28 | sys.exit(1) 29 | 30 | exclude_globs_filename = sys.argv[1] 31 | xml_filename = sys.argv[2] 32 | 33 | globs = [line.strip() for line in open(exclude_globs_filename, "r")] 34 | 35 | tree = ET.parse(xml_filename) 36 | root = tree.getroot() 37 | resources = root.findall('resource') 38 | 39 | all_ok = True 40 | for r in resources: 41 | approvals = r.findall('license-approval') 42 | if not approvals or approvals[0].attrib['name'] == 'true': 43 | continue 44 | clean_name = re.sub('^[^/]+/', '', r.attrib['name']) 45 | excluded = False 46 | for g in globs: 47 | if fnmatch.fnmatch(clean_name, g): 48 | excluded = True 49 | break 50 | if not excluded: 51 | sys.stdout.write("NOT APPROVED: %s (%s): %s\n" % ( 52 | clean_name, r.attrib['name'], approvals[0].attrib['name'])) 53 | all_ok = False 54 | 55 | if not all_ok: 56 | sys.exit(1) 57 | 58 | print('OK') 59 | sys.exit(0) 60 | -------------------------------------------------------------------------------- /dev/release/rat_exclude_files.txt: -------------------------------------------------------------------------------- 1 | venv/* 2 | testing/* 3 | target/* 4 | dev/release/rat_exclude_files.txt 5 | arrow/test/data/* 6 | arrow-csv/test/data/* 7 | arrow-json/test/data/* 8 | arrow/test/dependency/* 9 | arrow-integration-test/data/* 10 | parquet_derive/test/dependency/* 11 | .gitattributes 12 | **.gitignore 13 | .gitmodules 14 | Cargo.lock 15 | filtered_rat.txt 16 | rat.txt 17 | # auto-generated 18 | arrow-flight/src/arrow.flight.protocol.rs 19 | arrow-flight/src/sql/arrow.flight.protocol.sql.rs 20 | .github/* 21 | parquet/src/bin/parquet-fromcsv-help.txt 22 | arrow-flight/examples/data/* 23 | -------------------------------------------------------------------------------- /dev/release/remove-old-artifacts.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | # 20 | 21 | # This script removes all RCs and all but the most recent versions of 22 | # arrow-rs from svn. 23 | # 24 | # The older versions are in SVN history as well as available on the 25 | # archive page https://archive.apache.org/dist/ 26 | # 27 | # See 28 | # https://infra.apache.org/release-download-pages.html 29 | 30 | set -e 31 | set -u 32 | set -o pipefail 33 | 34 | echo "Remove all RCs" 35 | dev_base_url=https://dist.apache.org/repos/dist/dev/arrow 36 | old_rcs=$( 37 | svn ls ${dev_base_url}/ | \ 38 | grep -E '^apache-arrow-rs-[0-9]' | \ 39 | sort --version-sort 40 | ) 41 | for old_rc in $old_rcs; do 42 | echo "Remove RC: ${old_rc}" 43 | svn \ 44 | delete \ 45 | -m "Remove old Apache Arrow Rust RC: ${old_rc}" \ 46 | ${dev_base_url}/${old_rc} 47 | done 48 | 49 | echo "Remove all but the most recent version" 50 | release_base_url="https://dist.apache.org/repos/dist/release/arrow" 51 | old_releases=$( 52 | svn ls ${release_base_url} | \ 53 | grep -E '^arrow-rs-[0-9\.]+' | \ 54 | sort --version-sort --reverse | \ 55 | tail -n +2 56 | ) 57 | for old_release_version in $old_releases; do 58 | echo "Remove old release: ${old_release_version}" 59 | svn \ 60 | delete \ 61 | -m "Remove Apache Arrow Rust release: ${old_release_version}" \ 62 | ${release_base_url}/${old_release_version} 63 | done 64 | -------------------------------------------------------------------------------- /dev/release/run-rat.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Licensed to the Apache Software Foundation (ASF) under one 4 | # or more contributor license agreements. See the NOTICE file 5 | # distributed with this work for additional information 6 | # regarding copyright ownership. The ASF licenses this file 7 | # to you under the Apache License, Version 2.0 (the 8 | # "License"); you may not use this file except in compliance 9 | # with the License. You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, 14 | # software distributed under the License is distributed on an 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | # KIND, either express or implied. See the License for the 17 | # specific language governing permissions and limitations 18 | # under the License. 19 | # 20 | 21 | RAT_VERSION=0.13 22 | 23 | # download apache rat 24 | if [ ! -f apache-rat-${RAT_VERSION}.jar ]; then 25 | curl -s https://repo1.maven.org/maven2/org/apache/rat/apache-rat/${RAT_VERSION}/apache-rat-${RAT_VERSION}.jar > apache-rat-${RAT_VERSION}.jar 26 | fi 27 | 28 | RAT="java -jar apache-rat-${RAT_VERSION}.jar -x " 29 | 30 | RELEASE_DIR=$(cd "$(dirname "$BASH_SOURCE")"; pwd) 31 | 32 | # generate the rat report 33 | $RAT $1 > rat.txt 34 | python $RELEASE_DIR/check-rat-report.py $RELEASE_DIR/rat_exclude_files.txt rat.txt > filtered_rat.txt 35 | cat filtered_rat.txt 36 | UNAPPROVED=`cat filtered_rat.txt | grep "NOT APPROVED" | wc -l` 37 | 38 | if [ "0" -eq "${UNAPPROVED}" ]; then 39 | echo "No unapproved licenses" 40 | else 41 | echo "${UNAPPROVED} unapproved licences. Check rat report: rat.txt" 42 | exit 1 43 | fi 44 | -------------------------------------------------------------------------------- /format/File.fbs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | include "Schema.fbs"; 19 | 20 | namespace org.apache.arrow.flatbuf; 21 | 22 | /// ---------------------------------------------------------------------- 23 | /// Arrow File metadata 24 | /// 25 | 26 | table Footer { 27 | version: org.apache.arrow.flatbuf.MetadataVersion; 28 | 29 | schema: org.apache.arrow.flatbuf.Schema; 30 | 31 | dictionaries: [ Block ]; 32 | 33 | recordBatches: [ Block ]; 34 | 35 | /// User-defined metadata 36 | custom_metadata: [ KeyValue ]; 37 | } 38 | 39 | struct Block { 40 | 41 | /// Index to the start of the RecordBlock (note this is past the Message header) 42 | offset: long; 43 | 44 | /// Length of the metadata 45 | metaDataLength: int; 46 | 47 | /// Length of the data (this is aligned so there can be a gap between this and 48 | /// the metadata). 49 | bodyLength: long; 50 | } 51 | 52 | root_type Footer; 53 | -------------------------------------------------------------------------------- /format/README.rst: -------------------------------------------------------------------------------- 1 | .. Licensed to the Apache Software Foundation (ASF) under one 2 | .. or more contributor license agreements. See the NOTICE file 3 | .. distributed with this work for additional information 4 | .. regarding copyright ownership. The ASF licenses this file 5 | .. to you under the Apache License, Version 2.0 (the 6 | .. "License"); you may not use this file except in compliance 7 | .. with the License. You may obtain a copy of the License at 8 | 9 | .. http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | .. Unless required by applicable law or agreed to in writing, 12 | .. software distributed under the License is distributed on an 13 | .. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | .. KIND, either express or implied. See the License for the 15 | .. specific language governing permissions and limitations 16 | .. under the License. 17 | 18 | Arrow Protocol Files 19 | ==================== 20 | 21 | This folder contains binary protocol definitions for the Arrow columnar format 22 | and other parts of the project, like the Flight RPC framework. 23 | 24 | For documentation about the Arrow format, see the `docs/source/format` 25 | directory. 26 | -------------------------------------------------------------------------------- /format/Tensor.fbs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | /// EXPERIMENTAL: Metadata for n-dimensional arrays, aka "tensors" or 19 | /// "ndarrays". Arrow implementations in general are not required to implement 20 | /// this type 21 | 22 | include "Schema.fbs"; 23 | 24 | namespace org.apache.arrow.flatbuf; 25 | 26 | /// ---------------------------------------------------------------------- 27 | /// Data structures for dense tensors 28 | 29 | /// Shape data for a single axis in a tensor 30 | table TensorDim { 31 | /// Length of dimension 32 | size: long; 33 | 34 | /// Name of the dimension, optional 35 | name: string; 36 | } 37 | 38 | table Tensor { 39 | /// The type of data contained in a value cell. Currently only fixed-width 40 | /// value types are supported, no strings or nested types 41 | type: Type (required); 42 | 43 | /// The dimensions of the tensor, optionally named 44 | shape: [TensorDim] (required); 45 | 46 | /// Non-negative byte offsets to advance one value cell along each dimension 47 | /// If omitted, default to row-major order (C-like). 48 | strides: [long]; 49 | 50 | /// The location and size of the tensor's data 51 | data: Buffer (required); 52 | } 53 | 54 | root_type Tensor; 55 | -------------------------------------------------------------------------------- /header: -------------------------------------------------------------------------------- 1 | Licensed to the Apache Software Foundation (ASF) under one 2 | or more contributor license agreements. See the NOTICE file 3 | distributed with this work for additional information 4 | regarding copyright ownership. The ASF licenses this file 5 | to you under the Apache License, Version 2.0 (the 6 | "License"); you may not use this file except in compliance 7 | with the License. You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | 17 | -------------------------------------------------------------------------------- /parquet-variant/Cargo.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [package] 19 | name = "parquet-variant" 20 | # This package is still in development and thus the version does 21 | # not follow the versions of the rest of the crates in this repo. 22 | version = "0.1.0" 23 | license = { workspace = true } 24 | description = "Apache Parquet Variant implementation in Rust" 25 | homepage = { workspace = true } 26 | repository = { workspace = true } 27 | authors = { workspace = true } 28 | keywords = ["arrow", "parquet", "variant"] 29 | readme = "README.md" 30 | edition = { workspace = true } 31 | rust-version = { workspace = true } 32 | 33 | [dependencies] 34 | arrow-schema = "55.1.0" 35 | 36 | [lib] 37 | 38 | 39 | -------------------------------------------------------------------------------- /parquet-variant/README.md: -------------------------------------------------------------------------------- 1 | 19 | 20 | # Apache Parquet Variant Rust Implementation 21 | 22 | [![crates.io](https://img.shields.io/crates/v/parquet-variant.svg)](https://crates.io/crates/parquet-variant) 23 | [![docs.rs](https://img.shields.io/docsrs/parquet-variant.svg)](https://docs.rs/parquet/latest/parquet-variant/) 24 | 25 | This crate contains an implementation of [Variant Binary Encoding] from 26 | [Apache Parquet]. This software is developed as part of the [Apache Arrow] project. 27 | 28 | [Variant Binary Encoding]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md 29 | [Apache Parquet]: https://parquet.apache.org/ 30 | [Apache Arrow]: https://arrow.apache.org/ 31 | 32 | Please see the [API documentation](https://docs.rs/parquet-variant/latest) for more details. 33 | 34 | ## 🚧 Work In Progress 35 | 36 | NOTE: This crate is under active development and is not yet ready for production use. 37 | If you are interested in helping, you can find more information on the GitHub [Variant issue] 38 | 39 | [Variant issue]: https://github.com/apache/arrow-rs/issues/6736 40 | 41 | 42 | ## License 43 | 44 | Licensed under the Apache License, Version 2.0: . 45 | -------------------------------------------------------------------------------- /parquet-variant/src/lib.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! Implementation of [Variant Binary Encoding] from [Apache Parquet]. 19 | //! 20 | //! [Variant Binary Encoding]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md 21 | //! [Apache Parquet]: https://parquet.apache.org/ 22 | //! 23 | //! ## 🚧 Work In Progress 24 | //! 25 | //! This crate is under active development and is not yet ready for production use. 26 | //! If you are interested in helping, you can find more information on the GitHub [Variant issue] 27 | //! 28 | //! [Variant issue]: https://github.com/apache/arrow-rs/issues/6736 29 | 30 | // TODO: dead code removal 31 | #[allow(dead_code)] 32 | mod decoder; 33 | // TODO: dead code removal 34 | #[allow(dead_code)] 35 | mod variant; 36 | // TODO: dead code removal 37 | #[allow(dead_code)] 38 | mod utils; 39 | 40 | #[cfg(test)] 41 | mod test_variant; 42 | -------------------------------------------------------------------------------- /parquet/LICENSE.txt: -------------------------------------------------------------------------------- 1 | ../LICENSE.txt -------------------------------------------------------------------------------- /parquet/NOTICE.txt: -------------------------------------------------------------------------------- 1 | ../NOTICE.txt -------------------------------------------------------------------------------- /parquet/benches/metadata.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use bytes::Bytes; 19 | use criterion::*; 20 | use parquet::file::reader::SerializedFileReader; 21 | use parquet::file::serialized_reader::ReadOptionsBuilder; 22 | 23 | fn criterion_benchmark(c: &mut Criterion) { 24 | // Read file into memory to isolate filesystem performance 25 | let file = "../parquet-testing/data/alltypes_tiny_pages.parquet"; 26 | let data = std::fs::read(file).unwrap(); 27 | let data = Bytes::from(data); 28 | 29 | c.bench_function("open(default)", |b| { 30 | b.iter(|| SerializedFileReader::new(data.clone()).unwrap()) 31 | }); 32 | 33 | c.bench_function("open(page index)", |b| { 34 | b.iter(|| { 35 | let options = ReadOptionsBuilder::new().with_page_index().build(); 36 | SerializedFileReader::new_with_options(data.clone(), options).unwrap() 37 | }) 38 | }); 39 | } 40 | 41 | criterion_group!(benches, criterion_benchmark); 42 | criterion_main!(benches); 43 | -------------------------------------------------------------------------------- /parquet/examples/read_parquet.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use arrow::util::pretty::print_batches; 19 | use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; 20 | use parquet::errors::Result; 21 | use std::fs::File; 22 | 23 | fn main() -> Result<()> { 24 | // Create parquet file that will be read. 25 | let testdata = arrow::util::test_util::parquet_test_data(); 26 | let path = format!("{testdata}/alltypes_plain.parquet"); 27 | let file = File::open(path).unwrap(); 28 | 29 | // Create a sync parquet reader with batch_size. 30 | // batch_size is the number of rows to read up to buffer once from pages, defaults to 1024 31 | let parquet_reader = ParquetRecordBatchReaderBuilder::try_new(file)? 32 | .with_batch_size(8192) 33 | .build()?; 34 | 35 | let mut batches = Vec::new(); 36 | 37 | for batch in parquet_reader { 38 | batches.push(batch?); 39 | } 40 | 41 | print_batches(&batches).unwrap(); 42 | Ok(()) 43 | } 44 | -------------------------------------------------------------------------------- /parquet/pytest/requirements.in: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | pytest 18 | pyspark 19 | black 20 | pandas 21 | -------------------------------------------------------------------------------- /parquet/src/arrow/buffer/mod.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! Logic for reading data into arrow buffers 19 | 20 | pub mod bit_util; 21 | pub mod dictionary_buffer; 22 | pub mod offset_buffer; 23 | pub mod view_buffer; 24 | -------------------------------------------------------------------------------- /parquet/src/arrow/decoder/mod.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! Specialized decoders optimised for decoding to arrow format 19 | 20 | mod delta_byte_array; 21 | mod dictionary_index; 22 | 23 | pub use delta_byte_array::DeltaByteArrayDecoder; 24 | pub use dictionary_index::DictIndexDecoder; 25 | -------------------------------------------------------------------------------- /parquet/src/bin/parquet-fromcsv-help.txt: -------------------------------------------------------------------------------- 1 | 2 | Usage: parquet [OPTIONS] --schema --input-file --output-file 3 | 4 | Options: 5 | -s, --schema 6 | message schema for output Parquet 7 | 8 | -i, --input-file 9 | input CSV file 10 | 11 | -o, --output-file 12 | output Parquet file 13 | 14 | -f, --input-format 15 | input file format 16 | 17 | [default: csv] 18 | [possible values: csv, tsv] 19 | 20 | -b, --batch-size 21 | batch size 22 | 23 | [env: PARQUET_FROM_CSV_BATCHSIZE=] 24 | [default: 1000] 25 | 26 | -h, --has-header 27 | has header 28 | 29 | -d, --delimiter 30 | field delimiter 31 | 32 | default value: when input_format==CSV: ',' when input_format==TSV: 'TAB' 33 | 34 | -r, --record-terminator 35 | record terminator 36 | 37 | [possible values: lf, crlf, cr] 38 | 39 | -e, --escape-char 40 | escape character 41 | 42 | -q, --quote-char 43 | quote character 44 | 45 | -D, --double-quote 46 | double quote 47 | 48 | [possible values: true, false] 49 | 50 | -C, --csv-compression 51 | compression mode of csv 52 | 53 | [default: UNCOMPRESSED] 54 | 55 | -c, --parquet-compression 56 | compression mode of parquet 57 | 58 | [default: SNAPPY] 59 | 60 | -w, --writer-version 61 | writer version 62 | 63 | -m, --max-row-group-size 64 | max row group size 65 | 66 | --enable-bloom-filter 67 | whether to enable bloom filter writing 68 | 69 | [possible values: true, false] 70 | 71 | --help 72 | display usage help 73 | 74 | -V, --version 75 | Print version 76 | -------------------------------------------------------------------------------- /parquet/src/column/page_encryption_disabled.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use crate::column::page::CompressedPage; 19 | use crate::errors::Result; 20 | use crate::format::PageHeader; 21 | use std::io::Write; 22 | 23 | #[derive(Debug)] 24 | /// Dummy PageEncryptor struct that can never be instantiated, 25 | /// provided to support compilation without the encryption feature enabled. 26 | pub(crate) struct PageEncryptor { 27 | _empty: (), 28 | } 29 | 30 | impl PageEncryptor { 31 | pub fn increment_page(&mut self) {} 32 | 33 | pub fn encrypt_compressed_page(&mut self, _page: CompressedPage) -> Result { 34 | unreachable!("The encryption feature is disabled") 35 | } 36 | 37 | pub fn encrypt_page_header( 38 | &mut self, 39 | _page_header: &PageHeader, 40 | _sink: &mut W, 41 | ) -> Result<()> { 42 | unreachable!("The encryption feature is disabled") 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /parquet/src/encodings/mod.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | pub mod decoding; 19 | pub mod encoding; 20 | pub mod levels; 21 | experimental!(pub(crate) mod rle); 22 | -------------------------------------------------------------------------------- /parquet/src/file/page_index/mod.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! Page Index of "[Column Index] Layout to Support Page Skipping" 19 | //! 20 | //! [Column Index]: https://github.com/apache/parquet-format/blob/master/PageIndex.md 21 | 22 | pub mod index; 23 | pub mod index_reader; 24 | pub mod offset_index; 25 | -------------------------------------------------------------------------------- /parquet/src/record/mod.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | //! Contains record-based API for reading Parquet files. 19 | 20 | mod api; 21 | pub mod reader; 22 | mod record_reader; 23 | mod record_writer; 24 | mod triplet; 25 | 26 | pub use self::{ 27 | api::{ 28 | Field, List, ListAccessor, Map, MapAccessor, Row, RowAccessor, RowColumnIter, RowFormatter, 29 | }, 30 | record_reader::RecordReader, 31 | record_writer::RecordWriter, 32 | }; 33 | -------------------------------------------------------------------------------- /parquet/src/record/record_reader.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use super::super::errors::ParquetError; 19 | use super::super::file::reader::RowGroupReader; 20 | 21 | /// Read up to `num_records` records from `row_group_reader` into `self`. 22 | /// 23 | /// The type parameter `T` is used to work around the rust orphan rule 24 | /// when implementing on types such as `Vec`. 25 | pub trait RecordReader { 26 | /// Read up to `num_records` records from `row_group_reader` into `self`. 27 | fn read_from_row_group( 28 | &mut self, 29 | row_group_reader: &mut dyn RowGroupReader, 30 | num_records: usize, 31 | ) -> Result<(), ParquetError>; 32 | } 33 | -------------------------------------------------------------------------------- /parquet/src/record/record_writer.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use crate::schema::types::TypePtr; 19 | 20 | use super::super::errors::ParquetError; 21 | use super::super::file::writer::SerializedRowGroupWriter; 22 | 23 | /// Trait describing how to write a record (the implementator) to a row group writer. 24 | /// 25 | /// [`parquet_derive`] crate provides a derive macro [`ParquetRecordWriter`] for this trait 26 | /// for unnested structs. 27 | /// 28 | /// The type parameter `T` is used to work around the rust orphan rule 29 | /// when implementing on types such as `&[T]`. 30 | /// 31 | /// [`parquet_derive`]: https://crates.io/crates/parquet_derive 32 | /// [`ParquetRecordWriter`]: https://docs.rs/parquet_derive/53.0.0/parquet_derive/derive.ParquetRecordWriter.html 33 | pub trait RecordWriter { 34 | /// Writes from `self` into `row_group_writer`. 35 | fn write_to_row_group( 36 | &self, 37 | row_group_writer: &mut SerializedRowGroupWriter, 38 | ) -> Result<(), ParquetError>; 39 | 40 | /// Generated schema used by `row_group_writer` 41 | fn schema(&self) -> Result; 42 | } 43 | -------------------------------------------------------------------------------- /parquet/src/util/mod.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | #[macro_use] 19 | pub mod bit_util; 20 | mod bit_pack; 21 | pub(crate) mod interner; 22 | 23 | #[cfg(any(test, feature = "test_common"))] 24 | pub(crate) mod test_common; 25 | pub mod utf8; 26 | 27 | #[cfg(any(test, feature = "test_common"))] 28 | pub use self::test_common::page_util::{ 29 | DataPageBuilder, DataPageBuilderImpl, InMemoryPageIterator, 30 | }; 31 | -------------------------------------------------------------------------------- /parquet/src/util/test_common/file_util.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use std::{fs, path::PathBuf, str::FromStr}; 19 | 20 | /// Returns path to the test parquet file in 'data' directory 21 | pub fn get_test_path(file_name: &str) -> PathBuf { 22 | let mut pathbuf = PathBuf::from_str(&arrow::util::test_util::parquet_test_data()).unwrap(); 23 | pathbuf.push(file_name); 24 | pathbuf 25 | } 26 | 27 | /// Returns file handle for a test parquet file from 'data' directory 28 | pub fn get_test_file(file_name: &str) -> fs::File { 29 | let path = get_test_path(file_name); 30 | fs::File::open(path.as_path()).unwrap_or_else(|err| { 31 | panic!( 32 | "Test file {} could not be opened, did you do `git submodule update`?: {}", 33 | path.display(), 34 | err 35 | ) 36 | }) 37 | } 38 | -------------------------------------------------------------------------------- /parquet/src/util/test_common/mod.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | pub mod page_util; 19 | 20 | #[cfg(test)] 21 | pub mod file_util; 22 | 23 | #[cfg(test)] 24 | pub mod rand_gen; 25 | -------------------------------------------------------------------------------- /parquet/tests/arrow_reader/bad_raw_metadata.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apache/arrow-rs/3681540e072857746597aa65f083886dbf37f2d5/parquet/tests/arrow_reader/bad_raw_metadata.bin -------------------------------------------------------------------------------- /parquet/tests/encryption/mod.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | #[cfg(feature = "encryption")] 19 | mod encryption; 20 | mod encryption_agnostic; 21 | #[cfg(all(feature = "encryption", feature = "async"))] 22 | mod encryption_async; 23 | #[cfg(not(feature = "encryption"))] 24 | mod encryption_disabled; 25 | #[cfg(feature = "encryption")] 26 | mod encryption_util; 27 | -------------------------------------------------------------------------------- /parquet_derive/Cargo.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [package] 19 | name = "parquet_derive" 20 | version = { workspace = true } 21 | license = { workspace = true } 22 | description = "Derive macros for the Rust implementation of Apache Parquet" 23 | homepage = { workspace = true } 24 | repository = { workspace = true } 25 | authors = { workspace = true } 26 | keywords = ["parquet"] 27 | readme = "README.md" 28 | edition = { workspace = true } 29 | rust-version = { workspace = true } 30 | 31 | [lib] 32 | proc-macro = true 33 | 34 | [dependencies] 35 | proc-macro2 = { version = "1.0", default-features = false } 36 | quote = { version = "1.0", default-features = false } 37 | syn = { version = "2.0", features = ["extra-traits"] } 38 | parquet = { workspace = true } 39 | 40 | [package.metadata.docs.rs] 41 | all-features = true 42 | -------------------------------------------------------------------------------- /parquet_derive/LICENSE.txt: -------------------------------------------------------------------------------- 1 | ../LICENSE.txt -------------------------------------------------------------------------------- /parquet_derive/NOTICE.txt: -------------------------------------------------------------------------------- 1 | ../NOTICE.txt -------------------------------------------------------------------------------- /parquet_derive_test/Cargo.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [package] 19 | name = "parquet_derive_test" 20 | version = { workspace = true } 21 | license = { workspace = true } 22 | description = "Integration test package for parquet-derive" 23 | homepage = { workspace = true } 24 | repository = { workspace = true } 25 | authors = { workspace = true } 26 | keywords = [ "parquet" ] 27 | edition = { workspace = true } 28 | publish = false 29 | rust-version = { workspace = true } 30 | 31 | [dependencies] 32 | parquet = { workspace = true } 33 | parquet_derive = { path = "../parquet_derive", default-features = false } 34 | chrono = { workspace = true } 35 | uuid = { version = "1", features = ["v4"] } 36 | 37 | [package.metadata.docs.rs] 38 | all-features = true 39 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | edition = "2021" 19 | --------------------------------------------------------------------------------