├── .github └── workflows │ ├── release.yml │ └── test.yml ├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── Changes.md ├── Development.md ├── License.md ├── Readme.md ├── example ├── Cargo.toml └── src │ └── main.rs ├── integration_tests ├── Cargo.toml └── tests │ ├── root.rs │ └── tests │ ├── mod.rs │ ├── tensors.rs │ └── utils.rs ├── pyproject.toml ├── serde_arrow ├── Cargo.toml ├── Status.md ├── benches │ ├── groups │ │ ├── complex_common.rs │ │ ├── impls.rs │ │ ├── json_to_arrow.rs │ │ ├── mod.rs │ │ └── primitives.rs │ └── serde_arrow_bench.rs ├── build.rs └── src │ ├── _impl │ └── docs │ │ ├── defs.rs │ │ └── quickstart.rs │ ├── arrow2_impl.rs │ ├── arrow_impl.rs │ ├── internal │ ├── array_builder.rs │ ├── chrono.rs │ ├── deserialization │ │ ├── array_deserializer.rs │ │ ├── binary_deserializer.rs │ │ ├── bool_deserializer.rs │ │ ├── date_deserializer.rs │ │ ├── decimal_deserializer.rs │ │ ├── dictionary_deserializer.rs │ │ ├── duration_deserializer.rs │ │ ├── enum_deserializer.rs │ │ ├── enums_as_string_impl.rs │ │ ├── fixed_size_binary_deserializer.rs │ │ ├── fixed_size_list_deserializer.rs │ │ ├── float_deserializer.rs │ │ ├── float_impls.rs │ │ ├── integer_deserializer.rs │ │ ├── integer_impls.rs │ │ ├── list_deserializer.rs │ │ ├── map_deserializer.rs │ │ ├── mod.rs │ │ ├── null_deserializer.rs │ │ ├── random_access_deserializer.rs │ │ ├── string_deserializer.rs │ │ ├── struct_deserializer.rs │ │ ├── time_deserializer.rs │ │ ├── timestamp_deserializer.rs │ │ └── utils.rs │ ├── deserializer.rs │ ├── error.rs │ ├── mod.rs │ ├── schema │ │ ├── extensions │ │ │ ├── bool8_field.rs │ │ │ ├── fixed_shape_tensor_field.rs │ │ │ ├── mod.rs │ │ │ ├── utils.rs │ │ │ └── variable_shape_tensor_field.rs │ │ ├── from_samples │ │ │ ├── mod.rs │ │ │ └── test_error_messages.rs │ │ ├── from_type │ │ │ ├── mod.rs │ │ │ └── test_error_messages.rs │ │ ├── mod.rs │ │ ├── serde │ │ │ ├── deserialize.rs │ │ │ ├── mod.rs │ │ │ └── serialize.rs │ │ ├── strategy.rs │ │ ├── test.rs │ │ ├── tracer.rs │ │ └── tracing_options.rs │ ├── serialization │ │ ├── array_builder.rs │ │ ├── binary_builder.rs │ │ ├── bool_builder.rs │ │ ├── date_builder.rs │ │ ├── decimal_builder.rs │ │ ├── dictionary_utf8_builder.rs │ │ ├── duration_builder.rs │ │ ├── fixed_size_binary_builder.rs │ │ ├── fixed_size_list_builder.rs │ │ ├── float_builder.rs │ │ ├── int_builder.rs │ │ ├── list_builder.rs │ │ ├── map_builder.rs │ │ ├── mod.rs │ │ ├── null_builder.rs │ │ ├── outer_sequence_builder.rs │ │ ├── simple_serializer.rs │ │ ├── struct_builder.rs │ │ ├── test.rs │ │ ├── time_builder.rs │ │ ├── timestamp_builder.rs │ │ ├── union_builder.rs │ │ ├── unknown_variant_builder.rs │ │ └── utf8_builder.rs │ ├── serializer.rs │ ├── testing.rs │ └── utils │ │ ├── array_ext.rs │ │ ├── array_view_ext.rs │ │ ├── decimal.rs │ │ ├── dsl.rs │ │ ├── mod.rs │ │ ├── test_value.rs │ │ └── value.rs │ ├── lib.rs │ ├── marrow_impl.rs │ ├── test │ ├── api_chrono.rs │ ├── error_messages │ │ ├── deserializers.rs │ │ ├── misc.rs │ │ ├── mod.rs │ │ ├── push_validity.rs │ │ ├── trace_from_samples.rs │ │ └── trace_from_type.rs │ ├── jiff.rs │ ├── mod.rs │ ├── schema_like.rs │ └── schema_tracing.rs │ └── test_with_arrow │ ├── impls │ ├── arrow_binary.rs │ ├── arrow_binary_view.rs │ ├── arrow_date.rs │ ├── arrow_decimal.rs │ ├── arrow_dictionary.rs │ ├── arrow_fixed_size_binary.rs │ ├── arrow_fixed_size_list.rs │ ├── arrow_list.rs │ ├── arrow_map.rs │ ├── arrow_struct.rs │ ├── arrow_time.rs │ ├── arrow_timestamp.rs │ ├── arrow_union.rs │ ├── arrow_utf8.rs │ ├── arrow_utf8_view.rs │ ├── bool8.rs │ ├── examples.rs │ ├── issue_264_enum_dummy_values.rs │ ├── issue_74_unknown_fields.rs │ ├── issue_79_declared_but_missing_fields.rs │ ├── issue_90_type_tracing.rs │ ├── mod.rs │ ├── primitives.rs │ ├── serde_i32.rs │ ├── serde_i64.rs │ ├── third_party_big_decimal.rs │ ├── third_party_chrono.rs │ ├── third_party_jiff.rs │ ├── third_party_rust_decimal.rs │ ├── third_party_serde_json.rs │ ├── third_party_uuid.rs │ ├── tuple.rs │ ├── utils.rs │ └── wrappers.rs │ ├── issue_137_schema_like_from_arrow_schema.rs │ ├── issue_248_slices_deserialization.rs │ ├── issue_35_preserve_metadata.rs │ ├── issue_90_top_level_nulls_in_structs.rs │ ├── items_wrapper.rs │ ├── mod.rs │ ├── schema_overwrites.rs │ └── serializer_deserializer.rs ├── timings.png ├── uv.lock └── x.py /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Release", 3 | "on": { 4 | "release": { 5 | "types": [ 6 | "published" 7 | ] 8 | } 9 | }, 10 | "env": { 11 | "CARGO_TERM_COLOR": "always" 12 | }, 13 | "jobs": { 14 | "build": { 15 | "runs-on": "ubuntu-latest", 16 | "env": { 17 | "CARGO_REGISTRY_TOKEN": "${{ secrets.CARGO_REGISTRY_TOKEN }}" 18 | }, 19 | "steps": [ 20 | { 21 | "uses": "actions/checkout@v4" 22 | }, 23 | { 24 | "name": "rustc", 25 | "run": "rustc --version" 26 | }, 27 | { 28 | "name": "cargo", 29 | "run": "cargo --version" 30 | }, 31 | { 32 | "name": "Check", 33 | "run": "cargo check" 34 | }, 35 | { 36 | "name": "Check arrow2-0-17", 37 | "run": "cargo check --features arrow2-0-17" 38 | }, 39 | { 40 | "name": "Check arrow2-0-16", 41 | "run": "cargo check --features arrow2-0-16" 42 | }, 43 | { 44 | "name": "Check arrow-55", 45 | "run": "cargo check --features arrow-55" 46 | }, 47 | { 48 | "name": "Check arrow-54", 49 | "run": "cargo check --features arrow-54" 50 | }, 51 | { 52 | "name": "Check arrow-53", 53 | "run": "cargo check --features arrow-53" 54 | }, 55 | { 56 | "name": "Check arrow-52", 57 | "run": "cargo check --features arrow-52" 58 | }, 59 | { 60 | "name": "Check arrow-51", 61 | "run": "cargo check --features arrow-51" 62 | }, 63 | { 64 | "name": "Check arrow-50", 65 | "run": "cargo check --features arrow-50" 66 | }, 67 | { 68 | "name": "Check arrow-50", 69 | "run": "cargo check --features arrow-50" 70 | }, 71 | { 72 | "name": "Check arrow-49", 73 | "run": "cargo check --features arrow-49" 74 | }, 75 | { 76 | "name": "Check arrow-48", 77 | "run": "cargo check --features arrow-48" 78 | }, 79 | { 80 | "name": "Check arrow-47", 81 | "run": "cargo check --features arrow-47" 82 | }, 83 | { 84 | "name": "Check arrow-46", 85 | "run": "cargo check --features arrow-46" 86 | }, 87 | { 88 | "name": "Check arrow-45", 89 | "run": "cargo check --features arrow-45" 90 | }, 91 | { 92 | "name": "Check arrow-44", 93 | "run": "cargo check --features arrow-44" 94 | }, 95 | { 96 | "name": "Check arrow-43", 97 | "run": "cargo check --features arrow-43" 98 | }, 99 | { 100 | "name": "Check arrow-42", 101 | "run": "cargo check --features arrow-42" 102 | }, 103 | { 104 | "name": "Check arrow-41", 105 | "run": "cargo check --features arrow-41" 106 | }, 107 | { 108 | "name": "Check arrow-40", 109 | "run": "cargo check --features arrow-40" 110 | }, 111 | { 112 | "name": "Check arrow-39", 113 | "run": "cargo check --features arrow-39" 114 | }, 115 | { 116 | "name": "Check arrow-38", 117 | "run": "cargo check --features arrow-38" 118 | }, 119 | { 120 | "name": "Check arrow-37", 121 | "run": "cargo check --features arrow-37" 122 | }, 123 | { 124 | "name": "Check format", 125 | "run": "cargo fmt --check" 126 | }, 127 | { 128 | "name": "Build", 129 | "run": "cargo build --features arrow2-0-17,arrow-55" 130 | }, 131 | { 132 | "name": "Test", 133 | "run": "cargo test --features arrow2-0-17,arrow-55" 134 | }, 135 | { 136 | "name": "Publish to crates.io", 137 | "working-directory": "serde_arrow", 138 | "run": "cargo publish" 139 | } 140 | ] 141 | } 142 | } 143 | } -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Test", 3 | "on": { 4 | "workflow_dispatch": {}, 5 | "pull_request": { 6 | "branches": [ 7 | "main", 8 | "develop-*" 9 | ], 10 | "types": [ 11 | "opened", 12 | "edited", 13 | "reopened", 14 | "ready_for_review", 15 | "synchronize" 16 | ] 17 | } 18 | }, 19 | "env": { 20 | "CARGO_TERM_COLOR": "always" 21 | }, 22 | "jobs": { 23 | "build": { 24 | "runs-on": "ubuntu-latest", 25 | "steps": [ 26 | { 27 | "uses": "actions/checkout@v4" 28 | }, 29 | { 30 | "name": "rustc", 31 | "run": "rustc --version" 32 | }, 33 | { 34 | "name": "cargo", 35 | "run": "cargo --version" 36 | }, 37 | { 38 | "name": "Check", 39 | "run": "cargo check" 40 | }, 41 | { 42 | "name": "Check arrow2-0-17", 43 | "run": "cargo check --features arrow2-0-17" 44 | }, 45 | { 46 | "name": "Check arrow2-0-16", 47 | "run": "cargo check --features arrow2-0-16" 48 | }, 49 | { 50 | "name": "Check arrow-55", 51 | "run": "cargo check --features arrow-55" 52 | }, 53 | { 54 | "name": "Check arrow-54", 55 | "run": "cargo check --features arrow-54" 56 | }, 57 | { 58 | "name": "Check arrow-53", 59 | "run": "cargo check --features arrow-53" 60 | }, 61 | { 62 | "name": "Check arrow-52", 63 | "run": "cargo check --features arrow-52" 64 | }, 65 | { 66 | "name": "Check arrow-51", 67 | "run": "cargo check --features arrow-51" 68 | }, 69 | { 70 | "name": "Check arrow-50", 71 | "run": "cargo check --features arrow-50" 72 | }, 73 | { 74 | "name": "Check arrow-50", 75 | "run": "cargo check --features arrow-50" 76 | }, 77 | { 78 | "name": "Check arrow-49", 79 | "run": "cargo check --features arrow-49" 80 | }, 81 | { 82 | "name": "Check arrow-48", 83 | "run": "cargo check --features arrow-48" 84 | }, 85 | { 86 | "name": "Check arrow-47", 87 | "run": "cargo check --features arrow-47" 88 | }, 89 | { 90 | "name": "Check arrow-46", 91 | "run": "cargo check --features arrow-46" 92 | }, 93 | { 94 | "name": "Check arrow-45", 95 | "run": "cargo check --features arrow-45" 96 | }, 97 | { 98 | "name": "Check arrow-44", 99 | "run": "cargo check --features arrow-44" 100 | }, 101 | { 102 | "name": "Check arrow-43", 103 | "run": "cargo check --features arrow-43" 104 | }, 105 | { 106 | "name": "Check arrow-42", 107 | "run": "cargo check --features arrow-42" 108 | }, 109 | { 110 | "name": "Check arrow-41", 111 | "run": "cargo check --features arrow-41" 112 | }, 113 | { 114 | "name": "Check arrow-40", 115 | "run": "cargo check --features arrow-40" 116 | }, 117 | { 118 | "name": "Check arrow-39", 119 | "run": "cargo check --features arrow-39" 120 | }, 121 | { 122 | "name": "Check arrow-38", 123 | "run": "cargo check --features arrow-38" 124 | }, 125 | { 126 | "name": "Check arrow-37", 127 | "run": "cargo check --features arrow-37" 128 | }, 129 | { 130 | "name": "Check format", 131 | "run": "cargo fmt --check" 132 | }, 133 | { 134 | "name": "Build", 135 | "run": "cargo build --features arrow2-0-17,arrow-55" 136 | }, 137 | { 138 | "name": "Test", 139 | "run": "cargo test --features arrow2-0-17,arrow-55" 140 | } 141 | ] 142 | } 143 | } 144 | } -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | /example.ipc 3 | 4 | __pycache__/ 5 | .ipynb_checkpoints/ 6 | 7 | # Editor directories and files 8 | .idea/ 9 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | members = ["serde_arrow", "example", "integration_tests"] 3 | default-members = ["serde_arrow"] 4 | 5 | resolver = "2" 6 | 7 | [profile.bench] 8 | debug = true 9 | -------------------------------------------------------------------------------- /Development.md: -------------------------------------------------------------------------------- 1 | # Development process 2 | 3 | All common tasks are bundled in the `x.py` script: 4 | 5 | ```bash 6 | # format the code and run tests 7 | uv run python x.py precommit 8 | ``` 9 | 10 | Run `python x.py --help` for details. The script only uses standard Python 11 | modules can can be run without installing further packages. 12 | 13 | ## Creating a release 14 | 15 | 1. Create a new branch with name `release/{VERSION}` 16 | 2. Update the `version` field in 17 | [`serde_arrow/Cargo.toml`](serde_arrow/Cargo.toml) 18 | 3. Merge the branch into main 19 | 4. Create a new release via the GH UI tagged with `v{VERSION}` to trigger the 20 | release workflow 21 | 22 | ## Running the benchmarks 23 | 24 | 1. `uv run python x.py bench` 25 | 2. (optional) `uv run python x.py summarize-bench --update` to update the readme 26 | 27 | ## Adding a new arrow version 28 | 29 | 1. `uv run python x.py add-arrow-version {VERSION}` 30 | 2. `uv run python x.py precommit` 31 | 32 | ## Error format 33 | 34 | Style: 35 | 36 | - Use uppercase letters to start the error message 37 | - Do not include trailing punctuation (e.g., "Not supported", not "Not supported.") 38 | 39 | Common annotations: 40 | 41 | - `field`: the path of the field affected by the error 42 | - `data_type`: the Arrow data type of the field affected by the error 43 | -------------------------------------------------------------------------------- /License.md: -------------------------------------------------------------------------------- 1 | Copyright (c) 2021 - 2024 Christopher Prohm and contributors 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /example/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "example" 3 | version = "0.1.0" 4 | authors = ["Christopher Prohm "] 5 | edition = "2018" 6 | 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 8 | 9 | [dependencies] 10 | # arrow-version:replace: arrow = {{ version = "{version}", features = ["ipc"] }} 11 | arrow = { version = "55", features = ["ipc"] } 12 | 13 | chrono = { version = "0.4", features = ["serde"] } 14 | serde = { version = "1.0", features = ["derive"] } 15 | 16 | # arrow-version:replace: serde_arrow = {{ path = "../serde_arrow", features = ["arrow-{version}"] }} 17 | serde_arrow = { path = "../serde_arrow", features = ["arrow-55"] } -------------------------------------------------------------------------------- /example/src/main.rs: -------------------------------------------------------------------------------- 1 | use std::{collections::HashMap, fs::File}; 2 | 3 | use chrono::NaiveDateTime; 4 | use serde::Serialize; 5 | 6 | use arrow::datatypes::FieldRef; 7 | 8 | macro_rules! hashmap { 9 | () => { 10 | ::std::collections::HashMap::new() 11 | }; 12 | ($($key:expr => $value:expr),*) => { 13 | { 14 | let mut m = ::std::collections::HashMap::new(); 15 | $(m.insert($key.into(), $value.into());)* 16 | m 17 | } 18 | }; 19 | } 20 | 21 | #[derive(Serialize)] 22 | struct Example { 23 | r#type: SampleType, 24 | int8: i8, 25 | int32: i32, 26 | float32: f32, 27 | date64: NaiveDateTime, 28 | boolean: bool, 29 | map: HashMap, 30 | nested: Nested, 31 | } 32 | 33 | #[derive(Serialize)] 34 | enum SampleType { 35 | A, 36 | B, 37 | C, 38 | } 39 | 40 | #[derive(Serialize)] 41 | struct Nested { 42 | a: Option, 43 | b: Nested2, 44 | } 45 | 46 | #[derive(Serialize)] 47 | struct Nested2 { 48 | foo: String, 49 | } 50 | 51 | #[allow(deprecated)] 52 | fn main() -> Result<(), PanicOnError> { 53 | let examples = vec![ 54 | Example { 55 | r#type: SampleType::A, 56 | float32: 1.0, 57 | int8: 1, 58 | int32: 4, 59 | date64: NaiveDateTime::from_timestamp(0, 0), 60 | boolean: true, 61 | map: hashmap! { "a" => 2 }, 62 | nested: Nested { 63 | a: Some(42.0), 64 | b: Nested2 { 65 | foo: String::from("hello"), 66 | }, 67 | }, 68 | }, 69 | Example { 70 | r#type: SampleType::B, 71 | float32: 2.0, 72 | int8: 2, 73 | int32: 5, 74 | date64: NaiveDateTime::from_timestamp(5 * 24 * 60 * 60, 0), 75 | boolean: false, 76 | map: hashmap! { "a" => 3 }, 77 | nested: Nested { 78 | a: None, 79 | b: Nested2 { 80 | foo: String::from("world"), 81 | }, 82 | }, 83 | }, 84 | Example { 85 | r#type: SampleType::C, 86 | float32: 12.0, 87 | int8: -5, 88 | int32: 50, 89 | date64: NaiveDateTime::from_timestamp(5 * 24 * 60 * 60, 0), 90 | boolean: true, 91 | map: hashmap! { "a" => 3, "b" => 4 }, 92 | nested: Nested { 93 | a: Some(2.0), 94 | b: Nested2 { 95 | foo: String::from("world"), 96 | }, 97 | }, 98 | }, 99 | ]; 100 | 101 | use serde_arrow::schema::{SchemaLike, TracingOptions}; 102 | 103 | let tracing_options = TracingOptions::default() 104 | .guess_dates(true) 105 | .enums_without_data_as_strings(true); 106 | 107 | let fields = Vec::::from_samples(&examples, tracing_options)?; 108 | let batch = serde_arrow::to_record_batch(&fields, &examples)?; 109 | 110 | let file = File::create("example.ipc")?; 111 | 112 | let mut writer = arrow::ipc::writer::FileWriter::try_new(file, &batch.schema())?; 113 | writer.write(&batch)?; 114 | writer.finish()?; 115 | 116 | Ok(()) 117 | } 118 | 119 | #[derive(Debug)] 120 | struct PanicOnError; 121 | 122 | impl From for PanicOnError { 123 | fn from(e: E) -> Self { 124 | panic!("Encountered error: {}", e); 125 | } 126 | } 127 | -------------------------------------------------------------------------------- /integration_tests/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "integration_tests" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | # arrow-version:replace: arrow = {{ version = "{version}", features = ["ipc"] }} 8 | arrow = { version = "55", features = ["ipc"] } 9 | 10 | chrono = { version = "0.4", features = ["serde"] } 11 | serde = { version = "1.0", features = ["derive"] } 12 | serde_json = "1" 13 | 14 | # arrow-version:replace: serde_arrow = {{ path = "../serde_arrow", features = ["arrow-{version}"] }} 15 | serde_arrow = { path = "../serde_arrow", features = ["arrow-55"] } -------------------------------------------------------------------------------- /integration_tests/tests/root.rs: -------------------------------------------------------------------------------- 1 | mod tests; 2 | -------------------------------------------------------------------------------- /integration_tests/tests/tests/mod.rs: -------------------------------------------------------------------------------- 1 | mod tensors; 2 | mod utils; 3 | -------------------------------------------------------------------------------- /integration_tests/tests/tests/tensors.rs: -------------------------------------------------------------------------------- 1 | use arrow::datatypes::FieldRef; 2 | use serde::Serialize; 3 | use serde_arrow::{ 4 | schema::{ 5 | ext::{FixedShapeTensorField, VariableShapeTensorField}, 6 | SchemaLike, 7 | }, 8 | utils::Item, 9 | }; 10 | use serde_json::json; 11 | 12 | use super::utils::{execute_python, write_file, Result}; 13 | 14 | /// Test that a fixed shape tensor field can be correctly read in PyArrow 15 | #[test] 16 | fn fixed_shape_tensor() -> Result<()> { 17 | let items = vec![ 18 | Item(vec![1_i64, 2, 3, 4, 5, 6]), 19 | Item(vec![7, 8, 9, 0, 1, 2]), 20 | Item(vec![3, 4, 5, 6, 7, 8]), 21 | Item(vec![9, 0, 1, 2, 3, 4]), 22 | ]; 23 | let fields = Vec::::from_value(&[FixedShapeTensorField::new( 24 | "item", 25 | json!({"name": "element", "data_type": "I64"}), 26 | vec![3, 2, 1], 27 | )?])?; 28 | 29 | let batch = serde_arrow::to_record_batch(&fields, &items)?; 30 | 31 | write_file("fixed_shape_tensor.ipc", &batch)?; 32 | 33 | let output = execute_python( 34 | r#" 35 | import pyarrow as pa 36 | tbl = pa.ipc.open_file("fixed_shape_tensor.ipc").read_all() 37 | print(tbl["item"].combine_chunks().to_numpy_ndarray().shape) 38 | "#, 39 | )?; 40 | assert_eq!(output.trim(), "(4, 3, 2, 1)"); 41 | Ok(()) 42 | } 43 | 44 | #[test] 45 | fn variable_shape_tensor() -> Result<()> { 46 | #[derive(Serialize)] 47 | struct Tensor { 48 | data: Vec, 49 | shape: Vec, 50 | } 51 | 52 | let items = vec![ 53 | Item(Tensor { 54 | data: vec![1, 2, 3, 4, 5, 6], 55 | shape: vec![3, 2, 1], 56 | }), 57 | Item(Tensor { 58 | data: vec![1, 2], 59 | shape: vec![2, 1, 1], 60 | }), 61 | Item(Tensor { 62 | data: vec![1, 2, 3, 4], 63 | shape: vec![2, 2, 1], 64 | }), 65 | ]; 66 | let fields = Vec::::from_value(&[VariableShapeTensorField::new( 67 | "item", 68 | json!({"name": "element", "data_type": "I64"}), 69 | 3, 70 | )?])?; 71 | 72 | let batch = serde_arrow::to_record_batch(&fields, &items)?; 73 | 74 | write_file("variable_shape_tensor.ipc", &batch)?; 75 | 76 | let output = execute_python( 77 | r#" 78 | import pyarrow as pa 79 | tbl = pa.ipc.open_file("variable_shape_tensor.ipc").read_all() 80 | "#, 81 | )?; 82 | let _ = output; 83 | 84 | Ok(()) 85 | } 86 | -------------------------------------------------------------------------------- /integration_tests/tests/tests/utils.rs: -------------------------------------------------------------------------------- 1 | use std::{fs::File, path::PathBuf, process::Command}; 2 | 3 | use arrow::array::RecordBatch; 4 | 5 | pub type Result = std::result::Result; 6 | 7 | pub fn write_file(name: &str, batch: &RecordBatch) -> Result<()> { 8 | let tmp_dir = PathBuf::from(env!("CARGO_TARGET_TMPDIR")); 9 | let file_path = tmp_dir.join(name); 10 | 11 | let file = File::create(&file_path)?; 12 | let mut writer = arrow::ipc::writer::FileWriter::try_new(file, &batch.schema())?; 13 | writer.write(&batch)?; 14 | writer.finish()?; 15 | Ok(()) 16 | } 17 | 18 | pub fn execute_python(source: &str) -> Result { 19 | let tmp_dir = PathBuf::from(env!("CARGO_TARGET_TMPDIR")); 20 | 21 | // TODO: implement proper dedent logic 22 | let mut dedented_source = String::new(); 23 | for line in source.lines() { 24 | dedented_source.push_str(line.trim_start()); 25 | dedented_source.push('\n'); 26 | } 27 | let output = Command::new("python") 28 | .arg("-c") 29 | .arg(dedented_source) 30 | .current_dir(tmp_dir) 31 | .output()?; 32 | 33 | if !output.status.success() { 34 | panic!("command failed: {output:?}"); 35 | } 36 | 37 | Ok(String::from_utf8(output.stdout)?) 38 | } 39 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | description = "The dependencies required to run the integration tests" 3 | name = "serde-arrow" 4 | version = "0.0.0" 5 | requires-python = ">=3.12" 6 | 7 | dependencies = [ 8 | "pyarrow~=19.0", 9 | "numpy~=2.2", 10 | "ruff~=0.11", 11 | "polars~=1.25", 12 | ] 13 | 14 | [tool.uv] 15 | package = false 16 | -------------------------------------------------------------------------------- /serde_arrow/benches/groups/complex_common.rs: -------------------------------------------------------------------------------- 1 | use arrow2_convert::{ArrowDeserialize, ArrowField, ArrowSerialize}; 2 | use rand::{ 3 | distributions::{Standard, Uniform}, 4 | prelude::Distribution, 5 | Rng, 6 | }; 7 | use serde::{Deserialize, Serialize}; 8 | 9 | // required for arrow2_convert 10 | use serde_arrow::_impl::arrow2; 11 | 12 | #[derive(Debug, Serialize, Deserialize, ArrowField, ArrowSerialize, ArrowDeserialize)] 13 | pub struct Item { 14 | string: String, 15 | points: Vec, 16 | child: SubItem, 17 | } 18 | 19 | #[derive(Debug, Serialize, Deserialize, ArrowField, ArrowSerialize, ArrowDeserialize)] 20 | struct Point { 21 | x: f32, 22 | y: f32, 23 | } 24 | 25 | #[derive(Debug, Serialize, Deserialize, ArrowField, ArrowSerialize, ArrowDeserialize)] 26 | struct SubItem { 27 | a: bool, 28 | b: f64, 29 | c: Option, 30 | } 31 | 32 | impl Item { 33 | pub fn random(rng: &mut R) -> Self { 34 | let n_string = Uniform::new(1, 50).sample(rng); 35 | let n_points = Uniform::new(1, 50).sample(rng); 36 | 37 | Self { 38 | string: (0..n_string) 39 | .map(|_| -> char { Standard.sample(rng) }) 40 | .collect(), 41 | points: (0..n_points) 42 | .map(|_| Point { 43 | x: Standard.sample(rng), 44 | y: Standard.sample(rng), 45 | }) 46 | .collect(), 47 | child: SubItem { 48 | a: Standard.sample(rng), 49 | b: Standard.sample(rng), 50 | c: Standard.sample(rng), 51 | }, 52 | } 53 | } 54 | } 55 | 56 | crate::groups::impls::define_benchmark!(complex_common, ty = Item, n = [100_000, 1_000_000],); 57 | -------------------------------------------------------------------------------- /serde_arrow/benches/groups/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod complex_common; 2 | pub mod impls; 3 | pub mod json_to_arrow; 4 | pub mod primitives; 5 | -------------------------------------------------------------------------------- /serde_arrow/benches/groups/primitives.rs: -------------------------------------------------------------------------------- 1 | use arrow2_convert::{ArrowDeserialize, ArrowField, ArrowSerialize}; 2 | use rand::{distributions::Standard, prelude::Distribution, Rng}; 3 | use serde::{Deserialize, Serialize}; 4 | 5 | // required for arrow2_convert 6 | use serde_arrow::_impl::arrow2; 7 | 8 | #[derive(Debug, Serialize, Deserialize, ArrowField, ArrowSerialize, ArrowDeserialize)] 9 | pub struct Item { 10 | pub k: bool, 11 | pub a: u8, 12 | pub b: u16, 13 | pub c: u32, 14 | pub d: u64, 15 | pub e: i8, 16 | pub f: i16, 17 | pub g: i32, 18 | pub h: i64, 19 | pub i: f32, 20 | pub j: f64, 21 | pub l: String, 22 | } 23 | 24 | impl Item { 25 | pub fn random(rng: &mut R) -> Self { 26 | Self { 27 | a: Standard.sample(rng), 28 | b: Standard.sample(rng), 29 | c: Standard.sample(rng), 30 | d: Standard.sample(rng), 31 | e: Standard.sample(rng), 32 | f: Standard.sample(rng), 33 | g: Standard.sample(rng), 34 | h: Standard.sample(rng), 35 | i: Standard.sample(rng), 36 | j: Standard.sample(rng), 37 | k: Standard.sample(rng), 38 | l: crate::groups::impls::random_string(rng, 0..50), 39 | } 40 | } 41 | } 42 | 43 | crate::groups::impls::define_benchmark!(primitives, ty = Item, n = [100_000, 1_000_000],); 44 | -------------------------------------------------------------------------------- /serde_arrow/benches/serde_arrow_bench.rs: -------------------------------------------------------------------------------- 1 | mod groups; 2 | 3 | criterion::criterion_main!( 4 | groups::complex_common::benchmark, 5 | groups::primitives::benchmark, 6 | groups::json_to_arrow::benchmark, 7 | ); 8 | -------------------------------------------------------------------------------- /serde_arrow/build.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | let max_arrow2_version: Option = [ 3 | #[cfg(feature = "arrow2-0-17")] 4 | 17, 5 | #[cfg(feature = "arrow2-0-16")] 6 | 16, 7 | ] 8 | .into_iter() 9 | .max(); 10 | 11 | if let Some(version) = max_arrow2_version { 12 | println!("cargo:rustc-cfg=has_arrow2"); 13 | println!("cargo:rustc-cfg=has_arrow2_0_{version}"); 14 | } 15 | 16 | let max_arrow_version: Option = [ 17 | // arrow-version:insert: #[cfg(feature = "arrow-{version}")]{\n}{version}, 18 | #[cfg(feature = "arrow-55")] 19 | 55, 20 | #[cfg(feature = "arrow-54")] 21 | 54, 22 | #[cfg(feature = "arrow-53")] 23 | 53, 24 | #[cfg(feature = "arrow-52")] 25 | 52, 26 | #[cfg(feature = "arrow-51")] 27 | 51, 28 | #[cfg(feature = "arrow-50")] 29 | 50, 30 | #[cfg(feature = "arrow-49")] 31 | 49, 32 | #[cfg(feature = "arrow-48")] 33 | 48, 34 | #[cfg(feature = "arrow-47")] 35 | 47, 36 | #[cfg(feature = "arrow-46")] 37 | 46, 38 | #[cfg(feature = "arrow-45")] 39 | 45, 40 | #[cfg(feature = "arrow-44")] 41 | 44, 42 | #[cfg(feature = "arrow-43")] 43 | 43, 44 | #[cfg(feature = "arrow-42")] 45 | 42, 46 | #[cfg(feature = "arrow-41")] 47 | 41, 48 | #[cfg(feature = "arrow-40")] 49 | 40, 50 | #[cfg(feature = "arrow-39")] 51 | 39, 52 | #[cfg(feature = "arrow-38")] 53 | 38, 54 | #[cfg(feature = "arrow-37")] 55 | 37, 56 | ] 57 | .into_iter() 58 | .max(); 59 | 60 | if let Some(version) = max_arrow_version { 61 | println!("cargo:rustc-cfg=has_arrow"); 62 | println!("cargo:rustc-cfg=has_arrow_{version}"); 63 | 64 | if version >= 47 { 65 | println!("cargo:rustc-cfg=has_arrow_fixed_binary_support"); 66 | } 67 | if version >= 53 { 68 | println!("cargo:rustc-cfg=has_arrow_bytes_view_support"); 69 | } 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /serde_arrow/src/_impl/docs/defs.rs: -------------------------------------------------------------------------------- 1 | //! Common definitions for doc tests 2 | use serde::{Serialize, Deserialize}; 3 | 4 | #[derive(Clone, Deserialize, Serialize)] 5 | pub struct Record { 6 | pub a: Option, 7 | pub b: u64, 8 | } 9 | 10 | pub const fn example_records() -> &'static [Record] { 11 | &[Record { a: Some(1.0), b: 2}] 12 | } 13 | 14 | #[cfg(has_arrow)] 15 | pub fn example_record_batch() -> crate::_impl::arrow::array::RecordBatch { 16 | use crate::schema::{SchemaLike, TracingOptions}; 17 | 18 | let items = example_records(); 19 | 20 | let fields = Vec::::from_type::(TracingOptions::default()).unwrap(); 21 | crate::to_record_batch(&fields, &items).unwrap() 22 | } 23 | 24 | #[cfg(has_arrow)] 25 | pub fn example_arrow_arrays() -> (Vec, Vec) { 26 | use crate::schema::{SchemaLike, TracingOptions}; 27 | 28 | let items = example_records(); 29 | 30 | let fields = Vec::::from_type::(TracingOptions::default()).unwrap(); 31 | let arrays = crate::to_arrow(&fields, items).unwrap(); 32 | 33 | (fields, arrays) 34 | } 35 | 36 | #[cfg(has_arrow2)] 37 | pub fn example_arrow2_arrays() -> (Vec, Vec>) { 38 | use crate::schema::{SchemaLike, TracingOptions}; 39 | 40 | let items = example_records(); 41 | 42 | let fields = Vec::::from_type::(TracingOptions::default()).unwrap(); 43 | let arrays = crate::to_arrow2(&fields, items).unwrap(); 44 | 45 | (fields, arrays) 46 | } 47 | -------------------------------------------------------------------------------- /serde_arrow/src/internal/array_builder.rs: -------------------------------------------------------------------------------- 1 | use serde::Serialize; 2 | 3 | use marrow::array::Array; 4 | 5 | use crate::internal::{ 6 | error::Result, schema::SerdeArrowSchema, serialization::OuterSequenceBuilder, 7 | }; 8 | 9 | /// Construct arrays by pushing individual records 10 | /// 11 | /// It can be constructed via 12 | /// 13 | /// - [`ArrayBuilder::new`] 14 | #[cfg_attr(has_arrow, doc = r"- [`ArrayBuilder::from_arrow`]")] 15 | #[cfg_attr(has_arrow2, doc = r"- [`ArrayBuilder::from_arrow2`]")] 16 | /// 17 | #[cfg_attr( 18 | any(has_arrow, has_arrow2), 19 | doc = r"It supports array construction via" 20 | )] 21 | #[cfg_attr(any(has_arrow, has_arrow2), doc = r"")] 22 | #[cfg_attr(has_arrow, doc = r"- [`ArrayBuilder::to_record_batch`]")] 23 | #[cfg_attr(has_arrow, doc = r"- [`ArrayBuilder::to_arrow`]")] 24 | #[cfg_attr(has_arrow2, doc = r"- [`ArrayBuilder::to_arrow2`]")] 25 | /// 26 | /// Usage: 27 | /// 28 | /// ```rust 29 | /// # #[cfg(has_arrow)] 30 | /// # fn main() -> serde_arrow::_impl::PanicOnError<()> { 31 | /// # use serde_arrow::_impl::docs::defs::{Record, example_records}; 32 | /// # use serde_arrow::schema::{TracingOptions, SchemaLike}; 33 | /// # use serde::Serialize; 34 | /// # let items = example_records(); 35 | /// # let item = items[0].clone(); 36 | /// # let fields = Vec::::from_type::(TracingOptions::default())?; 37 | /// use serde_arrow::ArrayBuilder; 38 | /// let mut builder = ArrayBuilder::from_arrow(&fields)?; 39 | /// 40 | /// // push multiple items 41 | /// builder.extend(&items)?; 42 | /// 43 | /// // push a single items 44 | /// builder.push(&item)?; 45 | /// 46 | /// // build the arrays 47 | /// let arrays = builder.to_arrow()?; 48 | /// # 49 | /// # Ok(()) } 50 | /// # #[cfg(not(has_arrow))] 51 | /// # fn main() {} 52 | /// ``` 53 | pub struct ArrayBuilder { 54 | pub(crate) builder: OuterSequenceBuilder, 55 | #[allow(unused)] 56 | pub(crate) schema: SerdeArrowSchema, 57 | } 58 | 59 | impl ArrayBuilder { 60 | /// Construct an array builder from an [`SerdeArrowSchema`] 61 | pub fn new(schema: SerdeArrowSchema) -> Result { 62 | Ok(Self { 63 | builder: OuterSequenceBuilder::new(&schema)?, 64 | schema, 65 | }) 66 | } 67 | } 68 | 69 | impl std::fmt::Debug for ArrayBuilder { 70 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 71 | write!(f, "ArrayBuilder {{ .. }}") 72 | } 73 | } 74 | 75 | impl ArrayBuilder { 76 | /// Add a single record to the arrays 77 | /// 78 | pub fn push(&mut self, item: T) -> Result<()> { 79 | self.builder.push(item) 80 | } 81 | 82 | /// Add multiple records to the arrays 83 | /// 84 | pub fn extend(&mut self, items: T) -> Result<()> { 85 | self.builder.extend(items) 86 | } 87 | 88 | pub(crate) fn build_arrays(&mut self) -> Result> { 89 | let mut arrays = Vec::new(); 90 | for field in self.builder.take_records()? { 91 | arrays.push(field.into_array()?); 92 | } 93 | Ok(arrays) 94 | } 95 | } 96 | 97 | impl std::convert::AsRef for ArrayBuilder { 98 | fn as_ref(&self) -> &ArrayBuilder { 99 | self 100 | } 101 | } 102 | 103 | impl std::convert::AsMut for ArrayBuilder { 104 | fn as_mut(&mut self) -> &mut ArrayBuilder { 105 | self 106 | } 107 | } 108 | 109 | const _: () = { 110 | trait AssertSendSync: Send + Sync {} 111 | impl AssertSendSync for ArrayBuilder {} 112 | }; 113 | -------------------------------------------------------------------------------- /serde_arrow/src/internal/deserialization/binary_deserializer.rs: -------------------------------------------------------------------------------- 1 | use marrow::view::{BytesView, BytesViewView}; 2 | use serde::de::Visitor; 3 | 4 | use crate::internal::{ 5 | error::{set_default, try_, Context, ContextSupport, Error, Result}, 6 | utils::array_view_ext::ViewAccess, 7 | }; 8 | 9 | use super::{random_access_deserializer::RandomAccessDeserializer, utils::U8SliceDeserializer}; 10 | 11 | trait BinaryDeserializerDataType { 12 | const DATA_TYPE_NAME: &'static str; 13 | } 14 | 15 | impl BinaryDeserializerDataType for BytesView<'_, i32> { 16 | const DATA_TYPE_NAME: &'static str = "Binary"; 17 | } 18 | 19 | impl BinaryDeserializerDataType for BytesView<'_, i64> { 20 | const DATA_TYPE_NAME: &'static str = "LargeBinary"; 21 | } 22 | 23 | impl BinaryDeserializerDataType for BytesViewView<'_> { 24 | const DATA_TYPE_NAME: &'static str = "BinaryView"; 25 | } 26 | 27 | pub struct BinaryDeserializer { 28 | pub path: String, 29 | pub view: V, 30 | } 31 | 32 | impl BinaryDeserializer { 33 | pub fn new(path: String, view: V) -> Self { 34 | Self { path, view } 35 | } 36 | } 37 | 38 | impl Context for BinaryDeserializer { 39 | fn annotate(&self, annotations: &mut std::collections::BTreeMap) { 40 | set_default(annotations, "field", &self.path); 41 | set_default(annotations, "data_type", V::DATA_TYPE_NAME); 42 | } 43 | } 44 | 45 | impl<'de, VV> RandomAccessDeserializer<'de> for BinaryDeserializer 46 | where 47 | VV: ViewAccess<'de, [u8]> + BinaryDeserializerDataType + 'de, 48 | { 49 | fn is_some(&self, idx: usize) -> Result { 50 | self.view.is_some(idx) 51 | } 52 | 53 | fn deserialize_any_some>(&self, visitor: V, idx: usize) -> Result { 54 | self.deserialize_bytes(visitor, idx) 55 | } 56 | 57 | fn deserialize_any>(&self, visitor: V, idx: usize) -> Result { 58 | try_(|| { 59 | if self.view.is_some(idx)? { 60 | self.deserialize_bytes(visitor, idx) 61 | } else { 62 | visitor.visit_none() 63 | } 64 | }) 65 | .ctx(self) 66 | } 67 | 68 | fn deserialize_option>(&self, visitor: V, idx: usize) -> Result { 69 | try_(|| { 70 | if self.view.is_some(idx)? { 71 | visitor.visit_some(self.at(idx)) 72 | } else { 73 | visitor.visit_none() 74 | } 75 | }) 76 | .ctx(self) 77 | } 78 | 79 | fn deserialize_seq>(&self, visitor: V, idx: usize) -> Result { 80 | try_(|| { 81 | let bytes = self.view.get_required(idx)?; 82 | visitor.visit_seq(U8SliceDeserializer::new(bytes)) 83 | }) 84 | .ctx(self) 85 | } 86 | 87 | fn deserialize_bytes>(&self, visitor: V, idx: usize) -> Result { 88 | try_(|| visitor.visit_borrowed_bytes::(self.view.get_required(idx)?)).ctx(self) 89 | } 90 | 91 | fn deserialize_byte_buf>(&self, visitor: V, idx: usize) -> Result { 92 | try_(|| visitor.visit_borrowed_bytes::(self.view.get_required(idx)?)).ctx(self) 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /serde_arrow/src/internal/deserialization/bool_deserializer.rs: -------------------------------------------------------------------------------- 1 | use marrow::view::BooleanView; 2 | use serde::de::Visitor; 3 | 4 | use crate::internal::error::{fail, set_default, try_, Context, ContextSupport, Error, Result}; 5 | 6 | use super::{random_access_deserializer::RandomAccessDeserializer, utils::bitset_is_set}; 7 | 8 | pub struct BoolDeserializer<'a> { 9 | pub path: String, 10 | pub view: BooleanView<'a>, 11 | } 12 | 13 | impl<'a> BoolDeserializer<'a> { 14 | pub fn new(path: String, view: BooleanView<'a>) -> Self { 15 | Self { path, view } 16 | } 17 | 18 | fn get(&self, idx: usize) -> Result> { 19 | if idx >= self.view.len { 20 | fail!("Out of bounds access"); 21 | } 22 | if let Some(validity) = &self.view.validity { 23 | if !bitset_is_set(validity, idx)? { 24 | return Ok(None); 25 | } 26 | } 27 | 28 | Ok(Some(bitset_is_set(&self.view.values, idx)?)) 29 | } 30 | 31 | fn get_required(&self, idx: usize) -> Result { 32 | if let Some(value) = self.get(idx)? { 33 | Ok(value) 34 | } else { 35 | fail!("Required value was not defined"); 36 | } 37 | } 38 | } 39 | 40 | impl Context for BoolDeserializer<'_> { 41 | fn annotate(&self, annotations: &mut std::collections::BTreeMap) { 42 | set_default(annotations, "field", &self.path); 43 | set_default(annotations, "data_type", "Boolean"); 44 | } 45 | } 46 | 47 | impl<'de> RandomAccessDeserializer<'de> for BoolDeserializer<'de> { 48 | fn is_some(&self, idx: usize) -> Result { 49 | Ok(self.get(idx)?.is_some()) 50 | } 51 | 52 | fn deserialize_any_some>(&self, visitor: V, idx: usize) -> Result { 53 | self.deserialize_bool(visitor, idx) 54 | } 55 | 56 | fn deserialize_bool>(&self, visitor: V, idx: usize) -> Result { 57 | try_(|| visitor.visit_bool::(self.get_required(idx)?)).ctx(self) 58 | } 59 | 60 | fn deserialize_u8>(&self, visitor: V, idx: usize) -> Result { 61 | try_(|| visitor.visit_u8::(if self.get_required(idx)? { 1 } else { 0 })).ctx(self) 62 | } 63 | 64 | fn deserialize_u16>(&self, visitor: V, idx: usize) -> Result { 65 | try_(|| visitor.visit_u16::(if self.get_required(idx)? { 1 } else { 0 })).ctx(self) 66 | } 67 | 68 | fn deserialize_u32>(&self, visitor: V, idx: usize) -> Result { 69 | try_(|| visitor.visit_u32::(if self.get_required(idx)? { 1 } else { 0 })).ctx(self) 70 | } 71 | 72 | fn deserialize_u64>(&self, visitor: V, idx: usize) -> Result { 73 | try_(|| visitor.visit_u64::(if self.get_required(idx)? { 1 } else { 0 })).ctx(self) 74 | } 75 | 76 | fn deserialize_i8>(&self, visitor: V, idx: usize) -> Result { 77 | try_(|| visitor.visit_i8::(if self.get_required(idx)? { 1 } else { 0 })).ctx(self) 78 | } 79 | 80 | fn deserialize_i16>(&self, visitor: V, idx: usize) -> Result { 81 | try_(|| visitor.visit_i16::(if self.get_required(idx)? { 1 } else { 0 })).ctx(self) 82 | } 83 | 84 | fn deserialize_i32>(&self, visitor: V, idx: usize) -> Result { 85 | try_(|| visitor.visit_i32::(if self.get_required(idx)? { 1 } else { 0 })).ctx(self) 86 | } 87 | 88 | fn deserialize_i64>(&self, visitor: V, idx: usize) -> Result { 89 | try_(|| visitor.visit_i64::(if self.get_required(idx)? { 1 } else { 0 })).ctx(self) 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /serde_arrow/src/internal/deserialization/decimal_deserializer.rs: -------------------------------------------------------------------------------- 1 | use marrow::view::{DecimalView, PrimitiveView}; 2 | use serde::de::Visitor; 3 | 4 | use crate::internal::{ 5 | error::{set_default, Context, ContextSupport, Result}, 6 | utils::{array_view_ext::ViewAccess, decimal}, 7 | }; 8 | 9 | use super::random_access_deserializer::RandomAccessDeserializer; 10 | 11 | pub struct DecimalDeserializer<'a> { 12 | path: String, 13 | view: PrimitiveView<'a, i128>, 14 | scale: i8, 15 | } 16 | 17 | impl<'a> DecimalDeserializer<'a> { 18 | pub fn new(path: String, view: DecimalView<'a, i128>) -> Self { 19 | Self { 20 | path, 21 | view: PrimitiveView { 22 | validity: view.validity, 23 | values: view.values, 24 | }, 25 | scale: view.scale, 26 | } 27 | } 28 | 29 | fn with_value Result, R>(&self, idx: usize, func: F) -> Result { 30 | let val = self.view.get_required(idx)?; 31 | let mut buffer = [0; decimal::BUFFER_SIZE_I128]; 32 | let formatted = decimal::format_decimal(&mut buffer, *val, self.scale); 33 | 34 | func(formatted) 35 | } 36 | } 37 | 38 | impl Context for DecimalDeserializer<'_> { 39 | fn annotate(&self, annotations: &mut std::collections::BTreeMap) { 40 | set_default(annotations, "field", &self.path); 41 | set_default(annotations, "data_type", "Decimal128(..)"); 42 | } 43 | } 44 | 45 | impl<'de> RandomAccessDeserializer<'de> for DecimalDeserializer<'de> { 46 | fn is_some(&self, idx: usize) -> Result { 47 | self.view.is_some(idx) 48 | } 49 | 50 | fn deserialize_any_some>(&self, visitor: V, idx: usize) -> Result { 51 | self.deserialize_str(visitor, idx) 52 | } 53 | 54 | fn deserialize_str>(&self, visitor: V, idx: usize) -> Result { 55 | self.with_value(idx, |value| visitor.visit_str(value)) 56 | .ctx(self) 57 | } 58 | 59 | fn deserialize_string>(&self, visitor: V, idx: usize) -> Result { 60 | self.with_value(idx, |value| visitor.visit_string(value.to_string())) 61 | .ctx(self) 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /serde_arrow/src/internal/deserialization/dictionary_deserializer.rs: -------------------------------------------------------------------------------- 1 | use marrow::view::{BytesView, PrimitiveView}; 2 | use serde::de::Visitor; 3 | 4 | use crate::internal::{ 5 | error::{fail, set_default, try_, Context, ContextSupport, Result}, 6 | utils::{array_view_ext::ViewAccess, Offset}, 7 | }; 8 | 9 | use super::{ 10 | enums_as_string_impl::EnumAccess, integer_deserializer::Integer, 11 | random_access_deserializer::RandomAccessDeserializer, 12 | }; 13 | 14 | pub struct DictionaryDeserializer<'a, K: Integer, V: Offset> { 15 | path: String, 16 | keys: PrimitiveView<'a, K>, 17 | values: BytesView<'a, V>, 18 | } 19 | 20 | impl<'a, K: Integer, V: Offset> DictionaryDeserializer<'a, K, V> { 21 | pub fn new(path: String, keys: PrimitiveView<'a, K>, values: BytesView<'a, V>) -> Result { 22 | if values.validity.is_some() { 23 | // TODO: check whether all values are defined? 24 | fail!("Null for non-nullable type: dictionaries do not support nullable values"); 25 | } 26 | Ok(Self { 27 | path, 28 | keys: keys.clone(), 29 | values: values.clone(), 30 | }) 31 | } 32 | 33 | pub fn get_str(&self, idx: usize) -> Result<&str> { 34 | let key: usize = self.keys.get_required(idx)?.into_i64()?.try_into()?; 35 | let value: &str = self.values.get_required(key)?; 36 | Ok(value) 37 | } 38 | } 39 | 40 | impl Context for DictionaryDeserializer<'_, K, V> { 41 | fn annotate(&self, annotations: &mut std::collections::BTreeMap) { 42 | set_default(annotations, "field", &self.path); 43 | set_default(annotations, "data_type", "Dictionary(..)"); 44 | } 45 | } 46 | 47 | impl<'de, K: Integer, V: Offset> RandomAccessDeserializer<'de> 48 | for DictionaryDeserializer<'de, K, V> 49 | { 50 | fn is_some(&self, idx: usize) -> Result { 51 | self.keys.is_some(idx) 52 | } 53 | 54 | fn deserialize_any_some>(&self, visitor: VV, idx: usize) -> Result { 55 | self.deserialize_str(visitor, idx) 56 | } 57 | 58 | fn deserialize_str>(&self, visitor: VV, idx: usize) -> Result { 59 | try_(|| visitor.visit_str(self.get_str(idx)?)).ctx(self) 60 | } 61 | 62 | fn deserialize_string>(&self, visitor: VV, idx: usize) -> Result { 63 | try_(|| visitor.visit_string(self.get_str(idx)?.to_owned())).ctx(self) 64 | } 65 | 66 | fn deserialize_enum>( 67 | &self, 68 | _: &'static str, 69 | _: &'static [&'static str], 70 | visitor: VV, 71 | idx: usize, 72 | ) -> Result { 73 | try_(|| visitor.visit_enum(EnumAccess(self.get_str(idx)?))).ctx(self) 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /serde_arrow/src/internal/deserialization/duration_deserializer.rs: -------------------------------------------------------------------------------- 1 | use marrow::{ 2 | datatypes::TimeUnit, 3 | view::{PrimitiveView, TimeView}, 4 | }; 5 | use serde::de::Visitor; 6 | 7 | use crate::internal::{ 8 | chrono, 9 | error::{set_default, try_, Context, ContextSupport, Result}, 10 | utils::array_view_ext::ViewAccess, 11 | }; 12 | 13 | use super::random_access_deserializer::RandomAccessDeserializer; 14 | 15 | pub struct DurationDeserializer<'a> { 16 | path: String, 17 | unit: TimeUnit, 18 | values: PrimitiveView<'a, i64>, 19 | } 20 | 21 | impl<'a> DurationDeserializer<'a> { 22 | pub fn new(path: String, view: TimeView<'a, i64>) -> Self { 23 | Self { 24 | path, 25 | unit: view.unit, 26 | values: PrimitiveView { 27 | validity: view.validity, 28 | values: view.values, 29 | }, 30 | } 31 | } 32 | 33 | pub fn get_string_value(&self, idx: usize) -> Result { 34 | let value = self.values.get_required(idx)?; 35 | Ok(chrono::format_arrow_duration_as_span(*value, self.unit)) 36 | } 37 | } 38 | 39 | impl Context for DurationDeserializer<'_> { 40 | fn annotate(&self, annotations: &mut std::collections::BTreeMap) { 41 | set_default(annotations, "field", &self.path); 42 | set_default(annotations, "data_type", "Duration(..)"); 43 | } 44 | } 45 | 46 | impl<'de> RandomAccessDeserializer<'de> for DurationDeserializer<'de> { 47 | fn is_some(&self, idx: usize) -> Result { 48 | self.values.is_some(idx) 49 | } 50 | 51 | fn deserialize_any_some>(&self, visitor: V, idx: usize) -> Result { 52 | self.deserialize_i64(visitor, idx) 53 | } 54 | 55 | fn deserialize_i64>(&self, visitor: V, idx: usize) -> Result { 56 | try_(|| visitor.visit_i64(*self.values.get_required(idx)?)).ctx(self) 57 | } 58 | 59 | fn deserialize_str>(&self, visitor: V, idx: usize) -> Result { 60 | try_(|| visitor.visit_str(self.get_string_value(idx)?.as_str())).ctx(self) 61 | } 62 | 63 | fn deserialize_string>(&self, visitor: V, idx: usize) -> Result { 64 | try_(|| visitor.visit_string(self.get_string_value(idx)?)).ctx(self) 65 | } 66 | 67 | fn deserialize_bytes>(&self, visitor: V, idx: usize) -> Result { 68 | try_(|| visitor.visit_bytes(self.get_string_value(idx)?.as_bytes())).ctx(self) 69 | } 70 | 71 | fn deserialize_byte_buf>(&self, visitor: V, idx: usize) -> Result { 72 | try_(|| visitor.visit_byte_buf(self.get_string_value(idx)?.into_bytes())).ctx(self) 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /serde_arrow/src/internal/deserialization/enums_as_string_impl.rs: -------------------------------------------------------------------------------- 1 | use serde::de::Visitor; 2 | 3 | use crate::internal::error::{fail, Error, Result}; 4 | 5 | pub struct EnumAccess<'de>(pub &'de str); 6 | 7 | impl<'de> serde::de::EnumAccess<'de> for EnumAccess<'_> { 8 | type Error = Error; 9 | type Variant = UnitVariant; 10 | 11 | fn variant_seed>( 12 | self, 13 | seed: V, 14 | ) -> Result<(V::Value, Self::Variant), Self::Error> { 15 | struct SeedDeserializer<'a>(&'a str); 16 | 17 | macro_rules! unimplemented { 18 | ($lifetime:lifetime, $name:ident $($tt:tt)*) => { 19 | fn $name>(self $($tt)*, _: V) -> Result { 20 | fail!("Unsupported: EnumDeserializer does not implement {}", stringify!($name)) 21 | } 22 | }; 23 | } 24 | 25 | impl<'de> serde::de::Deserializer<'de> for SeedDeserializer<'_> { 26 | type Error = Error; 27 | 28 | fn deserialize_identifier>(self, visitor: V) -> Result { 29 | self.deserialize_str(visitor) 30 | } 31 | 32 | fn deserialize_any>(self, visitor: V) -> Result { 33 | self.deserialize_str(visitor) 34 | } 35 | 36 | fn deserialize_str>(self, visitor: V) -> Result { 37 | visitor.visit_str(self.0) 38 | } 39 | 40 | fn deserialize_string>(self, visitor: V) -> Result { 41 | visitor.visit_string(self.0.to_owned()) 42 | } 43 | 44 | unimplemented!('de, deserialize_u64); 45 | unimplemented!('de, deserialize_bool); 46 | unimplemented!('de, deserialize_i8); 47 | unimplemented!('de, deserialize_i16); 48 | unimplemented!('de, deserialize_i32); 49 | unimplemented!('de, deserialize_i64); 50 | unimplemented!('de, deserialize_u8); 51 | unimplemented!('de, deserialize_u16); 52 | unimplemented!('de, deserialize_u32); 53 | unimplemented!('de, deserialize_f32); 54 | unimplemented!('de, deserialize_f64); 55 | unimplemented!('de, deserialize_char); 56 | unimplemented!('de, deserialize_bytes); 57 | unimplemented!('de, deserialize_byte_buf); 58 | unimplemented!('de, deserialize_option); 59 | unimplemented!('de, deserialize_unit); 60 | unimplemented!('de, deserialize_unit_struct, _: &'static str); 61 | unimplemented!('de, deserialize_newtype_struct, _: &'static str); 62 | unimplemented!('de, deserialize_seq); 63 | unimplemented!('de, deserialize_tuple, _: usize); 64 | unimplemented!('de, deserialize_tuple_struct, _: &'static str, _: usize); 65 | unimplemented!('de, deserialize_map); 66 | unimplemented!('de, deserialize_struct, _: &'static str, _: &'static [&'static str]); 67 | unimplemented!('de, deserialize_enum, _: &'static str, _: &'static [&'static str]); 68 | unimplemented!('de, deserialize_ignored_any); 69 | } 70 | 71 | Ok((seed.deserialize(SeedDeserializer(self.0))?, UnitVariant)) 72 | } 73 | } 74 | 75 | pub struct UnitVariant; 76 | 77 | impl<'de> serde::de::VariantAccess<'de> for UnitVariant { 78 | type Error = Error; 79 | 80 | fn newtype_variant_seed>(self, _: T) -> Result { 81 | fail!("Unsupported: cannot deserialize enums with data from strings") 82 | } 83 | 84 | fn struct_variant>(self, _: &'static [&'static str], _: V) -> Result { 85 | fail!("Unsupported: cannot deserialize enums with data from strings") 86 | } 87 | 88 | fn tuple_variant>(self, _: usize, _: V) -> Result { 89 | fail!("Unsupported: cannot deserialize enums with data from strings") 90 | } 91 | 92 | fn unit_variant(self) -> Result<(), Self::Error> { 93 | Ok(()) 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /serde_arrow/src/internal/deserialization/fixed_size_binary_deserializer.rs: -------------------------------------------------------------------------------- 1 | use marrow::view::FixedSizeBinaryView; 2 | use serde::de::Visitor; 3 | 4 | use crate::internal::error::{fail, set_default, try_, Context, ContextSupport, Result}; 5 | 6 | use super::{ 7 | random_access_deserializer::RandomAccessDeserializer, 8 | utils::{bitset_is_set, U8SliceDeserializer}, 9 | }; 10 | 11 | pub struct FixedSizeBinaryDeserializer<'a> { 12 | pub path: String, 13 | pub view: FixedSizeBinaryView<'a>, 14 | pub len: usize, 15 | pub n: usize, 16 | } 17 | 18 | impl<'a> FixedSizeBinaryDeserializer<'a> { 19 | pub fn new(path: String, view: FixedSizeBinaryView<'a>) -> Result { 20 | let n = usize::try_from(view.n)?; 21 | if view.data.len() % n != 0 { 22 | fail!( 23 | concat!( 24 | "Invalid FixedSizeBinary array: Data of len {len} is not ", 25 | "evenly divisible into chunks of size {n}", 26 | ), 27 | len = view.data.len(), 28 | n = n, 29 | ); 30 | } 31 | 32 | Ok(Self { 33 | path, 34 | len: view.data.len() / n, 35 | view, 36 | n, 37 | }) 38 | } 39 | 40 | pub fn get(&self, idx: usize) -> Result> { 41 | if idx >= self.len { 42 | fail!("Out of bounds access") 43 | } 44 | if let Some(validity) = &self.view.validity { 45 | if !bitset_is_set(validity, idx)? { 46 | return Ok(None); 47 | } 48 | } 49 | let start = idx * self.n; 50 | let end = (idx + 1) * self.n; 51 | Ok(Some(&self.view.data[start..end])) 52 | } 53 | 54 | pub fn get_required(&self, idx: usize) -> Result<&'a [u8]> { 55 | let Some(s) = self.get(idx)? else { 56 | fail!("Required value is not defined"); 57 | }; 58 | Ok(s) 59 | } 60 | } 61 | 62 | impl Context for FixedSizeBinaryDeserializer<'_> { 63 | fn annotate(&self, annotations: &mut std::collections::BTreeMap) { 64 | set_default(annotations, "field", &self.path); 65 | set_default(annotations, "data_type", "FixedSizeBinary(..)"); 66 | } 67 | } 68 | 69 | impl<'de> RandomAccessDeserializer<'de> for FixedSizeBinaryDeserializer<'de> { 70 | fn is_some(&self, idx: usize) -> Result { 71 | Ok(self.get(idx)?.is_some()) 72 | } 73 | 74 | fn deserialize_any_some>(&self, visitor: V, idx: usize) -> Result { 75 | self.deserialize_bytes(visitor, idx) 76 | } 77 | 78 | fn deserialize_bytes>(&self, visitor: V, idx: usize) -> Result { 79 | try_(|| visitor.visit_borrowed_bytes(self.get_required(idx)?)).ctx(self) 80 | } 81 | 82 | fn deserialize_byte_buf>(&self, visitor: V, idx: usize) -> Result { 83 | try_(|| visitor.visit_borrowed_bytes(self.get_required(idx)?)).ctx(self) 84 | } 85 | 86 | fn deserialize_seq>(&self, visitor: V, idx: usize) -> Result { 87 | try_(|| visitor.visit_seq(U8SliceDeserializer::new(self.get_required(idx)?))).ctx(self) 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /serde_arrow/src/internal/deserialization/fixed_size_list_deserializer.rs: -------------------------------------------------------------------------------- 1 | use marrow::view::{BitsWithOffset, FixedSizeListView}; 2 | use serde::de::Visitor; 3 | 4 | use crate::internal::{ 5 | error::{fail, set_default, Context, Result}, 6 | schema::get_strategy_from_metadata, 7 | utils::ChildName, 8 | }; 9 | 10 | use super::{ 11 | array_deserializer::ArrayDeserializer, list_deserializer::ListItemDeserializer, 12 | random_access_deserializer::RandomAccessDeserializer, utils::bitset_is_set, 13 | }; 14 | 15 | pub struct FixedSizeListDeserializer<'a> { 16 | pub path: String, 17 | pub item: Box>, 18 | pub validity: Option>, 19 | pub len: usize, 20 | pub n: usize, 21 | } 22 | 23 | impl<'a> FixedSizeListDeserializer<'a> { 24 | pub fn new(path: String, view: FixedSizeListView<'a>) -> Result { 25 | let child_path = format!("{path}.{child}", child = ChildName(&view.meta.name)); 26 | let item = ArrayDeserializer::new( 27 | child_path, 28 | get_strategy_from_metadata(&view.meta.metadata)?.as_ref(), 29 | *view.elements, 30 | )?; 31 | 32 | Ok(Self { 33 | path, 34 | item: Box::new(item), 35 | validity: view.validity, 36 | len: view.len, 37 | n: view.n.try_into()?, 38 | }) 39 | } 40 | } 41 | 42 | impl Context for FixedSizeListDeserializer<'_> { 43 | fn annotate(&self, annotations: &mut std::collections::BTreeMap) { 44 | set_default(annotations, "field", &self.path); 45 | set_default(annotations, "data_type", "FixedSizeList(..)"); 46 | } 47 | } 48 | 49 | impl<'de> RandomAccessDeserializer<'de> for FixedSizeListDeserializer<'de> { 50 | fn is_some(&self, idx: usize) -> Result { 51 | if idx >= self.len { 52 | fail!("Out of bounds access"); 53 | } 54 | if let Some(validity) = self.validity.as_ref() { 55 | return bitset_is_set(validity, idx); 56 | } 57 | Ok(true) 58 | } 59 | 60 | fn deserialize_any_some>(&self, visitor: V, idx: usize) -> Result { 61 | self.deserialize_seq(visitor, idx) 62 | } 63 | 64 | fn deserialize_seq>(&self, visitor: V, idx: usize) -> Result { 65 | if idx >= self.len { 66 | fail!("Out of bounds access"); 67 | } 68 | visitor.visit_seq(ListItemDeserializer { 69 | item: self.item.as_ref(), 70 | start: idx * self.n, 71 | end: (idx + 1) * self.n, 72 | }) 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /serde_arrow/src/internal/deserialization/float_deserializer.rs: -------------------------------------------------------------------------------- 1 | use marrow::view::PrimitiveView; 2 | use serde::de::Visitor; 3 | 4 | use crate::internal::{ 5 | error::{set_default, try_, Context, ContextSupport, Result}, 6 | utils::{array_view_ext::ViewAccess, NamedType}, 7 | }; 8 | 9 | use super::random_access_deserializer::RandomAccessDeserializer; 10 | 11 | pub trait Float: Copy { 12 | fn deserialize_any_at<'de, S: RandomAccessDeserializer<'de>, V: Visitor<'de>>( 13 | deser: &S, 14 | visitor: V, 15 | idx: usize, 16 | ) -> Result; 17 | 18 | fn into_f32(self) -> Result; 19 | fn into_f64(self) -> Result; 20 | } 21 | 22 | pub struct FloatDeserializer<'a, F: Float> { 23 | path: String, 24 | view: PrimitiveView<'a, F>, 25 | } 26 | 27 | impl<'a, F: Float> FloatDeserializer<'a, F> { 28 | pub fn new(path: String, view: PrimitiveView<'a, F>) -> Self { 29 | Self { path, view } 30 | } 31 | } 32 | 33 | impl Context for FloatDeserializer<'_, F> { 34 | fn annotate(&self, annotations: &mut std::collections::BTreeMap) { 35 | set_default(annotations, "field", &self.path); 36 | set_default( 37 | annotations, 38 | "data_type", 39 | match F::NAME { 40 | "f16" => "Float16", 41 | "f32" => "Float32", 42 | "f64" => "Float64", 43 | _ => "", 44 | }, 45 | ); 46 | } 47 | } 48 | 49 | impl<'de, F: NamedType + Float> RandomAccessDeserializer<'de> for FloatDeserializer<'de, F> { 50 | fn is_some(&self, idx: usize) -> Result { 51 | self.view.is_some(idx) 52 | } 53 | 54 | fn deserialize_any_some>(&self, visitor: V, idx: usize) -> Result { 55 | F::deserialize_any_at(self, visitor, idx) 56 | } 57 | 58 | fn deserialize_f32>(&self, visitor: V, idx: usize) -> Result { 59 | try_(|| visitor.visit_f32(self.view.get_required(idx)?.into_f32()?)).ctx(self) 60 | } 61 | 62 | fn deserialize_f64>(&self, visitor: V, idx: usize) -> Result { 63 | try_(|| visitor.visit_f64(self.view.get_required(idx)?.into_f64()?)).ctx(self) 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /serde_arrow/src/internal/deserialization/float_impls.rs: -------------------------------------------------------------------------------- 1 | use half::f16; 2 | use serde::de::Visitor; 3 | 4 | use crate::internal::error::Result; 5 | 6 | use super::{float_deserializer::Float, random_access_deserializer::RandomAccessDeserializer}; 7 | 8 | impl Float for f16 { 9 | fn deserialize_any_at<'de, S: RandomAccessDeserializer<'de>, V: Visitor<'de>>( 10 | deser: &S, 11 | visitor: V, 12 | idx: usize, 13 | ) -> Result { 14 | deser.deserialize_f32(visitor, idx) 15 | } 16 | 17 | fn into_f32(self) -> Result { 18 | Ok(self.to_f32()) 19 | } 20 | 21 | fn into_f64(self) -> Result { 22 | Ok(self.to_f64()) 23 | } 24 | } 25 | 26 | impl Float for f32 { 27 | fn deserialize_any_at<'de, S: RandomAccessDeserializer<'de>, V: Visitor<'de>>( 28 | deser: &S, 29 | visitor: V, 30 | idx: usize, 31 | ) -> Result { 32 | deser.deserialize_f32(visitor, idx) 33 | } 34 | 35 | fn into_f32(self) -> Result { 36 | Ok(self) 37 | } 38 | 39 | fn into_f64(self) -> Result { 40 | Ok(self as f64) 41 | } 42 | } 43 | 44 | impl Float for f64 { 45 | fn deserialize_any_at<'de, S: RandomAccessDeserializer<'de>, V: Visitor<'de>>( 46 | deser: &S, 47 | visitor: V, 48 | idx: usize, 49 | ) -> Result { 50 | deser.deserialize_f64(visitor, idx) 51 | } 52 | 53 | fn into_f32(self) -> Result { 54 | Ok(self as f32) 55 | } 56 | 57 | fn into_f64(self) -> Result { 58 | Ok(self) 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /serde_arrow/src/internal/deserialization/integer_deserializer.rs: -------------------------------------------------------------------------------- 1 | use marrow::view::PrimitiveView; 2 | use serde::de::Visitor; 3 | 4 | use crate::internal::{ 5 | error::{set_default, try_, Context, ContextSupport, Result}, 6 | utils::{array_view_ext::ViewAccess, NamedType}, 7 | }; 8 | 9 | use super::random_access_deserializer::RandomAccessDeserializer; 10 | 11 | pub trait Integer: Sized + Copy { 12 | fn deserialize_any_at<'de, S: RandomAccessDeserializer<'de>, V: Visitor<'de>>( 13 | deser: &S, 14 | visitor: V, 15 | idx: usize, 16 | ) -> Result; 17 | 18 | fn into_bool(self) -> Result; 19 | 20 | fn into_i8(self) -> Result; 21 | fn into_i16(self) -> Result; 22 | fn into_i32(self) -> Result; 23 | fn into_i64(self) -> Result; 24 | 25 | fn into_u8(self) -> Result; 26 | fn into_u16(self) -> Result; 27 | fn into_u32(self) -> Result; 28 | fn into_u64(self) -> Result; 29 | } 30 | 31 | pub struct IntegerDeserializer<'a, T: Integer> { 32 | path: String, 33 | view: PrimitiveView<'a, T>, 34 | } 35 | 36 | impl<'a, T: Integer> IntegerDeserializer<'a, T> { 37 | pub fn new(path: String, view: PrimitiveView<'a, T>) -> Self { 38 | Self { path, view } 39 | } 40 | } 41 | 42 | impl Context for IntegerDeserializer<'_, T> { 43 | fn annotate(&self, annotations: &mut std::collections::BTreeMap) { 44 | set_default(annotations, "field", &self.path); 45 | set_default( 46 | annotations, 47 | "data_type", 48 | match T::NAME { 49 | "i8" => "Int8", 50 | "i16" => "Int16", 51 | "i32" => "Int32", 52 | "i64" => "Int64", 53 | "u8" => "UInt8", 54 | "u16" => "UInt16", 55 | "u32" => "UInt32", 56 | "u64" => "UInt64", 57 | _ => "", 58 | }, 59 | ); 60 | } 61 | } 62 | 63 | impl<'de, T: NamedType + Integer> RandomAccessDeserializer<'de> for IntegerDeserializer<'de, T> { 64 | fn is_some(&self, idx: usize) -> Result { 65 | self.view.is_some(idx) 66 | } 67 | 68 | fn deserialize_any_some>(&self, visitor: V, idx: usize) -> Result { 69 | T::deserialize_any_at(self, visitor, idx) 70 | } 71 | 72 | fn deserialize_bool>(&self, visitor: V, idx: usize) -> Result { 73 | try_(|| visitor.visit_bool(self.view.get_required(idx)?.into_bool()?)).ctx(self) 74 | } 75 | 76 | fn deserialize_char>(&self, visitor: V, idx: usize) -> Result { 77 | try_(|| visitor.visit_char(self.view.get_required(idx)?.into_u32()?.try_into()?)).ctx(self) 78 | } 79 | 80 | fn deserialize_u8>(&self, visitor: V, idx: usize) -> Result { 81 | try_(|| visitor.visit_u8(self.view.get_required(idx)?.into_u8()?)).ctx(self) 82 | } 83 | 84 | fn deserialize_u16>(&self, visitor: V, idx: usize) -> Result { 85 | try_(|| visitor.visit_u16(self.view.get_required(idx)?.into_u16()?)).ctx(self) 86 | } 87 | 88 | fn deserialize_u32>(&self, visitor: V, idx: usize) -> Result { 89 | try_(|| visitor.visit_u32(self.view.get_required(idx)?.into_u32()?)).ctx(self) 90 | } 91 | 92 | fn deserialize_u64>(&self, visitor: V, idx: usize) -> Result { 93 | try_(|| visitor.visit_u64(self.view.get_required(idx)?.into_u64()?)).ctx(self) 94 | } 95 | 96 | fn deserialize_i8>(&self, visitor: V, idx: usize) -> Result { 97 | try_(|| visitor.visit_i8(self.view.get_required(idx)?.into_i8()?)).ctx(self) 98 | } 99 | 100 | fn deserialize_i16>(&self, visitor: V, idx: usize) -> Result { 101 | try_(|| visitor.visit_i16(self.view.get_required(idx)?.into_i16()?)).ctx(self) 102 | } 103 | 104 | fn deserialize_i32>(&self, visitor: V, idx: usize) -> Result { 105 | try_(|| visitor.visit_i32(self.view.get_required(idx)?.into_i32()?)).ctx(self) 106 | } 107 | 108 | fn deserialize_i64>(&self, visitor: V, idx: usize) -> Result { 109 | try_(|| visitor.visit_i64(self.view.get_required(idx)?.into_i64()?)).ctx(self) 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /serde_arrow/src/internal/deserialization/integer_impls.rs: -------------------------------------------------------------------------------- 1 | use serde::de::Visitor; 2 | 3 | use crate::internal::error::Result; 4 | 5 | use super::integer_deserializer::Integer; 6 | 7 | macro_rules! implement_integer_into { 8 | () => { 9 | fn into_i8(self) -> Result { 10 | Ok(self.try_into()?) 11 | } 12 | 13 | fn into_i16(self) -> Result { 14 | Ok(self.try_into()?) 15 | } 16 | 17 | fn into_i32(self) -> Result { 18 | Ok(self.try_into()?) 19 | } 20 | 21 | fn into_i64(self) -> Result { 22 | Ok(self.try_into()?) 23 | } 24 | 25 | fn into_u8(self) -> Result { 26 | Ok(self.try_into()?) 27 | } 28 | 29 | fn into_u16(self) -> Result { 30 | Ok(self.try_into()?) 31 | } 32 | 33 | fn into_u32(self) -> Result { 34 | Ok(self.try_into()?) 35 | } 36 | 37 | fn into_u64(self) -> Result { 38 | Ok(self.try_into()?) 39 | } 40 | 41 | fn into_bool(self) -> Result { 42 | Ok(self != 0) 43 | } 44 | }; 45 | } 46 | 47 | impl Integer for i8 { 48 | fn deserialize_any_at< 49 | 'de, 50 | S: super::random_access_deserializer::RandomAccessDeserializer<'de>, 51 | V: Visitor<'de>, 52 | >( 53 | deser: &S, 54 | visitor: V, 55 | idx: usize, 56 | ) -> Result { 57 | deser.deserialize_i8(visitor, idx) 58 | } 59 | 60 | implement_integer_into!(); 61 | } 62 | 63 | impl Integer for i16 { 64 | fn deserialize_any_at< 65 | 'de, 66 | S: super::random_access_deserializer::RandomAccessDeserializer<'de>, 67 | V: Visitor<'de>, 68 | >( 69 | deser: &S, 70 | visitor: V, 71 | idx: usize, 72 | ) -> Result { 73 | deser.deserialize_i16(visitor, idx) 74 | } 75 | 76 | implement_integer_into!(); 77 | } 78 | 79 | impl Integer for i32 { 80 | fn deserialize_any_at< 81 | 'de, 82 | S: super::random_access_deserializer::RandomAccessDeserializer<'de>, 83 | V: Visitor<'de>, 84 | >( 85 | deser: &S, 86 | visitor: V, 87 | idx: usize, 88 | ) -> Result { 89 | deser.deserialize_i32(visitor, idx) 90 | } 91 | 92 | implement_integer_into!(); 93 | } 94 | 95 | impl Integer for i64 { 96 | fn deserialize_any_at< 97 | 'de, 98 | S: super::random_access_deserializer::RandomAccessDeserializer<'de>, 99 | V: Visitor<'de>, 100 | >( 101 | deser: &S, 102 | visitor: V, 103 | idx: usize, 104 | ) -> Result { 105 | deser.deserialize_i64(visitor, idx) 106 | } 107 | 108 | implement_integer_into!(); 109 | } 110 | 111 | impl Integer for u8 { 112 | fn deserialize_any_at< 113 | 'de, 114 | S: super::random_access_deserializer::RandomAccessDeserializer<'de>, 115 | V: Visitor<'de>, 116 | >( 117 | deser: &S, 118 | visitor: V, 119 | idx: usize, 120 | ) -> Result { 121 | deser.deserialize_u8(visitor, idx) 122 | } 123 | 124 | implement_integer_into!(); 125 | } 126 | 127 | impl Integer for u16 { 128 | fn deserialize_any_at< 129 | 'de, 130 | S: super::random_access_deserializer::RandomAccessDeserializer<'de>, 131 | V: Visitor<'de>, 132 | >( 133 | deser: &S, 134 | visitor: V, 135 | idx: usize, 136 | ) -> Result { 137 | deser.deserialize_u16(visitor, idx) 138 | } 139 | 140 | implement_integer_into!(); 141 | } 142 | 143 | impl Integer for u32 { 144 | fn deserialize_any_at< 145 | 'de, 146 | S: super::random_access_deserializer::RandomAccessDeserializer<'de>, 147 | V: Visitor<'de>, 148 | >( 149 | deser: &S, 150 | visitor: V, 151 | idx: usize, 152 | ) -> Result { 153 | deser.deserialize_u32(visitor, idx) 154 | } 155 | 156 | implement_integer_into!(); 157 | } 158 | 159 | impl Integer for u64 { 160 | fn deserialize_any_at< 161 | 'de, 162 | S: super::random_access_deserializer::RandomAccessDeserializer<'de>, 163 | V: Visitor<'de>, 164 | >( 165 | deser: &S, 166 | visitor: V, 167 | idx: usize, 168 | ) -> Result { 169 | deser.deserialize_u64(visitor, idx) 170 | } 171 | 172 | implement_integer_into!(); 173 | } 174 | -------------------------------------------------------------------------------- /serde_arrow/src/internal/deserialization/list_deserializer.rs: -------------------------------------------------------------------------------- 1 | use marrow::view::{BitsWithOffset, ListView}; 2 | use serde::de::{DeserializeSeed, SeqAccess, Visitor}; 3 | 4 | use crate::internal::{ 5 | error::{fail, set_default, try_, Context, ContextSupport, Error, Result}, 6 | schema::get_strategy_from_metadata, 7 | utils::{ChildName, NamedType, Offset}, 8 | }; 9 | 10 | use super::{ 11 | array_deserializer::ArrayDeserializer, random_access_deserializer::RandomAccessDeserializer, 12 | utils::bitset_is_set, 13 | }; 14 | 15 | pub struct ListDeserializer<'a, O: Offset> { 16 | pub path: String, 17 | pub item: Box>, 18 | pub offsets: &'a [O], 19 | pub validity: Option>, 20 | } 21 | 22 | impl<'de, O: Offset> ListDeserializer<'de, O> { 23 | pub fn new(path: String, view: ListView<'de, O>) -> Result { 24 | let child_path = format!("{path}.{child}", child = ChildName(&view.meta.name)); 25 | let item = ArrayDeserializer::new( 26 | child_path, 27 | get_strategy_from_metadata(&view.meta.metadata)?.as_ref(), 28 | *view.elements, 29 | )?; 30 | 31 | Ok(Self { 32 | path, 33 | item: Box::new(item), 34 | offsets: view.offsets, 35 | validity: view.validity, 36 | }) 37 | } 38 | 39 | fn get<'this>(&'this self, idx: usize) -> Result> { 40 | if idx + 1 >= self.offsets.len() { 41 | fail!("Outs of bound access"); 42 | } 43 | Ok(ListItemDeserializer { 44 | item: self.item.as_ref(), 45 | start: self.offsets[idx].try_into_usize()?, 46 | end: self.offsets[idx + 1].try_into_usize()?, 47 | }) 48 | } 49 | } 50 | 51 | impl Context for ListDeserializer<'_, O> { 52 | fn annotate(&self, annotations: &mut std::collections::BTreeMap) { 53 | set_default(annotations, "filed", &self.path); 54 | set_default( 55 | annotations, 56 | "data_type", 57 | match O::NAME { 58 | "i32" => "List(..)", 59 | "i64" => "LargeList(..)", 60 | _ => "", 61 | }, 62 | ); 63 | } 64 | } 65 | 66 | impl<'de, O: Offset + NamedType> RandomAccessDeserializer<'de> for ListDeserializer<'de, O> { 67 | fn is_some(&self, idx: usize) -> Result { 68 | if idx + 1 >= self.offsets.len() { 69 | fail!("Out of bounds access") 70 | } 71 | if let Some(validity) = &self.validity { 72 | Ok(bitset_is_set(validity, idx)?) 73 | } else { 74 | Ok(true) 75 | } 76 | } 77 | 78 | fn deserialize_any_some>(&self, visitor: V, idx: usize) -> Result { 79 | self.deserialize_seq(visitor, idx) 80 | } 81 | 82 | fn deserialize_seq>(&self, visitor: V, idx: usize) -> Result { 83 | try_(|| visitor.visit_seq(self.get(idx)?)).ctx(self) 84 | } 85 | 86 | fn deserialize_bytes>(&self, visitor: V, idx: usize) -> Result { 87 | try_(|| visitor.visit_seq(self.get(idx)?)).ctx(self) 88 | } 89 | 90 | fn deserialize_byte_buf>(&self, visitor: V, idx: usize) -> Result { 91 | try_(|| visitor.visit_seq(self.get(idx)?)).ctx(self) 92 | } 93 | } 94 | 95 | pub struct ListItemDeserializer<'a, 'de> { 96 | pub item: &'a ArrayDeserializer<'de>, 97 | pub start: usize, 98 | pub end: usize, 99 | } 100 | 101 | impl<'de> SeqAccess<'de> for ListItemDeserializer<'_, 'de> { 102 | type Error = Error; 103 | 104 | fn next_element_seed>(&mut self, seed: T) -> Result> { 105 | if self.start >= self.end { 106 | return Ok(None); 107 | } 108 | let item = seed.deserialize(self.item.at(self.start))?; 109 | self.start += 1; 110 | Ok(Some(item)) 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /serde_arrow/src/internal/deserialization/map_deserializer.rs: -------------------------------------------------------------------------------- 1 | use marrow::view::{BitsWithOffset, MapView}; 2 | use serde::de::{DeserializeSeed, MapAccess, Visitor}; 3 | 4 | use crate::internal::{ 5 | error::{fail, set_default, try_, Context, ContextSupport, Error, Result}, 6 | schema::get_strategy_from_metadata, 7 | utils::{ChildName, Offset}, 8 | }; 9 | 10 | use super::{ 11 | array_deserializer::ArrayDeserializer, random_access_deserializer::RandomAccessDeserializer, 12 | utils::bitset_is_set, 13 | }; 14 | 15 | pub struct MapDeserializer<'a> { 16 | path: String, 17 | key: Box>, 18 | value: Box>, 19 | offsets: &'a [i32], 20 | validity: Option>, 21 | } 22 | 23 | impl<'a> MapDeserializer<'a> { 24 | pub fn new(path: String, view: MapView<'a>) -> Result { 25 | let keys_path = format!( 26 | "{path}.{entries}.{keys}", 27 | entries = ChildName(&view.meta.entries_name), 28 | keys = ChildName(&view.meta.keys.name), 29 | ); 30 | let keys = ArrayDeserializer::new( 31 | keys_path, 32 | get_strategy_from_metadata(&view.meta.keys.metadata)?.as_ref(), 33 | *view.keys, 34 | )?; 35 | 36 | let values_path = format!( 37 | "{path}.{entries}.{values}", 38 | entries = ChildName(&view.meta.entries_name), 39 | values = ChildName(&view.meta.values.name), 40 | ); 41 | let values = ArrayDeserializer::new( 42 | values_path, 43 | get_strategy_from_metadata(&view.meta.values.metadata)?.as_ref(), 44 | *view.values, 45 | )?; 46 | 47 | Ok(Self { 48 | path, 49 | key: Box::new(keys), 50 | value: Box::new(values), 51 | offsets: view.offsets, 52 | validity: view.validity, 53 | }) 54 | } 55 | } 56 | 57 | impl Context for MapDeserializer<'_> { 58 | fn annotate(&self, annotations: &mut std::collections::BTreeMap) { 59 | set_default(annotations, "field", &self.path); 60 | set_default(annotations, "data_type", "Map(..)"); 61 | } 62 | } 63 | 64 | impl<'de> RandomAccessDeserializer<'de> for MapDeserializer<'de> { 65 | fn is_some(&self, idx: usize) -> Result { 66 | if idx + 1 >= self.offsets.len() { 67 | fail!("Out of bounds access") 68 | } 69 | if let Some(validity) = &self.validity { 70 | Ok(bitset_is_set(validity, idx)?) 71 | } else { 72 | Ok(true) 73 | } 74 | } 75 | 76 | fn deserialize_any_some>(&self, visitor: V, idx: usize) -> Result { 77 | self.deserialize_map(visitor, idx) 78 | } 79 | 80 | fn deserialize_map>(&self, visitor: V, idx: usize) -> Result { 81 | try_(|| { 82 | if idx + 1 >= self.offsets.len() { 83 | fail!("Out of bounds access") 84 | } 85 | 86 | visitor.visit_map(MapItemDeserializer { 87 | deserializer: self, 88 | start: self.offsets[idx].try_into_usize()?, 89 | end: self.offsets[idx + 1].try_into_usize()?, 90 | }) 91 | }) 92 | .ctx(self) 93 | } 94 | } 95 | 96 | struct MapItemDeserializer<'this, 'de> { 97 | deserializer: &'this MapDeserializer<'de>, 98 | start: usize, 99 | end: usize, 100 | } 101 | 102 | impl<'de> MapAccess<'de> for MapItemDeserializer<'_, 'de> { 103 | type Error = Error; 104 | 105 | fn next_key_seed>(&mut self, seed: K) -> Result> { 106 | if self.start >= self.end { 107 | return Ok(None); 108 | } 109 | let key = seed.deserialize(self.deserializer.key.at(self.start))?; 110 | Ok(Some(key)) 111 | } 112 | 113 | fn next_value_seed>(&mut self, seed: V) -> Result { 114 | if self.start >= self.end { 115 | fail!("Invalid state in MapItemDeserializer"); 116 | } 117 | let value = seed.deserialize(self.deserializer.value.at(self.start))?; 118 | self.start += 1; 119 | Ok(value) 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /serde_arrow/src/internal/deserialization/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod array_deserializer; 2 | pub mod binary_deserializer; 3 | pub mod bool_deserializer; 4 | pub mod date_deserializer; 5 | pub mod decimal_deserializer; 6 | pub mod dictionary_deserializer; 7 | pub mod duration_deserializer; 8 | pub mod enum_deserializer; 9 | pub mod enums_as_string_impl; 10 | pub mod fixed_size_binary_deserializer; 11 | pub mod fixed_size_list_deserializer; 12 | pub mod float_deserializer; 13 | pub mod float_impls; 14 | pub mod integer_deserializer; 15 | pub mod integer_impls; 16 | pub mod list_deserializer; 17 | pub mod map_deserializer; 18 | pub mod null_deserializer; 19 | pub mod random_access_deserializer; 20 | pub mod string_deserializer; 21 | pub mod struct_deserializer; 22 | pub mod time_deserializer; 23 | pub mod timestamp_deserializer; 24 | pub mod utils; 25 | -------------------------------------------------------------------------------- /serde_arrow/src/internal/deserialization/null_deserializer.rs: -------------------------------------------------------------------------------- 1 | use serde::de::Visitor; 2 | 3 | use crate::internal::error::{set_default, Context, ContextSupport, Error, Result}; 4 | 5 | use super::random_access_deserializer::RandomAccessDeserializer; 6 | 7 | pub struct NullDeserializer { 8 | path: String, 9 | } 10 | 11 | impl NullDeserializer { 12 | pub fn new(path: String) -> Self { 13 | Self { path } 14 | } 15 | } 16 | 17 | impl Context for NullDeserializer { 18 | fn annotate(&self, annotations: &mut std::collections::BTreeMap) { 19 | set_default(annotations, "field", &self.path); 20 | set_default(annotations, "data_type", "Null"); 21 | } 22 | } 23 | 24 | impl<'de> RandomAccessDeserializer<'de> for NullDeserializer { 25 | fn is_some(&self, _idx: usize) -> Result { 26 | Ok(false) 27 | } 28 | 29 | fn deserialize_any_some>(&self, visitor: V, _idx: usize) -> Result { 30 | visitor.visit_unit::().ctx(self) 31 | } 32 | 33 | fn deserialize_any>(&self, visitor: V, _idx: usize) -> Result { 34 | visitor.visit_unit::().ctx(self) 35 | } 36 | 37 | fn deserialize_option>(&self, visitor: V, _idx: usize) -> Result { 38 | visitor.visit_none::().ctx(self) 39 | } 40 | 41 | fn deserialize_unit>(&self, visitor: V, _idx: usize) -> Result { 42 | visitor.visit_unit::().ctx(self) 43 | } 44 | 45 | fn deserialize_unit_struct>( 46 | &self, 47 | _: &'static str, 48 | visitor: V, 49 | _idx: usize, 50 | ) -> Result { 51 | visitor.visit_unit::().ctx(self) 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /serde_arrow/src/internal/deserialization/string_deserializer.rs: -------------------------------------------------------------------------------- 1 | use marrow::view::{BytesView, BytesViewView}; 2 | 3 | use crate::internal::{ 4 | error::{set_default, try_, Context, ContextSupport, Result}, 5 | utils::array_view_ext::ViewAccess, 6 | }; 7 | 8 | use super::{ 9 | enums_as_string_impl::EnumAccess, random_access_deserializer::RandomAccessDeserializer, 10 | }; 11 | 12 | pub trait StringDeserializerDataType { 13 | const DATA_TYPE_NAME: &'static str; 14 | } 15 | 16 | impl StringDeserializerDataType for BytesView<'_, i32> { 17 | const DATA_TYPE_NAME: &'static str = "Utf8"; 18 | } 19 | 20 | impl StringDeserializerDataType for BytesView<'_, i64> { 21 | const DATA_TYPE_NAME: &'static str = "LargeUtf8"; 22 | } 23 | 24 | impl StringDeserializerDataType for BytesViewView<'_> { 25 | const DATA_TYPE_NAME: &'static str = "Utf8View"; 26 | } 27 | 28 | pub struct StringDeserializer { 29 | pub path: String, 30 | pub view: V, 31 | } 32 | 33 | impl StringDeserializer { 34 | pub fn new(path: String, view: V) -> Self { 35 | Self { path, view } 36 | } 37 | } 38 | 39 | impl Context for StringDeserializer { 40 | fn annotate(&self, annotations: &mut std::collections::BTreeMap) { 41 | set_default(annotations, "field", &self.path); 42 | set_default(annotations, "data_type", V::DATA_TYPE_NAME); 43 | } 44 | } 45 | 46 | impl<'a, VV> RandomAccessDeserializer<'a> for StringDeserializer 47 | where 48 | VV: ViewAccess<'a, str> + StringDeserializerDataType + 'a, 49 | { 50 | fn is_some(&self, idx: usize) -> Result { 51 | self.view.is_some(idx) 52 | } 53 | 54 | fn deserialize_any_some>( 55 | &self, 56 | visitor: V, 57 | idx: usize, 58 | ) -> Result { 59 | self.deserialize_str(visitor, idx) 60 | } 61 | 62 | fn deserialize_str>( 63 | &self, 64 | visitor: V, 65 | idx: usize, 66 | ) -> Result { 67 | try_(|| visitor.visit_borrowed_str(self.view.get_required(idx)?)).ctx(self) 68 | } 69 | 70 | fn deserialize_string>( 71 | &self, 72 | visitor: V, 73 | idx: usize, 74 | ) -> Result { 75 | try_(|| visitor.visit_string(self.view.get_required(idx)?.to_owned())).ctx(self) 76 | } 77 | 78 | fn deserialize_bytes>( 79 | &self, 80 | visitor: V, 81 | idx: usize, 82 | ) -> Result { 83 | try_(|| visitor.visit_bytes(self.view.get_required(idx)?.as_bytes())).ctx(self) 84 | } 85 | 86 | fn deserialize_byte_buf>( 87 | &self, 88 | visitor: V, 89 | idx: usize, 90 | ) -> Result { 91 | try_(|| visitor.visit_byte_buf(self.view.get_required(idx)?.to_owned().into_bytes())) 92 | .ctx(self) 93 | } 94 | 95 | fn deserialize_enum>( 96 | &self, 97 | _name: &'static str, 98 | _variants: &'static [&'static str], 99 | visitor: V, 100 | idx: usize, 101 | ) -> Result { 102 | try_(|| { 103 | let variant = self.view.get_required(idx)?; 104 | visitor.visit_enum(EnumAccess(variant)) 105 | }) 106 | .ctx(self) 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /serde_arrow/src/internal/deserialization/time_deserializer.rs: -------------------------------------------------------------------------------- 1 | use chrono::NaiveTime; 2 | use marrow::{ 3 | datatypes::TimeUnit, 4 | view::{PrimitiveView, TimeView}, 5 | }; 6 | use serde::de::Visitor; 7 | 8 | use crate::internal::{ 9 | error::{set_default, try_, try_opt, Context, ContextSupport, Error, Result}, 10 | utils::{array_view_ext::ViewAccess, NamedType}, 11 | }; 12 | 13 | use super::{integer_deserializer::Integer, random_access_deserializer::RandomAccessDeserializer}; 14 | 15 | pub struct TimeDeserializer<'a, T: Integer> { 16 | path: String, 17 | values: PrimitiveView<'a, T>, 18 | unit: TimeUnit, 19 | } 20 | 21 | impl<'a, T: Integer> TimeDeserializer<'a, T> { 22 | pub fn new(path: String, view: TimeView<'a, T>) -> Self { 23 | Self { 24 | path, 25 | values: PrimitiveView { 26 | validity: view.validity, 27 | values: view.values, 28 | }, 29 | unit: view.unit, 30 | } 31 | } 32 | 33 | pub fn get_string_repr(&self, ts: i64) -> Result { 34 | try_opt(|| { 35 | let (secs, nano) = match self.unit { 36 | TimeUnit::Second => (ts, 0), 37 | TimeUnit::Millisecond => (ts / 1_000, (ts % 1_000) * 1_000_000), 38 | TimeUnit::Microsecond => (ts / 1_000_000, (ts % 1_000_000) * 1_000), 39 | TimeUnit::Nanosecond => (ts / 1_000_000_000, ts % 1_000_000_000), 40 | }; 41 | let time = NaiveTime::from_num_seconds_from_midnight_opt( 42 | u32::try_from(secs).ok()?, 43 | u32::try_from(nano).ok()?, 44 | )?; 45 | Some(time.to_string()) 46 | }) 47 | .ok_or_else(|| { 48 | Error::custom(format!( 49 | "Cannot convert {ts} into Time64({unit})", 50 | unit = self.unit 51 | )) 52 | }) 53 | } 54 | } 55 | 56 | impl Context for TimeDeserializer<'_, T> { 57 | fn annotate(&self, annotations: &mut std::collections::BTreeMap) { 58 | set_default(annotations, "field", &self.path); 59 | set_default( 60 | annotations, 61 | "data_type", 62 | match T::NAME { 63 | "i32" => "Time32", 64 | "i64" => "Time64", 65 | _ => "", 66 | }, 67 | ); 68 | } 69 | } 70 | 71 | impl<'de, T: Integer + NamedType> RandomAccessDeserializer<'de> for TimeDeserializer<'de, T> { 72 | fn is_some(&self, idx: usize) -> Result { 73 | self.values.is_some(idx) 74 | } 75 | 76 | fn deserialize_any_some>(&self, visitor: V, idx: usize) -> Result { 77 | T::deserialize_any_at(self, visitor, idx) 78 | } 79 | 80 | fn deserialize_i32>(&self, visitor: V, idx: usize) -> Result { 81 | try_(|| visitor.visit_i32(self.values.get_required(idx)?.into_i32()?)).ctx(self) 82 | } 83 | 84 | fn deserialize_i64>(&self, visitor: V, idx: usize) -> Result { 85 | try_(|| visitor.visit_i64(self.values.get_required(idx)?.into_i64()?)).ctx(self) 86 | } 87 | 88 | fn deserialize_str>(&self, visitor: V, idx: usize) -> Result { 89 | try_(|| self.deserialize_string(visitor, idx)).ctx(self) 90 | } 91 | 92 | fn deserialize_string>(&self, visitor: V, idx: usize) -> Result { 93 | try_(|| { 94 | let ts = self.values.get_required(idx)?.into_i64()?; 95 | visitor.visit_string(self.get_string_repr(ts)?) 96 | }) 97 | .ctx(self) 98 | } 99 | 100 | fn deserialize_bytes>(&self, visitor: V, idx: usize) -> Result { 101 | try_(|| self.deserialize_byte_buf(visitor, idx)).ctx(self) 102 | } 103 | 104 | fn deserialize_byte_buf>(&self, visitor: V, idx: usize) -> Result { 105 | try_(|| { 106 | let ts = self.values.get_required(idx)?.into_i64()?; 107 | visitor.visit_byte_buf(self.get_string_repr(ts)?.into_bytes()) 108 | }) 109 | .ctx(self) 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /serde_arrow/src/internal/deserialization/utils.rs: -------------------------------------------------------------------------------- 1 | use marrow::view::BitsWithOffset; 2 | use serde::{ 3 | de::{SeqAccess, Visitor}, 4 | Deserializer, 5 | }; 6 | 7 | use crate::internal::{ 8 | error::{fail, Error, Result}, 9 | utils::array_ext::get_bit_buffer, 10 | }; 11 | 12 | pub fn bitset_is_set(set: &BitsWithOffset<'_>, idx: usize) -> Result { 13 | get_bit_buffer(set.data, set.offset, idx) 14 | } 15 | 16 | pub struct U8Deserializer(pub u8); 17 | 18 | macro_rules! unimplemented { 19 | ($lifetime:lifetime, $name:ident $($tt:tt)*) => { 20 | fn $name>(self $($tt)*, _: V) -> Result { 21 | fail!("Unsupported: U8Deserializer does not implement {}", stringify!($name)) 22 | } 23 | }; 24 | } 25 | 26 | impl<'de> Deserializer<'de> for U8Deserializer { 27 | type Error = Error; 28 | 29 | fn deserialize_any>(self, visitor: V) -> Result { 30 | self.deserialize_u8(visitor) 31 | } 32 | 33 | fn deserialize_ignored_any>(self, visitor: V) -> Result { 34 | self.deserialize_any(visitor) 35 | } 36 | 37 | fn deserialize_u8>(self, visitor: V) -> Result { 38 | visitor.visit_u8(self.0) 39 | } 40 | 41 | fn deserialize_u16>(self, visitor: V) -> Result { 42 | visitor.visit_u16(self.0.into()) 43 | } 44 | 45 | fn deserialize_u32>(self, visitor: V) -> Result { 46 | visitor.visit_u32(self.0.into()) 47 | } 48 | 49 | fn deserialize_u64>(self, visitor: V) -> Result { 50 | visitor.visit_u64(self.0.into()) 51 | } 52 | 53 | fn deserialize_i8>(self, visitor: V) -> Result { 54 | visitor.visit_i8(self.0.try_into()?) 55 | } 56 | 57 | fn deserialize_i16>(self, visitor: V) -> Result { 58 | visitor.visit_i16(self.0.into()) 59 | } 60 | 61 | fn deserialize_i32>(self, visitor: V) -> Result { 62 | visitor.visit_i32(self.0.into()) 63 | } 64 | 65 | fn deserialize_i64>(self, visitor: V) -> Result { 66 | visitor.visit_i64(self.0.into()) 67 | } 68 | 69 | unimplemented!('de, deserialize_identifier); 70 | unimplemented!('de, deserialize_str); 71 | unimplemented!('de, deserialize_string); 72 | unimplemented!('de, deserialize_bool); 73 | unimplemented!('de, deserialize_f32); 74 | unimplemented!('de, deserialize_f64); 75 | unimplemented!('de, deserialize_char); 76 | unimplemented!('de, deserialize_bytes); 77 | unimplemented!('de, deserialize_byte_buf); 78 | unimplemented!('de, deserialize_option); 79 | unimplemented!('de, deserialize_unit); 80 | unimplemented!('de, deserialize_unit_struct, _: &'static str); 81 | unimplemented!('de, deserialize_newtype_struct, _: &'static str); 82 | unimplemented!('de, deserialize_seq); 83 | unimplemented!('de, deserialize_tuple, _: usize); 84 | unimplemented!('de, deserialize_tuple_struct, _: &'static str, _: usize); 85 | unimplemented!('de, deserialize_map); 86 | unimplemented!('de, deserialize_struct, _: &'static str, _: &'static [&'static str]); 87 | unimplemented!('de, deserialize_enum, _: &'static str, _: &'static [&'static str]); 88 | } 89 | 90 | pub struct U8SliceDeserializer<'a>(&'a [u8]); 91 | 92 | impl<'a> U8SliceDeserializer<'a> { 93 | pub fn new(bytes: &'a [u8]) -> Self { 94 | Self(bytes) 95 | } 96 | } 97 | 98 | impl<'de> SeqAccess<'de> for U8SliceDeserializer<'de> { 99 | type Error = Error; 100 | 101 | fn size_hint(&self) -> Option { 102 | Some(self.0.len()) 103 | } 104 | 105 | fn next_element_seed>( 106 | &mut self, 107 | seed: T, 108 | ) -> Result> { 109 | let Some((item, rest)) = self.0.split_first() else { 110 | return Ok(None); 111 | }; 112 | let item = seed.deserialize(U8Deserializer(*item))?; 113 | self.0 = rest; 114 | 115 | Ok(Some(item)) 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /serde_arrow/src/internal/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod array_builder; 2 | pub mod chrono; 3 | pub mod deserialization; 4 | pub mod deserializer; 5 | pub mod error; 6 | pub mod schema; 7 | pub mod serialization; 8 | pub mod serializer; 9 | pub mod utils; 10 | 11 | #[cfg(test)] 12 | pub mod testing; 13 | -------------------------------------------------------------------------------- /serde_arrow/src/internal/schema/extensions/bool8_field.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | use marrow::datatypes::{DataType, Field}; 4 | 5 | use crate::internal::{ 6 | error::{Error, Result}, 7 | schema::PrettyField, 8 | }; 9 | 10 | /// A helper to construct new `Bool8` fields (`arrow.bool8`) 11 | /// 12 | /// This extension type can be used with `overwrites` in schema tracing: 13 | /// 14 | /// ```rust 15 | /// # use serde_json::json; 16 | /// # use serde_arrow::{Result, schema::{SerdeArrowSchema, SchemaLike, TracingOptions, ext::Bool8Field}}; 17 | /// # use serde::Deserialize; 18 | /// # fn main() -> Result<()> { 19 | /// ##[derive(Deserialize)] 20 | /// struct Record { 21 | /// int_field: i32, 22 | /// nested: Nested, 23 | /// } 24 | /// 25 | /// ##[derive(Deserialize)] 26 | /// struct Nested { 27 | /// bool_field: bool, 28 | /// } 29 | /// 30 | /// let tracing_options = TracingOptions::default() 31 | /// .overwrite("nested.bool_field", Bool8Field::new("bool_field"))?; 32 | /// 33 | /// let schema = SerdeArrowSchema::from_type::(tracing_options)?; 34 | /// # std::mem::drop(schema); 35 | /// # Ok(()) 36 | /// # } 37 | /// ``` 38 | /// 39 | /// It can also be converted to a `arrow` `Field` for manual schema manipulation. 40 | /// 41 | pub struct Bool8Field { 42 | name: String, 43 | nullable: bool, 44 | } 45 | 46 | impl Bool8Field { 47 | /// Construct a new non-nullable `Bool8Field` 48 | pub fn new(name: &str) -> Self { 49 | Self { 50 | name: name.into(), 51 | nullable: false, 52 | } 53 | } 54 | 55 | /// Set the nullability of the field 56 | pub fn nullable(mut self, value: bool) -> Self { 57 | self.nullable = value; 58 | self 59 | } 60 | } 61 | 62 | impl TryFrom<&Bool8Field> for Field { 63 | type Error = Error; 64 | 65 | fn try_from(value: &Bool8Field) -> Result { 66 | let mut metadata = HashMap::new(); 67 | metadata.insert("ARROW:extension:name".into(), "arrow.bool8".into()); 68 | metadata.insert("ARROW:extension:metadata".into(), String::new()); 69 | 70 | Ok(Field { 71 | name: value.name.to_owned(), 72 | nullable: value.nullable, 73 | data_type: DataType::Int8, 74 | metadata, 75 | }) 76 | } 77 | } 78 | 79 | impl serde::ser::Serialize for Bool8Field { 80 | fn serialize(&self, serializer: S) -> Result { 81 | use serde::ser::Error; 82 | let field = Field::try_from(self).map_err(S::Error::custom)?; 83 | PrettyField(&field).serialize(serializer) 84 | } 85 | } 86 | 87 | #[test] 88 | fn bool8_repr() -> crate::internal::error::PanicOnError<()> { 89 | use serde_json::json; 90 | 91 | let field = Bool8Field::new("hello"); 92 | 93 | let field = Field::try_from(&field)?; 94 | let actual = serde_json::to_value(&PrettyField(&field))?; 95 | 96 | let expected = json!({ 97 | "name": "hello", 98 | "data_type": "I8", 99 | "metadata": { 100 | "ARROW:extension:name": "arrow.bool8", 101 | "ARROW:extension:metadata": "", 102 | }, 103 | }); 104 | 105 | assert_eq!(actual, expected); 106 | Ok(()) 107 | } 108 | -------------------------------------------------------------------------------- /serde_arrow/src/internal/schema/extensions/mod.rs: -------------------------------------------------------------------------------- 1 | mod bool8_field; 2 | mod fixed_shape_tensor_field; 3 | mod utils; 4 | mod variable_shape_tensor_field; 5 | 6 | pub use bool8_field::Bool8Field; 7 | pub use fixed_shape_tensor_field::FixedShapeTensorField; 8 | pub use variable_shape_tensor_field::VariableShapeTensorField; 9 | 10 | const _: () = { 11 | trait AssertSendSync: Send + Sync {} 12 | impl AssertSendSync for Bool8Field {} 13 | impl AssertSendSync for FixedShapeTensorField {} 14 | impl AssertSendSync for VariableShapeTensorField {} 15 | }; 16 | -------------------------------------------------------------------------------- /serde_arrow/src/internal/schema/extensions/utils.rs: -------------------------------------------------------------------------------- 1 | use crate::internal::error::{fail, Result}; 2 | 3 | pub fn check_dim_names(ndim: usize, dim_names: &[String]) -> Result<()> { 4 | if dim_names.len() != ndim { 5 | fail!("Number of dim names must be equal to the number of dimensions"); 6 | } 7 | Ok(()) 8 | } 9 | 10 | pub fn check_permutation(ndim: usize, permutation: &[usize]) -> Result<()> { 11 | if permutation.len() != ndim { 12 | fail!("Number of permutation entries must be equal to the number of dimensions"); 13 | } 14 | let seen = vec![false; permutation.len()]; 15 | for &i in permutation { 16 | if i >= seen.len() { 17 | fail!( 18 | "Invalid permutation: index {i} is not in range 0..{len}", 19 | len = seen.len() 20 | ); 21 | } 22 | if seen[i] { 23 | fail!("Invalid permutation: index {i} found multiple times"); 24 | } 25 | } 26 | for (i, seen) in seen.into_iter().enumerate() { 27 | if !seen { 28 | fail!("Invalid permutation: index {i} is not present"); 29 | } 30 | } 31 | Ok(()) 32 | } 33 | 34 | pub fn write_list( 35 | s: &mut String, 36 | items: impl Iterator, 37 | ) -> Result<()> { 38 | use std::fmt::Write; 39 | 40 | write!(s, "[")?; 41 | for (idx, val) in items.enumerate() { 42 | if idx != 0 { 43 | write!(s, ",{val}")?; 44 | } else { 45 | write!(s, "{val}")?; 46 | } 47 | } 48 | write!(s, "]")?; 49 | Ok(()) 50 | } 51 | 52 | pub struct DebugRepr(pub T); 53 | 54 | impl std::fmt::Display for DebugRepr { 55 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 56 | write!(f, "{:?}", self.0) 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /serde_arrow/src/internal/schema/from_samples/test_error_messages.rs: -------------------------------------------------------------------------------- 1 | use serde::Serialize; 2 | 3 | use crate::internal::{ 4 | schema::{SchemaLike, SerdeArrowSchema, TracingOptions}, 5 | testing::assert_error_contains, 6 | }; 7 | 8 | #[test] 9 | fn outer_struct() { 10 | let res = SerdeArrowSchema::from_samples(&[1_u32, 2_u32, 3_u32], TracingOptions::default()); 11 | assert_error_contains( 12 | &res, 13 | "Only struct-like types are supported as root types in schema tracing.", 14 | ); 15 | assert_error_contains(&res, "Consider using the `Items` wrapper,"); 16 | } 17 | 18 | /// See: https://github.com/chmp/serde_arrow/issues/97 19 | #[test] 20 | fn outer_sequence_issue_97() { 21 | use serde::Serialize; 22 | 23 | #[derive(Debug, Serialize)] 24 | pub struct A { 25 | pub b: String, 26 | pub k: f64, 27 | } 28 | let b = A { 29 | b: String::from("Test"), 30 | k: 100.0, 31 | }; 32 | 33 | let res = SerdeArrowSchema::from_samples(&b, TracingOptions::default()); 34 | assert_error_contains(&res, "Cannot trace non-sequences with `from_samples`"); 35 | assert_error_contains(&res, "consider wrapping the argument in an array"); 36 | } 37 | 38 | #[test] 39 | fn enums_without_data() { 40 | #[derive(Debug, Serialize)] 41 | pub enum E { 42 | A, 43 | B, 44 | } 45 | 46 | let res = SerdeArrowSchema::from_samples(&[E::A, E::B], TracingOptions::default()); 47 | assert_error_contains(&res, "by setting `enums_without_data_as_strings` to `true`"); 48 | } 49 | -------------------------------------------------------------------------------- /serde_arrow/src/internal/schema/from_type/test_error_messages.rs: -------------------------------------------------------------------------------- 1 | //! Test the error messages from_type is generating 2 | 3 | use std::collections::HashMap; 4 | 5 | use serde::Deserialize; 6 | use serde_json::json; 7 | 8 | use crate::internal::{ 9 | schema::{SchemaLike, SerdeArrowSchema, TracingOptions}, 10 | testing::assert_error_contains, 11 | }; 12 | 13 | #[test] 14 | fn from_type_budget() { 15 | let res = SerdeArrowSchema::from_type::(TracingOptions::default().from_type_budget(0)); 16 | assert_error_contains( 17 | &res, 18 | "Could not determine schema from the type after 0 iterations.", 19 | ); 20 | assert_error_contains( 21 | &res, 22 | "Consider increasing the budget option or using `from_samples`.", 23 | ); 24 | } 25 | 26 | #[test] 27 | fn non_self_describing_types() { 28 | let res = SerdeArrowSchema::from_type::(TracingOptions::default()); 29 | assert_error_contains( 30 | &res, 31 | "Non self describing types cannot be traced with `from_type`.", 32 | ); 33 | assert_error_contains(&res, "Consider using `from_samples`."); 34 | } 35 | 36 | #[test] 37 | fn map_as_struct() { 38 | let res = SerdeArrowSchema::from_type::>( 39 | TracingOptions::default().map_as_struct(true), 40 | ); 41 | assert_error_contains(&res, "Cannot trace maps as structs with `from_type`"); 42 | assert_error_contains(&res, "Consider using `from_samples`"); 43 | } 44 | 45 | #[test] 46 | fn outer_struct() { 47 | let res = SerdeArrowSchema::from_type::(TracingOptions::default()); 48 | assert_error_contains( 49 | &res, 50 | "Only struct-like types are supported as root types in schema tracing.", 51 | ); 52 | assert_error_contains(&res, "Consider using the `Item` wrapper,"); 53 | } 54 | 55 | #[test] 56 | fn enums_without_data() { 57 | #[derive(Debug, Deserialize)] 58 | pub enum E { 59 | A, 60 | B, 61 | } 62 | 63 | let res = SerdeArrowSchema::from_type::(TracingOptions::default()); 64 | assert_error_contains(&res, "by setting `enums_without_data_as_strings` to `true`"); 65 | } 66 | 67 | #[test] 68 | fn missing_overwrites() { 69 | #[derive(Debug, Deserialize)] 70 | pub struct S { 71 | #[allow(dead_code)] 72 | a: i64, 73 | } 74 | 75 | let res = SerdeArrowSchema::from_type::( 76 | TracingOptions::default() 77 | .overwrite("b", json!({"name": "b", "data_type": "I64"})) 78 | .unwrap(), 79 | ); 80 | assert_error_contains(&res, "Overwritten fields could not be found:"); 81 | } 82 | 83 | #[test] 84 | fn mismatched_overwrite_name() { 85 | #[derive(Debug, Deserialize)] 86 | pub struct S { 87 | #[allow(dead_code)] 88 | a: i64, 89 | } 90 | 91 | let res = SerdeArrowSchema::from_type::( 92 | TracingOptions::default() 93 | .overwrite("a", json!({"name": "b", "data_type": "I64"})) 94 | .unwrap(), 95 | ); 96 | assert_error_contains(&res, "Invalid name for overwritten field"); 97 | } 98 | 99 | #[test] 100 | fn overwrite_invalid_name() { 101 | #[derive(Debug, Deserialize)] 102 | pub struct S { 103 | #[allow(dead_code)] 104 | a: i64, 105 | } 106 | 107 | let res = SerdeArrowSchema::from_type::( 108 | TracingOptions::default() 109 | .overwrite("a", json!({"name": "b", "data_type": "I64"})) 110 | .unwrap(), 111 | ); 112 | assert_error_contains( 113 | &res, 114 | "Invalid name for overwritten field \"a\": found \"b\", expected \"a\"", 115 | ); 116 | } 117 | -------------------------------------------------------------------------------- /serde_arrow/src/internal/schema/serde/mod.rs: -------------------------------------------------------------------------------- 1 | //! Group all serialization / deserialization related functionality 2 | //! 3 | pub mod deserialize; 4 | pub mod serialize; 5 | -------------------------------------------------------------------------------- /serde_arrow/src/internal/schema/strategy.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | collections::{BTreeMap, HashMap}, 3 | str::FromStr, 4 | }; 5 | 6 | use serde::{Deserialize, Serialize}; 7 | 8 | use crate::internal::error::{fail, Error, Result}; 9 | 10 | /// The metadata key under which to store the strategy 11 | /// 12 | /// See the [module][crate::schema] for details. 13 | /// 14 | pub const STRATEGY_KEY: &str = "SERDE_ARROW:strategy"; 15 | 16 | /// Strategies for handling types without direct match between arrow and serde 17 | /// 18 | /// For the correct strategy both the field type and the field metadata must be 19 | /// correctly configured. In particular, when determining the schema from the 20 | /// Rust objects themselves, some field types are incorrectly recognized (e.g., 21 | /// datetimes). 22 | /// 23 | #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] 24 | #[serde(into = "String", try_from = "String")] 25 | #[non_exhaustive] 26 | pub enum Strategy { 27 | /// Marker that the type of the field could not be determined during tracing 28 | /// 29 | InconsistentTypes, 30 | /// Serialize Rust tuples as Arrow structs with numeric field names starting 31 | /// at `"0"` 32 | /// 33 | /// This strategy is most-likely the most optimal one, as Rust tuples can 34 | /// contain different types, whereas Arrow sequences must be of uniform type 35 | /// 36 | TupleAsStruct, 37 | /// Serialize Rust maps as Arrow structs 38 | /// 39 | /// The field names are sorted by name to ensure unordered map (e.g., 40 | /// HashMap) have a defined order. 41 | /// 42 | /// Fields that are not present in all instances of the map are marked as 43 | /// nullable in schema tracing. In serialization these fields are written as 44 | /// null value if not present. 45 | /// 46 | /// This strategy is most-likely the most optimal one: 47 | /// 48 | /// - using the `#[serde(flatten)]` attribute converts a struct into a map 49 | /// - the support for arrow maps in the data ecosystem is limited (e.g., 50 | /// polars does not support them) 51 | /// 52 | MapAsStruct, 53 | /// Mark a variant as unknown 54 | /// 55 | /// This strategy applies only to fields with DataType Null. If 56 | /// serialization or deserialization of such a field is attempted, it will 57 | /// result in an error. 58 | UnknownVariant, 59 | } 60 | 61 | impl std::fmt::Display for Strategy { 62 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 63 | match self { 64 | Self::InconsistentTypes => write!(f, "InconsistentTypes"), 65 | Self::TupleAsStruct => write!(f, "TupleAsStruct"), 66 | Self::MapAsStruct => write!(f, "MapAsStruct"), 67 | Self::UnknownVariant => write!(f, "UnknownVariant"), 68 | } 69 | } 70 | } 71 | 72 | impl From for String { 73 | fn from(strategy: Strategy) -> String { 74 | strategy.to_string() 75 | } 76 | } 77 | 78 | impl TryFrom for Strategy { 79 | type Error = Error; 80 | 81 | fn try_from(s: String) -> Result { 82 | s.parse() 83 | } 84 | } 85 | 86 | impl FromStr for Strategy { 87 | type Err = Error; 88 | 89 | fn from_str(s: &str) -> Result { 90 | match s { 91 | "InconsistentTypes" => Ok(Self::InconsistentTypes), 92 | "TupleAsStruct" => Ok(Self::TupleAsStruct), 93 | "MapAsStruct" => Ok(Self::MapAsStruct), 94 | "UnknownVariant" => Ok(Self::UnknownVariant), 95 | _ => fail!("Unknown strategy {s}"), 96 | } 97 | } 98 | } 99 | 100 | impl From for BTreeMap { 101 | fn from(value: Strategy) -> Self { 102 | let mut res = BTreeMap::new(); 103 | res.insert(STRATEGY_KEY.to_string(), value.to_string()); 104 | res 105 | } 106 | } 107 | 108 | impl From for HashMap { 109 | fn from(value: Strategy) -> Self { 110 | let mut res = HashMap::new(); 111 | res.insert(STRATEGY_KEY.to_string(), value.to_string()); 112 | res 113 | } 114 | } 115 | 116 | pub fn get_strategy_from_metadata(metadata: &HashMap) -> Result> { 117 | let Some(strategy) = metadata.get(STRATEGY_KEY) else { 118 | return Ok(None); 119 | }; 120 | Ok(Some(strategy.parse()?)) 121 | } 122 | -------------------------------------------------------------------------------- /serde_arrow/src/internal/serialization/bool_builder.rs: -------------------------------------------------------------------------------- 1 | use std::collections::BTreeMap; 2 | 3 | use marrow::array::{Array, BooleanArray}; 4 | 5 | use crate::internal::{ 6 | error::{set_default, try_, Context, ContextSupport, Result}, 7 | utils::array_ext::{set_bit_buffer, set_validity, set_validity_default}, 8 | }; 9 | 10 | use super::{array_builder::ArrayBuilder, simple_serializer::SimpleSerializer}; 11 | 12 | #[derive(Debug, Clone)] 13 | pub struct BoolBuilder { 14 | path: String, 15 | array: BooleanArray, 16 | } 17 | 18 | impl BoolBuilder { 19 | pub fn new(path: String, is_nullable: bool) -> Self { 20 | Self { 21 | path, 22 | array: BooleanArray { 23 | len: 0, 24 | validity: is_nullable.then(Vec::new), 25 | values: Vec::new(), 26 | }, 27 | } 28 | } 29 | 30 | pub fn take(&mut self) -> ArrayBuilder { 31 | ArrayBuilder::Bool(Self { 32 | path: self.path.clone(), 33 | array: BooleanArray { 34 | len: std::mem::take(&mut self.array.len), 35 | validity: self.array.validity.as_mut().map(std::mem::take), 36 | values: std::mem::take(&mut self.array.values), 37 | }, 38 | }) 39 | } 40 | 41 | pub fn is_nullable(&self) -> bool { 42 | self.array.validity.is_some() 43 | } 44 | 45 | pub fn into_array(self) -> Result { 46 | Ok(Array::Boolean(self.array)) 47 | } 48 | } 49 | 50 | impl Context for BoolBuilder { 51 | fn annotate(&self, annotations: &mut BTreeMap) { 52 | set_default(annotations, "field", &self.path); 53 | set_default(annotations, "data_type", "Boolean"); 54 | } 55 | } 56 | 57 | impl SimpleSerializer for BoolBuilder { 58 | fn serialize_default(&mut self) -> Result<()> { 59 | try_(|| { 60 | set_validity_default(self.array.validity.as_mut(), self.array.len); 61 | set_bit_buffer(&mut self.array.values, self.array.len, false); 62 | self.array.len += 1; 63 | Ok(()) 64 | }) 65 | .ctx(self) 66 | } 67 | 68 | fn serialize_none(&mut self) -> Result<()> { 69 | try_(|| { 70 | set_validity(self.array.validity.as_mut(), self.array.len, false)?; 71 | set_bit_buffer(&mut self.array.values, self.array.len, false); 72 | self.array.len += 1; 73 | Ok(()) 74 | }) 75 | .ctx(self) 76 | } 77 | 78 | fn serialize_bool(&mut self, v: bool) -> Result<()> { 79 | try_(|| { 80 | set_validity(self.array.validity.as_mut(), self.array.len, true)?; 81 | set_bit_buffer(&mut self.array.values, self.array.len, v); 82 | self.array.len += 1; 83 | Ok(()) 84 | }) 85 | .ctx(self) 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /serde_arrow/src/internal/serialization/date_builder.rs: -------------------------------------------------------------------------------- 1 | use std::collections::BTreeMap; 2 | 3 | use chrono::{NaiveDate, NaiveDateTime}; 4 | use marrow::array::{Array, PrimitiveArray}; 5 | 6 | use crate::internal::{ 7 | error::{fail, set_default, try_, Context, ContextSupport, Result}, 8 | utils::array_ext::{ArrayExt, ScalarArrayExt}, 9 | }; 10 | 11 | use super::{array_builder::ArrayBuilder, simple_serializer::SimpleSerializer}; 12 | 13 | pub trait DatePrimitive: 14 | TryFrom 15 | + TryFrom 16 | + Copy 17 | + std::fmt::Display 18 | + std::default::Default 19 | + std::ops::Mul 20 | + Sized 21 | + 'static 22 | { 23 | const NAME: &'static str; 24 | const DATA_TYPE_NAME: &'static str; 25 | const DAY_TO_VALUE_FACTOR: Self; 26 | const ARRAY_BUILDER_VARIANT: fn(DateBuilder) -> ArrayBuilder; 27 | const ARRAY_VARIANT: fn(PrimitiveArray) -> Array; 28 | } 29 | 30 | impl DatePrimitive for i32 { 31 | const NAME: &'static str = "i32"; 32 | const DATA_TYPE_NAME: &'static str = "Date32"; 33 | const DAY_TO_VALUE_FACTOR: Self = 1; 34 | const ARRAY_BUILDER_VARIANT: fn(DateBuilder) -> ArrayBuilder = ArrayBuilder::Date32; 35 | const ARRAY_VARIANT: fn(PrimitiveArray) -> Array = Array::Date32; 36 | } 37 | 38 | impl DatePrimitive for i64 { 39 | const NAME: &'static str = "i64"; 40 | const DATA_TYPE_NAME: &'static str = "Date64"; 41 | const DAY_TO_VALUE_FACTOR: Self = 86_400_000; 42 | const ARRAY_BUILDER_VARIANT: fn(DateBuilder) -> ArrayBuilder = ArrayBuilder::Date64; 43 | const ARRAY_VARIANT: fn(PrimitiveArray) -> Array = Array::Date64; 44 | } 45 | 46 | #[derive(Debug, Clone)] 47 | pub struct DateBuilder { 48 | path: String, 49 | array: PrimitiveArray, 50 | } 51 | 52 | impl DateBuilder { 53 | pub fn new(path: String, is_nullable: bool) -> Self { 54 | Self { 55 | path, 56 | array: PrimitiveArray::new(is_nullable), 57 | } 58 | } 59 | 60 | pub fn take(&mut self) -> ArrayBuilder { 61 | I::ARRAY_BUILDER_VARIANT(Self { 62 | path: self.path.clone(), 63 | array: self.array.take(), 64 | }) 65 | } 66 | 67 | pub fn is_nullable(&self) -> bool { 68 | self.array.is_nullable() 69 | } 70 | 71 | pub fn into_array(self) -> Result { 72 | Ok(I::ARRAY_VARIANT(self.array)) 73 | } 74 | 75 | fn parse_str_to_days_since_epoch(&self, s: &str) -> Result { 76 | const UNIX_EPOCH: NaiveDate = NaiveDateTime::UNIX_EPOCH.date(); 77 | 78 | let date = s.parse::()?; 79 | let duration_since_epoch = date.signed_duration_since(UNIX_EPOCH).num_days(); 80 | let Ok(days_since_epoch) = I::try_from(duration_since_epoch) else { 81 | fail!("cannot convert {duration_since_epoch} to {I}", I = I::NAME); 82 | }; 83 | 84 | Ok(days_since_epoch * I::DAY_TO_VALUE_FACTOR) 85 | } 86 | } 87 | 88 | impl Context for DateBuilder { 89 | fn annotate(&self, annotations: &mut BTreeMap) { 90 | set_default(annotations, "field", &self.path); 91 | set_default(annotations, "data_type", I::DATA_TYPE_NAME); 92 | } 93 | } 94 | 95 | impl SimpleSerializer for DateBuilder { 96 | fn serialize_default(&mut self) -> Result<()> { 97 | try_(|| self.array.push_scalar_default()).ctx(self) 98 | } 99 | 100 | fn serialize_none(&mut self) -> Result<()> { 101 | try_(|| self.array.push_scalar_none()).ctx(self) 102 | } 103 | 104 | fn serialize_str(&mut self, v: &str) -> Result<()> { 105 | try_(|| { 106 | let days_since_epoch = self.parse_str_to_days_since_epoch(v)?; 107 | self.array.push_scalar_value(days_since_epoch) 108 | }) 109 | .ctx(self) 110 | } 111 | 112 | fn serialize_i32(&mut self, v: i32) -> Result<()> { 113 | try_(|| { 114 | let Ok(v) = I::try_from(v) else { 115 | fail!("cannot convert {v} to {I}", I = I::NAME); 116 | }; 117 | self.array.push_scalar_value(v) 118 | }) 119 | .ctx(self) 120 | } 121 | 122 | fn serialize_i64(&mut self, v: i64) -> Result<()> { 123 | try_(|| { 124 | let Ok(v) = I::try_from(v) else { 125 | fail!("cannot convert {v} to {I}", I = I::NAME); 126 | }; 127 | self.array.push_scalar_value(v) 128 | }) 129 | .ctx(self) 130 | } 131 | } 132 | -------------------------------------------------------------------------------- /serde_arrow/src/internal/serialization/decimal_builder.rs: -------------------------------------------------------------------------------- 1 | use std::collections::BTreeMap; 2 | 3 | use marrow::array::{Array, DecimalArray, PrimitiveArray}; 4 | 5 | use crate::internal::{ 6 | error::{set_default, try_, Context, ContextSupport, Result}, 7 | utils::{ 8 | array_ext::{ArrayExt, ScalarArrayExt}, 9 | decimal::{self, DecimalParser}, 10 | }, 11 | }; 12 | 13 | use super::{array_builder::ArrayBuilder, simple_serializer::SimpleSerializer}; 14 | 15 | #[derive(Debug, Clone)] 16 | pub struct DecimalBuilder { 17 | path: String, 18 | pub precision: u8, 19 | pub scale: i8, 20 | pub f32_factor: f32, 21 | pub f64_factor: f64, 22 | pub parser: DecimalParser, 23 | pub array: PrimitiveArray, 24 | } 25 | 26 | impl DecimalBuilder { 27 | pub fn new(path: String, precision: u8, scale: i8, is_nullable: bool) -> Self { 28 | Self { 29 | path, 30 | precision, 31 | scale, 32 | f32_factor: (10.0_f32).powi(scale as i32), 33 | f64_factor: (10.0_f64).powi(scale as i32), 34 | parser: DecimalParser::new(precision, scale, true), 35 | array: PrimitiveArray::new(is_nullable), 36 | } 37 | } 38 | 39 | pub fn take(&mut self) -> ArrayBuilder { 40 | ArrayBuilder::Decimal128(Self { 41 | path: self.path.clone(), 42 | precision: self.precision, 43 | scale: self.scale, 44 | f32_factor: self.f32_factor, 45 | f64_factor: self.f64_factor, 46 | parser: self.parser, 47 | array: self.array.take(), 48 | }) 49 | } 50 | 51 | pub fn is_nullable(&self) -> bool { 52 | self.array.is_nullable() 53 | } 54 | 55 | pub fn into_array(self) -> Result { 56 | Ok(Array::Decimal128(DecimalArray { 57 | precision: self.precision, 58 | scale: self.scale, 59 | validity: self.array.validity, 60 | values: self.array.values, 61 | })) 62 | } 63 | } 64 | 65 | impl Context for DecimalBuilder { 66 | fn annotate(&self, annotations: &mut BTreeMap) { 67 | set_default(annotations, "filed", &self.path); 68 | set_default(annotations, "data_type", "Decimal128(..)"); 69 | } 70 | } 71 | 72 | impl SimpleSerializer for DecimalBuilder { 73 | fn serialize_default(&mut self) -> Result<()> { 74 | try_(|| self.array.push_scalar_default()).ctx(self) 75 | } 76 | 77 | fn serialize_none(&mut self) -> Result<()> { 78 | try_(|| self.array.push_scalar_none()).ctx(self) 79 | } 80 | 81 | fn serialize_f32(&mut self, v: f32) -> Result<()> { 82 | try_(|| self.array.push_scalar_value((v * self.f32_factor) as i128)).ctx(self) 83 | } 84 | 85 | fn serialize_f64(&mut self, v: f64) -> Result<()> { 86 | try_(|| self.array.push_scalar_value((v * self.f64_factor) as i128)).ctx(self) 87 | } 88 | 89 | fn serialize_str(&mut self, v: &str) -> Result<()> { 90 | try_(|| { 91 | let mut parse_buffer = [0; decimal::BUFFER_SIZE_I128]; 92 | let val = self 93 | .parser 94 | .parse_decimal128(&mut parse_buffer, v.as_bytes())?; 95 | 96 | self.array.push_scalar_value(val) 97 | }) 98 | .ctx(self) 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /serde_arrow/src/internal/serialization/dictionary_utf8_builder.rs: -------------------------------------------------------------------------------- 1 | use std::collections::{BTreeMap, HashMap}; 2 | 3 | use marrow::array::{Array, DictionaryArray}; 4 | use serde::Serialize; 5 | 6 | use crate::internal::{ 7 | error::{fail, set_default, try_, Context, ContextSupport, Result}, 8 | utils::{array_view_ext::ViewExt, Mut}, 9 | }; 10 | 11 | use super::{array_builder::ArrayBuilder, simple_serializer::SimpleSerializer}; 12 | 13 | #[derive(Debug, Clone)] 14 | pub struct DictionaryUtf8Builder { 15 | path: String, 16 | pub indices: Box, 17 | pub values: Box, 18 | pub index: HashMap, 19 | } 20 | 21 | impl DictionaryUtf8Builder { 22 | pub fn new(path: String, indices: ArrayBuilder, values: ArrayBuilder) -> Self { 23 | Self { 24 | path, 25 | indices: Box::new(indices), 26 | values: Box::new(values), 27 | index: HashMap::new(), 28 | } 29 | } 30 | 31 | pub fn take(&mut self) -> ArrayBuilder { 32 | ArrayBuilder::DictionaryUtf8(Self { 33 | path: self.path.clone(), 34 | indices: Box::new(self.indices.take()), 35 | values: Box::new(self.values.take()), 36 | index: std::mem::take(&mut self.index), 37 | }) 38 | } 39 | 40 | pub fn is_nullable(&self) -> bool { 41 | self.indices.is_nullable() 42 | } 43 | 44 | pub fn into_array(mut self) -> Result { 45 | let keys = Box::new((*self.indices).into_array()?); 46 | 47 | let has_non_null_keys = !keys.as_view().is_nullable()? && keys.as_view().len()? != 0; 48 | let has_no_values = self.index.is_empty(); 49 | 50 | if has_non_null_keys && has_no_values { 51 | // the non-null keys must be dummy values, map them to empty strings to ensure they can 52 | // be decoded 53 | self.values.serialize_str("")?; 54 | } 55 | 56 | Ok(Array::Dictionary(DictionaryArray { 57 | keys, 58 | values: Box::new((*self.values).into_array()?), 59 | })) 60 | } 61 | } 62 | 63 | impl Context for DictionaryUtf8Builder { 64 | fn annotate(&self, annotations: &mut BTreeMap) { 65 | set_default(annotations, "field", &self.path); 66 | set_default(annotations, "data_type", "Dictionary(..)"); 67 | } 68 | } 69 | 70 | impl SimpleSerializer for DictionaryUtf8Builder { 71 | fn serialize_default(&mut self) -> Result<()> { 72 | try_(|| self.indices.serialize_default()).ctx(self) 73 | } 74 | 75 | fn serialize_none(&mut self) -> Result<()> { 76 | try_(|| self.indices.serialize_none().ctx(self)).ctx(self) 77 | } 78 | 79 | fn serialize_str(&mut self, v: &str) -> Result<()> { 80 | try_(|| { 81 | let idx = match self.index.get(v) { 82 | Some(idx) => *idx, 83 | None => { 84 | let idx = self.index.len(); 85 | self.values.serialize_str(v)?; 86 | self.index.insert(v.to_string(), idx); 87 | idx 88 | } 89 | }; 90 | idx.serialize(Mut(self.indices.as_mut())) 91 | }) 92 | .ctx(self) 93 | } 94 | 95 | fn serialize_unit_variant( 96 | &mut self, 97 | _: &'static str, 98 | _: u32, 99 | variant: &'static str, 100 | ) -> Result<()> { 101 | try_(|| self.serialize_str(variant)).ctx(self) 102 | } 103 | 104 | fn serialize_tuple_variant_start<'this>( 105 | &'this mut self, 106 | _: &'static str, 107 | _: u32, 108 | _: &'static str, 109 | _: usize, 110 | ) -> Result<&'this mut super::ArrayBuilder> { 111 | fail!(in self, "Cannot serialize enum with data as string"); 112 | } 113 | 114 | fn serialize_struct_variant_start<'this>( 115 | &'this mut self, 116 | _: &'static str, 117 | _: u32, 118 | _: &'static str, 119 | _: usize, 120 | ) -> Result<&'this mut super::ArrayBuilder> { 121 | fail!(in self, "Cannot serialize enum with data as string"); 122 | } 123 | 124 | fn serialize_newtype_variant( 125 | &mut self, 126 | _: &'static str, 127 | _: u32, 128 | _: &'static str, 129 | _: &V, 130 | ) -> Result<()> { 131 | fail!(in self, "Cannot serialize enum with data as string"); 132 | } 133 | } 134 | -------------------------------------------------------------------------------- /serde_arrow/src/internal/serialization/duration_builder.rs: -------------------------------------------------------------------------------- 1 | use std::collections::BTreeMap; 2 | 3 | use marrow::{ 4 | array::{Array, PrimitiveArray, TimeArray}, 5 | datatypes::TimeUnit, 6 | }; 7 | 8 | use crate::internal::{ 9 | chrono, 10 | error::{set_default, try_, Context, ContextSupport, Result}, 11 | utils::array_ext::{ArrayExt, ScalarArrayExt}, 12 | }; 13 | 14 | use super::{array_builder::ArrayBuilder, simple_serializer::SimpleSerializer}; 15 | 16 | #[derive(Debug, Clone)] 17 | pub struct DurationBuilder { 18 | path: String, 19 | pub unit: TimeUnit, 20 | pub array: PrimitiveArray, 21 | } 22 | 23 | impl DurationBuilder { 24 | pub fn new(path: String, unit: TimeUnit, is_nullable: bool) -> Self { 25 | Self { 26 | path, 27 | unit, 28 | array: PrimitiveArray::new(is_nullable), 29 | } 30 | } 31 | 32 | pub fn take(&mut self) -> ArrayBuilder { 33 | ArrayBuilder::Duration(Self { 34 | path: self.path.clone(), 35 | unit: self.unit, 36 | array: self.array.take(), 37 | }) 38 | } 39 | 40 | pub fn is_nullable(&self) -> bool { 41 | self.array.is_nullable() 42 | } 43 | 44 | pub fn into_array(self) -> Result { 45 | Ok(Array::Duration(TimeArray { 46 | unit: self.unit, 47 | validity: self.array.validity, 48 | values: self.array.values, 49 | })) 50 | } 51 | } 52 | 53 | impl Context for DurationBuilder { 54 | fn annotate(&self, annotations: &mut BTreeMap) { 55 | set_default(annotations, "field", &self.path); 56 | set_default(annotations, "data_type", "Duration(..)"); 57 | } 58 | } 59 | 60 | impl SimpleSerializer for DurationBuilder { 61 | fn serialize_default(&mut self) -> Result<()> { 62 | try_(|| self.array.push_scalar_default()).ctx(self) 63 | } 64 | 65 | fn serialize_none(&mut self) -> Result<()> { 66 | try_(|| self.array.push_scalar_none()).ctx(self) 67 | } 68 | 69 | fn serialize_i8(&mut self, v: i8) -> Result<()> { 70 | try_(|| self.array.push_scalar_value(i64::from(v))).ctx(self) 71 | } 72 | 73 | fn serialize_i16(&mut self, v: i16) -> Result<()> { 74 | try_(|| self.array.push_scalar_value(i64::from(v))).ctx(self) 75 | } 76 | 77 | fn serialize_i32(&mut self, v: i32) -> Result<()> { 78 | try_(|| self.array.push_scalar_value(i64::from(v))).ctx(self) 79 | } 80 | 81 | fn serialize_i64(&mut self, v: i64) -> Result<()> { 82 | try_(|| self.array.push_scalar_value(v)).ctx(self) 83 | } 84 | 85 | fn serialize_u8(&mut self, v: u8) -> Result<()> { 86 | try_(|| self.array.push_scalar_value(i64::from(v))).ctx(self) 87 | } 88 | 89 | fn serialize_u16(&mut self, v: u16) -> Result<()> { 90 | try_(|| self.array.push_scalar_value(i64::from(v))).ctx(self) 91 | } 92 | 93 | fn serialize_u32(&mut self, v: u32) -> Result<()> { 94 | try_(|| self.array.push_scalar_value(i64::from(v))).ctx(self) 95 | } 96 | 97 | fn serialize_u64(&mut self, v: u64) -> Result<()> { 98 | try_(|| self.array.push_scalar_value(i64::try_from(v)?)).ctx(self) 99 | } 100 | 101 | fn serialize_str(&mut self, v: &str) -> Result<()> { 102 | try_(|| { 103 | let value = chrono::parse_span(v)?.to_arrow_duration(self.unit)?; 104 | self.array.push_scalar_value(value) 105 | }) 106 | .ctx(self) 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /serde_arrow/src/internal/serialization/map_builder.rs: -------------------------------------------------------------------------------- 1 | use std::collections::BTreeMap; 2 | 3 | use marrow::{ 4 | array::{Array, MapArray}, 5 | datatypes::MapMeta, 6 | }; 7 | use serde::Serialize; 8 | 9 | use crate::internal::{ 10 | error::{set_default, try_, Context, ContextSupport, Result}, 11 | utils::{ 12 | array_ext::{ArrayExt, OffsetsArray, SeqArrayExt}, 13 | Mut, 14 | }, 15 | }; 16 | 17 | use super::{array_builder::ArrayBuilder, simple_serializer::SimpleSerializer}; 18 | 19 | #[derive(Debug, Clone)] 20 | pub struct MapBuilder { 21 | pub path: String, 22 | pub meta: MapMeta, 23 | pub keys: Box, 24 | pub values: Box, 25 | pub offsets: OffsetsArray, 26 | } 27 | 28 | impl MapBuilder { 29 | pub fn new( 30 | path: String, 31 | meta: MapMeta, 32 | keys: ArrayBuilder, 33 | values: ArrayBuilder, 34 | is_nullable: bool, 35 | ) -> Result { 36 | Ok(Self { 37 | path, 38 | meta, 39 | offsets: OffsetsArray::new(is_nullable), 40 | keys: Box::new(keys), 41 | values: Box::new(values), 42 | }) 43 | } 44 | 45 | pub fn take(&mut self) -> ArrayBuilder { 46 | ArrayBuilder::Map(Self { 47 | path: self.path.clone(), 48 | meta: self.meta.clone(), 49 | offsets: self.offsets.take(), 50 | keys: Box::new(self.keys.take()), 51 | values: Box::new(self.values.take()), 52 | }) 53 | } 54 | 55 | pub fn is_nullable(&self) -> bool { 56 | self.offsets.validity.is_some() 57 | } 58 | 59 | pub fn into_array(self) -> Result { 60 | Ok(Array::Map(MapArray { 61 | meta: self.meta, 62 | keys: Box::new((*self.keys).into_array()?), 63 | values: Box::new((*self.values).into_array()?), 64 | validity: self.offsets.validity, 65 | offsets: self.offsets.offsets, 66 | })) 67 | } 68 | } 69 | 70 | impl Context for MapBuilder { 71 | fn annotate(&self, annotations: &mut BTreeMap) { 72 | set_default(annotations, "field", &self.path); 73 | set_default(annotations, "data_type", "Map(..)"); 74 | } 75 | } 76 | 77 | impl SimpleSerializer for MapBuilder { 78 | fn serialize_default(&mut self) -> Result<()> { 79 | try_(|| self.offsets.push_seq_default()).ctx(self) 80 | } 81 | 82 | fn serialize_none(&mut self) -> Result<()> { 83 | try_(|| self.offsets.push_seq_none()).ctx(self) 84 | } 85 | 86 | fn serialize_map_start(&mut self, _: Option) -> Result<()> { 87 | try_(|| self.offsets.start_seq()).ctx(self) 88 | } 89 | 90 | fn serialize_map_key(&mut self, key: &V) -> Result<()> { 91 | try_(|| { 92 | self.offsets.push_seq_elements(1)?; 93 | key.serialize(Mut(self.keys.as_mut())) 94 | }) 95 | .ctx(self) 96 | } 97 | 98 | fn serialize_map_value(&mut self, value: &V) -> Result<()> { 99 | try_(|| value.serialize(Mut(self.values.as_mut()))).ctx(self) 100 | } 101 | 102 | fn serialize_map_end(&mut self) -> Result<()> { 103 | try_(|| self.offsets.end_seq()).ctx(self) 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /serde_arrow/src/internal/serialization/mod.rs: -------------------------------------------------------------------------------- 1 | //! A serialization implementation without the event model 2 | 3 | pub mod array_builder; 4 | pub mod binary_builder; 5 | pub mod bool_builder; 6 | pub mod date_builder; 7 | pub mod decimal_builder; 8 | pub mod dictionary_utf8_builder; 9 | pub mod duration_builder; 10 | pub mod fixed_size_binary_builder; 11 | pub mod fixed_size_list_builder; 12 | pub mod float_builder; 13 | pub mod int_builder; 14 | pub mod list_builder; 15 | pub mod map_builder; 16 | pub mod null_builder; 17 | pub mod outer_sequence_builder; 18 | pub mod simple_serializer; 19 | pub mod struct_builder; 20 | pub mod time_builder; 21 | pub mod timestamp_builder; 22 | pub mod union_builder; 23 | pub mod unknown_variant_builder; 24 | pub mod utf8_builder; 25 | 26 | // #[cfg(test)] 27 | // mod test; 28 | pub use array_builder::ArrayBuilder; 29 | pub use outer_sequence_builder::OuterSequenceBuilder; 30 | -------------------------------------------------------------------------------- /serde_arrow/src/internal/serialization/null_builder.rs: -------------------------------------------------------------------------------- 1 | use std::collections::BTreeMap; 2 | 3 | use marrow::array::{Array, NullArray}; 4 | 5 | use crate::internal::error::{set_default, Context, Result}; 6 | 7 | use super::{array_builder::ArrayBuilder, simple_serializer::SimpleSerializer}; 8 | 9 | #[derive(Debug, Clone)] 10 | pub struct NullBuilder { 11 | pub path: String, 12 | pub count: usize, 13 | } 14 | 15 | impl NullBuilder { 16 | pub fn new(path: String) -> Self { 17 | Self { path, count: 0 } 18 | } 19 | 20 | pub fn take(&mut self) -> ArrayBuilder { 21 | ArrayBuilder::Null(Self { 22 | path: self.path.clone(), 23 | count: std::mem::take(&mut self.count), 24 | }) 25 | } 26 | 27 | pub fn is_nullable(&self) -> bool { 28 | true 29 | } 30 | 31 | pub fn into_array(self) -> Result { 32 | Ok(Array::Null(NullArray { len: self.count })) 33 | } 34 | } 35 | 36 | impl Context for NullBuilder { 37 | fn annotate(&self, annotations: &mut BTreeMap) { 38 | set_default(annotations, "field", &self.path); 39 | set_default(annotations, "data_type", "Null"); 40 | } 41 | } 42 | 43 | impl SimpleSerializer for NullBuilder { 44 | fn serialize_default(&mut self) -> Result<()> { 45 | self.count += 1; 46 | Ok(()) 47 | } 48 | 49 | fn serialize_none(&mut self) -> Result<()> { 50 | self.count += 1; 51 | Ok(()) 52 | } 53 | 54 | fn serialize_unit_struct(&mut self, _: &'static str) -> Result<()> { 55 | self.count += 1; 56 | Ok(()) 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /serde_arrow/src/internal/serialization/time_builder.rs: -------------------------------------------------------------------------------- 1 | use std::collections::BTreeMap; 2 | 3 | use chrono::Timelike; 4 | use marrow::{ 5 | array::{Array, PrimitiveArray, TimeArray}, 6 | datatypes::TimeUnit, 7 | }; 8 | 9 | use crate::internal::{ 10 | error::{set_default, try_, Context, ContextSupport, Error, Result}, 11 | utils::{ 12 | array_ext::{ArrayExt, ScalarArrayExt}, 13 | NamedType, 14 | }, 15 | }; 16 | 17 | use super::{array_builder::ArrayBuilder, simple_serializer::SimpleSerializer}; 18 | 19 | #[derive(Debug, Clone)] 20 | pub struct TimeBuilder { 21 | path: String, 22 | pub unit: TimeUnit, 23 | pub array: PrimitiveArray, 24 | } 25 | 26 | impl TimeBuilder { 27 | pub fn new(path: String, unit: TimeUnit, is_nullable: bool) -> Self { 28 | Self { 29 | path, 30 | unit, 31 | array: PrimitiveArray::new(is_nullable), 32 | } 33 | } 34 | 35 | pub fn take_self(&mut self) -> Self { 36 | Self { 37 | path: self.path.clone(), 38 | unit: self.unit, 39 | array: self.array.take(), 40 | } 41 | } 42 | 43 | pub fn is_nullable(&self) -> bool { 44 | self.array.is_nullable() 45 | } 46 | } 47 | 48 | impl TimeBuilder { 49 | pub fn take(&mut self) -> ArrayBuilder { 50 | ArrayBuilder::Time32(self.take_self()) 51 | } 52 | 53 | pub fn into_array(self) -> Result { 54 | Ok(Array::Time32(TimeArray { 55 | unit: self.unit, 56 | validity: self.array.validity, 57 | values: self.array.values, 58 | })) 59 | } 60 | } 61 | 62 | impl TimeBuilder { 63 | pub fn take(&mut self) -> ArrayBuilder { 64 | ArrayBuilder::Time64(self.take_self()) 65 | } 66 | 67 | pub fn into_array(self) -> Result { 68 | Ok(Array::Time64(TimeArray { 69 | unit: self.unit, 70 | validity: self.array.validity, 71 | values: self.array.values, 72 | })) 73 | } 74 | } 75 | 76 | impl Context for TimeBuilder { 77 | fn annotate(&self, annotations: &mut BTreeMap) { 78 | set_default(annotations, "field", &self.path); 79 | set_default( 80 | annotations, 81 | "data_type", 82 | match I::NAME { 83 | "i32" => "Time32", 84 | "i64" => "Time64", 85 | _ => "", 86 | }, 87 | ); 88 | } 89 | } 90 | 91 | impl SimpleSerializer for TimeBuilder 92 | where 93 | I: NamedType + TryFrom + TryFrom + Default + 'static, 94 | Error: From<>::Error>, 95 | Error: From<>::Error>, 96 | { 97 | fn serialize_default(&mut self) -> Result<()> { 98 | try_(|| self.array.push_scalar_default()).ctx(self) 99 | } 100 | 101 | fn serialize_none(&mut self) -> Result<()> { 102 | try_(|| self.array.push_scalar_none()).ctx(self) 103 | } 104 | 105 | fn serialize_str(&mut self, v: &str) -> Result<()> { 106 | try_(|| { 107 | let (seconds_factor, nanoseconds_factor) = match self.unit { 108 | TimeUnit::Nanosecond => (1_000_000_000, 1), 109 | TimeUnit::Microsecond => (1_000_000, 1_000), 110 | TimeUnit::Millisecond => (1_000, 1_000_000), 111 | TimeUnit::Second => (1, 1_000_000_000), 112 | }; 113 | 114 | use chrono::naive::NaiveTime; 115 | let time = v.parse::()?; 116 | let timestamp = i64::from(time.num_seconds_from_midnight()) * seconds_factor 117 | + i64::from(time.nanosecond()) / nanoseconds_factor; 118 | 119 | self.array.push_scalar_value(timestamp.try_into()?) 120 | }) 121 | .ctx(self) 122 | } 123 | 124 | fn serialize_i32(&mut self, v: i32) -> Result<()> { 125 | try_(|| self.array.push_scalar_value(v.try_into()?)).ctx(self) 126 | } 127 | 128 | fn serialize_i64(&mut self, v: i64) -> Result<()> { 129 | try_(|| self.array.push_scalar_value(v.try_into()?)).ctx(self) 130 | } 131 | } 132 | -------------------------------------------------------------------------------- /serde_arrow/src/internal/serialization/timestamp_builder.rs: -------------------------------------------------------------------------------- 1 | use std::collections::BTreeMap; 2 | 3 | use marrow::{ 4 | array::{Array, PrimitiveArray, TimestampArray}, 5 | datatypes::TimeUnit, 6 | }; 7 | 8 | use crate::internal::{ 9 | error::{fail, set_default, try_, Context, ContextSupport, Result}, 10 | utils::array_ext::{ArrayExt, ScalarArrayExt}, 11 | }; 12 | 13 | use super::{array_builder::ArrayBuilder, simple_serializer::SimpleSerializer}; 14 | 15 | #[derive(Debug, Clone)] 16 | pub struct TimestampBuilder { 17 | path: String, 18 | pub unit: TimeUnit, 19 | pub timezone: Option, 20 | pub utc: bool, 21 | pub array: PrimitiveArray, 22 | } 23 | 24 | impl TimestampBuilder { 25 | pub fn new( 26 | path: String, 27 | unit: TimeUnit, 28 | timezone: Option, 29 | is_nullable: bool, 30 | ) -> Result { 31 | Ok(Self { 32 | utc: is_utc_tz(timezone.as_deref())?, 33 | path, 34 | unit, 35 | timezone, 36 | array: PrimitiveArray::new(is_nullable), 37 | }) 38 | } 39 | 40 | pub fn take(&mut self) -> ArrayBuilder { 41 | ArrayBuilder::Timestamp(Self { 42 | path: self.path.clone(), 43 | unit: self.unit, 44 | timezone: self.timezone.clone(), 45 | utc: self.utc, 46 | array: self.array.take(), 47 | }) 48 | } 49 | 50 | pub fn is_nullable(&self) -> bool { 51 | self.array.is_nullable() 52 | } 53 | 54 | pub fn into_array(self) -> Result { 55 | Ok(Array::Timestamp(TimestampArray { 56 | unit: self.unit, 57 | timezone: self.timezone, 58 | validity: self.array.validity, 59 | values: self.array.values, 60 | })) 61 | } 62 | } 63 | 64 | fn is_utc_tz(tz: Option<&str>) -> Result { 65 | match tz { 66 | None => Ok(false), 67 | Some(tz) if tz.to_uppercase() == "UTC" => Ok(true), 68 | Some(tz) => fail!("Timezone {tz} is not supported"), 69 | } 70 | } 71 | 72 | impl TimestampBuilder { 73 | fn parse_str_to_timestamp(&self, s: &str) -> Result { 74 | use chrono::{DateTime, NaiveDateTime, Utc}; 75 | 76 | let date_time = if self.utc { 77 | s.parse::>()? 78 | } else { 79 | s.parse::()?.and_utc() 80 | }; 81 | 82 | match self.unit { 83 | TimeUnit::Nanosecond => match date_time.timestamp_nanos_opt() { 84 | Some(timestamp) => Ok(timestamp), 85 | _ => fail!( 86 | concat!( 87 | "Timestamp '{date_time}' cannot be converted to nanoseconds. ", 88 | "The dates that can be represented as nanoseconds are between ", 89 | "1677-09-21T00:12:44.0 and 2262-04-11T23:47:16.854775804.", 90 | ), 91 | date_time = date_time, 92 | ), 93 | }, 94 | TimeUnit::Microsecond => Ok(date_time.timestamp_micros()), 95 | TimeUnit::Millisecond => Ok(date_time.timestamp_millis()), 96 | TimeUnit::Second => Ok(date_time.timestamp()), 97 | } 98 | } 99 | } 100 | 101 | impl Context for TimestampBuilder { 102 | fn annotate(&self, annotations: &mut BTreeMap) { 103 | set_default(annotations, "field", &self.path); 104 | set_default(annotations, "data_type", "Timestamp(..)"); 105 | } 106 | } 107 | 108 | impl SimpleSerializer for TimestampBuilder { 109 | fn serialize_default(&mut self) -> Result<()> { 110 | try_(|| self.array.push_scalar_default()).ctx(self) 111 | } 112 | 113 | fn serialize_none(&mut self) -> Result<()> { 114 | try_(|| self.array.push_scalar_none()).ctx(self) 115 | } 116 | 117 | fn serialize_str(&mut self, v: &str) -> Result<()> { 118 | try_(|| { 119 | let timestamp = self.parse_str_to_timestamp(v)?; 120 | self.array.push_scalar_value(timestamp) 121 | }) 122 | .ctx(self) 123 | } 124 | 125 | fn serialize_i64(&mut self, v: i64) -> Result<()> { 126 | try_(|| self.array.push_scalar_value(v)).ctx(self) 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /serde_arrow/src/internal/testing.rs: -------------------------------------------------------------------------------- 1 | //! Support for tests 2 | use core::str; 3 | 4 | use marrow::array::{Array, BytesArray}; 5 | 6 | use crate::internal::error::{fail, Error, Result}; 7 | 8 | pub fn assert_error_contains(actual: &Result, expected: &str) { 9 | let Err(actual) = actual else { 10 | panic!("Expected an error, but no error was raised"); 11 | }; 12 | 13 | let actual = actual.to_string(); 14 | if !actual.contains(expected) { 15 | panic!("Error did not contain {expected:?}. Full error: {actual}"); 16 | } 17 | } 18 | 19 | macro_rules! hash_map { 20 | () => { 21 | ::std::collections::HashMap::new() 22 | }; 23 | ($($key:expr => $value:expr),* $(,)?) => { 24 | { 25 | let mut m = ::std::collections::HashMap::new(); 26 | $(m.insert($key.into(), $value.into());)* 27 | m 28 | } 29 | }; 30 | } 31 | 32 | pub(crate) use hash_map; 33 | 34 | use super::utils::array_ext::get_bit_buffer; 35 | 36 | pub(crate) trait ArrayAccess { 37 | fn get_utf8(&self, idx: usize) -> Result>; 38 | } 39 | 40 | impl ArrayAccess for Array { 41 | fn get_utf8(&self, idx: usize) -> Result> { 42 | match self { 43 | Self::Binary(array) | Self::Utf8(array) => get_utf8_impl(array, idx), 44 | Self::LargeBinary(array) | Self::LargeUtf8(array) => get_utf8_impl(array, idx), 45 | _ => fail!("invalid array type. does not support `get_utf8`"), 46 | } 47 | } 48 | } 49 | 50 | fn get_utf8_impl(array: &BytesArray, idx: usize) -> Result> 51 | where 52 | O: Copy, 53 | usize: TryFrom, 54 | Error: From<>::Error>, 55 | { 56 | if let Some(validity) = array.validity.as_ref() { 57 | if !get_bit_buffer(validity, 0, idx)? { 58 | return Ok(None); 59 | } 60 | } 61 | 62 | let Some(start) = array.offsets.get(idx) else { 63 | fail!("Could not get start for element {idx}"); 64 | }; 65 | let Some(end) = array.offsets.get(idx + 1) else { 66 | fail!("Could not get end for element {idx}"); 67 | }; 68 | 69 | let start = usize::try_from(*start)?; 70 | let end = usize::try_from(*end)?; 71 | let Some(data) = array.data.get(start..end) else { 72 | fail!("Invalid array. Could not get byte slice"); 73 | }; 74 | 75 | Ok(Some(str::from_utf8(data)?)) 76 | } 77 | -------------------------------------------------------------------------------- /serde_arrow/src/internal/utils/test_value.rs: -------------------------------------------------------------------------------- 1 | use serde::{de::DeserializeOwned, Deserialize, Serialize}; 2 | 3 | use crate::internal::utils::value::{ValueDeserializer, ValueSerializer}; 4 | 5 | fn roundtrip(value: &T) -> T { 6 | let value = value.serialize(ValueSerializer).unwrap(); 7 | T::deserialize(ValueDeserializer::new(&value)).unwrap() 8 | } 9 | 10 | #[test] 11 | fn example() { 12 | #[derive(Debug, PartialEq, Serialize, Deserialize)] 13 | struct S { 14 | a: i32, 15 | b: i64, 16 | } 17 | 18 | let item = S { a: 13, b: 21 }; 19 | assert_eq!(item, roundtrip(&item)); 20 | } 21 | 22 | #[test] 23 | fn example_i8() { 24 | let item: i8 = -42; 25 | assert_eq!(item, roundtrip(&item)); 26 | } 27 | 28 | #[test] 29 | fn example_i16() { 30 | let item: i16 = -42; 31 | assert_eq!(item, roundtrip(&item)); 32 | } 33 | 34 | #[test] 35 | fn example_i32() { 36 | let item: i32 = -42; 37 | assert_eq!(item, roundtrip(&item)); 38 | } 39 | 40 | #[test] 41 | fn example_i64() { 42 | let item: i64 = -42; 43 | assert_eq!(item, roundtrip(&item)); 44 | } 45 | 46 | #[test] 47 | fn example_u8() { 48 | let item: u8 = 42; 49 | assert_eq!(item, roundtrip(&item)); 50 | } 51 | 52 | #[test] 53 | fn example_u16() { 54 | let item: u16 = 42; 55 | assert_eq!(item, roundtrip(&item)); 56 | } 57 | 58 | #[test] 59 | fn example_u32() { 60 | let item: u32 = 42; 61 | assert_eq!(item, roundtrip(&item)); 62 | } 63 | 64 | #[test] 65 | fn example_u64() { 66 | let item: u64 = 42; 67 | assert_eq!(item, roundtrip(&item)); 68 | } 69 | -------------------------------------------------------------------------------- /serde_arrow/src/marrow_impl.rs: -------------------------------------------------------------------------------- 1 | use marrow::{array::Array, datatypes::Field, view::View}; 2 | use serde::{Deserialize, Serialize}; 3 | 4 | use crate::internal::{ 5 | array_builder::ArrayBuilder, deserializer::Deserializer, error::Result, 6 | schema::SerdeArrowSchema, serializer::Serializer, 7 | }; 8 | 9 | /// Build [marrow array][marrow::array::Array] from the given items 10 | /// 11 | /// `items` should be given in the form a list of records (e.g., a vector of 12 | /// structs). To serialize items encoding single values consider the 13 | /// [`Items`][crate::utils::Items] wrapper. 14 | /// 15 | /// To build arrays record by record use [`ArrayBuilder`]. 16 | /// 17 | /// Example: 18 | /// 19 | /// ```rust 20 | /// # fn main() -> serde_arrow::Result<()> { 21 | /// use marrow::{array::{Array, PrimitiveArray}, datatypes::Field}; 22 | /// use serde::{Serialize, Deserialize}; 23 | /// use serde_arrow::schema::{SchemaLike, TracingOptions}; 24 | /// 25 | /// ##[derive(Debug, PartialEq, Serialize, Deserialize)] 26 | /// struct Record { 27 | /// a: Option, 28 | /// b: u64, 29 | /// } 30 | /// 31 | /// let items = vec![ 32 | /// Record { a: Some(1.0), b: 2}, 33 | /// // ... 34 | /// ]; 35 | /// 36 | /// let fields = Vec::::from_type::(TracingOptions::default())?; 37 | /// let arrays = serde_arrow::to_marrow(&fields, &items)?; 38 | /// 39 | /// assert_eq!( 40 | /// arrays, 41 | /// vec![ 42 | /// Array::Float32(PrimitiveArray { 43 | /// validity: Some(marrow::bit_vec![true]), 44 | /// values: vec![1.0], 45 | /// }), 46 | /// Array::UInt64(PrimitiveArray { 47 | /// validity: None, 48 | /// values: vec![2], 49 | /// }), 50 | /// ], 51 | /// ); 52 | /// # Ok(()) 53 | /// # } 54 | /// ``` 55 | /// 56 | pub fn to_marrow(fields: &[Field], items: T) -> Result> { 57 | let builder = ArrayBuilder::from_marrow(fields)?; 58 | items 59 | .serialize(Serializer::new(builder))? 60 | .into_inner() 61 | .to_marrow() 62 | } 63 | 64 | /// Deserialize items from [marrow views][marrow::view::View] 65 | /// 66 | /// The type should be a list of records (e.g., a vector of structs). To 67 | /// deserialize items encoding single values consider the 68 | /// [`Items`][crate::utils::Items] wrapper. 69 | /// 70 | /// ```rust 71 | /// # fn main() -> serde_arrow::Result<()> { 72 | /// use marrow::{datatypes::Field, view::{BitsWithOffset, View, PrimitiveView}}; 73 | /// use serde::{Deserialize, Serialize}; 74 | /// use serde_arrow::schema::{SchemaLike, TracingOptions}; 75 | /// 76 | /// ##[derive(Debug, PartialEq, Deserialize, Serialize)] 77 | /// struct Record { 78 | /// a: Option, 79 | /// b: u64, 80 | /// } 81 | /// 82 | /// let views = vec![ 83 | /// View::Float32(PrimitiveView { 84 | /// validity: Some(BitsWithOffset { 85 | /// offset: 0, 86 | /// data: &const { marrow::bit_array![true, false, true] }, 87 | /// }), 88 | /// values: &[13.0, 0.0, 17.0], 89 | /// }), 90 | /// View::UInt64(PrimitiveView { 91 | /// validity: None, 92 | /// values: &[21, 42, 84], 93 | /// }), 94 | /// ]; 95 | /// 96 | /// let fields = Vec::::from_type::(TracingOptions::default())?; 97 | /// let items: Vec = serde_arrow::from_marrow(&fields, &views)?; 98 | /// 99 | /// assert_eq!( 100 | /// items, 101 | /// vec![ 102 | /// Record { a: Some(13.0), b: 21 }, 103 | /// Record { a: None, b: 42 }, 104 | /// Record { a: Some(17.0), b: 84 }, 105 | /// ], 106 | /// ); 107 | /// # Ok(()) 108 | /// # } 109 | /// ``` 110 | /// 111 | pub fn from_marrow<'de, T>(fields: &[Field], views: &'de [View]) -> Result 112 | where 113 | T: Deserialize<'de>, 114 | { 115 | T::deserialize(Deserializer::from_marrow(fields, views)?) 116 | } 117 | 118 | impl ArrayBuilder { 119 | /// Build an array builder from [`marrow::Field`s][Field] 120 | pub fn from_marrow(fields: &[Field]) -> Result { 121 | ArrayBuilder::new(SerdeArrowSchema { 122 | fields: fields.to_vec(), 123 | }) 124 | } 125 | 126 | /// Construct [`marrow::Array`s][Array] and reset the builder 127 | pub fn to_marrow(&mut self) -> Result> { 128 | self.build_arrays() 129 | } 130 | } 131 | 132 | impl<'de> Deserializer<'de> { 133 | /// Build a deserializer from [`marrow::Field`s][Field] and [`marrow::View`s][View] 134 | pub fn from_marrow(fields: &[Field], views: &[View<'de>]) -> Result { 135 | Self::new(fields, views.to_vec()) 136 | } 137 | } 138 | -------------------------------------------------------------------------------- /serde_arrow/src/test/api_chrono.rs: -------------------------------------------------------------------------------- 1 | // use the deprecated chrono API for now 2 | #![allow(deprecated)] 3 | 4 | use chrono::{DateTime, FixedOffset, NaiveDate, TimeZone, Utc}; 5 | 6 | use crate::internal::error::{Error, Result}; 7 | 8 | #[test] 9 | fn test_parse_utc() -> Result<()> { 10 | let dt = "0730-12-01T02:03:50Z".parse::>()?; 11 | assert_eq!(dt, Utc.ymd(730, 12, 1).and_hms(2, 3, 50)); 12 | 13 | let dt = "2020-12-24T13:30:00+05:00".parse::>()?; 14 | assert_eq!(dt, Utc.ymd(2020, 12, 24).and_hms(8, 30, 0)); 15 | Ok(()) 16 | } 17 | 18 | #[test] 19 | fn test_chrono_api_naive_datetime() -> Result<()> { 20 | let dt = NaiveDate::from_ymd(2021, 8, 3).and_hms(12, 0, 0); 21 | let dt_str = serde_json::to_string(&dt).map_err(|err| Error::custom(err.to_string()))?; 22 | assert_eq!(dt_str, "\"2021-08-03T12:00:00\""); 23 | Ok(()) 24 | } 25 | 26 | #[test] 27 | fn test_chrono_api_datetime() -> Result<()> { 28 | let dt = Utc.ymd(730, 12, 1).and_hms(2, 3, 50); 29 | let dt_str = serde_json::to_string(&dt).map_err(|err| Error::custom(err.to_string()))?; 30 | 31 | assert_eq!(dt_str, "\"0730-12-01T02:03:50Z\""); 32 | Ok(()) 33 | } 34 | 35 | #[test] 36 | fn test_chrono_api_datetime_debug() -> Result<()> { 37 | let dt = Utc.ymd(730, 12, 1).and_hms(2, 3, 50); 38 | let dt_str = format!("{:?}", dt); 39 | 40 | assert_eq!(dt_str, "0730-12-01T02:03:50Z"); 41 | Ok(()) 42 | } 43 | 44 | #[test] 45 | fn test_chrono_fixed_offset() -> Result<()> { 46 | let dt = FixedOffset::east(5 * 3600) 47 | .ymd(2020, 12, 24) 48 | .and_hms(13, 30, 00); 49 | let dt_str = serde_json::to_string(&dt).map_err(|err| Error::custom(err.to_string()))?; 50 | 51 | assert_eq!(dt_str, "\"2020-12-24T13:30:00+05:00\""); 52 | Ok(()) 53 | } 54 | -------------------------------------------------------------------------------- /serde_arrow/src/test/error_messages/deserializers.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | use marrow::{ 4 | datatypes::FieldMeta, 5 | view::{BitsWithOffset, BooleanView, StructView, View}, 6 | }; 7 | use serde::Deserialize; 8 | use serde_json::json; 9 | 10 | use crate::{ 11 | internal::testing::assert_error_contains, 12 | schema::{SchemaLike, SerdeArrowSchema}, 13 | Deserializer, 14 | }; 15 | 16 | #[test] 17 | fn example_exhausted() { 18 | let views = vec![View::Struct(StructView { 19 | len: 5, 20 | validity: None, 21 | fields: vec![( 22 | FieldMeta { 23 | name: String::from("nested"), 24 | nullable: false, 25 | metadata: HashMap::new(), 26 | }, 27 | View::Boolean(BooleanView { 28 | len: 2, 29 | validity: None, 30 | values: BitsWithOffset { 31 | data: &[0b_0001_0011], 32 | offset: 0, 33 | }, 34 | }), 35 | )], 36 | })]; 37 | 38 | let schema = SerdeArrowSchema::from_value(&json!([{ 39 | "name": "item", 40 | "data_type": "Struct", 41 | "children": [ 42 | {"name": "nested", "data_type": "Bool"}, 43 | ], 44 | }])) 45 | .unwrap(); 46 | 47 | let deserializer = Deserializer::new(&schema.fields, views).unwrap(); 48 | 49 | #[derive(Deserialize)] 50 | struct S { 51 | #[allow(dead_code)] 52 | item: Nested, 53 | } 54 | 55 | #[derive(Deserialize)] 56 | struct Nested { 57 | #[allow(dead_code)] 58 | nested: bool, 59 | } 60 | 61 | let res = Vec::::deserialize(deserializer); 62 | assert_error_contains(&res, "Out of bounds access"); 63 | assert_error_contains(&res, "field: \"$.item.nested\""); 64 | assert_error_contains(&res, "data_type: \"Boolean\""); 65 | } 66 | -------------------------------------------------------------------------------- /serde_arrow/src/test/error_messages/misc.rs: -------------------------------------------------------------------------------- 1 | use crate::internal::error::Error; 2 | 3 | #[test] 4 | fn backtrace_on_debug() { 5 | let err = Error::custom(String::from("foo bar")); 6 | 7 | // NOTE: the exact message depends on the ability of Rust to capture a backtrace 8 | assert_eq!(format!("{}", err).contains("Backtrace"), false); 9 | assert_eq!(format!("{:?}", err).contains("Backtrace"), true); 10 | } 11 | -------------------------------------------------------------------------------- /serde_arrow/src/test/error_messages/mod.rs: -------------------------------------------------------------------------------- 1 | mod deserializers; 2 | mod misc; 3 | mod push_validity; 4 | mod trace_from_samples; 5 | mod trace_from_type; 6 | -------------------------------------------------------------------------------- /serde_arrow/src/test/error_messages/push_validity.rs: -------------------------------------------------------------------------------- 1 | use serde_json::json; 2 | 3 | use crate::internal::{ 4 | array_builder::ArrayBuilder, 5 | error::PanicOnError, 6 | schema::{SchemaLike, SerdeArrowSchema}, 7 | testing::assert_error_contains, 8 | }; 9 | 10 | #[test] 11 | fn int_nested() -> PanicOnError<()> { 12 | let schema = SerdeArrowSchema::from_value(&json!([ 13 | { 14 | "name": "nested", 15 | "data_type": "Struct", 16 | "children": [ 17 | {"name": "field", "data_type": "U32"}, 18 | ], 19 | }, 20 | ]))?; 21 | 22 | let mut array_builder = ArrayBuilder::new(schema)?; 23 | let res = array_builder.push(&json!({"nested": {"field": 32}})); 24 | assert_eq!(res, Ok(())); 25 | 26 | let res = array_builder.push(&json!({"nested": {"field": null}})); 27 | assert_error_contains(&res, "field: \"$.nested.field\""); 28 | 29 | Ok(()) 30 | } 31 | 32 | #[test] 33 | fn int_top_level() -> PanicOnError<()> { 34 | let schema = SerdeArrowSchema::from_value(&json!([ 35 | {"name": "field", "data_type": "U32"}, 36 | ]))?; 37 | 38 | let mut array_builder = ArrayBuilder::new(schema)?; 39 | let res = array_builder.push(&json!({"field": 32})); 40 | assert_eq!(res, Ok(())); 41 | 42 | let res = array_builder.push(&json!({"field": null})); 43 | assert_error_contains(&res, "field: \"$.field\""); 44 | 45 | Ok(()) 46 | } 47 | 48 | #[test] 49 | fn struct_nested() -> PanicOnError<()> { 50 | let schema = SerdeArrowSchema::from_value(&json!([ 51 | { 52 | "name": "nested", 53 | "data_type": "Struct", 54 | "children": [ 55 | {"name": "field", "data_type": "Struct", "children": []}, 56 | ], 57 | }, 58 | ]))?; 59 | 60 | let mut array_builder = ArrayBuilder::new(schema)?; 61 | let res = array_builder.push(&json!({"nested": {"field": {}}})); 62 | assert_eq!(res, Ok(())); 63 | 64 | let res = array_builder.push(&json!({"nested": {"field": null}})); 65 | assert_error_contains(&res, "field: \"$.nested.field\""); 66 | 67 | Ok(()) 68 | } 69 | 70 | #[test] 71 | fn struct_top_level() -> PanicOnError<()> { 72 | let schema = SerdeArrowSchema::from_value(&json!([ 73 | {"name": "field", "data_type": "Struct", "children": []}, 74 | ]))?; 75 | 76 | let mut array_builder = ArrayBuilder::new(schema)?; 77 | let res = array_builder.push(&json!({"field": {}})); 78 | assert_eq!(res, Ok(())); 79 | 80 | let res = array_builder.push(&json!({"field": null})); 81 | assert_error_contains(&res, "field: \"$.field\""); 82 | 83 | Ok(()) 84 | } 85 | -------------------------------------------------------------------------------- /serde_arrow/src/test/error_messages/trace_from_samples.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | internal::testing::assert_error_contains, 3 | schema::{SchemaLike, SerdeArrowSchema, TracingOptions}, 4 | utils::Item, 5 | }; 6 | 7 | #[test] 8 | fn non_sequence() { 9 | let res = SerdeArrowSchema::from_samples(&42, TracingOptions::default()); 10 | assert_error_contains(&res, "Cannot trace non-sequences with `from_samples`"); 11 | assert_error_contains(&res, "path: \"$\""); 12 | } 13 | 14 | #[test] 15 | fn incompatible_primitives() { 16 | let res = 17 | SerdeArrowSchema::from_samples(&(Item(42_u32), Item("foo bar")), TracingOptions::default()); 18 | assert_error_contains(&res, "path: \"$.item\""); 19 | } 20 | 21 | #[test] 22 | fn number_coercion() { 23 | let res = SerdeArrowSchema::from_samples(&(&32.0_f32, 42_u64), TracingOptions::default()); 24 | assert_error_contains( 25 | &res, 26 | "consider setting `coerce_numbers` to `true` to coerce different numeric types.", 27 | ); 28 | } 29 | -------------------------------------------------------------------------------- /serde_arrow/src/test/error_messages/trace_from_type.rs: -------------------------------------------------------------------------------- 1 | use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, Utc}; 2 | use serde::Deserialize; 3 | use serde_json::Value; 4 | 5 | use crate::{ 6 | internal::{ 7 | schema::{SchemaLike, SerdeArrowSchema}, 8 | testing::assert_error_contains, 9 | utils::Item, 10 | }, 11 | schema::TracingOptions, 12 | }; 13 | 14 | #[test] 15 | fn example() { 16 | // NOTE: Value cannot be traced with from_type, as it is not self-describing 17 | let res = SerdeArrowSchema::from_type::>>(TracingOptions::default()); 18 | assert_error_contains( 19 | &res, 20 | "Non self describing types cannot be traced with `from_type`.", 21 | ); 22 | assert_error_contains(&res, "path: \"$.item.element\""); 23 | assert_error_contains(&res, "tracer_type: \"Unknown\""); 24 | } 25 | 26 | #[test] 27 | fn chrono_types_are_not_self_describing() { 28 | let res = SerdeArrowSchema::from_type::>>(TracingOptions::default()); 29 | assert_error_contains(&res, "path: \"$.item\""); 30 | assert_error_contains(&res, "non self describing type"); 31 | 32 | let res = SerdeArrowSchema::from_type::>(TracingOptions::default()); 33 | assert_error_contains(&res, "path: \"$.item\""); 34 | assert_error_contains(&res, "non self describing type"); 35 | 36 | let res = SerdeArrowSchema::from_type::>(TracingOptions::default()); 37 | assert_error_contains(&res, "path: \"$.item\""); 38 | assert_error_contains(&res, "non self describing type"); 39 | 40 | let res = SerdeArrowSchema::from_type::>(TracingOptions::default()); 41 | assert_error_contains(&res, "path: \"$.item\""); 42 | assert_error_contains(&res, "non self describing type"); 43 | } 44 | 45 | #[test] 46 | fn net_ip_addr_is_not_self_describing() { 47 | let res = SerdeArrowSchema::from_type::>(TracingOptions::default()); 48 | assert_error_contains(&res, "path: \"$.item\""); 49 | assert_error_contains(&res, "non self describing type"); 50 | } 51 | 52 | #[test] 53 | fn unsupported_recursive_types() { 54 | #[allow(unused)] 55 | #[derive(Deserialize)] 56 | struct Tree { 57 | left: Option>, 58 | right: Option>, 59 | } 60 | 61 | let res = SerdeArrowSchema::from_type::(TracingOptions::default()); 62 | assert_error_contains(&res, "Too deeply nested type detected"); 63 | // NOTE: do not check the complete path, it depends on the recursion limit 64 | assert_error_contains(&res, "path: \"$.left.left.left.left.left.left"); 65 | } 66 | -------------------------------------------------------------------------------- /serde_arrow/src/test/jiff.rs: -------------------------------------------------------------------------------- 1 | use jiff::{ 2 | civil::{date, time, Date, DateTime, Time}, 3 | Span, Timestamp, Zoned, 4 | }; 5 | 6 | use crate::internal::{testing::assert_error_contains, utils::value}; 7 | 8 | #[test] 9 | fn string_repr_examples() { 10 | // date 11 | let obj = date(2023, 12, 31); 12 | assert_eq!(value::transmute::(&obj).unwrap(), "2023-12-31"); 13 | 14 | let obj = date(-10, 10, 30); 15 | assert_eq!(value::transmute::(&obj).unwrap(), "-000010-10-30"); 16 | assert_eq!(value::transmute::("-000010-10-30").unwrap(), obj); 17 | assert_error_contains( 18 | &value::transmute::("-0010-10-30"), 19 | "six digit integer", 20 | ); 21 | 22 | // date time without time zone 23 | let obj = date(2023, 12, 31).at(18, 30, 0, 0); 24 | assert_eq!( 25 | value::transmute::(&obj).unwrap(), 26 | "2023-12-31T18:30:00" 27 | ); 28 | 29 | // date time with timezone 30 | let obj = date(2023, 12, 31).at(18, 30, 0, 0).intz("UTC").unwrap(); 31 | assert_eq!( 32 | value::transmute::(&obj).unwrap(), 33 | "2023-12-31T18:30:00+00:00[UTC]" 34 | ); 35 | 36 | // time without fractional part 37 | let obj = time(16, 56, 42, 0); 38 | assert_eq!(value::transmute::(&obj).unwrap(), "16:56:42"); 39 | 40 | // time with fractional part 41 | let obj = time(16, 56, 42, 123_000_000); 42 | assert_eq!(value::transmute::(&obj).unwrap(), "16:56:42.123"); 43 | 44 | // day span 45 | let obj = Span::new().days(32); 46 | assert_eq!(value::transmute::(&obj).unwrap(), "P32d"); 47 | 48 | // year month span 49 | let obj = Span::new().years(4).months(7); 50 | assert_eq!(value::transmute::(&obj).unwrap(), "P4y7m"); 51 | } 52 | 53 | /// Test that the different reprs between chrono and jiff are compatible 54 | #[test] 55 | fn transmute_jiff_chrono() { 56 | // date 57 | let chrono = chrono::NaiveDate::from_ymd_opt(2023, 12, 31).unwrap(); 58 | let jiff = date(2023, 12, 31); 59 | 60 | assert_eq!(value::transmute::(&chrono).unwrap(), jiff); 61 | assert_eq!( 62 | value::transmute::(&jiff).unwrap(), 63 | chrono 64 | ); 65 | 66 | // time without fractional part 67 | let chrono = chrono::NaiveTime::from_hms_opt(19, 31, 22).unwrap(); 68 | let jiff = time(19, 31, 22, 0); 69 | 70 | assert_eq!(value::transmute::