├── rust-toolchain.toml ├── .gitignore ├── src ├── core │ ├── sqlite │ │ └── mod.rs │ ├── databend │ │ ├── mod.rs │ │ ├── constants.rs │ │ ├── jentry.rs │ │ ├── iterator.rs │ │ └── builder.rs │ ├── mod.rs │ └── item.rs ├── functions │ ├── mod.rs │ └── object.rs ├── jsonpath │ └── mod.rs ├── constants.rs ├── keypath.rs ├── lib.rs ├── error.rs ├── extension.rs ├── owned.rs ├── raw.rs ├── from.rs └── util.rs ├── .github └── workflows │ ├── rust.yml │ └── publish.yaml ├── tests └── it │ ├── main.rs │ ├── testdata │ └── key_path.txt │ ├── keypath_parser.rs │ ├── jsonpath_parser.rs │ ├── encode.rs │ └── decode.rs ├── Cargo.toml ├── benches ├── parser.rs ├── strip_nulls.rs └── get_path.rs ├── CHANGELOG.md ├── README.md └── LICENSE /rust-toolchain.toml: -------------------------------------------------------------------------------- 1 | [toolchain] 2 | channel = "stable" 3 | components = ["rustfmt", "clippy"] 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Generated by Cargo 2 | # will have compiled files and executables 3 | /target/ 4 | 5 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 6 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html 7 | Cargo.lock 8 | 9 | # These are backup files generated by rustfmt 10 | **/*.rs.bk 11 | 12 | # IDE and editor 13 | .vscode 14 | .idea 15 | 16 | # env files for backends 17 | .env 18 | 19 | # profiling 20 | flamegraph.svg 21 | perf.* 22 | -------------------------------------------------------------------------------- /src/core/sqlite/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | -------------------------------------------------------------------------------- /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | pull_request: 7 | branches: [ "main" ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v3 19 | - name: Format 20 | run: cargo fmt --all -- --check 21 | - name: Clippy 22 | run: cargo clippy --workspace --all-targets --all-features -- -D warnings 23 | - name: Build 24 | run: cargo build --verbose 25 | - name: Run tests 26 | run: cargo test --verbose 27 | - name: Run tests disable arbitrary_precision 28 | run: cargo test --no-default-features --features databend --verbose 29 | -------------------------------------------------------------------------------- /src/functions/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | mod array; 16 | mod object; 17 | mod operator; 18 | mod path; 19 | mod scalar; 20 | -------------------------------------------------------------------------------- /tests/it/main.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | mod decode; 16 | mod encode; 17 | mod functions; 18 | mod jsonpath_parser; 19 | mod keypath_parser; 20 | mod number; 21 | mod parser; 22 | -------------------------------------------------------------------------------- /.github/workflows/publish.yaml: -------------------------------------------------------------------------------- 1 | name: publish 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | paths: 8 | - '**/Cargo.toml' 9 | 10 | jobs: 11 | crates: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v3 15 | - name: Setup Cargo Release 16 | run: | 17 | curl -fsSLo /tmp/cargo-release.tar.gz https://github.com/crate-ci/cargo-release/releases/download/v0.24.8/cargo-release-v0.24.8-x86_64-unknown-linux-gnu.tar.gz 18 | mkdir -p /tmp/cargo-release 19 | tar -C /tmp/cargo-release -xzf /tmp/cargo-release.tar.gz 20 | sudo mv /tmp/cargo-release/cargo-release /usr/local/bin 21 | - name: Release to crates.io 22 | env: 23 | CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} 24 | run: | 25 | cargo release publish --execute --no-confirm 26 | -------------------------------------------------------------------------------- /src/jsonpath/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | mod parser; 16 | mod path; 17 | mod selector; 18 | 19 | pub use parser::parse_json_path; 20 | pub(crate) use parser::raw_string; 21 | pub(crate) use parser::string; 22 | pub use path::*; 23 | pub use selector::Selector; 24 | -------------------------------------------------------------------------------- /src/core/databend/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | mod builder; 16 | mod constants; 17 | mod de; 18 | mod iterator; 19 | mod jentry; 20 | mod ser; 21 | mod util; 22 | 23 | pub(crate) use builder::*; 24 | pub use de::*; 25 | pub(crate) use iterator::*; 26 | pub use ser::*; 27 | -------------------------------------------------------------------------------- /src/core/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #[cfg(feature = "databend")] 16 | mod databend; 17 | #[cfg(feature = "sqlite")] 18 | mod sqlite; 19 | 20 | mod item; 21 | 22 | #[cfg(feature = "databend")] 23 | pub use databend::*; 24 | pub use item::*; 25 | #[cfg(feature = "sqlite")] 26 | #[allow(unused_imports)] 27 | pub use sqlite::*; 28 | -------------------------------------------------------------------------------- /tests/it/testdata/key_path.txt: -------------------------------------------------------------------------------- 1 | ---------- Input ---------- 2 | { } 3 | ---------- Output --------- 4 | {} 5 | ---------- AST ------------ 6 | KeyPaths { 7 | paths: [], 8 | } 9 | 10 | 11 | ---------- Input ---------- 12 | { 1, a } 13 | ---------- Output --------- 14 | {1,a} 15 | ---------- AST ------------ 16 | KeyPaths { 17 | paths: [ 18 | Index( 19 | 1, 20 | ), 21 | Name( 22 | "a", 23 | ), 24 | ], 25 | } 26 | 27 | 28 | ---------- Input ---------- 29 | {1,a,-2} 30 | ---------- Output --------- 31 | {1,a,-2} 32 | ---------- AST ------------ 33 | KeyPaths { 34 | paths: [ 35 | Index( 36 | 1, 37 | ), 38 | Name( 39 | "a", 40 | ), 41 | Index( 42 | -2, 43 | ), 44 | ], 45 | } 46 | 47 | 48 | ---------- Input ---------- 49 | {a,"b","c"} 50 | ---------- Output --------- 51 | {a,"b","c"} 52 | ---------- AST ------------ 53 | KeyPaths { 54 | paths: [ 55 | Name( 56 | "a", 57 | ), 58 | QuotedName( 59 | "b", 60 | ), 61 | QuotedName( 62 | "c", 63 | ), 64 | ], 65 | } 66 | 67 | 68 | -------------------------------------------------------------------------------- /tests/it/keypath_parser.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use std::io::Write; 16 | 17 | use goldenfile::Mint; 18 | use jsonb::keypath::parse_key_paths; 19 | 20 | #[test] 21 | fn test_json_path() { 22 | let mut mint = Mint::new("tests/it/testdata"); 23 | let mut file = mint.new_goldenfile("key_path.txt").unwrap(); 24 | let cases = &[" { } ", " { 1, a } ", "{1,a,-2}", r#"{a,"b","c"} "#]; 25 | 26 | for case in cases { 27 | let key_paths = parse_key_paths(case.as_bytes()).unwrap(); 28 | 29 | writeln!(file, "---------- Input ----------").unwrap(); 30 | writeln!(file, "{case}").unwrap(); 31 | writeln!(file, "---------- Output ---------").unwrap(); 32 | writeln!(file, "{key_paths}").unwrap(); 33 | writeln!(file, "---------- AST ------------").unwrap(); 34 | writeln!(file, "{key_paths:#?}").unwrap(); 35 | writeln!(file, "\n").unwrap(); 36 | } 37 | } 38 | 39 | #[test] 40 | fn test_json_path_error() { 41 | let cases = &[r#"{"#, r#"ab"#]; 42 | 43 | for case in cases { 44 | let res = parse_key_paths(case.as_bytes()); 45 | assert!(res.is_err()); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Datafuse Labs 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | [package] 16 | authors = ["Databend Authors "] 17 | categories = ["encoding"] 18 | description = "JSONB implement in Rust." 19 | edition = "2021" 20 | homepage = "https://github.com/databendlabs/jsonb" 21 | keywords = ["json", "jsonb", "jsonpath"] 22 | license = "Apache-2.0" 23 | name = "jsonb" 24 | repository = "https://github.com/databendlabs/jsonb" 25 | version = "0.5.5" 26 | rust-version = "1.85" 27 | 28 | [dependencies] 29 | byteorder = "1.5.0" 30 | ethnum = "1.5.2" 31 | fast-float2 = "0.2.3" 32 | itoa = "1.0" 33 | jiff = "0.2.10" 34 | nom = "8.0.0" 35 | num-traits = "0.2.19" 36 | ordered-float = { version = "5.1.0", default-features = false } 37 | rand = { version = "0.9.2", features = ["small_rng"] } 38 | ryu = "1.0" 39 | serde = "1.0" 40 | serde_json = { version = "1.0", default-features = false, features = ["std"] } 41 | 42 | [dev-dependencies] 43 | goldenfile = "1.8" 44 | serde_json = "1.0" 45 | json-deserializer = "0.4.4" 46 | simd-json = "0.15.0" 47 | mockalloc = "0.1.2" 48 | criterion = "0.5.1" 49 | proptest = "1.7" 50 | json-five = "0.3.0" 51 | 52 | [features] 53 | default = ["databend", "preserve_order", "arbitrary_precision"] 54 | databend = [] 55 | sqlite = [] 56 | arbitrary_precision = ["serde_json/arbitrary_precision"] 57 | preserve_order = ["serde_json/preserve_order"] 58 | 59 | [[bench]] 60 | name = "parser" 61 | harness = false 62 | 63 | [[bench]] 64 | name = "get_path" 65 | harness = false 66 | 67 | [[bench]] 68 | name = "strip_nulls" 69 | harness = false 70 | -------------------------------------------------------------------------------- /src/core/databend/constants.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // JSONB header constants 16 | pub(super) const ARRAY_CONTAINER_TAG: u32 = 0x80000000; 17 | pub(super) const OBJECT_CONTAINER_TAG: u32 = 0x40000000; 18 | pub(super) const SCALAR_CONTAINER_TAG: u32 = 0x20000000; 19 | 20 | pub(super) const CONTAINER_HEADER_TYPE_MASK: u32 = 0xE0000000; 21 | pub(super) const CONTAINER_HEADER_LEN_MASK: u32 = 0x1FFFFFFF; 22 | 23 | // JSONB JEntry constants 24 | pub(super) const NULL_TAG: u32 = 0x00000000; 25 | pub(super) const STRING_TAG: u32 = 0x10000000; 26 | pub(super) const NUMBER_TAG: u32 = 0x20000000; 27 | pub(super) const FALSE_TAG: u32 = 0x30000000; 28 | pub(super) const TRUE_TAG: u32 = 0x40000000; 29 | pub(super) const CONTAINER_TAG: u32 = 0x50000000; 30 | pub(super) const EXTENSION_TAG: u32 = 0x60000000; 31 | 32 | // JSONB number constants 33 | pub(super) const NUMBER_ZERO: u8 = 0x00; 34 | pub(super) const NUMBER_NAN: u8 = 0x10; 35 | pub(super) const NUMBER_INF: u8 = 0x20; 36 | pub(super) const NUMBER_NEG_INF: u8 = 0x30; 37 | pub(super) const NUMBER_INT: u8 = 0x40; 38 | pub(super) const NUMBER_UINT: u8 = 0x50; 39 | pub(super) const NUMBER_FLOAT: u8 = 0x60; 40 | pub(super) const NUMBER_DECIMAL: u8 = 0x70; 41 | 42 | // JSONB extension constants 43 | pub(super) const EXTENSION_BINARY: u8 = 0x00; 44 | pub(super) const EXTENSION_DATE: u8 = 0x10; 45 | pub(super) const EXTENSION_TIMESTAMP: u8 = 0x20; 46 | pub(super) const EXTENSION_TIMESTAMP_TZ: u8 = 0x30; 47 | pub(super) const EXTENSION_INTERVAL: u8 = 0x40; 48 | 49 | // @todo support offset mode 50 | #[allow(dead_code)] 51 | pub(super) const JENTRY_IS_OFF_FLAG: u32 = 0x80000000; 52 | pub(super) const JENTRY_TYPE_MASK: u32 = 0x70000000; 53 | pub(super) const JENTRY_OFF_LEN_MASK: u32 = 0x0FFFFFFF; 54 | -------------------------------------------------------------------------------- /src/core/databend/jentry.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use super::constants::*; 16 | 17 | #[derive(Clone, Debug, PartialOrd, PartialEq, Eq, Ord)] 18 | pub(super) struct JEntry { 19 | pub(super) type_code: u32, 20 | pub(super) length: u32, 21 | } 22 | 23 | impl JEntry { 24 | pub(super) fn decode_jentry(encoded: u32) -> JEntry { 25 | let type_code = encoded & JENTRY_TYPE_MASK; 26 | let length = encoded & JENTRY_OFF_LEN_MASK; 27 | JEntry { type_code, length } 28 | } 29 | 30 | pub(super) fn make_null_jentry() -> JEntry { 31 | JEntry { 32 | type_code: NULL_TAG, 33 | length: 0, 34 | } 35 | } 36 | 37 | pub(super) fn make_true_jentry() -> JEntry { 38 | JEntry { 39 | type_code: TRUE_TAG, 40 | length: 0, 41 | } 42 | } 43 | 44 | pub(super) fn make_false_jentry() -> JEntry { 45 | JEntry { 46 | type_code: FALSE_TAG, 47 | length: 0, 48 | } 49 | } 50 | 51 | pub(super) fn make_string_jentry(length: usize) -> JEntry { 52 | JEntry { 53 | type_code: STRING_TAG, 54 | length: length as u32, 55 | } 56 | } 57 | 58 | pub(super) fn make_number_jentry(length: usize) -> JEntry { 59 | JEntry { 60 | type_code: NUMBER_TAG, 61 | length: length as u32, 62 | } 63 | } 64 | 65 | pub(super) fn make_container_jentry(length: usize) -> JEntry { 66 | JEntry { 67 | type_code: CONTAINER_TAG, 68 | length: length as u32, 69 | } 70 | } 71 | 72 | pub(super) fn make_extension_jentry(length: usize) -> JEntry { 73 | JEntry { 74 | type_code: EXTENSION_TAG, 75 | length: length as u32, 76 | } 77 | } 78 | 79 | pub(super) fn encoded(&self) -> u32 { 80 | self.type_code | self.length 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /benches/parser.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use std::fs; 16 | use std::io::Read; 17 | 18 | use criterion::{criterion_group, criterion_main, BatchSize, Criterion}; 19 | 20 | fn parse_jsonb(data: &[u8]) { 21 | let _v: jsonb::OwnedJsonb = jsonb::parse_owned_jsonb(data).unwrap(); 22 | } 23 | 24 | fn parse_serde_json(data: &[u8]) { 25 | let _v: serde_json::Value = serde_json::from_slice(data).unwrap(); 26 | } 27 | 28 | fn parse_json_deserializer(data: &[u8]) { 29 | let _v: json_deserializer::Value = json_deserializer::parse(data).unwrap(); 30 | } 31 | 32 | fn parse_simd_json(data: &mut [u8]) { 33 | let _v = simd_json::to_borrowed_value(data).unwrap(); 34 | } 35 | 36 | fn read(file: &str) -> Vec { 37 | let mut f = fs::File::open(file).unwrap(); 38 | let mut data = vec![]; 39 | f.read_to_end(&mut data).unwrap(); 40 | data 41 | } 42 | 43 | fn add_benchmark(c: &mut Criterion) { 44 | let paths = fs::read_dir("./data/").unwrap(); 45 | for path in paths { 46 | let file = format!("{}", path.unwrap().path().display()); 47 | let bytes = read(&file); 48 | 49 | c.bench_function(&format!("jsonb parse {file}"), |b| { 50 | b.iter(|| parse_jsonb(&bytes)) 51 | }); 52 | 53 | c.bench_function(&format!("serde_json parse {file}"), |b| { 54 | b.iter(|| parse_serde_json(&bytes)) 55 | }); 56 | 57 | c.bench_function(&format!("json_deserializer parse {file}"), |b| { 58 | b.iter(|| parse_json_deserializer(&bytes)) 59 | }); 60 | 61 | let bytes = bytes.clone(); 62 | c.bench_function(&format!("simd_json parse {file}"), move |b| { 63 | b.iter_batched( 64 | || bytes.clone(), 65 | |mut data| parse_simd_json(&mut data), 66 | BatchSize::SmallInput, 67 | ) 68 | }); 69 | } 70 | } 71 | 72 | criterion_group!(benches, add_benchmark); 73 | criterion_main!(benches); 74 | -------------------------------------------------------------------------------- /benches/strip_nulls.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use std::{fs, io::Read}; 16 | 17 | use criterion::{criterion_group, criterion_main, Criterion}; 18 | use jsonb::{from_slice, Value}; 19 | 20 | fn read(file: &str) -> Vec { 21 | let mut f = fs::File::open(file).unwrap(); 22 | let mut data = vec![]; 23 | f.read_to_end(&mut data).unwrap(); 24 | data 25 | } 26 | 27 | fn strip_nulls_deser(data: &[u8]) { 28 | let mut buf = Vec::new(); 29 | let mut json = from_slice(data).unwrap(); 30 | strip_value_nulls(&mut json); 31 | json.write_to_vec(&mut buf); 32 | assert!(!buf.is_empty()); 33 | } 34 | 35 | fn strip_value_nulls(val: &mut Value<'_>) { 36 | match val { 37 | Value::Array(arr) => { 38 | for v in arr { 39 | strip_value_nulls(v); 40 | } 41 | } 42 | Value::Object(ref mut obj) => { 43 | for (_, v) in obj.iter_mut() { 44 | strip_value_nulls(v); 45 | } 46 | obj.retain(|_, v| !matches!(v, Value::Null)); 47 | } 48 | _ => {} 49 | } 50 | } 51 | 52 | fn strip_nulls_fast(data: &[u8]) { 53 | let raw_jsonb = jsonb::RawJsonb::new(data); 54 | let result_jsonb = raw_jsonb.strip_nulls().unwrap(); 55 | assert!(!result_jsonb.is_empty()); 56 | } 57 | 58 | fn add_benchmark(c: &mut Criterion) { 59 | let paths = fs::read_dir("./data/").unwrap(); 60 | for path in paths { 61 | let file = format!("{}", path.unwrap().path().display()); 62 | let bytes = read(&file); 63 | let json = from_slice(&bytes).unwrap().to_vec(); 64 | 65 | c.bench_function(&format!("strip_nulls_deser[{file}]"), |b| { 66 | b.iter(|| strip_nulls_deser(&json)); 67 | }); 68 | 69 | c.bench_function(&format!("strip_nulls_fast[{file}]"), |b| { 70 | b.iter(|| strip_nulls_fast(&json)); 71 | }); 72 | } 73 | } 74 | 75 | criterion_group!(benches, add_benchmark); 76 | criterion_main!(benches); 77 | -------------------------------------------------------------------------------- /src/constants.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // JSON text constants 16 | pub(crate) const UNICODE_LEN: usize = 4; 17 | 18 | // JSON text escape characters constants 19 | pub(crate) const BS: char = '\x5C'; // \\ Backslash 20 | pub(crate) const QU: char = '\x22'; // \" Double quotation mark 21 | pub(crate) const SD: char = '\x2F'; // \/ Slash or divide 22 | pub(crate) const BB: char = '\x08'; // \b Backspace 23 | pub(crate) const FF: char = '\x0C'; // \f Formfeed Page Break 24 | pub(crate) const NN: char = '\x0A'; // \n Newline 25 | pub(crate) const RR: char = '\x0D'; // \r Carriage Return 26 | pub(crate) const TT: char = '\x09'; // \t Horizontal Tab 27 | 28 | // JSONB value compare level 29 | pub(crate) const NULL_LEVEL: u8 = 8; 30 | pub(crate) const ARRAY_LEVEL: u8 = 7; 31 | pub(crate) const OBJECT_LEVEL: u8 = 6; 32 | pub(crate) const STRING_LEVEL: u8 = 5; 33 | pub(crate) const NUMBER_LEVEL: u8 = 4; 34 | pub(crate) const TRUE_LEVEL: u8 = 3; 35 | pub(crate) const FALSE_LEVEL: u8 = 2; 36 | pub(crate) const EXTENSION_LEVEL: u8 = 1; 37 | 38 | pub(crate) const TYPE_STRING: &str = "STRING"; 39 | pub(crate) const TYPE_NULL: &str = "NULL_VALUE"; 40 | pub(crate) const TYPE_BOOLEAN: &str = "BOOLEAN"; 41 | pub(crate) const TYPE_INTEGER: &str = "INTEGER"; 42 | pub(crate) const TYPE_ARRAY: &str = "ARRAY"; 43 | pub(crate) const TYPE_OBJECT: &str = "OBJECT"; 44 | pub(crate) const TYPE_DECIMAL: &str = "DECIMAL"; 45 | pub(crate) const TYPE_DOUBLE: &str = "DOUBLE"; 46 | pub(crate) const TYPE_BINARY: &str = "BINARY"; 47 | pub(crate) const TYPE_DATE: &str = "DATE"; 48 | pub(crate) const TYPE_TIMESTAMP: &str = "TIMESTAMP"; 49 | pub(crate) const TYPE_TIMESTAMP_TZ: &str = "TIMESTAMP_TZ"; 50 | pub(crate) const TYPE_INTERVAL: &str = "INTERVAL"; 51 | 52 | pub(crate) const MAX_DECIMAL64_PRECISION: usize = 18; 53 | pub(crate) const MAX_DECIMAL128_PRECISION: usize = 38; 54 | pub(crate) const MAX_DECIMAL256_PRECISION: usize = 76; 55 | 56 | pub(crate) const UINT64_MIN: i128 = 0i128; 57 | pub(crate) const UINT64_MAX: i128 = 18_446_744_073_709_551_615i128; 58 | pub(crate) const INT64_MIN: i128 = -9_223_372_036_854_775_808i128; 59 | pub(crate) const INT64_MAX: i128 = 9_223_372_036_854_775_807i128; 60 | pub(crate) const DECIMAL64_MIN: i128 = -999_999_999_999_999_999i128; 61 | pub(crate) const DECIMAL64_MAX: i128 = 999_999_999_999_999_999i128; 62 | pub(crate) const DECIMAL128_MIN: i128 = -99_999_999_999_999_999_999_999_999_999_999_999_999i128; 63 | pub(crate) const DECIMAL128_MAX: i128 = 99_999_999_999_999_999_999_999_999_999_999_999_999i128; 64 | 65 | pub(crate) const NUMBER_STRUCT_TOKEN: &str = "$jsonb::private::Number"; 66 | pub(crate) const NUMBER_STRUCT_FIELD_SCALE: &str = "$jsonb::private::Number::Scale"; 67 | pub(crate) const NUMBER_STRUCT_FIELD_VALUE: &str = "$jsonb::private::Number::Value"; 68 | pub(crate) const NUMBER_STRUCT_FIELD_HIGH_VALUE: &str = "$jsonb::private::Number::High_Value"; 69 | pub(crate) const NUMBER_STRUCT_FIELD_LOW_VALUE: &str = "$jsonb::private::Number::Low_Value"; 70 | -------------------------------------------------------------------------------- /benches/get_path.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use std::fs; 16 | use std::io::Read; 17 | 18 | use criterion::{criterion_group, criterion_main, Criterion}; 19 | 20 | fn jsonb_get(data: &[u8], paths: &[&str], expected: &str) { 21 | let paths = paths 22 | .iter() 23 | .map(|p| jsonb::jsonpath::Path::DotField(std::borrow::Cow::Borrowed(p))) 24 | .collect::>(); 25 | let json_path = jsonb::jsonpath::JsonPath { paths }; 26 | 27 | let raw_jsonb = jsonb::RawJsonb::new(data); 28 | let result_jsonb = raw_jsonb.select_value_by_path(&json_path).unwrap(); 29 | assert!(result_jsonb.is_some()); 30 | let result_jsonb = result_jsonb.unwrap(); 31 | let result_raw_jsonb = result_jsonb.as_raw(); 32 | 33 | let s = result_raw_jsonb.as_str().unwrap().unwrap(); 34 | assert_eq!(s, expected); 35 | } 36 | 37 | fn serde_json_get(data: &[u8], paths: &Vec<&str>, expected: &str) { 38 | let mut v: serde_json::Value = serde_json::from_slice(data).unwrap(); 39 | for path in paths { 40 | v = v.get(path).unwrap().clone(); 41 | } 42 | let s = v.as_str().unwrap(); 43 | assert_eq!(s, expected); 44 | } 45 | 46 | fn read(file: &str) -> Vec { 47 | let mut f = fs::File::open(file).unwrap(); 48 | let mut data = vec![]; 49 | f.read_to_end(&mut data).unwrap(); 50 | data 51 | } 52 | 53 | struct TestSuite<'a> { 54 | file: &'a str, 55 | paths: Vec<&'a str>, 56 | expected: &'a str, 57 | } 58 | 59 | fn add_benchmark(c: &mut Criterion) { 60 | let test_suites = vec![ 61 | TestSuite { 62 | file: "canada", 63 | paths: vec!["type"], 64 | expected: "FeatureCollection", 65 | }, 66 | TestSuite { 67 | file: "citm_catalog", 68 | paths: vec!["areaNames", "205705994"], 69 | expected: "1er balcon central", 70 | }, 71 | TestSuite { 72 | file: "citm_catalog", 73 | paths: vec!["topicNames", "324846100"], 74 | expected: "Formations musicales", 75 | }, 76 | TestSuite { 77 | file: "twitter", 78 | paths: vec!["search_metadata", "max_id_str"], 79 | expected: "505874924095815681", 80 | }, 81 | ]; 82 | 83 | for test_suite in test_suites { 84 | let bytes = read(&format!("./data/{}.json", test_suite.file)); 85 | 86 | let val = jsonb::parse_value(&bytes).unwrap(); 87 | let jsonb_bytes = val.to_vec(); 88 | 89 | c.bench_function( 90 | &format!( 91 | "jsonb get {}->{}", 92 | test_suite.file, 93 | test_suite.paths.join("->") 94 | ), 95 | |b| b.iter(|| jsonb_get(&jsonb_bytes, &test_suite.paths, test_suite.expected)), 96 | ); 97 | 98 | c.bench_function( 99 | &format!( 100 | "serde_json get {}->{}", 101 | test_suite.file, 102 | test_suite.paths.join("->") 103 | ), 104 | |b| b.iter(|| serde_json_get(&bytes, &test_suite.paths, test_suite.expected)), 105 | ); 106 | } 107 | } 108 | 109 | criterion_group!(benches, add_benchmark); 110 | criterion_main!(benches); 111 | -------------------------------------------------------------------------------- /tests/it/jsonpath_parser.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use std::io::Write; 16 | 17 | use goldenfile::Mint; 18 | use jsonb::jsonpath::parse_json_path; 19 | 20 | #[test] 21 | fn test_json_path() { 22 | let mut mint = Mint::new("tests/it/testdata"); 23 | let mut file = mint.new_goldenfile("json_path.txt").unwrap(); 24 | let cases = &[ 25 | r#"$"#, 26 | r#"$.*"#, 27 | r#"$.**"#, 28 | r#"$.**{2 to last}"#, 29 | r#"$[*]"#, 30 | r#"5 + 5"#, 31 | r#"10 - 5"#, 32 | r#"10 * 5"#, 33 | r#"10 / 5"#, 34 | r#"10 % 5"#, 35 | r#"$.store.book[*].*"#, 36 | // r#"$.store.book[*].* + 5"#, 37 | r#"$.store.book[0].price"#, 38 | r#"+$.store.book[0].price"#, 39 | r#"-$.store.book[0].price"#, 40 | r#"$.store.book[0].price + 5"#, 41 | r#"$.store.book[last].isbn"#, 42 | r"$.store.book[last].test_key\uD83D\uDC8E测试", 43 | r#"$.store.book[0,1, last - 2].price"#, 44 | r#"$.store.book[0,1 to last-1]"#, 45 | r#"$."store"."book""#, 46 | r#"$."st\"ore"."book\uD83D\uDC8E""#, 47 | r#"$[*].book.price ? (@ == 10)"#, 48 | r#"$.store.book?(@.price > 10).title"#, 49 | r#"$.store.book?(@.price < $.expensive).price"#, 50 | r#"$.store.book?(@.price < 10 && @.category == "fiction")"#, 51 | r#"$.store.book?(@.price > 10 || @.category == "reference")"#, 52 | r#"$.store.book?(@.price > 20 && (@.category == "reference" || @.category == "fiction"))"#, 53 | // compatible with Snowflake style path 54 | r#"[1][2]"#, 55 | r#"["k1"]["k2"]"#, 56 | r#"k1.k2:k3"#, 57 | r#"k1["k2"][1]"#, 58 | // predicates 59 | r#"$ > 1"#, 60 | r#"$.* == 0"#, 61 | r#"$[*] > 1"#, 62 | r#"$.a > $.b"#, 63 | r#"$.price > 10 || $.category == "reference""#, 64 | // exists expression 65 | r#"$.store.book?(exists(@.price?(@ > 20)))"#, 66 | r#"$.store?(exists(@.book?(exists(@.category?(@ == "fiction")))))"#, 67 | r#"$.store.book?(@ starts with "Nigel")"#, 68 | r#"$[*] ? (@.job == null) .name"#, 69 | // arithmetic functions 70 | r#"$.phones[0].number + 3"#, 71 | r#"7 - $[0]"#, 72 | r#"- $.phones[0].number"#, 73 | ]; 74 | 75 | for case in cases { 76 | let json_path = parse_json_path(case.as_bytes()).unwrap(); 77 | 78 | writeln!(file, "---------- Input ----------").unwrap(); 79 | writeln!(file, "{case}").unwrap(); 80 | writeln!(file, "---------- Output ---------").unwrap(); 81 | writeln!(file, "{json_path}").unwrap(); 82 | writeln!(file, "---------- AST ------------").unwrap(); 83 | writeln!(file, "{json_path:#?}").unwrap(); 84 | writeln!(file, "\n").unwrap(); 85 | } 86 | } 87 | 88 | #[test] 89 | fn test_json_path_error() { 90 | let cases = &[ 91 | r#"$.["#, 92 | r#"$X"#, 93 | r#"$."#, 94 | r#"$.prop."#, 95 | r#"$.prop+."#, 96 | r#"$.."#, 97 | r#"$.prop.."#, 98 | r#"$.foo bar"#, 99 | r#"$[0, 1, 2 4]"#, 100 | r#"$['1','2',]"#, 101 | r#"$['1', ,'3']"#, 102 | r#"$['aaa'}'bbb']"#, 103 | r#"@ > 10"#, 104 | ]; 105 | 106 | for case in cases { 107 | let res = parse_json_path(case.as_bytes()); 108 | assert!(res.is_err()); 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /src/keypath.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use std::borrow::Cow; 16 | use std::fmt::Display; 17 | use std::fmt::Formatter; 18 | 19 | use nom::branch::alt; 20 | use nom::character::complete::char; 21 | use nom::character::complete::i32; 22 | use nom::character::complete::multispace0; 23 | use nom::combinator::map; 24 | use nom::multi::separated_list1; 25 | use nom::sequence::delimited; 26 | use nom::sequence::preceded; 27 | use nom::sequence::terminated; 28 | use nom::IResult; 29 | use nom::Parser; 30 | 31 | use crate::jsonpath::raw_string; 32 | use crate::jsonpath::string; 33 | use crate::Error; 34 | 35 | /// Represents a set of key path chains. 36 | /// Compatible with PostgreSQL extracts JSON sub-object paths syntax. 37 | #[derive(Debug, Clone, Eq, PartialEq, Hash)] 38 | pub struct KeyPaths<'a> { 39 | pub paths: Vec>, 40 | } 41 | 42 | /// Represents a valid key path. 43 | #[derive(Debug, Clone, Eq, PartialEq, Hash)] 44 | pub enum KeyPath<'a> { 45 | /// represents the index of an Array, allow negative indexing. 46 | Index(i32), 47 | /// represents the quoted field name of an Object. 48 | QuotedName(Cow<'a, str>), 49 | /// represents the field name of an Object. 50 | Name(Cow<'a, str>), 51 | } 52 | 53 | impl Display for KeyPaths<'_> { 54 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 55 | write!(f, "{{")?; 56 | for (i, path) in self.paths.iter().enumerate() { 57 | if i > 0 { 58 | write!(f, ",")?; 59 | } 60 | write!(f, "{path}")?; 61 | } 62 | write!(f, "}}")?; 63 | Ok(()) 64 | } 65 | } 66 | 67 | impl Display for KeyPath<'_> { 68 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 69 | match self { 70 | KeyPath::Index(idx) => { 71 | write!(f, "{idx}")?; 72 | } 73 | KeyPath::QuotedName(name) => { 74 | write!(f, "\"{name}\"")?; 75 | } 76 | KeyPath::Name(name) => { 77 | write!(f, "{name}")?; 78 | } 79 | } 80 | Ok(()) 81 | } 82 | } 83 | 84 | /// Parsing the input string to key paths. 85 | pub fn parse_key_paths(input: &[u8]) -> Result, Error> { 86 | match key_paths(input) { 87 | Ok((rest, paths)) => { 88 | if !rest.is_empty() { 89 | return Err(Error::InvalidKeyPath); 90 | } 91 | let key_paths = KeyPaths { paths }; 92 | Ok(key_paths) 93 | } 94 | Err(nom::Err::Error(_) | nom::Err::Failure(_)) => Err(Error::InvalidKeyPath), 95 | Err(nom::Err::Incomplete(_)) => unreachable!(), 96 | } 97 | } 98 | 99 | fn key_path(input: &[u8]) -> IResult<&[u8], KeyPath<'_>> { 100 | alt(( 101 | map(i32, KeyPath::Index), 102 | map(string, KeyPath::QuotedName), 103 | map(raw_string, KeyPath::Name), 104 | )) 105 | .parse(input) 106 | } 107 | 108 | fn key_paths(input: &[u8]) -> IResult<&[u8], Vec>> { 109 | alt(( 110 | delimited( 111 | preceded(multispace0, char('{')), 112 | separated_list1(char(','), delimited(multispace0, key_path, multispace0)), 113 | terminated(char('}'), multispace0), 114 | ), 115 | map( 116 | delimited( 117 | preceded(multispace0, char('{')), 118 | multispace0, 119 | terminated(char('}'), multispace0), 120 | ), 121 | |_| vec![], 122 | ), 123 | )) 124 | .parse(input) 125 | } 126 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | //! `jsonb` is a binary format `JSON` representation inspired by [PostgreSQL](https://www.postgresql.org/docs/current/datatype-json.html) and [CockroachDB](https://www.cockroachlabs.com/docs/stable/jsonb). It provides a fast, lightweight and easy-to-use API for working with `JSON` data. 16 | //! 17 | //! ## Features 18 | //! 19 | //! - Good compatibility: `jsonb` fully supports the `JSON` standard and can be used to store complex data structures. 20 | //! - Fast performance: `jsonb` is designed for high performance, allowing you to work with large `JSON` data sets with ease. 21 | //! - Easy to use: `jsonb` provides a number of built-in functions to support various operations, and also supports the `JSONPath` syntax for selecting and extracting subset elements. 22 | //! - Safe and secure: `jsonb` is written in Rust, which provides memory and thread safety guarantees, making it a safe choice for handling sensitive data. 23 | //! 24 | //! ## Encoding format 25 | //! 26 | //! The `jsonb` encoding format is a tree-like structure. Each node contains a container header, a number of JEntry headers, and nested encoding values. 27 | //! 28 | //! - 32-bit container header. 3 bits identify the type of value, including `scalar`, `object` and `array`, and 29 bits identify the number of JEntries in the `array` or `object`. The root node of the `jsonb` value is always a container header. 29 | //! - `scalar` container header: `0x20000000` 30 | //! - `object` container header: `0x40000000` 31 | //! - `array` container header: `0x80000000` 32 | //! - 32-bit JEntry header. 1 bit identifies whether the JEntry stores a length or an offset, 3 bits identify the type of value, including `null`, `string`, `number`, `false`, `true` and `container`, and the remaining 28 bits identify the length or offset of the encoding value. 33 | //! - `null` JEntry header: `0x00000000` 34 | //! - `string` JEntry header: `0x10000000` 35 | //! - `number` JEntry header: `0x20000000` 36 | //! - `false` JEntry header: `0x30000000` 37 | //! - `true` JEntry header: `0x40000000` 38 | //! - `container` JEntry header `0x50000000` 39 | //! - Encoding value. Different types of JEntry header have different encoding values. 40 | //! - `null`, `true`, `false`: no encoding value, identified by the JEntry header. 41 | //! - `string`: a normal UTF-8 string. 42 | //! - `number`: an encoded number to represent uint64s, int64s and float64s. 43 | //! - `container`: a nested `json` value with a recursive structure. 44 | //! 45 | //! #### An encoding example 46 | //! 47 | //! ```text 48 | //! // JSON value 49 | //! [false, 10, {"k":"v"}] 50 | //! 51 | //! // JSONB encoding 52 | //! 0x80000003 array container header (3 JEntries) 53 | //! 0x30000000 false JEntry header (no encoding value) 54 | //! 0x20000002 number JEntry header (encoding value length 2) 55 | //! 0x5000000e container JEntry header (encoding value length 14) 56 | //! 0x500a number encoding value (10) 57 | //! 0x40000001 object container header (1 JEntry) 58 | //! 0x10000001 string key JEntry header (encoding value length 1) 59 | //! 0x10000001 string value JEntry header (encoding value length 1) 60 | //! 0x6b string encoding value ("k") 61 | //! 0x76 string encoding value ("v") 62 | //! ``` 63 | 64 | #![allow(clippy::uninlined_format_args)] 65 | 66 | mod constants; 67 | pub mod core; 68 | mod error; 69 | mod extension; 70 | mod from; 71 | mod functions; 72 | pub mod jsonpath; 73 | pub mod keypath; 74 | mod number; 75 | mod owned; 76 | mod parser; 77 | mod raw; 78 | mod util; 79 | mod value; 80 | 81 | pub use error::Error; 82 | pub use extension::*; 83 | #[allow(unused_imports)] 84 | pub use from::*; 85 | pub use number::Decimal128; 86 | pub use number::Decimal256; 87 | pub use number::Decimal64; 88 | pub use number::Number; 89 | pub use owned::to_owned_jsonb; 90 | pub use owned::OwnedJsonb; 91 | pub use parser::from_slice; 92 | pub use parser::parse_owned_jsonb; 93 | pub use parser::parse_owned_jsonb_standard_mode; 94 | pub use parser::parse_owned_jsonb_standard_mode_with_buf; 95 | pub use parser::parse_owned_jsonb_with_buf; 96 | pub use parser::parse_value; 97 | pub use parser::parse_value_standard_mode; 98 | pub use raw::from_raw_jsonb; 99 | pub use raw::RawJsonb; 100 | pub use value::*; 101 | -------------------------------------------------------------------------------- /src/error.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use core::fmt::Display; 16 | 17 | use serde::de; 18 | use serde::ser; 19 | 20 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] 21 | pub enum ParseErrorCode { 22 | InvalidEOF, 23 | InvalidNumberValue, 24 | InvalidStringValue, 25 | ExpectedSomeIdent, 26 | ExpectedSomeValue, 27 | ExpectedColon, 28 | ExpectedArrayCommaOrEnd, 29 | ExpectedObjectCommaOrEnd, 30 | UnexpectedTrailingCharacters, 31 | KeyMustBeAString, 32 | ControlCharacterWhileParsingString, 33 | InvalidEscaped(u8), 34 | InvalidHex(u8), 35 | InvalidLoneLeadingSurrogateInHexEscape(u16), 36 | InvalidSurrogateInHexEscape(u16), 37 | UnexpectedEndOfHexEscape, 38 | ObjectDuplicateKey(String), 39 | ObjectKeyInvalidNumber, 40 | ObjectKeyInvalidCharacter, 41 | } 42 | 43 | pub type Result = std::result::Result; 44 | 45 | impl Display for ParseErrorCode { 46 | fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { 47 | match self { 48 | ParseErrorCode::InvalidEOF => f.write_str("EOF while parsing a value"), 49 | ParseErrorCode::InvalidNumberValue => f.write_str("invalid number"), 50 | ParseErrorCode::InvalidStringValue => f.write_str("invalid string"), 51 | ParseErrorCode::ExpectedSomeIdent => f.write_str("expected ident"), 52 | ParseErrorCode::ExpectedSomeValue => f.write_str("expected value"), 53 | ParseErrorCode::ExpectedColon => f.write_str("expected `:`"), 54 | ParseErrorCode::ExpectedArrayCommaOrEnd => f.write_str("expected `,` or `]`"), 55 | ParseErrorCode::ExpectedObjectCommaOrEnd => f.write_str("expected `,` or `}`"), 56 | ParseErrorCode::UnexpectedTrailingCharacters => f.write_str("trailing characters"), 57 | ParseErrorCode::KeyMustBeAString => f.write_str("key must be a string"), 58 | ParseErrorCode::ControlCharacterWhileParsingString => { 59 | f.write_str("control character (\\u0000-\\u001F) found while parsing a string") 60 | } 61 | ParseErrorCode::InvalidEscaped(n) => { 62 | write!(f, "invalid escaped '{:X}'", n) 63 | } 64 | ParseErrorCode::InvalidHex(n) => { 65 | write!(f, "invalid hex '{:X}'", n) 66 | } 67 | ParseErrorCode::InvalidLoneLeadingSurrogateInHexEscape(n) => { 68 | write!(f, "lone leading surrogate in hex escape '{:X}'", n) 69 | } 70 | ParseErrorCode::InvalidSurrogateInHexEscape(n) => { 71 | write!(f, "invalid surrogate in hex escape '{:X}'", n) 72 | } 73 | ParseErrorCode::UnexpectedEndOfHexEscape => f.write_str("unexpected end of hex escape"), 74 | ParseErrorCode::ObjectDuplicateKey(key) => { 75 | write!(f, "duplicate object attribute \"{}\"", key) 76 | } 77 | ParseErrorCode::ObjectKeyInvalidNumber => { 78 | f.write_str("object attribute name cannot be a number") 79 | } 80 | ParseErrorCode::ObjectKeyInvalidCharacter => { 81 | f.write_str("object attribute name cannot be invalid character") 82 | } 83 | } 84 | } 85 | } 86 | 87 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] 88 | #[non_exhaustive] 89 | pub enum Error { 90 | InvalidUtf8, 91 | InvalidEOF, 92 | InvalidToken, 93 | InvalidCast, 94 | 95 | InvalidJson, 96 | InvalidJsonb, 97 | InvalidJsonbHeader, 98 | InvalidJsonbJEntry, 99 | InvalidJsonbNumber, 100 | InvalidJsonbExtension, 101 | 102 | InvalidJsonPath, 103 | InvalidJsonPathPredicate, 104 | InvalidKeyPath, 105 | 106 | InvalidJsonType, 107 | InvalidObject, 108 | ObjectDuplicateKey, 109 | UnexpectedType, 110 | 111 | Message(String), 112 | Syntax(ParseErrorCode, usize), 113 | } 114 | 115 | impl ser::Error for Error { 116 | fn custom(msg: T) -> Self { 117 | Error::Message(msg.to_string()) 118 | } 119 | } 120 | 121 | impl de::Error for Error { 122 | fn custom(msg: T) -> Self { 123 | Error::Message(msg.to_string()) 124 | } 125 | } 126 | 127 | impl Display for Error { 128 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { 129 | match self { 130 | Error::Message(m) => write!(f, "{}", m), 131 | Error::Syntax(code, pos) => write!(f, "{}, pos {}", code, pos), 132 | _ => write!(f, "{:?}", self), 133 | } 134 | } 135 | } 136 | 137 | impl std::error::Error for Error { 138 | fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { 139 | None 140 | } 141 | } 142 | 143 | impl From for Error { 144 | fn from(_error: std::io::Error) -> Self { 145 | Error::InvalidUtf8 146 | } 147 | } 148 | 149 | impl From for Error { 150 | fn from(_error: std::str::Utf8Error) -> Self { 151 | Error::InvalidUtf8 152 | } 153 | } 154 | 155 | impl From>> for Error { 156 | fn from(_error: nom::Err>) -> Self { 157 | Error::InvalidJsonb 158 | } 159 | } 160 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## [v0.5.5] - 2025-11-17 2 | 3 | ### Added 4 | 5 | - Feat: Support function `extract_scalar_key_values` (#95) 6 | - Feat: Make JsonbItemType public so users can compare (#94) 7 | 8 | ### Fixed 9 | 10 | - Fix: fix `serde_json` infinite Number convert to Jsonb Value unwrap panic (#96) 11 | 12 | ## [v0.5.4] - 2025-08-20 13 | 14 | ### Added 15 | 16 | - Feat: Support parse extended json5 syntax (#92) 17 | 18 | ## [v0.5.3] - 2025-08-02 19 | 20 | ### Added 21 | 22 | - Perf: Improve parse json performance (#90) 23 | - Chore: add toplevel feature to allow users to better manager dependencies (#89) 24 | 25 | ## [v0.5.2] - 2025-06-27 26 | 27 | ### Added 28 | 29 | - Feat: Enhanced JSONB Parser with Decimal Support and Extended Syntax (#86) 30 | - Feat: Support extension types (#83) 31 | 32 | ## [v0.5.1] - 2025-04-18 33 | 34 | ### Added 35 | 36 | - Chore: Bump nom 8.0.0 (#84) 37 | 38 | ## [v0.5.0] - 2025-04-15 39 | 40 | ### Added 41 | 42 | - Feat: json path support recursive wildcard member accessor `.**` syntax (#81) 43 | - Refactor: get object value by key name improve performance (#79) 44 | - Refactor: Implements serde trait for RawJsonb (#77) 45 | - Refactor JSONB functions: Improved API, Documentation, and Data Structures (#75) 46 | - Feat: add arithmatic expression support (#71) 47 | - Feat(expr): add filter expr `starts with` (#52) 48 | 49 | ## [v0.4.4] - 2024-11-16 50 | 51 | ### Fixed 52 | 53 | - Fix: panic when facing corrupted jsonb (#67) 54 | 55 | ### Added 56 | 57 | - Bump fast-float2 v0.2.3 (#69) 58 | - Feat: add a function to parse jsonb only (#66) 59 | - Feat: support `object_delete` and `object_pick` function (#65) 60 | - Feat: support `object_insert` function (#64) 61 | - Feat: Support json array functions (#62) 62 | - Feat: add lazy value (#61) 63 | 64 | ## [v0.4.3] - 2024-09-30 65 | 66 | ### Fixed 67 | 68 | - Fix: Fix compare object value with different length panic (#59) 69 | 70 | ## [v0.4.2] - 2024-09-19 71 | 72 | ### Added 73 | 74 | - Feat: make `preserve_order` a default feature (#56) 75 | 76 | ## [v0.4.1] - 2024-07-18 77 | 78 | ### Fixed 79 | 80 | - Fix: Fix jsonpath selector unwrap panic. (#53) 81 | 82 | ## [v0.4.0] - 2024-05-17 83 | 84 | ### Fixed 85 | 86 | - Fix: Fix get by keypath with null value. (#47) 87 | - Fix: Handle invalid jsonb value to avoid panic in functions. (#46) 88 | - Fix: Fix builder & concat container jentry len. (#43) 89 | 90 | ### Added 91 | 92 | - Feat: Support convert jsonb value to `serde_json` value. (#49) 93 | - Feat: Add `exists` filter expression. (48)` 94 | - Feat: Add `delete_by_keypath`. (#45) 95 | - Feat: Add `delete_by_index` & `delete_by_name`. (#44) 96 | - Feat: Add `concat` & improve `strip_nulls`. (#42) 97 | - Feat: Add jsonpath predicate support. (#41) 98 | - Feat: Add `contains` api. (#40) 99 | - Feat: Add `exists_any_keys` & `exists_all_keys`. (#38) 100 | - Feat: Support parse key paths. (#37) 101 | - Feat: Add `get_by_keypath`. (#36) 102 | 103 | ## [v0.3.0] - 2023-10-13 104 | 105 | ### Added 106 | 107 | - Docs: Add more jsonb encoding format descriptions. (#34) 108 | - Feat: Support `object_each` api. (#33) 109 | - Feat: Support `path_exists` api. (#32) 110 | - Feat: Support `type_of` api. (#31) 111 | - Feat: Support `strip_nulls` api. (#30) 112 | - Perf: Add benches for parser and `get_path`. (#29) 113 | - Chore: Add check fmt and clippy. (#27) 114 | - Feat: Support `to_pretty_string` api. (#26) 115 | - Feat: Support `traverse_check_string` function. (#25) 116 | - Feat: Improve json path selector using less memory. (#24) 117 | 118 | ## [v0.2.3] - 2023-07-10 119 | 120 | ### Fixed 121 | 122 | - Fix: fix parse json path name with escaped characters. (#21) 123 | - Fix: Fix some special characters display errors. (#18) 124 | - Fix: Support parsing Unicode characters enclosed in brackets. (#17) 125 | - Fix: json `to_string` function adds backslash for escaped characters. (#16) 126 | - Fix: fix parse UTF-8 characters. (#15) 127 | 128 | ### Added 129 | 130 | - chore: implement From trait with owned JsonValue for Value. (#22) 131 | - Feat: Add function `convert_to_comparable`, `rand_value`. (#20) 132 | - Create publish.yaml. (#19) 133 | 134 | ## [v0.2.2] - 2023-05-06 135 | 136 | ### Fixed 137 | 138 | - Fix: Allow parse escaped white space. (#14) 139 | 140 | ## [v0.2.1] - 2023-05-05 141 | 142 | ### Fixed 143 | 144 | - Fix: Allow parse invalid Unicode. (#13) 145 | 146 | ## [v0.2.0] - 2023-04-21 147 | 148 | ### Added 149 | 150 | - Feat: Support `JSON path` selector. (#8) 151 | - Feat: Support parse `JSON path` syntax. (#7) 152 | 153 | ## [v0.1.1] - 2023-03-03 154 | 155 | - Rename project name to jsonb. 156 | - Add Readme description. (#4) 157 | - Use stable Rust. (#3) 158 | 159 | ## v0.1.0 - 2023-03-03 160 | 161 | - Implement a `JSON` parser. 162 | - Implement `JSONB` encodes and decodes. 163 | - Implemented a number of `JSONB` functions. 164 | 165 | [v0.5.5]: https://github.com/databendlabs/jsonb/compare/v0.5.4...v0.5.5 166 | [v0.5.4]: https://github.com/databendlabs/jsonb/compare/v0.5.3...v0.5.4 167 | [v0.5.3]: https://github.com/databendlabs/jsonb/compare/v0.5.2...v0.5.3 168 | [v0.5.2]: https://github.com/databendlabs/jsonb/compare/v0.5.1...v0.5.2 169 | [v0.5.1]: https://github.com/databendlabs/jsonb/compare/v0.5.0...v0.5.1 170 | [v0.5.0]: https://github.com/databendlabs/jsonb/compare/v0.4.4...v0.5.0 171 | [v0.4.4]: https://github.com/databendlabs/jsonb/compare/v0.4.3...v0.4.4 172 | [v0.4.3]: https://github.com/databendlabs/jsonb/compare/v0.4.2...v0.4.3 173 | [v0.4.2]: https://github.com/databendlabs/jsonb/compare/v0.4.1...v0.4.2 174 | [v0.4.1]: https://github.com/databendlabs/jsonb/compare/v0.4.0...v0.4.1 175 | [v0.4.0]: https://github.com/databendlabs/jsonb/compare/v0.3.0...v0.4.0 176 | [v0.3.0]: https://github.com/databendlabs/jsonb/compare/v0.2.3...v0.3.0 177 | [v0.2.3]: https://github.com/databendlabs/jsonb/compare/v0.2.2...v0.2.3 178 | [v0.2.2]: https://github.com/databendlabs/jsonb/compare/v0.2.1...v0.2.2 179 | [v0.2.1]: https://github.com/databendlabs/jsonb/compare/v0.2.0...v0.2.1 180 | [v0.2.0]: https://github.com/databendlabs/jsonb/compare/v0.1.1...v0.2.0 181 | [v0.1.1]: https://github.com/databendlabs/jsonb/compare/v0.1.0...v0.1.1 182 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # jsonb   [![Build Status]][actions] [![Latest Version]][crates.io] [![Crate Downloads]][crates.io] 2 | 3 | [build status]: https://img.shields.io/github/actions/workflow/status/datafuselabs/jsonb/rust.yml?branch=main 4 | [actions]: https://github.com/datafuselabs/jsonb/actions?query=branch%3Amain 5 | [latest version]: https://img.shields.io/crates/v/jsonb.svg 6 | [crates.io]: https://crates.io/crates/jsonb 7 | [crate downloads]: https://img.shields.io/crates/d/jsonb.svg 8 | 9 | 10 | `jsonb` is a binary format `JSON` representation inspired by [PostgreSQL](https://www.postgresql.org/docs/current/datatype-json.html) and [CockroachDB](https://www.cockroachlabs.com/docs/stable/jsonb). It provides a fast, lightweight and easy-to-use API for working with `JSON` data. 11 | 12 | ## Features 13 | 14 | - Good compatibility: `jsonb` fully supports the `JSON` standard and can be used to store complex data structures. 15 | - Fast performance: `jsonb` is designed for high performance, allowing you to work with large `JSON` data sets with ease. 16 | - Easy to use: `jsonb` provides a number of built-in functions to support various operations, and also supports the `JSONPath` syntax for selecting and extracting subset elements. 17 | - Safe and secure: `jsonb` is written in Rust, which provides memory and thread safety guarantees, making it a safe choice for handling sensitive data. 18 | 19 | ## Encoding format 20 | 21 | The `jsonb` encoding format is a tree-like structure. Each node contains a container header, a number of JEntry headers, and nested encoding values. 22 | 23 | - 32-bit container header. 3 bits identify the type of value, including `scalar`, `object` and `array`, and 29 bits identify the number of JEntries in the `array` or `object`. The root node of the `jsonb` value is always a container header. 24 | - `scalar` container header: `0x20000000` 25 | - `object` container header: `0x40000000` 26 | - `array` container header: `0x80000000` 27 | - 32-bit JEntry header. 1 bit identifies whether the JEntry stores a length or an offset, 3 bits identify the type of value, including `null`, `string`, `number`, `false`, `true` and `container`, and the remaining 28 bits identify the length or offset of the encoding value. 28 | - `null` JEntry header: `0x00000000` 29 | - `string` JEntry header: `0x10000000` 30 | - `number` JEntry header: `0x20000000` 31 | - `false` JEntry header: `0x30000000` 32 | - `true` JEntry header: `0x40000000` 33 | - `container` JEntry header `0x50000000` 34 | - Encoding value. Different types of JEntry header have different encoding values. 35 | - `null`, `true`, `false`: no encoding value, identified by the JEntry header. 36 | - `string`: a normal UTF-8 string. 37 | - `number`: an encoded number to represent uint64s, int64s and float64s. 38 | - `container`: a nested `json` value with a recursive structure. 39 | 40 | #### An encoding example 41 | 42 | ```text 43 | // JSON value 44 | [false, 10, {"k":"v"}] 45 | 46 | // JSONB encoding 47 | 0x80000003 array container header (3 JEntries) 48 | 0x30000000 false JEntry header (no encoding value) 49 | 0x20000002 number JEntry header (encoding value length 2) 50 | 0x5000000e container JEntry header (encoding value length 14) 51 | 0x500a number encoding value (10) 52 | 0x40000001 object container header (1 JEntry) 53 | 0x10000001 string key JEntry header (encoding value length 1) 54 | 0x10000001 string value JEntry header (encoding value length 1) 55 | 0x6b string encoding value ("k") 56 | 0x76 string encoding value ("v") 57 | ``` 58 | 59 | ## Jsonb value 60 | 61 | The `jsonb` value is an enumeration that represents all kinds of `JSON` values and serves as an intermediate for converting other data types to the `jsonb` binary format value. 62 | 63 | ```rust 64 | // jsonb value 65 | #[derive(Clone, PartialEq, Eq)] 66 | pub enum Value<'a> { 67 | Null, 68 | Bool(bool), 69 | String(Cow<'a, str>), 70 | Number(Number), 71 | Array(Vec>), 72 | Object(Object<'a>), 73 | } 74 | ``` 75 | 76 | ## Built-in functions 77 | 78 | `jsonb` implements a number of commonly used built-in functions. Since most functions only focus on a subset of the total value, using container headers and JEntry headers to can efficiently skip over intermediate parts of the `jsonb` value. This avoids time-consuming deserialisation operations and provides very high performance. For more information, see https://docs.rs/jsonb/latest/jsonb/#functions 79 | 80 | ## SQL/JSONPath 81 | 82 | [SQL/JSONPath](https://www.iso.org/standard/67367.html) is a query language used to select and extract a subset of elements from a `jsonb` value. 83 | 84 | #### Operators 85 | 86 | The following operators have been implemented: 87 | 88 | | Operator | Description | Examples | 89 | |--------------------------|--------------------------------------------------------------|--------------------| 90 | | `$` | The root element | `$` | 91 | | `@` | The current element in the filter expression | `$.event?(@ == 1)` | 92 | | `.*` | Selecting all elements in an Object | `$.*` | 93 | | `.` | Selecting element that match the name in an Object | `$.event` | 94 | | `:` | Alias of `.` | `$:event` | 95 | | `[""]` | Alias of `.` | `$["event"]` | 96 | | `[*]` | Selecting all elements in an Array | `$[*]` | 97 | | `[, ..]` | Selecting 0-based `n-th` elements in an Array | `$[1, 2]` | 98 | | `[last - , ..]` | Selecting `n-th` element before the last element in an Array | `$[0, last - 1]` | 99 | | `[ to , ..]` | Selecting all elements of a range in an Array | `$[1 to last - 2]` | 100 | | `?()` | Selecting all elements that matched the filter expression | `$?(@.price < 10)` | 101 | 102 | ## Examples 103 | 104 | ```rust 105 | fn main() { 106 | let json = r#" 107 | { 108 | "name":"Fred", 109 | "phones":[ 110 | { 111 | "type":"home", 112 | "number":3720453 113 | }, 114 | { 115 | "type": "work", 116 | "number":5062051 117 | } 118 | ] 119 | }"#; 120 | 121 | let path = r#"$.phones[*]?(@.number == 3720453)"#; 122 | 123 | // parse JSON string to jsonb value 124 | let value = jsonb::parse_value(json.as_bytes()).unwrap(); 125 | // encode jsonb value to jsonb binary value 126 | let jsonb = value.to_vec(); 127 | // parse JSONPath string 128 | let json_path = jsonb::jsonpath::parse_json_path(path.as_bytes()).unwrap(); 129 | // select subset value from jsonb binary value 130 | let mut sub_jsonb = Vec::new(); 131 | let mut sub_offsets = Vec::new(); 132 | jsonb::get_by_path(&jsonb, json_path, &mut sub_jsonb, &mut sub_offsets); 133 | 134 | // value={"number":3720453,"type":"home"} 135 | println!("value={}", jsonb::to_string(&sub_jsonb)); 136 | } 137 | ``` 138 | 139 | ## Contributing 140 | 141 | `jsonb` is an open source project and all kinds of contributions are welcome! You can help with ideas, code or documentation. 142 | 143 | ## License 144 | 145 | Licensed under the [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) 146 | -------------------------------------------------------------------------------- /tests/it/encode.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use std::borrow::Cow; 16 | 17 | use ethnum::I256; 18 | use jsonb::{ 19 | Date, Decimal128, Decimal256, Decimal64, Interval, Number, Object, Timestamp, TimestampTz, 20 | Value, 21 | }; 22 | 23 | #[test] 24 | fn test_encode_null() { 25 | assert_eq!(&Value::Null.to_vec(), b"\x20\0\0\0\0\0\0\0"); 26 | } 27 | 28 | #[test] 29 | fn test_encode_boolean() { 30 | assert_eq!(&Value::Bool(true).to_vec(), b"\x20\0\0\0\x40\0\0\0"); 31 | assert_eq!(&Value::Bool(false).to_vec(), b"\x20\0\0\0\x30\0\0\0"); 32 | } 33 | 34 | #[test] 35 | fn test_encode_string() { 36 | assert_eq!( 37 | &Value::String(Cow::from("asd")).to_vec(), 38 | b"\x20\0\0\0\x10\0\0\x03\x61\x73\x64" 39 | ); 40 | assert_eq!( 41 | &Value::String(Cow::from("测试")).to_vec(), 42 | b"\x20\0\0\0\x10\0\0\x06\xE6\xB5\x8B\xE8\xAF\x95" 43 | ); 44 | } 45 | 46 | #[test] 47 | fn test_encode_int64() { 48 | assert_eq!( 49 | &Value::Number(Number::Int64(0)).to_vec(), 50 | b"\x20\0\0\0\x20\0\0\x01\x00" 51 | ); 52 | assert_eq!( 53 | &Value::Number(Number::Int64(-100)).to_vec(), 54 | b"\x20\0\0\0\x20\0\0\x02\x40\x9C" 55 | ); 56 | assert_eq!( 57 | &Value::Number(Number::Int64(i8::MIN as i64)).to_vec(), 58 | b"\x20\0\0\0\x20\0\0\x02\x40\x80" 59 | ); 60 | assert_eq!( 61 | &Value::Number(Number::Int64(i8::MAX as i64)).to_vec(), 62 | b"\x20\0\0\0\x20\0\0\x02\x40\x7F" 63 | ); 64 | assert_eq!( 65 | &Value::Number(Number::Int64(i16::MIN as i64)).to_vec(), 66 | b"\x20\0\0\0\x20\0\0\x03\x40\x80\0" 67 | ); 68 | assert_eq!( 69 | &Value::Number(Number::Int64(i16::MAX as i64)).to_vec(), 70 | b"\x20\0\0\0\x20\0\0\x03\x40\x7F\xFF" 71 | ); 72 | assert_eq!( 73 | &Value::Number(Number::Int64(i32::MIN as i64)).to_vec(), 74 | b"\x20\0\0\0\x20\0\0\x05\x40\x80\0\0\0" 75 | ); 76 | assert_eq!( 77 | &Value::Number(Number::Int64(i32::MAX as i64)).to_vec(), 78 | b"\x20\0\0\0\x20\0\0\x05\x40\x7F\xFF\xFF\xFF" 79 | ); 80 | assert_eq!( 81 | &Value::Number(Number::Int64(i64::MIN)).to_vec(), 82 | b"\x20\0\0\0\x20\0\0\x09\x40\x80\0\0\0\0\0\0\0" 83 | ); 84 | assert_eq!( 85 | &Value::Number(Number::Int64(i64::MAX)).to_vec(), 86 | b"\x20\0\0\0\x20\0\0\x09\x40\x7F\xFF\xFF\xFF\xFF\xFF\xFF\xFF" 87 | ); 88 | } 89 | 90 | #[test] 91 | fn test_encode_uint64() { 92 | assert_eq!( 93 | &Value::Number(Number::UInt64(0)).to_vec(), 94 | b"\x20\0\0\0\x20\0\0\x01\x00" 95 | ); 96 | assert_eq!( 97 | &Value::Number(Number::UInt64(100)).to_vec(), 98 | b"\x20\0\0\0\x20\0\0\x02\x50\x64" 99 | ); 100 | assert_eq!( 101 | &Value::Number(Number::UInt64(u8::MAX as u64)).to_vec(), 102 | b"\x20\0\0\0\x20\0\0\x02\x50\xFF" 103 | ); 104 | assert_eq!( 105 | &Value::Number(Number::UInt64(u16::MAX as u64)).to_vec(), 106 | b"\x20\0\0\0\x20\0\0\x03\x50\xFF\xFF" 107 | ); 108 | assert_eq!( 109 | &Value::Number(Number::UInt64(u32::MAX as u64)).to_vec(), 110 | b"\x20\0\0\0\x20\0\0\x05\x50\xFF\xFF\xFF\xFF" 111 | ); 112 | assert_eq!( 113 | &Value::Number(Number::UInt64(u64::MAX)).to_vec(), 114 | b"\x20\0\0\0\x20\0\0\x09\x50\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" 115 | ); 116 | } 117 | 118 | #[test] 119 | fn test_encode_float64() { 120 | assert_eq!( 121 | &Value::Number(Number::Float64(f64::INFINITY)).to_vec(), 122 | b"\x20\0\0\0\x20\0\0\x01\x20" 123 | ); 124 | assert_eq!( 125 | &Value::Number(Number::Float64(f64::NEG_INFINITY)).to_vec(), 126 | b"\x20\0\0\0\x20\0\0\x01\x30" 127 | ); 128 | assert_eq!( 129 | &Value::Number(Number::Float64(0.0123f64)).to_vec(), 130 | b"\x20\0\0\0\x20\0\0\x09\x60\x3F\x89\x30\xBE\x0D\xED\x28\x8D" 131 | ); 132 | assert_eq!( 133 | &Value::Number(Number::Float64(1.2e308f64)).to_vec(), 134 | b"\x20\0\0\0\x20\0\0\x09\x60\x7F\xE5\x5C\x57\x6D\x81\x57\x26" 135 | ); 136 | } 137 | 138 | #[test] 139 | fn test_encode_decimal() { 140 | assert_eq!( 141 | &Value::Number(Number::Decimal64(Decimal64 { 142 | scale: 2, 143 | value: 1234 144 | })) 145 | .to_vec(), 146 | b"\x20\0\0\0\x20\0\0\x0A\x70\0\0\0\0\0\0\x04\xD2\x02" 147 | ); 148 | assert_eq!( 149 | &Value::Number(Number::Decimal128(Decimal128 { 150 | scale: 10, 151 | value: 10000000000485 152 | })) 153 | .to_vec(), 154 | b"\x20\0\0\0\x20\0\0\x12\x70\0\0\0\0\0\0\0\0\0\0\x09\x18\x4E\x72\xA1\xE5\x0A" 155 | ); 156 | 157 | assert_eq!( 158 | &Value::Number(Number::Decimal256(Decimal256 { scale: 2, value: I256::new(1234) })).to_vec(), 159 | b"\x20\0\0\0\x20\0\0\x22\x70\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\xD2\x02" 160 | ); 161 | assert_eq!( 162 | &Value::Number(Number::Decimal256(Decimal256 { scale: 10, value: I256::new(10000000000485) })).to_vec(), 163 | b"\x20\0\0\0\x20\0\0\x22\x70\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x09\x18\x4E\x72\xA1\xE5\x0A" 164 | ); 165 | } 166 | 167 | #[test] 168 | fn test_encode_array() { 169 | assert_eq!( 170 | &Value::Array(vec![Value::Bool(false), Value::Bool(true)]).to_vec(), 171 | b"\x80\0\0\x02\x30\0\0\0\x40\0\0\0", 172 | ); 173 | 174 | assert_eq!( 175 | &Value::Array(vec![ 176 | Value::Bool(false), 177 | Value::Binary(&[100, 101, 102, 103]), 178 | Value::Date(Date {value: 20381 }), 179 | Value::Timestamp(Timestamp { value: 1540230120000000 }), 180 | Value::TimestampTz(TimestampTz { offset: 8, value: 1670389100000000 }), 181 | Value::Interval(Interval { months: 2, days: 10, micros: 500000000 }), 182 | Value::Number(Number::Decimal256(Decimal256 { scale: 2, value: I256::new(1234) })), 183 | ]).to_vec(), 184 | b"\x80\0\0\x07\x30\0\0\0\x60\0\0\x05\x60\0\0\x05\x60\0\0\x09\x60\0\0\x0A\x60\0\0\x11\x20\0\0\x22\0\x64\x65\x66\x67\x10\0\0\x4F\x9D\x20\0\x05\x78\xD4\xC5\x2C\xCA\0\x30\0\x05\xEF\x35\xC4\xF1\x33\0\x08\x40\0\0\0\x02\0\0\0\x0A\0\0\0\0\x1D\xCD\x65\0\x70\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\xD2\x02", 185 | ); 186 | } 187 | 188 | #[test] 189 | fn test_encode_object() { 190 | let mut obj1 = Object::new(); 191 | obj1.insert("asd".to_string(), Value::String(Cow::from("adf"))); 192 | assert_eq!( 193 | &Value::Object(obj1).to_vec(), 194 | b"\x40\0\0\x01\x10\0\0\x03\x10\0\0\x03\x61\x73\x64\x61\x64\x66" 195 | ); 196 | 197 | let mut obj2 = Object::new(); 198 | obj2.insert("k1".to_string(), Value::String(Cow::from("v1"))); 199 | obj2.insert("k2".to_string(), Value::Binary(&[200, 201, 202, 203])); 200 | obj2.insert("k3".to_string(), Value::Date(Date { value: 20381 })); 201 | obj2.insert( 202 | "k4".to_string(), 203 | Value::Timestamp(Timestamp { 204 | value: 1540230120000000, 205 | }), 206 | ); 207 | obj2.insert( 208 | "k5".to_string(), 209 | Value::TimestampTz(TimestampTz { 210 | offset: 8, 211 | value: 1670389100000000, 212 | }), 213 | ); 214 | obj2.insert( 215 | "k6".to_string(), 216 | Value::Interval(Interval { 217 | months: 2, 218 | days: 10, 219 | micros: 500000000, 220 | }), 221 | ); 222 | obj2.insert( 223 | "k7".to_string(), 224 | Value::Number(Number::Decimal256(Decimal256 { 225 | scale: 2, 226 | value: I256::new(1234), 227 | })), 228 | ); 229 | 230 | assert_eq!( 231 | &Value::Object(obj2).to_vec(), 232 | b"\x40\0\0\x07\x10\0\0\x02\x10\0\0\x02\x10\0\0\x02\x10\0\0\x02\x10\0\0\x02\x10\0\0\x02\x10\0\0\x02\x10\0\0\x02\x60\0\0\x05\x60\0\0\x05\x60\0\0\x09\x60\0\0\x0A\x60\0\0\x11\x20\0\0\x22\x6B\x31\x6B\x32\x6B\x33\x6B\x34\x6B\x35\x6B\x36\x6B\x37\x76\x31\0\xC8\xC9\xCA\xCB\x10\0\0\x4F\x9D\x20\0\x05\x78\xD4\xC5\x2C\xCA\0\x30\0\x05\xEF\x35\xC4\xF1\x33\0\x08\x40\0\0\0\x02\0\0\0\x0A\0\0\0\0\x1D\xCD\x65\0\x70\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\xD2\x02" 233 | ); 234 | } 235 | 236 | #[test] 237 | fn test_encode_extension() { 238 | assert_eq!( 239 | Value::Binary(&[1, 2, 3]).to_vec(), 240 | b"\x20\0\0\0\x60\0\0\x04\0\x01\x02\x03" 241 | ); 242 | assert_eq!( 243 | Value::Date(Date { value: 20372 }).to_vec(), 244 | b"\x20\0\0\0\x60\0\0\x05\x10\0\0\x4f\x94" 245 | ); 246 | assert_eq!( 247 | Value::Timestamp(Timestamp { 248 | value: 1760140800000000 249 | }) 250 | .to_vec(), 251 | b"\x20\0\0\0\x60\0\0\x09\x20\0\x06\x40\xd6\xb7\x23\x80\0" 252 | ); 253 | assert_eq!( 254 | Value::TimestampTz(TimestampTz { 255 | offset: 8, 256 | value: 1760140800000000 257 | }) 258 | .to_vec(), 259 | b"\x20\0\0\0\x60\0\0\x0a\x30\0\x06\x40\xd6\xb7\x23\x80\0\x08" 260 | ); 261 | assert_eq!( 262 | Value::Interval(Interval { 263 | months: 10, 264 | days: 20, 265 | micros: 300000000 266 | }) 267 | .to_vec(), 268 | b"\x20\0\0\0\x60\0\0\x11\x40\0\0\0\x0A\0\0\0\x14\0\0\0\0\x11\xE1\xA3\0" 269 | ); 270 | } 271 | -------------------------------------------------------------------------------- /tests/it/decode.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use std::borrow::Cow; 16 | 17 | use ethnum::I256; 18 | use jsonb::{ 19 | from_slice, Date, Decimal128, Decimal256, Decimal64, Interval, Number, Object, Timestamp, 20 | TimestampTz, Value, 21 | }; 22 | 23 | #[test] 24 | fn test_decode_null() { 25 | let s = b"\x20\0\0\0\0\0\0\0"; 26 | let value = from_slice(s).unwrap(); 27 | assert!(value.is_null()); 28 | assert_eq!(value.as_null(), Some(())); 29 | } 30 | 31 | #[test] 32 | fn test_decode_boolean() { 33 | let tests = vec![ 34 | (b"\x20\0\0\0\x40\0\0\0".to_vec(), true), 35 | (b"\x20\0\0\0\x30\0\0\0".to_vec(), false), 36 | ]; 37 | for (s, v) in tests { 38 | let value = from_slice(s.as_slice()).unwrap(); 39 | assert!(value.is_boolean()); 40 | assert_eq!(value.as_bool().unwrap(), v); 41 | } 42 | } 43 | 44 | #[test] 45 | fn test_decode_string() { 46 | let tests = vec![ 47 | (b"\x20\0\0\0\x10\0\0\x03\x61\x73\x64".to_vec(), "asd"), 48 | ( 49 | b"\x20\0\0\0\x10\0\0\x06\xE6\xB5\x8B\xE8\xAF\x95".to_vec(), 50 | "测试", 51 | ), 52 | (b"\x20\0\0\0\x10\0\0\x01\x0A".to_vec(), "\n"), 53 | ]; 54 | for (s, v) in tests { 55 | let value = from_slice(s.as_slice()).unwrap(); 56 | assert!(value.is_string()); 57 | assert_eq!(value.as_str().unwrap(), &Cow::from(v)); 58 | } 59 | } 60 | 61 | #[test] 62 | fn test_decode_int64() { 63 | let tests = vec![ 64 | (b"\x20\0\0\0\x20\0\0\x01\x00".to_vec(), 0i64), 65 | (b"\x20\0\0\0\x20\0\0\x02\x40\x9C".to_vec(), -100i64), 66 | (b"\x20\0\0\0\x20\0\0\x02\x40\x80".to_vec(), i8::MIN as i64), 67 | (b"\x20\0\0\0\x20\0\0\x02\x40\x7F".to_vec(), i8::MAX as i64), 68 | ( 69 | b"\x20\0\0\0\x20\0\0\x03\x40\x80\0".to_vec(), 70 | i16::MIN as i64, 71 | ), 72 | ( 73 | b"\x20\0\0\0\x20\0\0\x03\x40\x7F\xFF".to_vec(), 74 | i16::MAX as i64, 75 | ), 76 | ( 77 | b"\x20\0\0\0\x20\0\0\x05\x40\x80\0\0\0".to_vec(), 78 | i32::MIN as i64, 79 | ), 80 | ( 81 | b"\x20\0\0\0\x20\0\0\x05\x40\x7F\xFF\xFF\xFF".to_vec(), 82 | i32::MAX as i64, 83 | ), 84 | ( 85 | b"\x20\0\0\0\x20\0\0\x09\x40\x80\0\0\0\0\0\0\0".to_vec(), 86 | i64::MIN, 87 | ), 88 | ( 89 | b"\x20\0\0\0\x20\0\0\x09\x40\x7F\xFF\xFF\xFF\xFF\xFF\xFF\xFF".to_vec(), 90 | i64::MAX, 91 | ), 92 | ]; 93 | for (s, v) in tests { 94 | let value = from_slice(s.as_slice()).unwrap(); 95 | assert!(value.is_i64()); 96 | assert_eq!(value.as_i64().unwrap(), v); 97 | } 98 | } 99 | 100 | #[test] 101 | fn test_decode_uint64() { 102 | let tests = vec![ 103 | (b"\x20\0\0\0\x20\0\0\x01\x00".to_vec(), 0u64), 104 | (b"\x20\0\0\0\x20\0\0\x02\x50\x64".to_vec(), 100u64), 105 | (b"\x20\0\0\0\x20\0\0\x02\x50\xFF".to_vec(), u8::MAX as u64), 106 | ( 107 | b"\x20\0\0\0\x20\0\0\x03\x50\xFF\xFF".to_vec(), 108 | u16::MAX as u64, 109 | ), 110 | ( 111 | b"\x20\0\0\0\x20\0\0\x05\x50\xFF\xFF\xFF\xFF".to_vec(), 112 | u32::MAX as u64, 113 | ), 114 | ( 115 | b"\x20\0\0\0\x20\0\0\x09\x50\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF".to_vec(), 116 | u64::MAX, 117 | ), 118 | ]; 119 | for (s, v) in tests { 120 | let value = from_slice(s.as_slice()).unwrap(); 121 | assert!(value.is_u64()); 122 | assert_eq!(value.as_u64().unwrap(), v); 123 | } 124 | } 125 | 126 | #[test] 127 | fn test_decode_float64() { 128 | let tests = vec![ 129 | (b"\x20\0\0\0\x20\0\0\x01\x20".to_vec(), f64::INFINITY), 130 | (b"\x20\0\0\0\x20\0\0\x01\x30".to_vec(), f64::NEG_INFINITY), 131 | ( 132 | b"\x20\0\0\0\x20\0\0\x09\x60\x3F\x89\x30\xBE\x0D\xED\x28\x8D".to_vec(), 133 | 0.0123f64, 134 | ), 135 | ( 136 | b"\x20\0\0\0\x20\0\0\x09\x60\x7F\xE5\x5C\x57\x6D\x81\x57\x26".to_vec(), 137 | 1.2e308f64, 138 | ), 139 | ]; 140 | for (s, v) in tests { 141 | let value = from_slice(s.as_slice()).unwrap(); 142 | assert!(value.is_f64()); 143 | assert_eq!(value.as_f64().unwrap(), v); 144 | } 145 | } 146 | 147 | #[test] 148 | fn test_decode_deprected_decimal() { 149 | // Compatible with deprecated Decimal128 and Decimal256 formats, including precision 150 | let tests = vec![ 151 | (b"\x20\0\0\0\x20\0\0\x13\x70\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\xD2\x26\x02".to_vec(), Number::Decimal128(Decimal128 { 152 | scale: 2, 153 | value: 1234 154 | })), 155 | (b"\x20\0\0\0\x20\0\0\x13\x70\0\0\0\0\0\0\0\0\0\0\x09\x18\x4E\x72\xA1\xE5\x26\x0A".to_vec(), Number::Decimal128(Decimal128 { 156 | scale: 10, 157 | value: 10000000000485 158 | })), 159 | (b"\x20\0\0\0\x20\0\0\x23\x70\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\xD2\x4C\x02".to_vec(), 160 | Number::Decimal256(Decimal256 { scale: 2, value: I256::new(1234) })), 161 | (b"\x20\0\0\0\x20\0\0\x23\x70\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x09\x18\x4E\x72\xA1\xE5\x4C\x0A".to_vec(), 162 | Number::Decimal256(Decimal256 { scale: 10, value: I256::new(10000000000485) })), 163 | ]; 164 | for (s, v) in tests { 165 | let value = from_slice(s.as_slice()).unwrap(); 166 | assert!(value.is_number()); 167 | assert_eq!(value.as_number().unwrap(), v); 168 | } 169 | } 170 | 171 | #[test] 172 | fn test_decode_decimal() { 173 | let tests = vec![ 174 | (b"\x20\0\0\0\x20\0\0\x0A\x70\0\0\0\0\0\0\x04\xD2\x02".to_vec(), Number::Decimal64(Decimal64 { 175 | scale: 2, 176 | value: 1234 177 | })), 178 | (b"\x20\0\0\0\x20\0\0\x12\x70\0\0\0\0\0\0\0\0\0\0\x09\x18\x4E\x72\xA1\xE5\x0A".to_vec(), Number::Decimal128(Decimal128 { 179 | scale: 10, 180 | value: 10000000000485 181 | })), 182 | (b"\x20\0\0\0\x20\0\0\x22\x70\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\xD2\x02".to_vec(), 183 | Number::Decimal256(Decimal256 { scale: 2, value: I256::new(1234) })), 184 | (b"\x20\0\0\0\x20\0\0\x22\x70\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x09\x18\x4E\x72\xA1\xE5\x0A".to_vec(), 185 | Number::Decimal256(Decimal256 { scale: 10, value: I256::new(10000000000485) })), 186 | ]; 187 | for (s, v) in tests { 188 | let value = from_slice(s.as_slice()).unwrap(); 189 | assert!(value.is_number()); 190 | assert_eq!(value.as_number().unwrap(), v); 191 | } 192 | } 193 | 194 | #[test] 195 | fn test_decode_array() { 196 | let tests = vec![( 197 | b"\x80\0\0\x02\x30\0\0\0\x40\0\0\0".to_vec(), 198 | vec![Value::Bool(false), Value::Bool(true)], 199 | )]; 200 | for (s, v) in tests { 201 | let value = from_slice(s.as_slice()).unwrap(); 202 | assert!(value.is_array()); 203 | let arr = value.as_array().unwrap(); 204 | assert_eq!(arr.len(), v.len()); 205 | for (l, r) in arr.iter().zip(v.iter()) { 206 | assert_eq!(l, r); 207 | } 208 | } 209 | } 210 | 211 | #[test] 212 | fn test_decode_object() { 213 | let mut obj1 = Object::new(); 214 | obj1.insert("asd".to_string(), Value::String(Cow::from("adf"))); 215 | let tests = vec![( 216 | b"\x40\0\0\x01\x10\0\0\x03\x10\0\0\x03\x61\x73\x64\x61\x64\x66".to_vec(), 217 | obj1, 218 | )]; 219 | for (s, v) in tests { 220 | let value = from_slice(s.as_slice()).unwrap(); 221 | assert!(value.is_object()); 222 | let obj = value.as_object().unwrap(); 223 | assert_eq!(obj.len(), v.len()); 224 | for ((lk, lv), (rk, rv)) in obj.iter().enumerate().zip(v.iter().enumerate()) { 225 | assert_eq!(lk, rk); 226 | assert_eq!(lv, rv); 227 | } 228 | } 229 | } 230 | 231 | #[test] 232 | fn test_decode_extension() { 233 | let tests = vec![ 234 | ( 235 | b"\x20\0\0\0\x60\0\0\x04\0\x01\x02\x03".to_vec(), 236 | Value::Binary(&[1, 2, 3]), 237 | ), 238 | ( 239 | b"\x20\0\0\0\x60\0\0\x05\x10\0\0\x4f\x94".to_vec(), 240 | Value::Date(Date { value: 20372 }), 241 | ), 242 | ( 243 | b"\x20\0\0\0\x60\0\0\x09\x20\0\x06\x40\xd6\xb7\x23\x80\0".to_vec(), 244 | Value::Timestamp(Timestamp { 245 | value: 1760140800000000, 246 | }), 247 | ), 248 | ( 249 | b"\x20\0\0\0\x60\0\0\x0a\x30\0\x06\x40\xd6\xb7\x23\x80\0\x08".to_vec(), 250 | Value::TimestampTz(TimestampTz { 251 | offset: 8, 252 | value: 1760140800000000, 253 | }), 254 | ), 255 | ( 256 | b"\x20\0\0\0\x60\0\0\x11\x40\0\0\0\x0A\0\0\0\x14\0\0\0\0\x11\xE1\xA3\0".to_vec(), 257 | Value::Interval(Interval { 258 | months: 10, 259 | days: 20, 260 | micros: 300000000, 261 | }), 262 | ), 263 | ]; 264 | 265 | for (s, v) in tests { 266 | let value = from_slice(s.as_slice()).unwrap(); 267 | assert_eq!(value, v); 268 | } 269 | } 270 | 271 | #[test] 272 | fn test_decode_corrupted() { 273 | let json = "{\"a\": 1, \"b\": \"123\"}"; 274 | let jsonb = jsonb::parse_value(json.as_bytes()).unwrap().to_vec(); 275 | let corrupted = jsonb[0..jsonb.len() - 1].to_vec(); 276 | let value = from_slice(corrupted.as_slice()); 277 | assert!(value.is_err()); 278 | } 279 | -------------------------------------------------------------------------------- /src/core/databend/iterator.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use std::collections::VecDeque; 16 | use std::ops::Range; 17 | 18 | use super::constants::*; 19 | use super::jentry::JEntry; 20 | use crate::core::databend::util::jentry_to_jsonb_item; 21 | use crate::core::JsonbItem; 22 | use crate::error::Result; 23 | use crate::RawJsonb; 24 | 25 | pub(crate) struct ArrayIterator<'a> { 26 | raw_jsonb: RawJsonb<'a>, 27 | jentry_offset: usize, 28 | item_offset: usize, 29 | length: usize, 30 | index: usize, 31 | } 32 | 33 | impl<'a> ArrayIterator<'a> { 34 | pub(crate) fn new(raw_jsonb: RawJsonb<'a>) -> Result> { 35 | let (header_type, header_len) = raw_jsonb.read_header(0)?; 36 | if header_type == ARRAY_CONTAINER_TAG { 37 | let jentry_offset = 4; 38 | let item_offset = 4 + 4 * header_len; 39 | Ok(Some(Self { 40 | raw_jsonb, 41 | jentry_offset, 42 | item_offset, 43 | length: header_len, 44 | index: 0, 45 | })) 46 | } else { 47 | Ok(None) 48 | } 49 | } 50 | 51 | pub(crate) fn len(&self) -> usize { 52 | self.length 53 | } 54 | } 55 | 56 | impl<'a> Iterator for ArrayIterator<'a> { 57 | type Item = Result>; 58 | 59 | fn next(&mut self) -> Option { 60 | if self.index >= self.length { 61 | return None; 62 | } 63 | let jentry = match self.raw_jsonb.read_jentry(self.jentry_offset) { 64 | Ok(jentry) => jentry, 65 | Err(err) => return Some(Err(err)), 66 | }; 67 | 68 | let item_length = jentry.length as usize; 69 | let item_range = Range { 70 | start: self.item_offset, 71 | end: self.item_offset + item_length, 72 | }; 73 | let data = match self.raw_jsonb.slice(item_range) { 74 | Ok(data) => data, 75 | Err(err) => return Some(Err(err)), 76 | }; 77 | let item = jentry_to_jsonb_item(jentry, data); 78 | 79 | self.index += 1; 80 | self.jentry_offset += 4; 81 | self.item_offset += item_length; 82 | 83 | Some(Ok(item)) 84 | } 85 | } 86 | 87 | pub(crate) struct ObjectKeyIterator<'a> { 88 | raw_jsonb: RawJsonb<'a>, 89 | jentry_offset: usize, 90 | item_offset: usize, 91 | length: usize, 92 | index: usize, 93 | } 94 | 95 | impl<'a> ObjectKeyIterator<'a> { 96 | pub(crate) fn new(raw_jsonb: RawJsonb<'a>) -> Result> { 97 | let (header_type, header_len) = raw_jsonb.read_header(0)?; 98 | if header_type == OBJECT_CONTAINER_TAG { 99 | let jentry_offset = 4; 100 | let item_offset = 4 + 8 * header_len; 101 | Ok(Some(Self { 102 | raw_jsonb, 103 | jentry_offset, 104 | item_offset, 105 | length: header_len, 106 | index: 0, 107 | })) 108 | } else { 109 | Ok(None) 110 | } 111 | } 112 | 113 | pub(crate) fn len(&self) -> usize { 114 | self.length 115 | } 116 | } 117 | 118 | impl<'a> Iterator for ObjectKeyIterator<'a> { 119 | type Item = Result>; 120 | 121 | fn next(&mut self) -> Option { 122 | if self.index >= self.length { 123 | return None; 124 | } 125 | let jentry = match self.raw_jsonb.read_jentry(self.jentry_offset) { 126 | Ok(jentry) => jentry, 127 | Err(err) => return Some(Err(err)), 128 | }; 129 | 130 | let key_length = jentry.length as usize; 131 | let key_range = Range { 132 | start: self.item_offset, 133 | end: self.item_offset + key_length, 134 | }; 135 | let data = match self.raw_jsonb.slice(key_range) { 136 | Ok(data) => data, 137 | Err(err) => return Some(Err(err)), 138 | }; 139 | let key_item = jentry_to_jsonb_item(jentry, data); 140 | 141 | self.index += 1; 142 | self.jentry_offset += 4; 143 | self.item_offset += key_length; 144 | 145 | Some(Ok(key_item)) 146 | } 147 | } 148 | 149 | pub(crate) struct ObjectValueIterator<'a> { 150 | raw_jsonb: RawJsonb<'a>, 151 | jentry_offset: usize, 152 | item_offset: usize, 153 | length: usize, 154 | index: usize, 155 | } 156 | 157 | impl<'a> ObjectValueIterator<'a> { 158 | pub(crate) fn new(raw_jsonb: RawJsonb<'a>) -> Result> { 159 | let (header_type, header_len) = raw_jsonb.read_header(0)?; 160 | if header_type == OBJECT_CONTAINER_TAG { 161 | let mut jentry_offset = 4; 162 | let mut item_offset = 4 + 8 * header_len; 163 | for _ in 0..header_len { 164 | let key_jentry = raw_jsonb.read_jentry(jentry_offset)?; 165 | jentry_offset += 4; 166 | item_offset += key_jentry.length as usize; 167 | } 168 | 169 | Ok(Some(Self { 170 | raw_jsonb, 171 | jentry_offset, 172 | item_offset, 173 | length: header_len, 174 | index: 0, 175 | })) 176 | } else { 177 | Ok(None) 178 | } 179 | } 180 | 181 | #[allow(dead_code)] 182 | pub(crate) fn len(&self) -> usize { 183 | self.length 184 | } 185 | } 186 | 187 | impl<'a> Iterator for ObjectValueIterator<'a> { 188 | type Item = Result>; 189 | 190 | fn next(&mut self) -> Option { 191 | if self.index >= self.length { 192 | return None; 193 | } 194 | let jentry = match self.raw_jsonb.read_jentry(self.jentry_offset) { 195 | Ok(jentry) => jentry, 196 | Err(err) => return Some(Err(err)), 197 | }; 198 | 199 | let val_length = jentry.length as usize; 200 | let val_range = Range { 201 | start: self.item_offset, 202 | end: self.item_offset + val_length, 203 | }; 204 | let data = match self.raw_jsonb.slice(val_range) { 205 | Ok(data) => data, 206 | Err(err) => return Some(Err(err)), 207 | }; 208 | let val_item = jentry_to_jsonb_item(jentry, data); 209 | 210 | self.index += 1; 211 | self.jentry_offset += 4; 212 | self.item_offset += val_length; 213 | 214 | Some(Ok(val_item)) 215 | } 216 | } 217 | 218 | pub(crate) struct ObjectIterator<'a> { 219 | raw_jsonb: RawJsonb<'a>, 220 | key_jentries: VecDeque, 221 | jentry_offset: usize, 222 | key_offset: usize, 223 | val_offset: usize, 224 | length: usize, 225 | } 226 | 227 | impl<'a> ObjectIterator<'a> { 228 | pub(crate) fn new(raw_jsonb: RawJsonb<'a>) -> Result> { 229 | let (header_type, header_len) = raw_jsonb.read_header(0)?; 230 | if header_type == OBJECT_CONTAINER_TAG { 231 | let mut jentry_offset = 4; 232 | let mut key_jentries = VecDeque::with_capacity(header_len); 233 | for _ in 0..header_len { 234 | let key_jentry = raw_jsonb.read_jentry(jentry_offset)?; 235 | jentry_offset += 4; 236 | key_jentries.push_back(key_jentry); 237 | } 238 | let key_length: usize = key_jentries.iter().map(|j| j.length as usize).sum(); 239 | let key_offset = 4 + 8 * header_len; 240 | let val_offset = key_offset + key_length; 241 | 242 | Ok(Some(Self { 243 | raw_jsonb, 244 | key_jentries, 245 | jentry_offset, 246 | key_offset, 247 | val_offset, 248 | length: header_len, 249 | })) 250 | } else { 251 | Ok(None) 252 | } 253 | } 254 | 255 | pub(crate) fn len(&self) -> usize { 256 | self.length 257 | } 258 | } 259 | 260 | impl<'a> Iterator for ObjectIterator<'a> { 261 | type Item = Result<(&'a str, JsonbItem<'a>)>; 262 | 263 | fn next(&mut self) -> Option { 264 | match self.key_jentries.pop_front() { 265 | Some(key_jentry) => { 266 | let val_jentry = match self.raw_jsonb.read_jentry(self.jentry_offset) { 267 | Ok(jentry) => jentry, 268 | Err(err) => return Some(Err(err)), 269 | }; 270 | let key_length = key_jentry.length as usize; 271 | let val_length = val_jentry.length as usize; 272 | 273 | let key_range = Range { 274 | start: self.key_offset, 275 | end: self.key_offset + key_length, 276 | }; 277 | let key_data = match self.raw_jsonb.slice(key_range) { 278 | Ok(data) => data, 279 | Err(err) => return Some(Err(err)), 280 | }; 281 | let key = unsafe { std::str::from_utf8_unchecked(key_data) }; 282 | 283 | let val_range = Range { 284 | start: self.val_offset, 285 | end: self.val_offset + val_length, 286 | }; 287 | let val_data = match self.raw_jsonb.slice(val_range) { 288 | Ok(data) => data, 289 | Err(err) => return Some(Err(err)), 290 | }; 291 | let val_item = jentry_to_jsonb_item(val_jentry, val_data); 292 | 293 | self.jentry_offset += 4; 294 | self.key_offset += key_length; 295 | self.val_offset += val_length; 296 | 297 | Some(Ok((key, val_item))) 298 | } 299 | None => None, 300 | } 301 | } 302 | } 303 | -------------------------------------------------------------------------------- /src/extension.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use std::cmp::Ordering; 16 | use std::fmt::Debug; 17 | use std::fmt::Display; 18 | use std::fmt::Formatter; 19 | 20 | use jiff::civil::date; 21 | use jiff::fmt::strtime; 22 | use jiff::tz::Offset; 23 | use jiff::SignedDuration; 24 | 25 | const MICROS_PER_SEC: i64 = 1_000_000; 26 | const MICROS_PER_MINUTE: i64 = 60 * MICROS_PER_SEC; 27 | const MICROS_PER_HOUR: i64 = 60 * MICROS_PER_MINUTE; 28 | const MONTHS_PER_YEAR: i32 = 12; 29 | 30 | const TIMESTAMP_FORMAT: &str = "%Y-%m-%d %H:%M:%S%.6f"; 31 | 32 | /// Represents extended JSON value types that are not supported in standard JSON. 33 | /// 34 | /// Standard JSON only supports strings, numbers, booleans, null, arrays, and objects. 35 | /// This enum provides additional data types commonly needed in database systems and 36 | /// other applications that require more specialized data representations. 37 | #[derive(Debug, Clone)] 38 | pub enum ExtensionValue<'a> { 39 | /// Binary data (byte array), allowing efficient storage of binary content 40 | /// that would otherwise require base64 encoding in standard JSON 41 | Binary(&'a [u8]), 42 | /// Calendar date without time component (year, month, day) 43 | Date(Date), 44 | /// Timestamp with microsecond precision but without timezone information 45 | Timestamp(Timestamp), 46 | /// Timestamp with microsecond precision and timezone offset information 47 | TimestampTz(TimestampTz), 48 | /// Time interval representation for duration calculations 49 | Interval(Interval), 50 | } 51 | 52 | /// Represents a calendar date (year, month, day) without time component. 53 | /// 54 | /// The value is stored as days since the Unix epoch (January 1, 1970). 55 | /// This allows for efficient date arithmetic and comparison operations. 56 | /// Standard JSON has no native date type and typically uses ISO 8601 strings. 57 | #[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd)] 58 | pub struct Date { 59 | /// Days since Unix epoch (January 1, 1970) 60 | /// Positive values represent dates after the epoch, negative values represent dates before 61 | pub value: i32, 62 | } 63 | 64 | /// Represents a timestamp (date and time) without timezone information. 65 | /// 66 | /// The value is stored as microseconds since the Unix epoch (January 1, 1970 00:00:00 UTC). 67 | /// This provides microsecond precision for timestamp operations. 68 | /// Standard JSON has no native timestamp type and typically uses ISO 8601 strings. 69 | #[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd)] 70 | pub struct Timestamp { 71 | /// Microseconds since Unix epoch (January 1, 1970 00:00:00 UTC) 72 | pub value: i64, 73 | } 74 | 75 | /// Represents a timestamp with timezone information. 76 | /// 77 | /// Combines a timestamp value with a timezone offset, allowing for 78 | /// timezone-aware datetime operations. The timestamp is stored in UTC, 79 | /// and the offset indicates the local timezone. 80 | /// Standard JSON has no native timezone-aware timestamp type. 81 | #[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd)] 82 | pub struct TimestampTz { 83 | /// Timezone offset in hours from UTC 84 | pub offset: i8, 85 | /// Microseconds since Unix epoch (January 1, 1970 00:00:00 UTC) 86 | pub value: i64, 87 | } 88 | 89 | /// Represents a time interval or duration. 90 | /// 91 | /// This structure can represent complex time intervals with separate 92 | /// components for months, days, and microseconds, allowing for precise 93 | /// duration calculations that account for calendar irregularities. 94 | /// Standard JSON has no native interval/duration type. 95 | #[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd)] 96 | pub struct Interval { 97 | /// Number of months in the interval 98 | pub months: i32, 99 | /// Number of days in the interval 100 | pub days: i32, 101 | /// Number of microseconds in the interval 102 | pub micros: i64, 103 | } 104 | 105 | impl Display for Date { 106 | fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { 107 | let dur = SignedDuration::from_hours(self.value as i64 * 24); 108 | let date = date(1970, 1, 1).checked_add(dur).unwrap(); 109 | write!(f, "{}", date) 110 | } 111 | } 112 | 113 | impl Display for Timestamp { 114 | fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { 115 | let micros = self.value; 116 | let (mut secs, mut nanos) = (micros / MICROS_PER_SEC, (micros % MICROS_PER_SEC) * 1_000); 117 | if nanos < 0 { 118 | secs -= 1; 119 | nanos += 1_000_000_000; 120 | } 121 | 122 | if secs > 253402207200 { 123 | secs = 253402207200; 124 | nanos = 0; 125 | } else if secs < -377705023201 { 126 | secs = -377705023201; 127 | nanos = 0; 128 | } 129 | let ts = jiff::Timestamp::new(secs, nanos as i32).unwrap(); 130 | 131 | write!(f, "{}", strtime::format(TIMESTAMP_FORMAT, ts).unwrap()) 132 | } 133 | } 134 | 135 | impl Display for TimestampTz { 136 | fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { 137 | let micros = self.value; 138 | let (mut secs, mut nanos) = (micros / MICROS_PER_SEC, (micros % MICROS_PER_SEC) * 1_000); 139 | if nanos < 0 { 140 | secs -= 1; 141 | nanos += 1_000_000_000; 142 | } 143 | 144 | if secs > 253402207200 { 145 | secs = 253402207200; 146 | nanos = 0; 147 | } else if secs < -377705023201 { 148 | secs = -377705023201; 149 | nanos = 0; 150 | } 151 | let ts = jiff::Timestamp::new(secs, nanos as i32).unwrap(); 152 | let tz = Offset::constant(self.offset).to_time_zone(); 153 | let zoned = ts.to_zoned(tz); 154 | 155 | write!(f, "{}", strtime::format(TIMESTAMP_FORMAT, &zoned).unwrap()) 156 | } 157 | } 158 | 159 | impl Display for Interval { 160 | fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { 161 | let mut date_parts = vec![]; 162 | let years = self.months / MONTHS_PER_YEAR; 163 | let months = self.months % MONTHS_PER_YEAR; 164 | match years.cmp(&1) { 165 | Ordering::Equal => { 166 | date_parts.push((years, "year")); 167 | } 168 | Ordering::Greater => { 169 | date_parts.push((years, "years")); 170 | } 171 | _ => {} 172 | } 173 | match months.cmp(&1) { 174 | Ordering::Equal => { 175 | date_parts.push((months, "month")); 176 | } 177 | Ordering::Greater => { 178 | date_parts.push((months, "months")); 179 | } 180 | _ => {} 181 | } 182 | match self.days.cmp(&1) { 183 | Ordering::Equal => { 184 | date_parts.push((self.days, "day")); 185 | } 186 | Ordering::Greater => { 187 | date_parts.push((self.days, "days")); 188 | } 189 | _ => {} 190 | } 191 | if !date_parts.is_empty() { 192 | for (i, (val, name)) in date_parts.into_iter().enumerate() { 193 | if i > 0 { 194 | write!(f, " ")?; 195 | } 196 | write!(f, "{} {}", val, name)?; 197 | } 198 | if self.micros != 0 { 199 | write!(f, " ")?; 200 | } 201 | } 202 | 203 | if self.micros != 0 { 204 | let mut micros = self.micros; 205 | if micros < 0 { 206 | write!(f, "-")?; 207 | micros = -micros; 208 | } 209 | let hour = micros / MICROS_PER_HOUR; 210 | micros -= hour * MICROS_PER_HOUR; 211 | let min = micros / MICROS_PER_MINUTE; 212 | micros -= min * MICROS_PER_MINUTE; 213 | let sec = micros / MICROS_PER_SEC; 214 | micros -= sec * MICROS_PER_SEC; 215 | 216 | if hour < 100 { 217 | write!(f, "{:02}:{:02}:{:02}", hour, min, sec)?; 218 | } else { 219 | write!(f, "{}:{:02}:{:02}", hour, min, sec)?; 220 | } 221 | if micros != 0 { 222 | write!(f, ".{:06}", micros)?; 223 | } 224 | } else if self.months == 0 && self.days == 0 { 225 | write!(f, "00:00:00")?; 226 | } 227 | Ok(()) 228 | } 229 | } 230 | 231 | impl Display for ExtensionValue<'_> { 232 | fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { 233 | match self { 234 | ExtensionValue::Binary(v) => { 235 | for c in *v { 236 | write!(f, "{c:02X}")?; 237 | } 238 | Ok(()) 239 | } 240 | ExtensionValue::Date(v) => write!(f, "{}", v), 241 | ExtensionValue::Timestamp(v) => write!(f, "{}", v), 242 | ExtensionValue::TimestampTz(v) => write!(f, "{}", v), 243 | ExtensionValue::Interval(v) => write!(f, "{}", v), 244 | } 245 | } 246 | } 247 | 248 | impl Eq for ExtensionValue<'_> {} 249 | 250 | impl PartialEq for ExtensionValue<'_> { 251 | fn eq(&self, other: &Self) -> bool { 252 | self.partial_cmp(other) == Some(Ordering::Equal) 253 | } 254 | } 255 | 256 | #[allow(clippy::non_canonical_partial_ord_impl)] 257 | impl PartialOrd for ExtensionValue<'_> { 258 | fn partial_cmp(&self, other: &Self) -> Option { 259 | let self_level = match self { 260 | ExtensionValue::Binary(_) => 0, 261 | ExtensionValue::Date(_) => 1, 262 | ExtensionValue::Timestamp(_) => 2, 263 | ExtensionValue::TimestampTz(_) => 3, 264 | ExtensionValue::Interval(_) => 4, 265 | }; 266 | let other_level = match other { 267 | ExtensionValue::Binary(_) => 0, 268 | ExtensionValue::Date(_) => 1, 269 | ExtensionValue::Timestamp(_) => 2, 270 | ExtensionValue::TimestampTz(_) => 3, 271 | ExtensionValue::Interval(_) => 4, 272 | }; 273 | let res = self_level.cmp(&other_level); 274 | if matches!(res, Ordering::Greater | Ordering::Less) { 275 | return Some(res); 276 | } 277 | 278 | match (self, other) { 279 | (ExtensionValue::Binary(self_data), ExtensionValue::Binary(other_data)) => { 280 | Some(self_data.cmp(other_data)) 281 | } 282 | (ExtensionValue::Date(self_data), ExtensionValue::Date(other_data)) => { 283 | Some(self_data.cmp(other_data)) 284 | } 285 | (ExtensionValue::Timestamp(self_data), ExtensionValue::Timestamp(other_data)) => { 286 | Some(self_data.cmp(other_data)) 287 | } 288 | (ExtensionValue::TimestampTz(self_data), ExtensionValue::TimestampTz(other_data)) => { 289 | Some(self_data.cmp(other_data)) 290 | } 291 | (ExtensionValue::Interval(self_data), ExtensionValue::Interval(other_data)) => { 292 | Some(self_data.cmp(other_data)) 293 | } 294 | (_, _) => None, 295 | } 296 | } 297 | } 298 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /src/core/databend/builder.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use core::ops::Range; 16 | use std::collections::BTreeMap; 17 | 18 | use byteorder::BigEndian; 19 | use byteorder::WriteBytesExt; 20 | 21 | use super::constants::*; 22 | use super::jentry::JEntry; 23 | use crate::core::ExtensionItem; 24 | use crate::core::JsonbItem; 25 | use crate::core::NumberItem; 26 | use crate::error::Error; 27 | use crate::error::Result; 28 | use crate::OwnedJsonb; 29 | use crate::RawJsonb; 30 | 31 | pub(crate) struct ArrayBuilder<'a> { 32 | items: Vec>, 33 | } 34 | 35 | impl<'a> ArrayBuilder<'a> { 36 | pub(crate) fn new() -> Self { 37 | Self { items: Vec::new() } 38 | } 39 | 40 | pub(crate) fn with_capacity(capacity: usize) -> Self { 41 | Self { 42 | items: Vec::with_capacity(capacity), 43 | } 44 | } 45 | 46 | pub(crate) fn push_jsonb_item(&mut self, item: JsonbItem<'a>) { 47 | self.items.push(item); 48 | } 49 | 50 | pub(crate) fn push_raw_jsonb(&mut self, raw: RawJsonb<'a>) { 51 | let item = JsonbItem::Raw(raw); 52 | self.items.push(item); 53 | } 54 | 55 | pub(crate) fn push_owned_jsonb(&mut self, owned: OwnedJsonb) { 56 | let item = JsonbItem::Owned(owned); 57 | self.push_jsonb_item(item) 58 | } 59 | 60 | pub(crate) fn build(self) -> Result { 61 | let mut buf = Vec::new(); 62 | let header = ARRAY_CONTAINER_TAG | self.items.len() as u32; 63 | buf.write_u32::(header)?; 64 | 65 | let mut jentry_index = reserve_jentries(&mut buf, self.items.len() * 4); 66 | for item in self.items.into_iter() { 67 | append_jsonb_item(&mut buf, &mut jentry_index, item)?; 68 | } 69 | Ok(OwnedJsonb::new(buf)) 70 | } 71 | } 72 | 73 | pub(crate) struct ArrayDistinctBuilder<'a> { 74 | items: Vec>, 75 | item_map: BTreeMap, usize>, 76 | } 77 | 78 | impl<'a> ArrayDistinctBuilder<'a> { 79 | pub(crate) fn new(capacity: usize) -> Self { 80 | Self { 81 | items: Vec::with_capacity(capacity), 82 | item_map: BTreeMap::new(), 83 | } 84 | } 85 | 86 | pub(crate) fn push_jsonb_item(&mut self, item: JsonbItem<'a>) { 87 | if let Some(cnt) = self.item_map.get_mut(&item) { 88 | *cnt += 1; 89 | } else { 90 | self.item_map.insert(item.clone(), 1); 91 | self.items.push(item); 92 | } 93 | } 94 | 95 | pub(crate) fn push_raw_jsonb(&mut self, raw: RawJsonb<'a>) { 96 | let item = JsonbItem::Raw(raw); 97 | self.push_jsonb_item(item); 98 | } 99 | 100 | pub(crate) fn pop_jsonb_item(&mut self, item: JsonbItem<'a>) -> Option<()> { 101 | if let Some(cnt) = self.item_map.get_mut(&item) { 102 | if *cnt > 0 { 103 | *cnt -= 1; 104 | return Some(()); 105 | } 106 | } 107 | None 108 | } 109 | 110 | pub(crate) fn pop_raw_jsonb(&mut self, raw: RawJsonb<'a>) -> Option<()> { 111 | let item = JsonbItem::Raw(raw); 112 | self.pop_jsonb_item(item) 113 | } 114 | 115 | pub(crate) fn build(self) -> Result { 116 | let mut buf = Vec::new(); 117 | let header = ARRAY_CONTAINER_TAG | self.items.len() as u32; 118 | buf.write_u32::(header)?; 119 | 120 | let mut jentry_index = reserve_jentries(&mut buf, self.items.len() * 4); 121 | for item in self.items.into_iter() { 122 | append_jsonb_item(&mut buf, &mut jentry_index, item)?; 123 | } 124 | Ok(OwnedJsonb::new(buf)) 125 | } 126 | } 127 | 128 | pub(crate) struct ObjectBuilder<'a> { 129 | entries: BTreeMap<&'a str, JsonbItem<'a>>, 130 | } 131 | 132 | impl<'a> ObjectBuilder<'a> { 133 | pub(crate) fn new() -> Self { 134 | Self { 135 | entries: BTreeMap::new(), 136 | } 137 | } 138 | 139 | pub(crate) fn push_jsonb_item(&mut self, key: &'a str, val_item: JsonbItem<'a>) -> Result<()> { 140 | if self.entries.contains_key(key) { 141 | return Err(Error::ObjectDuplicateKey); 142 | } 143 | self.entries.insert(key, val_item); 144 | Ok(()) 145 | } 146 | 147 | pub(crate) fn push_raw_jsonb(&mut self, key: &'a str, raw: RawJsonb<'a>) -> Result<()> { 148 | let item = JsonbItem::Raw(raw); 149 | self.push_jsonb_item(key, item) 150 | } 151 | 152 | pub(crate) fn push_owned_jsonb(&mut self, key: &'a str, owned: OwnedJsonb) -> Result<()> { 153 | let item = JsonbItem::Owned(owned); 154 | self.push_jsonb_item(key, item) 155 | } 156 | 157 | pub(crate) fn contains_key(&self, key: &'a str) -> bool { 158 | self.entries.contains_key(key) 159 | } 160 | 161 | pub(crate) fn build(self) -> Result { 162 | let mut buf = Vec::new(); 163 | let header = OBJECT_CONTAINER_TAG | self.entries.len() as u32; 164 | buf.write_u32::(header)?; 165 | 166 | let mut jentry_index = reserve_jentries(&mut buf, self.entries.len() * 8); 167 | for (key, _) in self.entries.iter() { 168 | let key_len = key.len(); 169 | buf.extend_from_slice(key.as_bytes()); 170 | let jentry = JEntry::make_string_jentry(key_len); 171 | replace_jentry(&mut buf, jentry, &mut jentry_index) 172 | } 173 | for (_, item) in self.entries.into_iter() { 174 | append_jsonb_item(&mut buf, &mut jentry_index, item)?; 175 | } 176 | Ok(OwnedJsonb::new(buf)) 177 | } 178 | } 179 | 180 | fn append_jsonb_item(buf: &mut Vec, jentry_index: &mut usize, item: JsonbItem) -> Result<()> { 181 | match item { 182 | JsonbItem::Null => { 183 | let jentry = JEntry::make_null_jentry(); 184 | replace_jentry(buf, jentry, jentry_index); 185 | } 186 | JsonbItem::Boolean(v) => { 187 | let jentry = if v { 188 | JEntry::make_true_jentry() 189 | } else { 190 | JEntry::make_false_jentry() 191 | }; 192 | replace_jentry(buf, jentry, jentry_index); 193 | } 194 | JsonbItem::Number(num) => match num { 195 | NumberItem::Raw(data) => { 196 | let jentry = JEntry::make_number_jentry(data.len()); 197 | replace_jentry(buf, jentry, jentry_index); 198 | buf.extend_from_slice(data); 199 | } 200 | NumberItem::Number(num) => { 201 | let len = num.compact_encode(&mut *buf)?; 202 | let jentry = JEntry::make_number_jentry(len); 203 | replace_jentry(buf, jentry, jentry_index); 204 | } 205 | }, 206 | JsonbItem::String(data) => { 207 | let jentry = JEntry::make_string_jentry(data.len()); 208 | replace_jentry(buf, jentry, jentry_index); 209 | buf.extend_from_slice(data.as_bytes()); 210 | } 211 | JsonbItem::Extension(ext) => match ext { 212 | ExtensionItem::Raw(data) => { 213 | let jentry = JEntry::make_extension_jentry(data.len()); 214 | replace_jentry(buf, jentry, jentry_index); 215 | buf.extend_from_slice(data); 216 | } 217 | ExtensionItem::Extension(ext) => { 218 | let len = ext.compact_encode(&mut *buf)?; 219 | let jentry = JEntry::make_extension_jentry(len); 220 | replace_jentry(buf, jentry, jentry_index); 221 | } 222 | }, 223 | JsonbItem::Raw(raw_jsonb) => { 224 | append_raw_jsonb_data(buf, jentry_index, raw_jsonb)?; 225 | } 226 | JsonbItem::Owned(owned_jsonb) => { 227 | let raw_jsonb = owned_jsonb.as_raw(); 228 | append_raw_jsonb_data(buf, jentry_index, raw_jsonb)?; 229 | } 230 | } 231 | Ok(()) 232 | } 233 | 234 | fn append_raw_jsonb_data( 235 | buf: &mut Vec, 236 | jentry_index: &mut usize, 237 | raw_jsonb: RawJsonb, 238 | ) -> Result<()> { 239 | let (header_type, _) = raw_jsonb.read_header(0)?; 240 | if header_type == SCALAR_CONTAINER_TAG { 241 | let scalar_jentry = raw_jsonb.read_jentry(4)?; 242 | let range = Range { 243 | start: 8, 244 | end: raw_jsonb.len(), 245 | }; 246 | let data = raw_jsonb.slice(range)?; 247 | replace_jentry(buf, scalar_jentry, jentry_index); 248 | buf.extend_from_slice(data); 249 | } else { 250 | let jentry = JEntry::make_container_jentry(raw_jsonb.len()); 251 | replace_jentry(buf, jentry, jentry_index); 252 | buf.extend_from_slice(raw_jsonb.data); 253 | } 254 | Ok(()) 255 | } 256 | 257 | fn reserve_jentries(buf: &mut Vec, len: usize) -> usize { 258 | let old_len = buf.len(); 259 | let new_len = old_len + len; 260 | buf.resize(new_len, 0); 261 | old_len 262 | } 263 | 264 | fn replace_jentry(buf: &mut [u8], jentry: JEntry, jentry_index: &mut usize) { 265 | let jentry_bytes = jentry.encoded().to_be_bytes(); 266 | for (i, b) in jentry_bytes.iter().enumerate() { 267 | buf[*jentry_index + i] = *b; 268 | } 269 | *jentry_index += 4; 270 | } 271 | 272 | #[cfg(test)] 273 | mod tests { 274 | use std::collections::BTreeMap; 275 | 276 | use super::ArrayBuilder; 277 | use super::ObjectBuilder; 278 | use crate::to_owned_jsonb; 279 | use crate::Value; 280 | 281 | #[test] 282 | fn test_build_with_inner_array() { 283 | let from_builder = { 284 | let mut builder = ObjectBuilder::new(); 285 | let mut inner_array_builder = ArrayBuilder::with_capacity(1); 286 | 287 | let val = to_owned_jsonb(&false).unwrap(); 288 | inner_array_builder.push_owned_jsonb(val); 289 | let array = inner_array_builder.build().unwrap(); 290 | 291 | builder.push_owned_jsonb("arr", array).unwrap(); 292 | let object = builder.build().unwrap(); 293 | object.to_vec() 294 | }; 295 | let mut from_encoder = Vec::new(); 296 | { 297 | let value = init_object(vec![("arr", Value::Array(vec![Value::Bool(false)]))]); 298 | value.write_to_vec(&mut from_encoder); 299 | } 300 | assert_eq!(from_builder, from_encoder); 301 | } 302 | 303 | #[test] 304 | fn test_build_with_inner_object() { 305 | let from_builder = { 306 | let mut builder = ObjectBuilder::new(); 307 | let mut inner_obj_builder = ObjectBuilder::new(); 308 | 309 | let val = to_owned_jsonb(&true).unwrap(); 310 | inner_obj_builder.push_owned_jsonb("field", val).unwrap(); 311 | let inner_obj = inner_obj_builder.build().unwrap(); 312 | 313 | builder.push_owned_jsonb("obj", inner_obj).unwrap(); 314 | let object = builder.build().unwrap(); 315 | object.to_vec() 316 | }; 317 | let mut from_encoder = Vec::new(); 318 | { 319 | let value = init_object(vec![( 320 | "obj", 321 | init_object(vec![("field", Value::Bool(true))]), 322 | )]); 323 | value.write_to_vec(&mut from_encoder); 324 | } 325 | assert_eq!(from_builder, from_encoder); 326 | } 327 | 328 | fn init_object<'a>(entries: Vec<(&str, Value<'a>)>) -> Value<'a> { 329 | let mut map = BTreeMap::new(); 330 | for (key, val) in entries { 331 | map.insert(key.to_string(), val); 332 | } 333 | Value::Object(map) 334 | } 335 | } 336 | -------------------------------------------------------------------------------- /src/core/item.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use std::borrow::Cow; 16 | use std::cmp::Ordering; 17 | 18 | use crate::error::*; 19 | use crate::ExtensionValue; 20 | use crate::Number; 21 | use crate::OwnedJsonb; 22 | use crate::RawJsonb; 23 | 24 | /// The value type of JSONB data. 25 | #[derive(Debug, Clone, Copy)] 26 | pub enum JsonbItemType { 27 | /// The Null JSONB type. 28 | Null, 29 | /// The Boolean JSONB type. 30 | Boolean, 31 | /// The Number JSONB type. 32 | Number, 33 | /// The String JSONB type. 34 | String, 35 | /// The Extension JSONB type. 36 | Extension, 37 | /// The Array JSONB type with the length of items. 38 | Array(usize), 39 | /// The Object JSONB type with the length of key and value pairs. 40 | Object(usize), 41 | } 42 | 43 | impl Eq for JsonbItemType {} 44 | 45 | impl PartialEq for JsonbItemType { 46 | fn eq(&self, other: &Self) -> bool { 47 | self.partial_cmp(other) == Some(Ordering::Equal) 48 | } 49 | } 50 | 51 | impl PartialOrd for JsonbItemType { 52 | fn partial_cmp(&self, other: &Self) -> Option { 53 | match (self, other) { 54 | (JsonbItemType::Null, JsonbItemType::Null) => Some(Ordering::Equal), 55 | (JsonbItemType::Null, _) => Some(Ordering::Greater), 56 | (_, JsonbItemType::Null) => Some(Ordering::Less), 57 | 58 | (JsonbItemType::Array(_), JsonbItemType::Array(_)) => None, 59 | (JsonbItemType::Array(_), _) => Some(Ordering::Greater), 60 | (_, JsonbItemType::Array(_)) => Some(Ordering::Less), 61 | 62 | (JsonbItemType::Object(_), JsonbItemType::Object(_)) => None, 63 | (JsonbItemType::Object(_), _) => Some(Ordering::Greater), 64 | (_, JsonbItemType::Object(_)) => Some(Ordering::Less), 65 | 66 | (JsonbItemType::String, JsonbItemType::String) => None, 67 | (JsonbItemType::String, _) => Some(Ordering::Greater), 68 | (_, JsonbItemType::String) => Some(Ordering::Less), 69 | 70 | (JsonbItemType::Number, JsonbItemType::Number) => None, 71 | (JsonbItemType::Number, _) => Some(Ordering::Greater), 72 | (_, JsonbItemType::Number) => Some(Ordering::Less), 73 | 74 | (JsonbItemType::Boolean, JsonbItemType::Boolean) => None, 75 | (JsonbItemType::Boolean, _) => Some(Ordering::Greater), 76 | (_, JsonbItemType::Boolean) => Some(Ordering::Less), 77 | 78 | (JsonbItemType::Extension, JsonbItemType::Extension) => None, 79 | } 80 | } 81 | } 82 | 83 | #[derive(Debug, Clone)] 84 | pub(crate) enum NumberItem<'a> { 85 | /// Represents a raw JSONB number, stored as a byte slice. 86 | Raw(&'a [u8]), 87 | /// Represents a JSONB number. 88 | #[allow(dead_code)] 89 | Number(Number), 90 | } 91 | 92 | impl NumberItem<'_> { 93 | pub(crate) fn as_number(&self) -> Result { 94 | match self { 95 | NumberItem::Raw(data) => { 96 | let num = Number::decode(data)?; 97 | Ok(num) 98 | } 99 | NumberItem::Number(num) => Ok(num.clone()), 100 | } 101 | } 102 | } 103 | 104 | #[derive(Debug, Clone)] 105 | pub(crate) enum ExtensionItem<'a> { 106 | /// Represents a raw JSONB extension value, stored as a byte slice. 107 | Raw(&'a [u8]), 108 | /// Represents a raw JSONB extension value. 109 | #[allow(dead_code)] 110 | Extension(ExtensionValue<'a>), 111 | } 112 | 113 | impl<'a> ExtensionItem<'a> { 114 | pub(crate) fn as_extension_value(&self) -> Result> { 115 | match self { 116 | ExtensionItem::Raw(data) => { 117 | let val = ExtensionValue::decode(data)?; 118 | Ok(val) 119 | } 120 | ExtensionItem::Extension(val) => Ok(val.clone()), 121 | } 122 | } 123 | } 124 | 125 | /// `JsonbItem` is an internal enum used primarily within `ArrayIterator` and 126 | /// `ObjectIterator` to represent temporary values during iteration. It is also 127 | /// utilized by `ArrayBuilder` and `ObjectBuilder` to store intermediate variables 128 | /// during the construction of JSONB objects and arrays. 129 | /// 130 | /// This enum encapsulates different types of JSONB values, allowing iterators and 131 | /// builders to handle various data types uniformly. It supports null values, 132 | /// booleans, numbers (represented as byte slices), strings (represented as byte slices), 133 | /// raw JSONB data (`RawJsonb`), and owned JSONB data (`OwnedJsonb`). 134 | #[derive(Debug, Clone)] 135 | pub(crate) enum JsonbItem<'a> { 136 | /// Represents a JSONB null value. 137 | Null, 138 | /// Represents a JSONB boolean value. 139 | Boolean(bool), 140 | /// Represents a JSONB number, stored as a byte slice. 141 | Number(NumberItem<'a>), 142 | /// Represents a JSONB string. 143 | String(Cow<'a, str>), 144 | /// Represents a JSONB extension values, stored as a byte slice. 145 | Extension(ExtensionItem<'a>), 146 | /// Represents raw JSONB data, using a borrowed slice. 147 | Raw(RawJsonb<'a>), 148 | /// Represents owned JSONB data. 149 | Owned(OwnedJsonb), 150 | } 151 | 152 | impl<'a> JsonbItem<'a> { 153 | pub(crate) fn jsonb_item_type(&self) -> Result { 154 | match self { 155 | JsonbItem::Null => Ok(JsonbItemType::Null), 156 | JsonbItem::Boolean(_) => Ok(JsonbItemType::Boolean), 157 | JsonbItem::Number(_) => Ok(JsonbItemType::Number), 158 | JsonbItem::String(_) => Ok(JsonbItemType::String), 159 | JsonbItem::Extension(_) => Ok(JsonbItemType::Extension), 160 | JsonbItem::Raw(raw) => raw.jsonb_item_type(), 161 | JsonbItem::Owned(owned) => owned.as_raw().jsonb_item_type(), 162 | } 163 | } 164 | 165 | pub(crate) fn as_raw_jsonb(&self) -> Option> { 166 | match self { 167 | JsonbItem::Raw(raw_jsonb) => Some(*raw_jsonb), 168 | _ => None, 169 | } 170 | } 171 | 172 | pub(crate) fn as_null(&self) -> Option<()> { 173 | match self { 174 | JsonbItem::Null => Some(()), 175 | _ => None, 176 | } 177 | } 178 | 179 | pub(crate) fn as_str(&self) -> Option> { 180 | match self { 181 | JsonbItem::String(s) => Some(s.clone()), 182 | _ => None, 183 | } 184 | } 185 | } 186 | 187 | impl Eq for JsonbItem<'_> {} 188 | 189 | impl PartialEq for JsonbItem<'_> { 190 | fn eq(&self, other: &Self) -> bool { 191 | self.partial_cmp(other) == Some(Ordering::Equal) 192 | } 193 | } 194 | 195 | #[allow(clippy::non_canonical_partial_ord_impl)] 196 | impl PartialOrd for JsonbItem<'_> { 197 | fn partial_cmp(&self, other: &Self) -> Option { 198 | let self_type = self.jsonb_item_type().ok()?; 199 | let other_type = other.jsonb_item_type().ok()?; 200 | 201 | // First use JSONB type to determine the order, 202 | // different types must have different orders. 203 | if let Some(ord) = self_type.partial_cmp(&other_type) { 204 | return Some(ord); 205 | } 206 | 207 | let self_item = if let JsonbItem::Owned(owned) = self { 208 | &JsonbItem::Raw(owned.as_raw()) 209 | } else { 210 | self 211 | }; 212 | let other_item = if let JsonbItem::Owned(owned) = other { 213 | &JsonbItem::Raw(owned.as_raw()) 214 | } else { 215 | other 216 | }; 217 | 218 | match (self_item, other_item) { 219 | (JsonbItem::Raw(self_raw), JsonbItem::Raw(other_raw)) => { 220 | self_raw.partial_cmp(other_raw) 221 | } 222 | // compare null, raw jsonb must not null 223 | (JsonbItem::Raw(_), JsonbItem::Null) => Some(Ordering::Less), 224 | (JsonbItem::Null, JsonbItem::Raw(_)) => Some(Ordering::Greater), 225 | // compare extension 226 | (JsonbItem::Extension(self_ext), JsonbItem::Extension(other_ext)) => { 227 | let self_val = self_ext.as_extension_value().ok()?; 228 | let other_val = other_ext.as_extension_value().ok()?; 229 | self_val.partial_cmp(&other_val) 230 | } 231 | (JsonbItem::Raw(self_raw), JsonbItem::Extension(other_ext)) => { 232 | let self_val = self_raw.as_extension_value(); 233 | let other_val = other_ext.as_extension_value().ok()?; 234 | if let Ok(Some(self_val)) = self_val { 235 | self_val.partial_cmp(&other_val) 236 | } else { 237 | None 238 | } 239 | } 240 | (JsonbItem::Extension(self_ext), JsonbItem::Raw(other_raw)) => { 241 | let self_val = self_ext.as_extension_value().ok()?; 242 | let other_val = other_raw.as_extension_value(); 243 | if let Ok(Some(other_val)) = other_val { 244 | self_val.partial_cmp(&other_val) 245 | } else { 246 | None 247 | } 248 | } 249 | // compare boolean 250 | (JsonbItem::Boolean(self_val), JsonbItem::Boolean(other_val)) => { 251 | self_val.partial_cmp(other_val) 252 | } 253 | (JsonbItem::Raw(self_raw), JsonbItem::Boolean(other_val)) => { 254 | let self_val = self_raw.as_bool(); 255 | if let Ok(Some(self_val)) = self_val { 256 | self_val.partial_cmp(other_val) 257 | } else { 258 | None 259 | } 260 | } 261 | (JsonbItem::Boolean(self_val), JsonbItem::Raw(other_raw)) => { 262 | let other_val = other_raw.as_bool(); 263 | if let Ok(Some(other_val)) = other_val { 264 | self_val.partial_cmp(&other_val) 265 | } else { 266 | None 267 | } 268 | } 269 | // compare number 270 | (JsonbItem::Number(self_num), JsonbItem::Number(other_num)) => { 271 | let self_val = self_num.as_number().ok()?; 272 | let other_val = other_num.as_number().ok()?; 273 | self_val.partial_cmp(&other_val) 274 | } 275 | (JsonbItem::Raw(self_raw), JsonbItem::Number(other_num)) => { 276 | let self_val = self_raw.as_number(); 277 | let other_val = other_num.as_number().ok()?; 278 | if let Ok(Some(self_val)) = self_val { 279 | self_val.partial_cmp(&other_val) 280 | } else { 281 | None 282 | } 283 | } 284 | (JsonbItem::Number(self_num), JsonbItem::Raw(other_raw)) => { 285 | let self_val = self_num.as_number().ok()?; 286 | let other_val = other_raw.as_number(); 287 | if let Ok(Some(other_val)) = other_val { 288 | self_val.partial_cmp(&other_val) 289 | } else { 290 | None 291 | } 292 | } 293 | // compare string 294 | (JsonbItem::String(self_str), JsonbItem::String(other_str)) => { 295 | self_str.partial_cmp(other_str) 296 | } 297 | (JsonbItem::Raw(self_raw), JsonbItem::String(other_str)) => { 298 | let self_str = self_raw.as_str(); 299 | if let Ok(Some(self_str)) = self_str { 300 | self_str.partial_cmp(other_str) 301 | } else { 302 | None 303 | } 304 | } 305 | (JsonbItem::String(self_str), JsonbItem::Raw(other_raw)) => { 306 | let other_str = other_raw.as_str(); 307 | if let Ok(Some(other_str)) = other_str { 308 | self_str.partial_cmp(&other_str) 309 | } else { 310 | None 311 | } 312 | } 313 | (_, _) => None, 314 | } 315 | } 316 | } 317 | 318 | impl Ord for JsonbItem<'_> { 319 | fn cmp(&self, other: &Self) -> Ordering { 320 | match self.partial_cmp(other) { 321 | Some(ordering) => ordering, 322 | None => Ordering::Equal, 323 | } 324 | } 325 | } 326 | -------------------------------------------------------------------------------- /src/owned.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use std::cmp::Ordering; 16 | use std::fmt::Display; 17 | use std::str::FromStr; 18 | 19 | use crate::core::ArrayBuilder; 20 | use crate::core::ObjectBuilder; 21 | use crate::core::Serializer; 22 | use crate::error::Error; 23 | use crate::error::Result; 24 | use crate::parse_value; 25 | use crate::RawJsonb; 26 | 27 | /// Represents a JSONB data that owns its underlying data. 28 | /// 29 | /// This struct provides ownership over the binary JSONB representation. 30 | /// `OwnedJsonb` is primarily used to create JSONB data from other data types (such as JSON String). 31 | /// However, for most operations, it's necessary to convert an `OwnedJsonb` to a `RawJsonb` using the `as_raw()` method 32 | /// to avoid unnecessary copying and to take advantage of the performance benefits of the read-only access of the `RawJsonb`. 33 | #[derive(Debug, Clone)] 34 | pub struct OwnedJsonb { 35 | /// The underlying `Vec` containing the binary JSONB data. 36 | pub(crate) data: Vec, 37 | } 38 | 39 | impl OwnedJsonb { 40 | /// Creates a new OwnedJsonb from a Vec. 41 | /// 42 | /// # Arguments 43 | /// 44 | /// * `data` - The `Vec` containing the JSONB data. 45 | /// 46 | /// # Returns 47 | /// 48 | /// A new `OwnedJsonb` instance. 49 | pub fn new(data: Vec) -> OwnedJsonb { 50 | Self { data } 51 | } 52 | 53 | /// Creates a `RawJsonb` view of the owned data. 54 | /// This is useful for passing the data to functions that expect a `RawJsonb`. 55 | /// This does *not* transfer ownership. 56 | /// 57 | /// # Returns 58 | /// 59 | /// A `RawJsonb` instance referencing the owned data. 60 | pub fn as_raw(&self) -> RawJsonb<'_> { 61 | RawJsonb::new(self.data.as_slice()) 62 | } 63 | 64 | /// Consumes the OwnedJsonb and returns the underlying Vec. 65 | /// 66 | /// # Returns 67 | /// 68 | /// The underlying `Vec` containing the JSONB data. 69 | pub fn to_vec(self) -> Vec { 70 | self.data 71 | } 72 | 73 | /// Checks if the JSONB data is empty. 74 | /// 75 | /// # Returns 76 | /// 77 | /// `true` if the data is empty, `false` otherwise. 78 | pub fn is_empty(&self) -> bool { 79 | self.len() == 0 80 | } 81 | 82 | /// Returns the length of the JSONB data in bytes. 83 | /// 84 | /// # Returns 85 | /// 86 | /// The length of the data in bytes. 87 | pub fn len(&self) -> usize { 88 | self.data.len() 89 | } 90 | 91 | /// Builds a JSONB array from a collection of RawJsonb values. 92 | /// 93 | /// This function constructs a new JSONB array from an iterator of `RawJsonb` values. 94 | /// The resulting `OwnedJsonb` represents the binary encoding of the array. 95 | /// 96 | /// # Arguments 97 | /// 98 | /// * `items` - An iterator of `RawJsonb` values representing the elements of the array. 99 | /// 100 | /// # Returns 101 | /// 102 | /// * `Ok(OwnedJsonb)` - The newly created JSONB array. 103 | /// * `Err(Error)` - If any of the input `RawJsonb` values are invalid or if an error occurs during array construction. 104 | /// 105 | /// # Examples 106 | /// 107 | /// ```rust 108 | /// use jsonb::OwnedJsonb; 109 | /// use jsonb::RawJsonb; 110 | /// 111 | /// // Create some RawJsonb values 112 | /// let owned_num = "1".parse::().unwrap(); 113 | /// let owned_str = r#""hello""#.parse::().unwrap(); 114 | /// let owned_arr = "[1,2,3]".parse::().unwrap(); 115 | /// 116 | /// // Build the array 117 | /// let raw_jsonbs = vec![owned_num.as_raw(), owned_str.as_raw(), owned_arr.as_raw()]; 118 | /// let array_result = OwnedJsonb::build_array(raw_jsonbs.into_iter()); 119 | /// assert!(array_result.is_ok()); 120 | /// let array = array_result.unwrap(); 121 | /// 122 | /// // Convert to string for easy verification 123 | /// assert_eq!(array.to_string(), "[1,\"hello\",[1,2,3]]"); 124 | /// 125 | /// // Example with an empty iterator 126 | /// let empty_array = 127 | /// OwnedJsonb::build_array(<[RawJsonb<'_>; 0] as IntoIterator>::into_iter([])).unwrap(); 128 | /// assert_eq!(empty_array.to_string(), "[]"); 129 | /// 130 | /// // Example with invalid input (this will cause an error) 131 | /// let invalid_data = OwnedJsonb::new(vec![1, 2, 3, 4]); 132 | /// let result = OwnedJsonb::build_array([invalid_data.as_raw()].into_iter()); 133 | /// assert!(result.is_err()); 134 | /// ``` 135 | pub fn build_array<'a>( 136 | raw_jsonbs: impl IntoIterator>, 137 | ) -> Result { 138 | let mut builder = ArrayBuilder::new(); 139 | for raw_jsonb in raw_jsonbs.into_iter() { 140 | builder.push_raw_jsonb(raw_jsonb); 141 | } 142 | builder.build() 143 | } 144 | 145 | /// Builds a JSONB object from a collection of key-value pairs. 146 | /// 147 | /// This function constructs a new JSONB object from an iterator of key-value pairs. The keys are strings, and the values are `RawJsonb` values. 148 | /// The resulting `OwnedJsonb` represents the binary encoding of the object. 149 | /// 150 | /// # Arguments 151 | /// 152 | /// * `items` - An iterator of `(K, &'a RawJsonb<'a>)` tuples, where `K` is a type that can be converted into a string slice (`AsRef`) representing the key, 153 | /// and the second element is a `RawJsonb` representing the value. 154 | /// 155 | /// # Returns 156 | /// 157 | /// * `Ok(OwnedJsonb)` - The newly created JSONB object. 158 | /// * `Err(Error)` - If any of the input `RawJsonb` values are invalid, if contain duplicate keys, or if an error occurs during object construction. 159 | /// 160 | /// # Examples 161 | /// 162 | /// ```rust 163 | /// use jsonb::OwnedJsonb; 164 | /// use jsonb::RawJsonb; 165 | /// 166 | /// // Create some RawJsonb values 167 | /// let owned_num = "1".parse::().unwrap(); 168 | /// let owned_str = r#""hello""#.parse::().unwrap(); 169 | /// let owned_arr = "[1,2,3]".parse::().unwrap(); 170 | /// 171 | /// // Build the object 172 | /// let raw_jsonbs = vec![ 173 | /// ("a", owned_num.as_raw()), 174 | /// ("b", owned_str.as_raw()), 175 | /// ("c", owned_arr.as_raw()), 176 | /// ]; 177 | /// let object_result = OwnedJsonb::build_object(raw_jsonbs.into_iter()); 178 | /// assert!(object_result.is_ok()); 179 | /// let object = object_result.unwrap(); 180 | /// 181 | /// // Convert to string for easy verification 182 | /// assert_eq!(object.to_string(), r#"{"a":1,"b":"hello","c":[1,2,3]}"#); 183 | /// 184 | /// // Example with an empty iterator 185 | /// let empty_object = 186 | /// OwnedJsonb::build_object(<[(&str, RawJsonb<'_>); 0] as IntoIterator>::into_iter([])) 187 | /// .unwrap(); 188 | /// assert_eq!(empty_object.to_string(), "{}"); 189 | /// 190 | /// // Example with invalid value 191 | /// let invalid_data = OwnedJsonb::new(vec![1, 2, 3, 4]); 192 | /// let result = OwnedJsonb::build_object([("a", invalid_data.as_raw())].into_iter()); 193 | /// assert!(result.is_err()); 194 | /// ``` 195 | pub fn build_object<'a, K: AsRef>( 196 | items: impl IntoIterator)>, 197 | ) -> Result { 198 | let mut kvs = Vec::new(); 199 | for (key, val) in items.into_iter() { 200 | kvs.push((key, val)); 201 | } 202 | let mut builder = ObjectBuilder::new(); 203 | for (key, val) in kvs.iter() { 204 | builder.push_raw_jsonb(key.as_ref(), *val)?; 205 | } 206 | builder.build() 207 | } 208 | } 209 | 210 | /// Creates an `OwnedJsonb` from a borrowed byte slice. The byte slice is copied into a new `Vec`. 211 | impl From<&[u8]> for OwnedJsonb { 212 | fn from(data: &[u8]) -> Self { 213 | Self { 214 | data: data.to_vec(), 215 | } 216 | } 217 | } 218 | 219 | /// Creates an `OwnedJsonb` from a `Vec`. This is a simple ownership transfer. 220 | impl From> for OwnedJsonb { 221 | fn from(data: Vec) -> Self { 222 | Self { data } 223 | } 224 | } 225 | 226 | /// Parses a string into an `OwnedJsonb`. 227 | /// The string is parsed into a JSON value, then encoded into the binary JSONB format. 228 | impl FromStr for OwnedJsonb { 229 | type Err = Error; 230 | 231 | fn from_str(s: &str) -> std::result::Result { 232 | let value = parse_value(s.as_bytes())?; 233 | let mut data = Vec::new(); 234 | value.write_to_vec(&mut data); 235 | Ok(Self { data }) 236 | } 237 | } 238 | 239 | /// Allows accessing the underlying byte slice as a reference. 240 | /// This enables easy integration with functions that expect a `&[u8]`. 241 | impl AsRef<[u8]> for OwnedJsonb { 242 | fn as_ref(&self) -> &[u8] { 243 | self.data.as_ref() 244 | } 245 | } 246 | 247 | /// Implements the Display trait, allowing OwnedJsonb to be formatted as a string using the `{}` format specifier. 248 | impl Display for OwnedJsonb { 249 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { 250 | let raw_jsonb = self.as_raw(); 251 | write!(f, "{}", raw_jsonb.to_string()) 252 | } 253 | } 254 | 255 | impl Eq for OwnedJsonb {} 256 | 257 | impl PartialEq for OwnedJsonb { 258 | fn eq(&self, other: &Self) -> bool { 259 | self.partial_cmp(other) == Some(Ordering::Equal) 260 | } 261 | } 262 | 263 | /// Implements `PartialOrd` for `OwnedJsonb`, allowing comparison of two `OwnedJsonb` values. 264 | /// 265 | /// The comparison logic handles different JSONB types (scalar, array, object) and considers null values. 266 | /// The ordering is defined as follows: 267 | /// 268 | /// 1. Null is considered greater than any other type. 269 | /// 2. Scalars are compared based on their type and value (String > Number > Boolean). 270 | /// 3. Arrays are compared element by element. 271 | /// 4. Objects are compared based on their keys and values. 272 | /// 5. Arrays are greater than objects and scalars. 273 | /// 6. Objects are greater than scalars. 274 | /// 7. If the types are incompatible, None is returned. 275 | #[allow(clippy::non_canonical_partial_ord_impl)] 276 | impl PartialOrd for OwnedJsonb { 277 | fn partial_cmp(&self, other: &Self) -> Option { 278 | let self_raw = self.as_raw(); 279 | let other_raw = other.as_raw(); 280 | self_raw.partial_cmp(&other_raw) 281 | } 282 | } 283 | 284 | /// Implements `Ord` for `OwnedJsonb`, allowing comparison of two `OwnedJsonb` values using the total ordering. 285 | /// This implementation leverages the `PartialOrd` implementation, returning `Ordering::Equal` for incomparable values. 286 | impl Ord for OwnedJsonb { 287 | fn cmp(&self, other: &Self) -> Ordering { 288 | let self_raw = self.as_raw(); 289 | let other_raw = other.as_raw(); 290 | match self_raw.partial_cmp(&other_raw) { 291 | Some(ordering) => ordering, 292 | None => Ordering::Equal, 293 | } 294 | } 295 | } 296 | 297 | /// Serializes a Rust data structure into an `OwnedJsonb` using Serde. 298 | /// 299 | /// This function takes a Rust type `T` that implements the `Serialize` trait and 300 | /// serializes it into an `OwnedJsonb`, which is a struct containing a `Vec` 301 | /// representing the JSONB data. It uses a custom `Serializer` to handle the 302 | /// serialization process. 303 | /// 304 | /// # Arguments 305 | /// 306 | /// * `value`: A reference to the value of type `T` to be serialized. 307 | /// 308 | /// # Type Parameters 309 | /// 310 | /// * `T`: The Rust type to serialize. This type must implement the `serde::ser::Serialize` trait. 311 | /// 312 | /// # Returns 313 | /// 314 | /// * `Ok(OwnedJsonb)`: If the serialization is successful, returns an `OwnedJsonb` 315 | /// containing the serialized JSONB data. 316 | /// * `Err(e)`: If any Serde serialization error occurs. 317 | /// 318 | /// # Examples 319 | /// 320 | /// ``` 321 | /// use jsonb::to_owned_jsonb; 322 | /// use jsonb::OwnedJsonb; 323 | /// use serde::Serialize; 324 | /// 325 | /// #[derive(Serialize, Debug)] 326 | /// struct Person { 327 | /// name: String, 328 | /// age: u32, 329 | /// } 330 | /// 331 | /// let person = Person { 332 | /// name: "Bob".to_string(), 333 | /// age: 42, 334 | /// }; 335 | /// 336 | /// let owned_jsonb: OwnedJsonb = to_owned_jsonb(&person).unwrap(); 337 | /// assert_eq!(format!("{}", owned_jsonb), "{\"age\":42,\"name\":\"Bob\"}"); 338 | /// println!("JSONB data: {}", owned_jsonb); 339 | /// ``` 340 | pub fn to_owned_jsonb(value: &T) -> Result 341 | where 342 | T: serde::ser::Serialize, 343 | { 344 | let mut serializer = Serializer::default(); 345 | value.serialize(&mut serializer)?; 346 | Ok(serializer.to_owned_jsonb()) 347 | } 348 | -------------------------------------------------------------------------------- /src/raw.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use std::cmp::Ordering; 16 | 17 | use serde::Serialize; 18 | 19 | use crate::core::ArrayIterator; 20 | use crate::core::Deserializer; 21 | use crate::core::JsonbItemType; 22 | use crate::core::ObjectIterator; 23 | use crate::error::*; 24 | use crate::OwnedJsonb; 25 | 26 | /// Represents JSONB data wrapped around a raw, immutable slice of bytes. 27 | /// 28 | /// It does not own the underlying data, allowing various operations to be performed on the JSONB data *without copying*. 29 | /// This is critical for performance when dealing with large JSONB values. 30 | /// `RawJsonb` provides various methods to inspect and manipulate the JSONB data efficiently. 31 | #[derive(Debug, Clone, Copy)] 32 | pub struct RawJsonb<'a> { 33 | /// The underlying byte slice representing the JSONB data. 34 | pub(crate) data: &'a [u8], 35 | } 36 | 37 | impl<'a> RawJsonb<'a> { 38 | /// Creates a new RawJsonb from a byte slice. 39 | /// 40 | /// # Arguments 41 | /// 42 | /// * `data` - The byte slice containing the JSONB data. 43 | /// 44 | /// # Returns 45 | /// 46 | /// A new `RawJsonb` instance. 47 | pub fn new(data: &'a [u8]) -> Self { 48 | Self { data } 49 | } 50 | 51 | /// Checks if the JSONB data is empty. 52 | /// 53 | /// # Returns 54 | /// 55 | /// `true` if the data is empty, `false` otherwise. 56 | pub fn is_empty(&self) -> bool { 57 | self.len() == 0 58 | } 59 | 60 | /// Returns the length of the JSONB data in bytes. 61 | /// 62 | /// # Returns 63 | /// 64 | /// The length of the data in bytes. 65 | pub fn len(&self) -> usize { 66 | self.data.as_ref().len() 67 | } 68 | 69 | /// Creates an `OwnedJsonb` from the `RawJsonb` by copying the underlying data. 70 | /// 71 | /// This method converts a `RawJsonb`, which holds a reference to JSONB data, 72 | /// into an `OwnedJsonb`, which owns its own copy of the JSONB data. This is 73 | /// achieved by cloning the byte slice held by the `RawJsonb` into a new `Vec`. 74 | /// 75 | /// # Returns 76 | /// 77 | /// An `OwnedJsonb` instance containing a copy of the JSONB data from the `RawJsonb`. 78 | pub fn to_owned(&self) -> OwnedJsonb { 79 | OwnedJsonb::new(self.data.to_vec()) 80 | } 81 | 82 | /// Converts the JSONB value to a JSON string. 83 | /// 84 | /// This function serializes the JSONB value into a human-readable JSON string representation. 85 | /// If the JSONB data is invalid, treate it as a text JSON string and return directly. 86 | /// If the JSONB data is empty, return a JSON null for compatibility. 87 | /// 88 | /// # Returns 89 | /// 90 | /// * `String` - The JSON string representation of the value. 91 | /// 92 | /// # Examples 93 | /// 94 | /// ```rust 95 | /// use jsonb::OwnedJsonb; 96 | /// 97 | /// let arr_jsonb = "[1, 2, 3]".parse::().unwrap(); 98 | /// let raw_jsonb = arr_jsonb.as_raw(); 99 | /// assert_eq!(raw_jsonb.to_string(), "[1,2,3]"); 100 | /// 101 | /// let obj_jsonb = r#"{"a": 1, "b": "hello"}"#.parse::().unwrap(); 102 | /// let raw_jsonb = obj_jsonb.as_raw(); 103 | /// assert_eq!(raw_jsonb.to_string(), r#"{"a":1,"b":"hello"}"#); 104 | /// 105 | /// let num_jsonb = "123.45".parse::().unwrap(); 106 | /// let raw_jsonb = num_jsonb.as_raw(); 107 | /// assert_eq!(raw_jsonb.to_string(), "123.45"); 108 | /// 109 | /// let string_jsonb = r#""hello, world!""#.parse::().unwrap(); 110 | /// let raw_jsonb = string_jsonb.as_raw(); 111 | /// assert_eq!(raw_jsonb.to_string(), r#""hello, world!""#); 112 | /// 113 | /// let true_jsonb = "true".parse::().unwrap(); 114 | /// let raw_jsonb = true_jsonb.as_raw(); 115 | /// assert_eq!(raw_jsonb.to_string(), "true"); 116 | /// 117 | /// // Example with invalid JSONB data (fallback to treat as text JSON string) 118 | /// let invalid_jsonb = OwnedJsonb::new(vec![1, 2, 3, 4]); // Invalid binary JSONB 119 | /// let invalid_raw_jsonb = invalid_jsonb.as_raw(); 120 | /// 121 | /// // It will treat as text JSON string. 122 | /// assert_eq!(invalid_raw_jsonb.to_string(), "\u{1}\u{2}\u{3}\u{4}"); 123 | /// ``` 124 | #[allow(clippy::inherent_to_string)] 125 | pub fn to_string(&self) -> String { 126 | let mut buf = Vec::with_capacity(self.len()); 127 | let formatter = serde_json::ser::CompactFormatter {}; 128 | let mut ser = serde_json::Serializer::with_formatter(&mut buf, formatter); 129 | match self.serialize(&mut ser) { 130 | Ok(_) => String::from_utf8(buf).unwrap(), 131 | Err(_) => { 132 | if self.data.is_empty() { 133 | "null".to_string() 134 | } else { 135 | String::from_utf8_lossy(self.data).to_string() 136 | } 137 | } 138 | } 139 | } 140 | 141 | /// Converts the JSONB value to a pretty-printed JSON string. 142 | /// 143 | /// This function serializes the JSONB value into a human-readable JSON string representation with indentation for formatting. 144 | /// If the JSONB data is invalid, return a "null" string. 145 | /// 146 | /// # Returns 147 | /// 148 | /// * `String` - The pretty-printed JSON string representation of the value. 149 | /// 150 | /// # Examples 151 | /// 152 | /// ```rust 153 | /// use jsonb::OwnedJsonb; 154 | /// 155 | /// let arr_jsonb = "[1, 2, 3]".parse::().unwrap(); 156 | /// let raw_jsonb = arr_jsonb.as_raw(); 157 | /// assert_eq!(raw_jsonb.to_pretty_string(), "[\n 1,\n 2,\n 3\n]"); 158 | /// 159 | /// let obj_jsonb = r#"{"a": 1, "b": "hello"}"#.parse::().unwrap(); 160 | /// let raw_jsonb = obj_jsonb.as_raw(); 161 | /// assert_eq!( 162 | /// raw_jsonb.to_pretty_string(), 163 | /// "{\n \"a\": 1,\n \"b\": \"hello\"\n}" 164 | /// ); 165 | /// 166 | /// let num_jsonb = "123.45".parse::().unwrap(); 167 | /// let raw_jsonb = num_jsonb.as_raw(); 168 | /// assert_eq!(raw_jsonb.to_pretty_string(), "123.45"); 169 | /// 170 | /// let string_jsonb = r#""hello, world!""#.parse::().unwrap(); 171 | /// let raw_jsonb = string_jsonb.as_raw(); 172 | /// assert_eq!(raw_jsonb.to_pretty_string(), r#""hello, world!""#); 173 | /// 174 | /// // Example with invalid JSONB data (fallback to text JSON parsing) 175 | /// let invalid_jsonb = OwnedJsonb::new(vec![1, 2, 3, 4]); // Invalid binary JSONB 176 | /// let invalid_raw_jsonb = invalid_jsonb.as_raw(); 177 | /// assert_eq!(invalid_raw_jsonb.to_pretty_string(), "null"); // Fails and returns "null" 178 | /// ``` 179 | pub fn to_pretty_string(&self) -> String { 180 | let mut buf = Vec::with_capacity(self.len()); 181 | let formatter = serde_json::ser::PrettyFormatter::new(); 182 | let mut ser = serde_json::Serializer::with_formatter(&mut buf, formatter); 183 | match self.serialize(&mut ser) { 184 | Ok(_) => String::from_utf8(buf).unwrap(), 185 | Err(_) => "null".to_string(), 186 | } 187 | } 188 | } 189 | 190 | /// Converts a borrowed byte slice into a RawJsonb. 191 | /// This provides a convenient way to create a RawJsonb from existing data without copying. 192 | impl<'a> From<&'a [u8]> for RawJsonb<'a> { 193 | fn from(data: &'a [u8]) -> Self { 194 | Self { data } 195 | } 196 | } 197 | 198 | /// Allows accessing the underlying byte slice as a reference. 199 | /// This enables easy integration with functions that expect a &[u8]. 200 | impl AsRef<[u8]> for RawJsonb<'_> { 201 | fn as_ref(&self) -> &[u8] { 202 | self.data 203 | } 204 | } 205 | 206 | impl Eq for RawJsonb<'_> {} 207 | 208 | impl PartialEq for RawJsonb<'_> { 209 | fn eq(&self, other: &Self) -> bool { 210 | self.partial_cmp(other) == Some(Ordering::Equal) 211 | } 212 | } 213 | 214 | /// Implements `PartialOrd` for `RawJsonb`, allowing comparison of two `RawJsonb` values. 215 | /// 216 | /// The comparison logic handles different JSONB types (scalar, array, object) and considers null values. 217 | /// The ordering is defined as follows: 218 | /// 219 | /// 1. Null is considered greater than any other type. 220 | /// 2. Scalars are compared based on their type and value (String > Number > Boolean > ExtensionValue). 221 | /// 3. Arrays are compared element by element. 222 | /// 4. Objects are compared based on their keys and values. 223 | /// 5. Arrays are greater than objects and scalars. 224 | /// 6. Objects are greater than scalars. 225 | /// 7. If the types are incompatible, None is returned. 226 | #[allow(clippy::non_canonical_partial_ord_impl)] 227 | impl PartialOrd for RawJsonb<'_> { 228 | fn partial_cmp(&self, other: &Self) -> Option { 229 | let self_type = self.jsonb_item_type().ok()?; 230 | let other_type = other.jsonb_item_type().ok()?; 231 | 232 | // First use JSONB type to determine the order, 233 | // different types must have different orders. 234 | if let Some(ord) = self_type.partial_cmp(&other_type) { 235 | return Some(ord); 236 | } 237 | 238 | match (self_type, other_type) { 239 | (JsonbItemType::Array(self_len), JsonbItemType::Array(other_len)) => { 240 | let self_array_iter = ArrayIterator::new(*self).ok()?.unwrap(); 241 | let mut other_array_iter = ArrayIterator::new(*other).ok()?.unwrap(); 242 | for (self_res, other_res) in &mut self_array_iter.zip(&mut other_array_iter) { 243 | let self_item = self_res.ok()?; 244 | let other_item = other_res.ok()?; 245 | 246 | let ord = self_item.partial_cmp(&other_item)?; 247 | if ord != Ordering::Equal { 248 | return Some(ord); 249 | } 250 | } 251 | Some(self_len.cmp(&other_len)) 252 | } 253 | (JsonbItemType::Object(self_len), JsonbItemType::Object(other_len)) => { 254 | let self_object_iter = ObjectIterator::new(*self).ok()?.unwrap(); 255 | let mut other_object_iter = ObjectIterator::new(*other).ok()?.unwrap(); 256 | for (self_res, other_res) in &mut self_object_iter.zip(&mut other_object_iter) { 257 | let (self_key, self_val) = self_res.ok()?; 258 | let (other_key, other_val) = other_res.ok()?; 259 | 260 | let key_ord = self_key.partial_cmp(other_key)?; 261 | if key_ord != Ordering::Equal { 262 | return Some(key_ord); 263 | } 264 | let val_ord = self_val.partial_cmp(&other_val)?; 265 | if val_ord != Ordering::Equal { 266 | return Some(val_ord); 267 | } 268 | } 269 | Some(self_len.cmp(&other_len)) 270 | } 271 | (JsonbItemType::String, JsonbItemType::String) => { 272 | let self_val = self.as_str(); 273 | let other_val = other.as_str(); 274 | match (self_val, other_val) { 275 | (Ok(Some(self_val)), Ok(Some(other_val))) => self_val.partial_cmp(&other_val), 276 | (_, _) => None, 277 | } 278 | } 279 | (JsonbItemType::Number, JsonbItemType::Number) => { 280 | let self_val = self.as_number(); 281 | let other_val = other.as_number(); 282 | match (self_val, other_val) { 283 | (Ok(Some(self_val)), Ok(Some(other_val))) => self_val.partial_cmp(&other_val), 284 | (_, _) => None, 285 | } 286 | } 287 | (JsonbItemType::Boolean, JsonbItemType::Boolean) => { 288 | let self_val = self.as_bool(); 289 | let other_val = other.as_bool(); 290 | match (self_val, other_val) { 291 | (Ok(Some(self_val)), Ok(Some(other_val))) => self_val.partial_cmp(&other_val), 292 | (_, _) => None, 293 | } 294 | } 295 | (JsonbItemType::Extension, JsonbItemType::Extension) => { 296 | let self_val = self.as_extension_value(); 297 | let other_val = other.as_extension_value(); 298 | match (self_val, other_val) { 299 | (Ok(Some(self_val)), Ok(Some(other_val))) => self_val.partial_cmp(&other_val), 300 | (_, _) => None, 301 | } 302 | } 303 | (_, _) => None, 304 | } 305 | } 306 | } 307 | 308 | /// Implements `Ord` for `RawJsonb`, allowing comparison of two `RawJsonb` values using the total ordering. 309 | /// This implementation leverages the `PartialOrd` implementation, returning `Ordering::Equal` for incomparable values. 310 | impl Ord for RawJsonb<'_> { 311 | fn cmp(&self, other: &Self) -> Ordering { 312 | match self.partial_cmp(other) { 313 | Some(ordering) => ordering, 314 | None => Ordering::Equal, 315 | } 316 | } 317 | } 318 | 319 | /// Deserializes a `RawJsonb` into a Rust data structure using Serde. 320 | /// 321 | /// This function takes a `RawJsonb` (a borrowed slice of JSONB data) and attempts 322 | /// to deserialize it into a Rust type `T` that implements the `Deserialize` trait. 323 | /// It uses a custom `Deserializer` to handle the JSONB data. 324 | /// 325 | /// # Arguments 326 | /// 327 | /// * `raw_jsonb`: A reference to the `RawJsonb` containing the JSONB data to deserialize. 328 | /// 329 | /// # Type Parameters 330 | /// 331 | /// * `T`: The Rust type to deserialize the JSONB data into. This type must implement 332 | /// the `serde::de::Deserialize` trait. 333 | /// 334 | /// # Returns 335 | /// 336 | /// * `Ok(t)`: If the deserialization is successful, returns the deserialized value of type `T`. 337 | /// * `Err(Error::InvalidJsonb)`: If the deserialization fails due to invalid JSONB data 338 | /// (e.g., trailing characters after the expected JSONB structure). 339 | /// * `Err(e)`: If any other Serde deserialization error occurs. 340 | /// 341 | /// # Examples 342 | /// 343 | /// ``` 344 | /// use jsonb::from_raw_jsonb; 345 | /// use jsonb::OwnedJsonb; 346 | /// use jsonb::RawJsonb; 347 | /// use serde::Deserialize; 348 | /// 349 | /// #[derive(Deserialize, Debug, PartialEq, Eq)] 350 | /// struct Person { 351 | /// name: String, 352 | /// age: u32, 353 | /// } 354 | /// 355 | /// let owned_jsonb = r#"{"name": "Alice", "age": 20}"#.parse::().unwrap(); 356 | /// let raw_jsonb = owned_jsonb.as_raw(); 357 | /// 358 | /// let person: Person = from_raw_jsonb(&raw_jsonb).unwrap(); 359 | /// assert_eq!(person, Person { name: "Alice".to_string(), age: 20 }); 360 | /// println!("{:?}", person); // Output: Person { name: "Alice", age: 20 } 361 | /// ``` 362 | pub fn from_raw_jsonb<'de, T>(raw_jsonb: &'de RawJsonb) -> Result 363 | where 364 | T: serde::de::Deserialize<'de>, 365 | { 366 | let mut deserializer = Deserializer::new(raw_jsonb); 367 | let t = T::deserialize(&mut deserializer)?; 368 | if deserializer.end() { 369 | Ok(t) 370 | } else { 371 | // Trailing characters 372 | Err(Error::InvalidJsonb) 373 | } 374 | } 375 | -------------------------------------------------------------------------------- /src/from.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use core::iter::FromIterator; 16 | use std::borrow::Cow; 17 | 18 | #[cfg(feature = "arbitrary_precision")] 19 | use ethnum::i256; 20 | use ordered_float::OrderedFloat; 21 | use serde_json::Map as JsonMap; 22 | use serde_json::Number as JsonNumber; 23 | use serde_json::Value as JsonValue; 24 | 25 | #[cfg(feature = "arbitrary_precision")] 26 | use crate::constants::DECIMAL128_MAX; 27 | #[cfg(feature = "arbitrary_precision")] 28 | use crate::constants::DECIMAL128_MIN; 29 | use crate::value::Object; 30 | use crate::value::Value; 31 | #[cfg(feature = "arbitrary_precision")] 32 | use crate::Decimal128; 33 | #[cfg(feature = "arbitrary_precision")] 34 | use crate::Decimal256; 35 | use crate::Number; 36 | 37 | macro_rules! from_signed_integer { 38 | ($($ty:ident)*) => { 39 | $( 40 | impl<'a> From<$ty> for Value<'a> { 41 | fn from(n: $ty) -> Self { 42 | Value::Number(Number::Int64(n as i64)) 43 | } 44 | } 45 | )* 46 | }; 47 | } 48 | 49 | macro_rules! from_unsigned_integer { 50 | ($($ty:ident)*) => { 51 | $( 52 | impl<'a> From<$ty> for Value<'a> { 53 | fn from(n: $ty) -> Self { 54 | Value::Number(Number::UInt64(n as u64)) 55 | } 56 | } 57 | )* 58 | }; 59 | } 60 | 61 | macro_rules! from_float { 62 | ($($ty:ident)*) => { 63 | $( 64 | impl<'a> From<$ty> for Value<'a> { 65 | fn from(n: $ty) -> Self { 66 | Value::Number(Number::Float64(n as f64)) 67 | } 68 | } 69 | )* 70 | }; 71 | } 72 | 73 | from_signed_integer! { 74 | i8 i16 i32 i64 isize 75 | } 76 | 77 | from_unsigned_integer! { 78 | u8 u16 u32 u64 usize 79 | } 80 | 81 | from_float! { 82 | f32 f64 83 | } 84 | 85 | impl From> for Value<'_> { 86 | fn from(f: OrderedFloat) -> Self { 87 | Value::Number(Number::Float64(f.0 as f64)) 88 | } 89 | } 90 | 91 | impl From> for Value<'_> { 92 | fn from(f: OrderedFloat) -> Self { 93 | Value::Number(Number::Float64(f.0)) 94 | } 95 | } 96 | 97 | impl From for Value<'_> { 98 | fn from(f: bool) -> Self { 99 | Value::Bool(f) 100 | } 101 | } 102 | 103 | impl From for Value<'_> { 104 | fn from(f: String) -> Self { 105 | Value::String(f.into()) 106 | } 107 | } 108 | 109 | impl<'a> From<&'a str> for Value<'a> { 110 | fn from(f: &'a str) -> Self { 111 | Value::String(Cow::from(f)) 112 | } 113 | } 114 | 115 | impl<'a> From> for Value<'a> { 116 | fn from(f: Cow<'a, str>) -> Self { 117 | Value::String(f) 118 | } 119 | } 120 | 121 | impl<'a> From> for Value<'a> { 122 | fn from(o: Object<'a>) -> Self { 123 | Value::Object(o) 124 | } 125 | } 126 | 127 | impl<'a, T: Into>> From> for Value<'a> { 128 | fn from(f: Vec) -> Self { 129 | Value::Array(f.into_iter().map(Into::into).collect()) 130 | } 131 | } 132 | 133 | impl<'a, T: Clone + Into>> From<&'a [T]> for Value<'a> { 134 | fn from(f: &'a [T]) -> Self { 135 | Value::Array(f.iter().cloned().map(Into::into).collect()) 136 | } 137 | } 138 | 139 | impl<'a, T: Into>> FromIterator for Value<'a> { 140 | fn from_iter>(iter: I) -> Self { 141 | Value::Array(iter.into_iter().map(Into::into).collect()) 142 | } 143 | } 144 | 145 | impl<'a, K: Into, V: Into>> FromIterator<(K, V)> for Value<'a> { 146 | fn from_iter>(iter: I) -> Self { 147 | Value::Object( 148 | iter.into_iter() 149 | .map(|(k, v)| (k.into(), v.into())) 150 | .collect(), 151 | ) 152 | } 153 | } 154 | 155 | impl From<()> for Value<'_> { 156 | fn from((): ()) -> Self { 157 | Value::Null 158 | } 159 | } 160 | 161 | impl From<&JsonValue> for Value<'_> { 162 | fn from(value: &JsonValue) -> Self { 163 | match value { 164 | JsonValue::Null => Value::Null, 165 | JsonValue::Bool(v) => Value::Bool(*v), 166 | JsonValue::Number(v) => { 167 | if let Some(n) = v.as_u64() { 168 | return Value::Number(Number::UInt64(n)); 169 | } else if let Some(n) = v.as_i64() { 170 | return Value::Number(Number::Int64(n)); 171 | } 172 | #[cfg(feature = "arbitrary_precision")] 173 | { 174 | if let Some(n) = v.as_i128() { 175 | if (DECIMAL128_MIN..=DECIMAL128_MAX).contains(&n) { 176 | return Value::Number(Number::Decimal128(Decimal128 { 177 | value: n, 178 | scale: 0, 179 | })); 180 | } else { 181 | return Value::Number(Number::Decimal256(Decimal256 { 182 | value: n.into(), 183 | scale: 0, 184 | })); 185 | } 186 | } else if let Some(n) = v.as_u128() { 187 | return Value::Number(Number::Decimal256(Decimal256 { 188 | value: n.into(), 189 | scale: 0, 190 | })); 191 | } 192 | } 193 | if let Some(n) = v.as_f64() { 194 | Value::Number(Number::Float64(n)) 195 | } else { 196 | // If the value is NaN or Infinity, fallback to NULL 197 | Value::Null 198 | } 199 | } 200 | JsonValue::String(v) => Value::String(v.clone().into()), 201 | JsonValue::Array(arr) => { 202 | let mut vals: Vec = Vec::with_capacity(arr.len()); 203 | for val in arr { 204 | vals.push(val.into()); 205 | } 206 | Value::Array(vals) 207 | } 208 | JsonValue::Object(obj) => { 209 | let mut map = Object::new(); 210 | for (k, v) in obj.iter() { 211 | let val: Value = v.into(); 212 | map.insert(k.to_string(), val); 213 | } 214 | Value::Object(map) 215 | } 216 | } 217 | } 218 | } 219 | 220 | impl From for Value<'_> { 221 | fn from(value: JsonValue) -> Self { 222 | (&value).into() 223 | } 224 | } 225 | 226 | impl<'a> From> for JsonValue { 227 | fn from(value: Value<'a>) -> Self { 228 | match value { 229 | Value::Null => JsonValue::Null, 230 | Value::Bool(v) => JsonValue::Bool(v), 231 | Value::Number(v) => match v { 232 | Number::Int64(n) => JsonValue::Number(n.into()), 233 | Number::UInt64(n) => JsonValue::Number(n.into()), 234 | Number::Decimal64(d) if d.scale == 0 => JsonValue::Number(d.value.into()), 235 | #[cfg(feature = "arbitrary_precision")] 236 | Number::Decimal128(ref d) if d.scale == 0 => { 237 | if let Some(n) = JsonNumber::from_i128(d.value) { 238 | JsonValue::Number(n) 239 | } else if let Some(n) = JsonNumber::from_f64(v.as_f64()) { 240 | JsonValue::Number(n) 241 | } else { 242 | JsonValue::Null 243 | } 244 | } 245 | #[cfg(feature = "arbitrary_precision")] 246 | Number::Decimal256(ref d) if d.scale == 0 => { 247 | if d.value >= i256::ZERO && d.value <= i256::from(u128::MAX) { 248 | if let Some(n) = JsonNumber::from_u128(d.value.as_u128()) { 249 | return JsonValue::Number(n); 250 | } 251 | } else if d.value >= i256::from(i128::MIN) && d.value < i256::ZERO { 252 | if let Some(n) = JsonNumber::from_i128(d.value.as_i128()) { 253 | return JsonValue::Number(n); 254 | } 255 | } 256 | if let Some(n) = JsonNumber::from_f64(v.as_f64()) { 257 | JsonValue::Number(n) 258 | } else { 259 | JsonValue::Null 260 | } 261 | } 262 | _ => { 263 | if let Some(n) = JsonNumber::from_f64(v.as_f64()) { 264 | JsonValue::Number(n) 265 | } else { 266 | // If the value is NaN or Infinity, fallback to NULL 267 | JsonValue::Null 268 | } 269 | } 270 | }, 271 | Value::String(v) => JsonValue::String(v.to_string()), 272 | Value::Binary(v) => { 273 | let mut s = String::new(); 274 | for c in v { 275 | s.push_str(&format!("{c:02X}")); 276 | } 277 | JsonValue::String(s) 278 | } 279 | Value::Date(v) => { 280 | let s = format!("{}", v); 281 | JsonValue::String(s) 282 | } 283 | Value::Timestamp(v) => { 284 | let s = format!("{}", v); 285 | JsonValue::String(s) 286 | } 287 | Value::TimestampTz(v) => { 288 | let s = format!("{}", v); 289 | JsonValue::String(s) 290 | } 291 | Value::Interval(v) => { 292 | let s = format!("{}", v); 293 | JsonValue::String(s) 294 | } 295 | Value::Array(arr) => { 296 | let mut vals: Vec = Vec::with_capacity(arr.len()); 297 | for val in arr { 298 | vals.push(val.into()); 299 | } 300 | JsonValue::Array(vals) 301 | } 302 | Value::Object(obj) => { 303 | let mut map = JsonMap::new(); 304 | for (k, v) in obj.iter() { 305 | let val: JsonValue = v.clone().into(); 306 | map.insert(k.to_string(), val); 307 | } 308 | JsonValue::Object(map) 309 | } 310 | } 311 | } 312 | } 313 | 314 | #[cfg(test)] 315 | mod tests { 316 | #[cfg(feature = "arbitrary_precision")] 317 | use super::i256; 318 | use super::*; 319 | use serde_json::json; 320 | #[cfg(feature = "arbitrary_precision")] 321 | use serde_json::Number as JsonNumber; 322 | 323 | fn run_float_conversion_suite() { 324 | let finite_samples = [0.0, -1.5, 42.4242, 1.0e-10, 9_007_199_254_740_992.0]; 325 | 326 | for sample in finite_samples { 327 | let json_from_value = JsonValue::from(Value::from(sample)); 328 | match &json_from_value { 329 | JsonValue::Number(num) => { 330 | assert_eq!(num.as_f64(), Some(sample), "failed for {sample}"); 331 | } 332 | other => panic!("expected number for {sample}, got {other:?}"), 333 | } 334 | 335 | match Value::from(&json_from_value) { 336 | Value::Number(Number::Float64(value)) => { 337 | assert_eq!(value, sample, "round-trip mismatch for {sample}"); 338 | } 339 | other => panic!("expected float number for {sample}, got {other:?}"), 340 | } 341 | 342 | // Cover the direct JsonValue -> Value path using serde_json's json! macro. 343 | match Value::from(&json!(sample)) { 344 | Value::Number(Number::Float64(value)) => { 345 | assert_eq!(value, sample, "json! conversion mismatch for {sample}"); 346 | } 347 | other => panic!("expected float number for {sample}, got {other:?}"), 348 | } 349 | } 350 | 351 | for edge in [f64::INFINITY, f64::NEG_INFINITY, f64::NAN] { 352 | let json_value = JsonValue::from(Value::from(edge)); 353 | assert_eq!( 354 | json_value, 355 | JsonValue::Null, 356 | "non-finite value should map to null" 357 | ); 358 | } 359 | } 360 | 361 | #[test] 362 | #[cfg(feature = "arbitrary_precision")] 363 | fn float_conversions_with_arbitrary_precision() { 364 | run_float_conversion_suite(); 365 | } 366 | 367 | #[test] 368 | #[cfg(not(feature = "arbitrary_precision"))] 369 | fn float_conversions_without_arbitrary_precision() { 370 | run_float_conversion_suite(); 371 | } 372 | 373 | #[test] 374 | #[cfg(feature = "arbitrary_precision")] 375 | fn big_integer_conversion_suite() { 376 | let i128_samples = [DECIMAL128_MIN, DECIMAL128_MAX]; 377 | for sample in i128_samples { 378 | let json_value = JsonValue::Number(JsonNumber::from_i128(sample).unwrap()); 379 | match Value::from(&json_value) { 380 | Value::Number(Number::Decimal128(decimal)) => { 381 | assert_eq!( 382 | decimal.value, sample, 383 | "Decimal128 value mismatch for {sample}" 384 | ); 385 | assert_eq!(decimal.scale, 0, "Decimal128 scale mismatch for {sample}"); 386 | } 387 | other => panic!("expected Decimal128 for {sample}, got {other:?}"), 388 | } 389 | 390 | let json_from_value = JsonValue::from(Value::Number(Number::Decimal128(Decimal128 { 391 | value: sample, 392 | scale: 0, 393 | }))); 394 | 395 | assert_eq!( 396 | json_from_value.to_string(), 397 | sample.to_string(), 398 | "precise JSON mismatch for {sample}" 399 | ); 400 | } 401 | 402 | let u128_samples = [i128::MAX as u128, u128::MAX]; 403 | for sample in u128_samples { 404 | let json_value = JsonValue::Number(JsonNumber::from_u128(sample).unwrap()); 405 | match Value::from(&json_value) { 406 | Value::Number(Number::Decimal256(decimal)) => { 407 | assert_eq!( 408 | decimal.value, 409 | i256::from(sample), 410 | "Decimal256 value mismatch for {sample}" 411 | ); 412 | assert_eq!(decimal.scale, 0, "Decimal256 scale mismatch for {sample}"); 413 | } 414 | other => panic!("expected Decimal256 for {sample}, got {other:?}"), 415 | } 416 | 417 | let json_from_value = JsonValue::from(Value::Number(Number::Decimal256(Decimal256 { 418 | value: i256::from(sample), 419 | scale: 0, 420 | }))); 421 | 422 | assert_eq!( 423 | json_from_value.to_string(), 424 | sample.to_string(), 425 | "precise JSON mismatch for {sample}" 426 | ); 427 | } 428 | } 429 | } 430 | -------------------------------------------------------------------------------- /src/functions/object.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // This file contains functions that specifically operate on JSONB object values. 16 | 17 | use std::collections::BTreeSet; 18 | 19 | use crate::core::ArrayBuilder; 20 | use crate::core::JsonbItem; 21 | use crate::core::ObjectBuilder; 22 | use crate::core::ObjectIterator; 23 | use crate::core::ObjectKeyIterator; 24 | use crate::error::*; 25 | use crate::OwnedJsonb; 26 | use crate::RawJsonb; 27 | 28 | impl RawJsonb<'_> { 29 | /// Returns an `OwnedJsonb` array containing the keys of the JSONB object. 30 | /// 31 | /// If the JSONB value is an object, this function returns a new `OwnedJsonb` array containing the keys of the object as string values. 32 | /// The order of the keys in the returned array is the same as their order in the original object. 33 | /// If the JSONB value is not an object (e.g., it's an array or a scalar), this function returns `None`. 34 | /// 35 | /// # Arguments 36 | /// 37 | /// * `self` - The JSONB value. 38 | /// 39 | /// # Returns 40 | /// 41 | /// * `Ok(Some(OwnedJsonb))` - An `OwnedJsonb` representing the array of keys if the input is an object. 42 | /// * `Ok(None)` - If the input is not an object. 43 | /// * `Err(Error)` - If the input JSONB data is invalid. 44 | /// 45 | /// # Examples 46 | /// 47 | /// ```rust 48 | /// use jsonb::OwnedJsonb; 49 | /// 50 | /// // Object keys 51 | /// let obj_jsonb = r#"{"a": 1, "b": 2, "c": 3}"#.parse::().unwrap(); 52 | /// let raw_jsonb = obj_jsonb.as_raw(); 53 | /// let keys_result = raw_jsonb.object_keys(); 54 | /// assert!(keys_result.is_ok()); 55 | /// 56 | /// let keys_jsonb = keys_result.unwrap(); 57 | /// assert_eq!( 58 | /// keys_jsonb.as_ref().map(|k| k.to_string()), 59 | /// Some(r#"["a","b","c"]"#.to_string()) 60 | /// ); 61 | /// 62 | /// // Array - returns None 63 | /// let arr_jsonb = "[1, 2, 3]".parse::().unwrap(); 64 | /// let raw_jsonb = arr_jsonb.as_raw(); 65 | /// let keys_result = raw_jsonb.object_keys(); 66 | /// assert!(keys_result.is_ok()); 67 | /// assert!(keys_result.unwrap().is_none()); 68 | /// 69 | /// // Scalar - returns None 70 | /// let scalar_jsonb = "1".parse::().unwrap(); 71 | /// let raw_jsonb = scalar_jsonb.as_raw(); 72 | /// let keys_result = raw_jsonb.object_keys(); 73 | /// assert!(keys_result.is_ok()); 74 | /// assert!(keys_result.unwrap().is_none()); 75 | /// ``` 76 | pub fn object_keys(&self) -> Result> { 77 | let object_key_iter_opt = ObjectKeyIterator::new(*self)?; 78 | match object_key_iter_opt { 79 | Some(mut object_key_iter) => { 80 | let mut builder = ArrayBuilder::with_capacity(object_key_iter.len()); 81 | for key_result in &mut object_key_iter { 82 | let key_item = key_result?; 83 | builder.push_jsonb_item(key_item); 84 | } 85 | Ok(Some(builder.build()?)) 86 | } 87 | None => Ok(None), 88 | } 89 | } 90 | 91 | /// Iterates over the key-value pairs of a JSONB object. 92 | /// 93 | /// If the JSONB value is an object, this function returns a vector of tuples, where each tuple contains 94 | /// the key (as a `String`) and the value (as an `OwnedJsonb`) of a key-value pair. 95 | /// The order of the key-value pairs in the returned vector is the same as their order in the original object. 96 | /// If the JSONB value is not an object (e.g., it's an array or a scalar), this function returns `None`. 97 | /// 98 | /// # Arguments 99 | /// 100 | /// * `self` - The JSONB value. 101 | /// 102 | /// # Returns 103 | /// 104 | /// * `Ok(Some(Vec<(String, OwnedJsonb)>))` - A vector of tuples representing the key-value pairs if the input is an object. 105 | /// * `Ok(None)` - If the input is not an object. 106 | /// * `Err(Error)` - If the input JSONB data is invalid. 107 | /// 108 | /// # Examples 109 | /// 110 | /// ```rust 111 | /// use jsonb::OwnedJsonb; 112 | /// 113 | /// // Object iteration 114 | /// let obj_jsonb = r#"{"a": 1, "b": "hello", "c": [1, 2]}"#.parse::().unwrap(); 115 | /// let raw_jsonb = obj_jsonb.as_raw(); 116 | /// let items_result = raw_jsonb.object_each(); 117 | /// assert!(items_result.is_ok()); 118 | /// 119 | /// let items = items_result.unwrap().unwrap(); 120 | /// assert_eq!(items.len(), 3); 121 | /// 122 | /// assert_eq!(items[0].0, "a"); 123 | /// assert_eq!(items[0].1.to_string(), "1"); 124 | /// assert_eq!(items[1].0, "b"); 125 | /// assert_eq!(items[1].1.to_string(), r#""hello""#); 126 | /// assert_eq!(items[2].0, "c"); 127 | /// assert_eq!(items[2].1.to_string(), r#"[1,2]"#); 128 | /// 129 | /// // Array - returns None 130 | /// let arr_jsonb = "[1, 2, 3]".parse::().unwrap(); 131 | /// let raw_jsonb = arr_jsonb.as_raw(); 132 | /// let items_result = raw_jsonb.object_each(); 133 | /// assert!(items_result.is_ok()); 134 | /// assert!(items_result.unwrap().is_none()); 135 | /// 136 | /// // Scalar - returns None 137 | /// let scalar_jsonb = "1".parse::().unwrap(); 138 | /// let raw_jsonb = scalar_jsonb.as_raw(); 139 | /// let items_result = raw_jsonb.object_each(); 140 | /// assert!(items_result.is_ok()); 141 | /// assert!(items_result.unwrap().is_none()); 142 | /// ``` 143 | pub fn object_each(&self) -> Result>> { 144 | let object_iter_opt = ObjectIterator::new(*self)?; 145 | match object_iter_opt { 146 | Some(mut object_iter) => { 147 | let mut items = Vec::with_capacity(object_iter.len()); 148 | for result in &mut object_iter { 149 | let (key, val_item) = result?; 150 | let owned_jsonb_val = OwnedJsonb::from_item(val_item)?; 151 | items.push((key.to_string(), owned_jsonb_val)); 152 | } 153 | Ok(Some(items)) 154 | } 155 | None => Ok(None), 156 | } 157 | } 158 | 159 | /// Inserts or updates a key-value pair in a JSONB object. 160 | /// 161 | /// This function inserts a new key-value pair into a JSONB object or updates an existing key-value pair if the key already exists. 162 | /// The behavior is controlled by the `update_flag`: 163 | /// * `update_flag = true`: If the key already exists, its value is updated with `new_val`. If the key does not exist, it is inserted. 164 | /// * `update_flag = false`: If the key already exists, an error (`Error::ObjectDuplicateKey`) is returned. If the key does not exist, it is inserted. 165 | /// 166 | /// The input JSONB value must be an object; otherwise, an error (`Error::InvalidObject`) is returned. 167 | /// 168 | /// # Arguments 169 | /// 170 | /// * `self` - The JSONB object. 171 | /// * `new_key` - The key to insert or update. 172 | /// * `new_val` - The new JSONB value. 173 | /// * `update_flag` - A boolean indicating whether to update an existing key (true) or fail if a duplicate key is found (false). 174 | /// 175 | /// # Returns 176 | /// 177 | /// * `Ok(OwnedJsonb)` - The modified JSONB object. 178 | /// * `Err(Error)` - If the input is not a JSONB object, if `update_flag` is false and the key already exists, or if the JSONB data is invalid. 179 | /// 180 | /// # Examples 181 | /// 182 | /// ```rust 183 | /// use jsonb::OwnedJsonb; 184 | /// 185 | /// // Inserting a new key-value pair 186 | /// let obj_jsonb = r#"{"a": 1}"#.parse::().unwrap(); 187 | /// let raw_jsonb = obj_jsonb.as_raw(); 188 | /// let new_jsonb = "2".parse::().unwrap(); 189 | /// let new_raw_jsonb = new_jsonb.as_raw(); 190 | /// let inserted = raw_jsonb.object_insert("b", &new_raw_jsonb, false).unwrap(); 191 | /// assert_eq!(inserted.to_string(), r#"{"a":1,"b":2}"#); 192 | /// 193 | /// // Updating an existing key-value pair 194 | /// let new_jsonb = r#"3"#.parse::().unwrap(); 195 | /// let new_raw_jsonb = new_jsonb.as_raw(); 196 | /// let updated = inserted 197 | /// .as_raw() 198 | /// .object_insert("b", &new_raw_jsonb, true) 199 | /// .unwrap(); 200 | /// assert_eq!(updated.to_string(), r#"{"a":1,"b":3}"#); 201 | /// 202 | /// // Attempting to insert a duplicate key without update 203 | /// let result = raw_jsonb.object_insert("a", &new_raw_jsonb, false); 204 | /// assert!(result.is_err()); // Returns an error because key "a" already exists 205 | /// 206 | /// // Invalid JSONB input 207 | /// let invalid_jsonb = OwnedJsonb::new(vec![1, 2, 3, 4]); 208 | /// let invalid_raw_jsonb = invalid_jsonb.as_raw(); 209 | /// let new_raw_jsonb = new_jsonb.as_raw(); 210 | /// let result = invalid_raw_jsonb.object_insert("a", &new_raw_jsonb, false); 211 | /// assert!(result.is_err()); // Returns an error due to invalid JSONB data 212 | /// 213 | /// // Inserting into a non-object 214 | /// let arr_jsonb = "[1,2,3]".parse::().unwrap(); 215 | /// let arr_raw_jsonb = invalid_jsonb.as_raw(); 216 | /// let new_raw_jsonb = new_jsonb.as_raw(); 217 | /// let result = arr_raw_jsonb.object_insert("a", &new_raw_jsonb, false); 218 | /// assert!(result.is_err()); // Returns an error because input is not a JSONB object 219 | /// ``` 220 | pub fn object_insert( 221 | &self, 222 | new_key: &str, 223 | new_val: &RawJsonb, 224 | update_flag: bool, 225 | ) -> Result { 226 | let mut builder = ObjectBuilder::new(); 227 | let object_iter_opt = ObjectIterator::new(*self)?; 228 | match object_iter_opt { 229 | Some(mut object_iter) => { 230 | for result in &mut object_iter { 231 | let (key, val_item) = result?; 232 | if new_key.eq(key) { 233 | if !update_flag { 234 | return Err(Error::ObjectDuplicateKey); 235 | } 236 | } else { 237 | builder.push_jsonb_item(key, val_item)?; 238 | } 239 | } 240 | let new_val_item = JsonbItem::from_raw_jsonb(*new_val)?; 241 | builder.push_jsonb_item(new_key, new_val_item)?; 242 | } 243 | None => { 244 | return Err(Error::InvalidObject); 245 | } 246 | } 247 | builder.build() 248 | } 249 | 250 | /// Deletes key-value pairs from a JSONB object based on a set of keys. 251 | /// 252 | /// This function removes key-value pairs from a JSONB object where the keys are present in the provided `keys` set. The key comparison is case-sensitive. 253 | /// 254 | /// If the input JSONB value is not an object, an error (`Error::InvalidObject`) is returned. 255 | /// 256 | /// # Arguments 257 | /// 258 | /// * `self` - The JSONB object. 259 | /// * `keys` - A set of keys to delete. 260 | /// 261 | /// # Returns 262 | /// 263 | /// * `Ok(OwnedJsonb)` - A new JSONB object with the specified keys removed. 264 | /// * `Err(Error)` - If the input JSONB value is not an object, or if the JSONB data is invalid. 265 | /// 266 | /// # Examples 267 | /// 268 | /// ```rust 269 | /// use std::collections::BTreeSet; 270 | /// 271 | /// use jsonb::OwnedJsonb; 272 | /// 273 | /// let obj_jsonb = r#"{"a": 1, "b": "hello", "c": 3}"#.parse::().unwrap(); 274 | /// let raw_jsonb = obj_jsonb.as_raw(); 275 | /// 276 | /// // Delete keys "a" and "c" 277 | /// let keys_to_delete: BTreeSet<&str> = ["a", "c"].into_iter().collect(); 278 | /// let deleted = raw_jsonb.object_delete(&keys_to_delete).unwrap(); 279 | /// assert_eq!(deleted.to_string(), r#"{"b":"hello"}"#); 280 | /// 281 | /// // Delete a non-existent key 282 | /// let keys_to_delete: BTreeSet<&str> = ["x"].into_iter().collect(); 283 | /// let deleted = raw_jsonb.object_delete(&keys_to_delete).unwrap(); 284 | /// assert_eq!(deleted.to_string(), r#"{"a":1,"b":"hello","c":3}"#); // Original object returned 285 | /// 286 | /// // Attempting to delete from a non-object 287 | /// let arr_jsonb = "[1, 2, 3]".parse::().unwrap(); 288 | /// let result = arr_jsonb.as_raw().object_delete(&keys_to_delete); 289 | /// assert!(result.is_err()); // Returns an error 290 | /// 291 | /// // Invalid JSONB data 292 | /// let invalid_jsonb = OwnedJsonb::new(vec![1, 2, 3, 4]); 293 | /// let invalid_raw_jsonb = invalid_jsonb.as_raw(); 294 | /// let result = invalid_raw_jsonb.object_delete(&keys_to_delete); 295 | /// assert!(result.is_err()); // Returns an error 296 | /// ``` 297 | pub fn object_delete(&self, keys: &BTreeSet<&str>) -> Result { 298 | let object_iter_opt = ObjectIterator::new(*self)?; 299 | match object_iter_opt { 300 | Some(mut object_iter) => { 301 | let mut builder = ObjectBuilder::new(); 302 | for result in &mut object_iter { 303 | let (key, val_item) = result?; 304 | if keys.contains(key) { 305 | continue; 306 | } 307 | builder.push_jsonb_item(key, val_item)?; 308 | } 309 | builder.build() 310 | } 311 | None => Err(Error::InvalidObject), 312 | } 313 | } 314 | 315 | /// Creates a new JSONB object containing only the specified keys from the original object. 316 | /// 317 | /// This function selects a subset of key-value pairs from a JSONB object based on the provided `keys` set. 318 | /// Only key-value pairs where the key is present in the `keys` set are included in the resulting object. The key comparison is case-sensitive. 319 | /// 320 | /// If the input JSONB value is not an object, an error (`Error::InvalidObject`) is returned. 321 | /// 322 | /// # Arguments 323 | /// 324 | /// * `self` - The JSONB object. 325 | /// * `keys` - A set of keys to select. 326 | /// 327 | /// # Returns 328 | /// 329 | /// * `Ok(OwnedJsonb)` - A new JSONB object containing only the key-value pairs specified by the `keys` set. 330 | /// * `Err(Error)` - If the input JSONB value is not an object, or if the JSONB data is invalid. 331 | /// 332 | /// # Examples 333 | /// 334 | /// ```rust 335 | /// use std::collections::BTreeSet; 336 | /// 337 | /// use jsonb::OwnedJsonb; 338 | /// 339 | /// let obj_jsonb = r#"{"a": 1, "b": "hello", "c": 3}"#.parse::().unwrap(); 340 | /// let raw_jsonb = obj_jsonb.as_raw(); 341 | /// 342 | /// // Pick keys "a" and "c" 343 | /// let keys_to_pick: BTreeSet<&str> = ["a", "c"].into_iter().collect(); 344 | /// let picked = raw_jsonb.object_pick(&keys_to_pick).unwrap(); 345 | /// assert_eq!(picked.to_string(), r#"{"a":1,"c":3}"#); 346 | /// 347 | /// // Pick a non-existent key 348 | /// let keys_to_pick: BTreeSet<&str> = ["x"].into_iter().collect(); 349 | /// let picked = raw_jsonb.object_pick(&keys_to_pick).unwrap(); 350 | /// assert_eq!(picked.to_string(), "{}"); // Empty object returned 351 | /// 352 | /// // Attempting to pick from a non-object 353 | /// let arr_jsonb = "[1, 2, 3]".parse::().unwrap(); 354 | /// let result = arr_jsonb.as_raw().object_pick(&keys_to_pick); 355 | /// assert!(result.is_err()); // Returns an error 356 | /// 357 | /// // Invalid JSONB data 358 | /// let invalid_jsonb = OwnedJsonb::new(vec![1, 2, 3, 4]); 359 | /// let invalid_raw_jsonb = invalid_jsonb.as_raw(); 360 | /// let result = invalid_raw_jsonb.object_pick(&keys_to_pick); 361 | /// assert!(result.is_err()); // Returns an error 362 | /// ``` 363 | pub fn object_pick(&self, keys: &BTreeSet<&str>) -> Result { 364 | let object_iter_opt = ObjectIterator::new(*self)?; 365 | match object_iter_opt { 366 | Some(mut object_iter) => { 367 | let mut builder = ObjectBuilder::new(); 368 | for result in &mut object_iter { 369 | let (key, val_item) = result?; 370 | if !keys.contains(key) { 371 | continue; 372 | } 373 | builder.push_jsonb_item(key, val_item)?; 374 | } 375 | builder.build() 376 | } 377 | None => Err(Error::InvalidObject), 378 | } 379 | } 380 | } 381 | -------------------------------------------------------------------------------- /src/util.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use super::constants::*; 16 | use super::error::Error; 17 | use super::error::ParseErrorCode; 18 | 19 | #[allow(clippy::zero_prefixed_literal)] 20 | static HEX: [u8; 256] = { 21 | const __: u8 = 255; // not a hex digit 22 | [ 23 | // 1 2 3 4 5 6 7 8 9 A B C D E F 24 | __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 0 25 | __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 1 26 | __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2 27 | 00, 01, 02, 03, 04, 05, 06, 07, 08, 09, __, __, __, __, __, __, // 3 28 | __, 10, 11, 12, 13, 14, 15, __, __, __, __, __, __, __, __, __, // 4 29 | __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 5 30 | __, 10, 11, 12, 13, 14, 15, __, __, __, __, __, __, __, __, __, // 6 31 | __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7 32 | __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8 33 | __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9 34 | __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // A 35 | __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // B 36 | __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // C 37 | __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // D 38 | __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // E 39 | __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F 40 | ] 41 | }; 42 | 43 | pub fn parse_string(mut data: &[u8], len: usize, idx: &mut usize) -> Result { 44 | let mut buf = Vec::with_capacity(len); 45 | let mut str_buf = String::with_capacity(4); 46 | while !data.is_empty() { 47 | *idx += 1; 48 | let byte = data[0]; 49 | if byte == b'\\' { 50 | data = &data[1..]; 51 | data = parse_escaped_string(data, idx, &mut str_buf)?; 52 | buf.extend_from_slice(str_buf.as_bytes()); 53 | str_buf.clear(); 54 | } else { 55 | buf.push(byte); 56 | data = &data[1..]; 57 | } 58 | } 59 | String::from_utf8(buf).map_err(|_| Error::Syntax(ParseErrorCode::InvalidStringValue, *idx)) 60 | } 61 | 62 | fn parse_escaped_string<'a>( 63 | mut data: &'a [u8], 64 | idx: &mut usize, 65 | str_buf: &mut String, 66 | ) -> Result<&'a [u8], Error> { 67 | if data.is_empty() { 68 | return Err(Error::Syntax( 69 | ParseErrorCode::UnexpectedEndOfHexEscape, 70 | *idx, 71 | )); 72 | } 73 | 74 | let byte = data[0]; 75 | *idx += 1; 76 | data = &data[1..]; 77 | match byte { 78 | b'\\' => str_buf.push(BS), 79 | b'"' => str_buf.push(QU), 80 | b'/' => str_buf.push(SD), 81 | b'b' => str_buf.push(BB), 82 | b'f' => str_buf.push(FF), 83 | b'n' => str_buf.push(NN), 84 | b'r' => str_buf.push(RR), 85 | b't' => str_buf.push(TT), 86 | b'u' => { 87 | let mut numbers = [0u8; UNICODE_LEN]; 88 | // Parse the first Unicode escape sequence 89 | data = parse_unicode_escape(data, idx, &mut numbers)?; 90 | let hex = decode_hex_escape(&numbers, idx)?; 91 | 92 | let c = match hex { 93 | 0xDC00..=0xDFFF => { 94 | // Low surrogate without preceding high surrogate 95 | encode_invalid_unicode(&numbers, str_buf); 96 | return Ok(data); 97 | } 98 | 99 | // Non-BMP characters are encoded as a sequence of two hex 100 | // escapes, representing UTF-16 surrogates. 101 | n1 @ 0xD800..=0xDBFF => { 102 | // High surrogate - check for following low surrogate 103 | if data.len() < 2 { 104 | encode_invalid_unicode(&numbers, str_buf); 105 | return Ok(data); 106 | } 107 | 108 | // Check for \u sequence 109 | if data[0] == b'\\' && data[1] == b'u' { 110 | *idx += 2; 111 | data = &data[2..]; 112 | } else { 113 | encode_invalid_unicode(&numbers, str_buf); 114 | return Ok(data); 115 | } 116 | 117 | let mut lower_numbers = [0u8; UNICODE_LEN]; 118 | // Parse the second Unicode escape sequence 119 | data = parse_unicode_escape(data, idx, &mut lower_numbers)?; 120 | let n2 = decode_hex_escape(&lower_numbers, idx)?; 121 | if !(0xDC00..=0xDFFF).contains(&n2) { 122 | encode_invalid_unicode(&numbers, str_buf); 123 | encode_invalid_unicode(&lower_numbers, str_buf); 124 | return Ok(data); 125 | } 126 | 127 | #[allow(clippy::precedence)] 128 | let n = (((n1 - 0xD800) as u32) << 10 | (n2 - 0xDC00) as u32) + 0x1_0000; 129 | 130 | match char::from_u32(n) { 131 | Some(ch) => ch, 132 | None => { 133 | // Handle invalid Unicode code points gracefully 134 | // If we somehow got an invalid code point, preserve the original escape sequence 135 | encode_invalid_unicode(&numbers, str_buf); 136 | encode_invalid_unicode(&lower_numbers, str_buf); 137 | return Ok(data); 138 | } 139 | } 140 | } 141 | 142 | // Regular Unicode code points 143 | n => match char::from_u32(n as u32) { 144 | Some(ch) => ch, 145 | None => { 146 | // Handle invalid code points gracefully 147 | encode_invalid_unicode(&numbers, str_buf); 148 | return Ok(data); 149 | } 150 | }, 151 | }; 152 | str_buf.push(c); 153 | } 154 | other => return Err(Error::Syntax(ParseErrorCode::InvalidEscaped(other), *idx)), 155 | } 156 | Ok(data) 157 | } 158 | 159 | /// Parse a Unicode escape sequence and return the updated data slice 160 | /// 161 | /// This helper function handles both standard \uXXXX and extended \u{XXXX} formats, 162 | /// extracting the hex digits into the provided buffer. 163 | #[inline] 164 | fn parse_unicode_escape<'a>( 165 | mut data: &'a [u8], 166 | idx: &mut usize, 167 | numbers: &mut [u8; UNICODE_LEN], 168 | ) -> Result<&'a [u8], Error> { 169 | if data.len() < UNICODE_LEN { 170 | return Err(Error::Syntax( 171 | ParseErrorCode::UnexpectedEndOfHexEscape, 172 | *idx, 173 | )); 174 | } 175 | // Handle \u{XXXX} format (with braces) 176 | if data[0] == b'{' { 177 | if data.len() < UNICODE_LEN + 2 { 178 | return Err(Error::Syntax( 179 | ParseErrorCode::UnexpectedEndOfHexEscape, 180 | *idx, 181 | )); 182 | } 183 | 184 | numbers.copy_from_slice(&data[1..UNICODE_LEN + 1]); 185 | if data[UNICODE_LEN + 1] != b'}' { 186 | return Err(Error::Syntax( 187 | ParseErrorCode::UnexpectedEndOfHexEscape, 188 | *idx, 189 | )); 190 | } 191 | 192 | data = &data[UNICODE_LEN + 2..]; 193 | *idx += UNICODE_LEN + 2; 194 | } else { 195 | // Standard \uXXXX format 196 | numbers.copy_from_slice(&data[..UNICODE_LEN]); 197 | data = &data[UNICODE_LEN..]; 198 | *idx += UNICODE_LEN; 199 | } 200 | 201 | Ok(data) 202 | } 203 | 204 | // https://datatracker.ietf.org/doc/html/rfc8259#section-8.2 205 | // RFC8259 allow invalid Unicode 206 | #[inline] 207 | fn encode_invalid_unicode(numbers: &[u8], str_buf: &mut String) { 208 | str_buf.push('\\'); 209 | str_buf.push('u'); 210 | for n in numbers { 211 | str_buf.push((*n).into()); 212 | } 213 | } 214 | 215 | #[inline] 216 | fn decode_hex_val(val: u8) -> Option { 217 | let n = HEX[val as usize] as u16; 218 | if n == 255 { 219 | None 220 | } else { 221 | Some(n) 222 | } 223 | } 224 | 225 | #[inline] 226 | fn decode_hex_escape(numbers: &[u8], idx: &usize) -> Result { 227 | let mut n = 0; 228 | for number in numbers { 229 | if let Some(hex) = decode_hex_val(*number) { 230 | n = (n << 4) + hex; 231 | } else { 232 | return Err(Error::Syntax(ParseErrorCode::InvalidHex(*number), *idx)); 233 | } 234 | } 235 | Ok(n) 236 | } 237 | 238 | #[cfg(test)] 239 | mod tests { 240 | use super::*; 241 | use proptest::prelude::*; 242 | use std::fmt::Write; 243 | 244 | #[test] 245 | fn test_parse_string() { 246 | // Test cases with expected results 247 | let test_cases = vec![ 248 | // Basic strings 249 | ("hello", "hello"), 250 | ("", ""), 251 | ("123", "123"), 252 | // Escaped characters 253 | (r#"hello\nworld"#, "hello\nworld"), 254 | (r#"\"\\\b\f\n\r\t"#, "\"\\\u{8}\u{c}\n\r\t"), 255 | (r#"escaped \"quotes\""#, "escaped \"quotes\""), 256 | (r#"forward\/slash"#, "forward/slash"), 257 | // Unicode escapes - Basic 258 | (r#"\u0041\u0042\u0043"#, "ABC"), 259 | (r#"Unicode: \u00A9 \u00AE"#, "Unicode: © ®"), 260 | // Unicode escapes - Braces syntax 261 | (r#"\u{0041}\u{0042}\u{0043}"#, "ABC"), 262 | (r#"Unicode: \u{00A9} \u{00AE}"#, "Unicode: © ®"), 263 | // Unicode escapes - Surrogate pairs 264 | (r#"\uD834\uDD1E"#, "𝄞"), // G-clef (musical symbol) 265 | (r#"\u{D834}\u{DD1E}"#, "𝄞"), // Same with braces 266 | // Mixed content 267 | (r#"Mixed: \u0041\n\t\"test\""#, "Mixed: A\n\t\"test\""), 268 | (r#"CJK: \u4E2D\u6587"#, "CJK: 中文"), 269 | // Edge cases 270 | (r#"\u007F"#, "\u{7F}"), // DEL character 271 | (r#"\u0000"#, "\u{0}"), // NULL character 272 | ]; 273 | 274 | // Run all test cases 275 | for (input, expected) in test_cases { 276 | let input_bytes = input.as_bytes(); 277 | let mut idx = 0; 278 | let result = parse_string(input_bytes, input_bytes.len(), &mut idx); 279 | 280 | assert!(result.is_ok(), "Failed to parse valid string: {}", input); 281 | assert_eq!( 282 | result.unwrap(), 283 | expected, 284 | "Incorrect parsing result for: {}", 285 | input 286 | ); 287 | assert_eq!( 288 | idx, 289 | input_bytes.len(), 290 | "Index not advanced correctly for: {}", 291 | input 292 | ); 293 | } 294 | 295 | // Error cases 296 | let error_cases = vec![ 297 | // Invalid escape sequence 298 | r#"\z"#, 299 | // Incomplete Unicode escape 300 | r#"\u123"#, 301 | // Invalid hex in Unicode escape 302 | r#"\uGHIJ"#, 303 | ]; 304 | 305 | for input in error_cases { 306 | let input_bytes = if let Ok(s) = std::str::from_utf8(input.as_ref()) { 307 | s.as_bytes() 308 | } else { 309 | input.as_ref() 310 | }; 311 | let mut idx = 0; 312 | let result = parse_string(input_bytes, input_bytes.len(), &mut idx); 313 | assert!( 314 | result.is_err(), 315 | "Expected error for invalid input: {:?}", 316 | input_bytes 317 | ); 318 | } 319 | } 320 | 321 | proptest! { 322 | /// Property-based test for parse_string using randomly generated strings 323 | /// 324 | /// This test generates: 325 | /// 1. Regular ASCII strings 326 | /// 2. Strings with escaped characters 327 | /// 3. Strings with Unicode characters including CJK 328 | /// 4. Strings with Unicode escape sequences 329 | #[test] 330 | fn proptest_parse_string( 331 | // Generate regular ASCII strings 332 | s1 in r#"[a-zA-Z0-9 ]{0,50}"#, 333 | // Generate strings with standard JSON escape sequences 334 | s2 in r#"(\\[\"\\\/bfnrt]){0,10}"#, 335 | // Generate Unicode characters including CJK 336 | s3 in prop::collection::vec(prop::char::range('\u{0020}', '\u{FFFF}'), 0..20).prop_map(|chars| chars.into_iter().collect::()), 337 | // Generate valid Unicode escape sequences 338 | s4 in prop::collection::vec(0u16..0xD800, 0..5).prop_map(|nums| { 339 | nums.into_iter() 340 | .fold(String::new(), |mut output, b| { 341 | let _ = write!(output, r#"\u{:04X}"#, b); 342 | output 343 | }) 344 | }), 345 | // Generate valid Unicode surrogate pairs 346 | s5 in prop::collection::vec((0xD800u16..0xDC00, 0xDC00u16..0xE000), 0..3).prop_map(|pairs| { 347 | pairs.into_iter() 348 | .fold(String::new(), |mut output, (high, low)| { 349 | let _ = write!(output, r#"\u{:04X}\u{:04X}"#, high, low); 350 | output 351 | }) 352 | }), 353 | ) { 354 | // Combine all generated strings 355 | let combined = format!("{}{}{}{}{}", s1, s2, s3, s4, s5); 356 | 357 | // Skip empty strings as they're already tested in the unit tests 358 | prop_assume!(!combined.is_empty()); 359 | 360 | // Convert to a properly escaped JSON string 361 | let json_string = serde_json::to_string(&combined).unwrap(); 362 | // Remove the surrounding quotes that serde_json adds 363 | let json_content = &json_string[1..json_string.len()-1]; 364 | 365 | // Parse the string using our function 366 | let input_bytes = json_content.as_bytes(); 367 | let mut idx = 0; 368 | let result = parse_string(input_bytes, input_bytes.len(), &mut idx); 369 | 370 | // Verify parsing succeeded and produced the expected result 371 | prop_assert!(result.is_ok(), "Failed to parse valid string: {}", json_content); 372 | prop_assert_eq!(result.unwrap(), combined, "Incorrect parsing result"); 373 | prop_assert_eq!(idx, input_bytes.len(), "Index not advanced correctly"); 374 | } 375 | 376 | /// Property-based test for parse_string with focus on edge cases 377 | /// 378 | /// This test specifically targets edge cases like: 379 | /// 1. Strings with many escape sequences 380 | /// 2. Very long strings 381 | /// 3. Strings with complex Unicode patterns 382 | #[test] 383 | fn proptest_parse_string_edge_cases( 384 | // Generate strings with many escape sequences 385 | heavy_escapes in prop::collection::vec( 386 | prop::sample::select(vec![r#"\\"#, r#"\""#, r#"\n"#, r#"\t"#, r#"\b"#, r#"\f"#, r#"\r"#, r#"\/"#, r#"\u0020"#, r#"\u00A9"#]), 387 | 1..100 388 | ).prop_map(|v| v.join("")), 389 | 390 | // Generate long regular strings 391 | long_string in r#"[a-zA-Z0-9 ]{100,500}"#, 392 | 393 | // Generate strings with repeating Unicode patterns 394 | unicode_pattern in prop::collection::vec( 395 | prop::sample::select(vec![ 396 | // ASCII 397 | "ABC", 398 | // Emoji 399 | "😀😁😂", 400 | // CJK 401 | "中文日本語", 402 | // Mixed scripts 403 | "Latin Кириллица العربية", 404 | // Unicode escapes 405 | r#"\u0041\u0042\u0043"#, 406 | // Surrogate pairs 407 | r#"\uD834\uDD1E\uD834\uDD1F"# 408 | ]), 409 | 1..10 410 | ).prop_map(|v| v.join("")), 411 | ) { 412 | // Test each generated string separately 413 | for test_str in [heavy_escapes, long_string, unicode_pattern] { 414 | // Skip empty strings 415 | if test_str.is_empty() { 416 | continue; 417 | } 418 | 419 | // Convert to a properly escaped JSON string 420 | let json_string = serde_json::to_string(&test_str).unwrap(); 421 | // Remove the surrounding quotes 422 | let json_content = &json_string[1..json_string.len()-1]; 423 | 424 | // Parse the string 425 | let input_bytes = json_content.as_bytes(); 426 | let mut idx = 0; 427 | let result = parse_string(input_bytes, input_bytes.len(), &mut idx); 428 | 429 | // Verify parsing 430 | prop_assert!(result.is_ok(), "Failed to parse valid string: {}", json_content); 431 | prop_assert_eq!(result.unwrap(), test_str, "Incorrect parsing result"); 432 | prop_assert_eq!(idx, input_bytes.len(), "Index not advanced correctly"); 433 | } 434 | } 435 | } 436 | } 437 | --------------------------------------------------------------------------------