├── .github └── workflows │ ├── bench.yml │ └── ci.yml ├── .gitignore ├── CHANGELOG.md ├── Cargo.toml ├── LICENSE ├── README.md ├── benches └── bench.rs ├── fuzz ├── .gitignore ├── Cargo.toml └── fuzz_targets │ └── fuzz_target_1.rs └── src ├── builder.rs ├── entry.rs ├── lib.rs ├── macros.rs ├── partial_eq.rs ├── serde.rs ├── value.rs └── value_ref.rs /.github/workflows/bench.yml: -------------------------------------------------------------------------------- 1 | name: Benchmark 2 | on: 3 | push: 4 | branches: 5 | - main 6 | 7 | permissions: 8 | contents: write 9 | deployments: write 10 | 11 | jobs: 12 | benchmark: 13 | name: Run benchmark 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v2 17 | - name: Get data 18 | run: | 19 | mkdir -p benches/data 20 | wget -P benches/data https://github.com/datafuselabs/jsonb/raw/94e018874b63eb4afb175e4fdcc8397ac141f6a7/data/canada.json 21 | wget -P benches/data https://github.com/datafuselabs/jsonb/raw/94e018874b63eb4afb175e4fdcc8397ac141f6a7/data/citm_catalog.json 22 | wget -P benches/data https://github.com/datafuselabs/jsonb/raw/94e018874b63eb4afb175e4fdcc8397ac141f6a7/data/twitter.json 23 | - name: Run benchmark 24 | run: cargo bench --bench bench -- --output-format bencher jsonbb | tee output.txt 25 | 26 | - name: Store benchmark result 27 | uses: benchmark-action/github-action-benchmark@v1 28 | with: 29 | name: Rust Benchmark 30 | tool: 'cargo' 31 | output-file-path: output.txt 32 | github-token: ${{ secrets.GITHUB_TOKEN }} 33 | auto-push: true 34 | # Show alert with commit comment on detecting possible performance regression 35 | alert-threshold: '200%' 36 | comment-on-alert: true 37 | fail-on-alert: true 38 | alert-comment-cc-users: '@wangrunji0408' 39 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | 11 | env: 12 | CARGO_TERM_COLOR: always 13 | 14 | jobs: 15 | fmt: 16 | runs-on: ubuntu-latest 17 | steps: 18 | - uses: actions/checkout@v3 19 | - uses: actions-rs/toolchain@v1 20 | with: 21 | profile: minimal 22 | toolchain: stable 23 | components: rustfmt, clippy 24 | - name: Check code format 25 | uses: actions-rs/cargo@v1 26 | with: 27 | command: fmt 28 | args: --all -- --check 29 | - name: Clippy 30 | uses: actions-rs/cargo@v1 31 | with: 32 | command: clippy 33 | args: --all-targets --all-features -- -D warnings 34 | 35 | test: 36 | runs-on: ubuntu-latest 37 | strategy: 38 | matrix: 39 | toolchain: [stable, nightly] 40 | steps: 41 | - uses: actions/checkout@v3 42 | - uses: actions-rs/toolchain@v1 43 | with: 44 | profile: minimal 45 | toolchain: ${{ matrix.toolchain }} 46 | - name: Build 47 | uses: actions-rs/cargo@v1 48 | with: 49 | command: build 50 | args: --all-targets --all-features 51 | - name: Test 52 | uses: actions-rs/cargo@v1 53 | with: 54 | command: test 55 | args: --all-features --no-fail-fast 56 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /Cargo.lock 3 | /benches/data 4 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 7 | 8 | ## [Unreleased] 9 | 10 | ## [0.2.0] - 2024-05-27 11 | 12 | ### Changed 13 | 14 | - Do not take `self` for `Value::is_*` methods. 15 | 16 | ## [0.1.4] - 2024-04-11 17 | 18 | ### Changed 19 | 20 | - Compress the size of numbers. 21 | 22 | ### Fixed 23 | 24 | - Fix panic on unaligned pointer. 25 | 26 | ## [0.1.3] - 2023-11-20 27 | 28 | ### Added 29 | 30 | - Add `is_*` methods for `Value`, `ValueRef` and `NumberRef`. 31 | - Add `From`, `From` and `From` for `Value`. 32 | - Add `Default` for `Value`. 33 | - Add `PartialEq` for `ValueRef`. 34 | - Add `pointer` for `Value` and `ValueRef`. 35 | - Add `to_number` for `NumberRef`. 36 | - Add `json!` macro. 37 | 38 | ## [0.1.2] - 2023-10-30 39 | 40 | ### Added 41 | 42 | - Add `From<&[u8]>` for `Value`. 43 | 44 | ## [0.1.1] - 2023-10-27 45 | 46 | ### Added 47 | 48 | - Add `ObjectRef::contains_key`. 49 | - Add `to_value`. 50 | - Add feature `serde-json` and `Value::from_text_mut`. 51 | 52 | ### Fixed 53 | 54 | - Remove data of duplicate keys when building objects. 55 | 56 | ## [0.1.0] - 2023-10-25 57 | 58 | - Initial release. 59 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "jsonbb" 3 | version = "0.2.0" 4 | edition = "2021" 5 | description = "A binary representation of json value, optimized for parsing and querying." 6 | repository = "https://github.com/risingwavelabs/jsonbb" 7 | keywords = ["json"] 8 | categories = ["data-structures", "encoding"] 9 | license = "Apache-2.0" 10 | 11 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 12 | 13 | [dependencies] 14 | bytes = "1" 15 | serde = "1" 16 | serde_json = "1" 17 | simd-json = { version = "0.13", optional = true } 18 | smallvec = "1" 19 | 20 | [dev-dependencies] 21 | criterion = "0.5" 22 | jsonb = "0.3" 23 | simd-json = "0.13" 24 | 25 | [[bench]] 26 | name = "bench" 27 | harness = false 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # jsonbb 2 | 3 | [![Crate](https://img.shields.io/crates/v/jsonbb.svg)](https://crates.io/crates/jsonbb) 4 | [![Docs](https://docs.rs/jsonbb/badge.svg)](https://docs.rs/jsonbb) 5 | 6 | `jsonbb` is a binary representation of JSON value. It is inspired by [JSONB](https://www.postgresql.org/docs/current/datatype-json.html) in PostgreSQL and optimized for fast parsing. 7 | 8 | ## Usage 9 | 10 | `jsonbb` provides an API similar to `serde_json` for constructing and querying JSON values. 11 | 12 | ```rust 13 | // Deserialize a JSON value from a string of JSON text. 14 | let value: jsonbb::Value = r#"{"name": ["foo", "bar"]}"#.parse().unwrap(); 15 | 16 | // Serialize a JSON value into JSON text. 17 | let json = value.to_string(); 18 | assert_eq!(json, r#"{"name":["foo","bar"]}"#); 19 | ``` 20 | 21 | As a binary format, you can extract byte slices from it or read JSON values from byte slices. 22 | 23 | ```rust 24 | // Get the underlying byte slice of a JSON value. 25 | let jsonbb = value.as_bytes(); 26 | 27 | // Read a JSON value from a byte slice. 28 | let value = jsonbb::ValueRef::from_bytes(jsonbb); 29 | ``` 30 | 31 | You can use common API to query JSON and then build new JSON values using the `Builder` API. 32 | 33 | ```rust 34 | // Indexing 35 | let name = value.get("name").unwrap(); 36 | let foo = name.get(0).unwrap(); 37 | assert_eq!(foo.as_str().unwrap(), "foo"); 38 | 39 | // Build a JSON value. 40 | let mut builder = jsonbb::Builder::>::new(); 41 | builder.begin_object(); 42 | builder.add_string("name"); 43 | builder.add_value(foo); 44 | builder.end_object(); 45 | let value = builder.finish(); 46 | assert_eq!(value.to_string(), r#"{"name":"foo"}"#); 47 | ``` 48 | 49 | ## Format 50 | 51 | `jsonbb` stores JSON values in contiguous memory. By avoiding dynamic memory allocation, it is more cache-friendly and provides efficient **parsing** and **querying** performance. 52 | 53 | It has the following key features: 54 | 55 | 1. Memory Continuity: The content of any JSON subtree is stored contiguously, allowing for efficient copying through `memcpy`. This leads to highly efficient indexing operations. 56 | 2. Post-Order Traversal: JSON nodes are stored in post-order traversal sequence. When parsing JSON strings, output can be sequentially written to the buffer without additional memory allocation and movement. This results in highly efficient parsing operations. 57 | 58 | ## Performance Comparison 59 | 60 | | item[^0] | jsonbb | [jsonb] | [serde_json] | [simd_json] | 61 | | --------------------------- | --------- | --------- | -------------- | -------------- | 62 | | `canada.parse()` | 4.7394 ms | 12.640 ms | 10.806 ms | 6.0767 ms [^1] | 63 | | `canada.to_json()` | 5.7694 ms | 20.420 ms | 5.5702 ms | 3.0548 ms | 64 | | `canada.size()` | 2,117,412 B | 1,892,844 B | | | 65 | | `canada["type"]`[^2] | 39.181 ns[^2.1] | 316.51 ns[^2.2] | 67.202 ns [^2.3] | 27.102 ns [^2.4] | 66 | | `citm_catalog["areaNames"]` | 92.363 ns | 328.70 ns | 2.1190 µs [^3] | 1.9012 µs [^3] | 67 | | `from("1234567890")` | 26.840 ns | 91.037 ns | 45.130 ns | 21.513 ns | 68 | | `a == b` | 66.513 ns | 115.89 ns | 39.213 ns | 41.675 ns | 69 | | `a < b` | 71.793 ns | 120.77 ns | not supported | not supported | 70 | 71 | [jsonb]: https://docs.rs/jsonb/0.3.0/jsonb/ 72 | [serde_json]: https://docs.rs/serde_json/1.0.107/serde_json/ 73 | [simd_json]: https://docs.rs/simd-json/0.12.0/simd_json/ 74 | 75 | [^0]: JSON files for benchmark: [canada](https://github.com/datafuselabs/jsonb/blob/6b3f03effc08e1ca3cad69199e4cb1398e482757/data/canada.json), [citm_catalog](https://github.com/datafuselabs/jsonb/blob/6b3f03effc08e1ca3cad69199e4cb1398e482757/data/citm_catalog.json) 76 | 77 | [^1]: Parsed to [`simd_json::OwnedValue`](https://docs.rs/simd-json/0.12.0/simd_json/value/owned/enum.Value.html) for fair. 78 | 79 | [^2]: `canada["type"]` returns a string, so the primary overhead of this operation lies in indexing. 80 | 81 | [^2.1]: `jsonbb` uses binary search on sorted keys 82 | [^2.2]: `jsonb` uses linear search on unsorted keys 83 | [^2.3]: `serde_json` uses `BTreeMap` 84 | [^2.4]: `simd_json` uses `HashMap` 85 | 86 | [^3]: `citm_catalog["areaNames"]` returns an object with 17 key-value string pairs. However, both `serde_json` and `simd_json` exhibit slower performance due to dynamic memory allocation for each string. In contrast, jsonb employs a flat representation, allowing for direct memcpy operations, resulting in better performance. 87 | -------------------------------------------------------------------------------- /benches/bench.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 RisingWave Labs 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use criterion::{criterion_group, criterion_main, BatchSize, Criterion}; 16 | use jsonbb::ValueRef; 17 | use simd_json::prelude::{ValueArrayAccess, ValueAsMutContainer, ValueObjectAccess}; 18 | 19 | fn bench_parse(c: &mut Criterion) { 20 | for (filename, json) in iter_json_files() { 21 | c.bench_function(&format!("{filename} parse/jsonbb"), |b| { 22 | b.iter(|| json.parse::().unwrap()) 23 | }); 24 | #[cfg(feature = "simd-json")] 25 | c.bench_function(&format!("{filename} parse/jsonbb-simd"), |b| { 26 | b.iter_batched( 27 | || Vec::from(json.clone()), 28 | |mut data| (jsonbb::Value::from_text_mut(&mut data).unwrap(), data), 29 | BatchSize::SmallInput, 30 | ) 31 | }); 32 | c.bench_function(&format!("{filename} parse/serde_json"), |b| { 33 | b.iter(|| json.parse::().unwrap()) 34 | }); 35 | c.bench_function(&format!("{filename} parse/jsonb"), |b| { 36 | b.iter(|| jsonb::parse_value(json.as_bytes()).unwrap().to_vec()) 37 | }); 38 | c.bench_function(&format!("{filename} parse/simd-json-owned"), |b| { 39 | b.iter_batched( 40 | || Vec::from(json.clone()), 41 | |mut data| (simd_json::to_owned_value(&mut data).unwrap(), data), 42 | BatchSize::SmallInput, 43 | ) 44 | }); 45 | c.bench_function(&format!("{filename} parse/simd-json-borrowed"), |b| { 46 | b.iter_batched( 47 | || Vec::from(json.clone()), 48 | |mut data| { 49 | simd_json::to_borrowed_value(&mut data).unwrap(); // drop is counted 50 | data 51 | }, 52 | BatchSize::SmallInput, 53 | ) 54 | }); 55 | c.bench_function(&format!("{filename} parse/simd-json-tape"), |b| { 56 | b.iter_batched( 57 | || Vec::from(json.clone()), 58 | |mut data| { 59 | simd_json::to_tape(&mut data).unwrap(); // drop is counted 60 | data 61 | }, 62 | BatchSize::SmallInput, 63 | ) 64 | }); 65 | c.bench_function(&format!("{filename} parse/serde_json-no-build"), |b| { 66 | b.iter(|| serde_json::from_str::(&json).unwrap()) 67 | }); 68 | 69 | println!("{filename} size/text:\t{}", json.len()); 70 | println!( 71 | "{filename} size/jsonbb:\t{}", 72 | json.parse::().unwrap().capacity() 73 | ); 74 | println!( 75 | "{filename} size/jsonb:\t{}", 76 | jsonb::parse_value(json.as_bytes()).unwrap().to_vec().len() 77 | ); 78 | } 79 | } 80 | 81 | fn bench_to_string(c: &mut Criterion) { 82 | for (filename, json) in iter_json_files() { 83 | let v: jsonbb::Value = json.parse().unwrap(); 84 | c.bench_function(&format!("{filename} to_string/jsonbb"), |b| { 85 | b.iter(|| v.to_string()) 86 | }); 87 | let v: serde_json::Value = json.parse().unwrap(); 88 | c.bench_function(&format!("{filename} to_string/serde_json"), |b| { 89 | b.iter(|| v.to_string()) 90 | }); 91 | let v = jsonb::parse_value(json.as_bytes()).unwrap().to_vec(); 92 | c.bench_function(&format!("{filename} to_string/jsonb"), |b| { 93 | b.iter(|| jsonb::to_string(&v)) 94 | }); 95 | let v = simd_json::to_owned_value(&mut Vec::from(json.clone())).unwrap(); 96 | c.bench_function(&format!("{filename} to_string/simd-json"), |b| { 97 | b.iter(|| simd_json::to_string(&v).unwrap()) 98 | }); 99 | } 100 | } 101 | 102 | fn bench_hash(c: &mut Criterion) { 103 | use std::hash::{Hash, Hasher}; 104 | 105 | let json = r#"[{"a":"foo"},{"b":"bar"},{"c":"baz"}]"#; 106 | 107 | fn hash(v: &impl Hash) -> u64 { 108 | let mut hasher = std::collections::hash_map::DefaultHasher::new(); 109 | v.hash(&mut hasher); 110 | hasher.finish() 111 | } 112 | 113 | let v: jsonbb::Value = json.parse().unwrap(); 114 | c.bench_function("hash/jsonbb", |b| b.iter(|| hash(&v))); 115 | 116 | // other crates don't implement Hash 117 | } 118 | 119 | fn bench_eq(c: &mut Criterion) { 120 | let json1 = r#"{"a":"foo","b":[null,1,"bar"]}"#; 121 | let json2 = r#"{"b":[null,1,"bar"],"a":"foo"}"#; 122 | 123 | let v1: jsonbb::Value = json1.parse().unwrap(); 124 | let v2: jsonbb::Value = json2.parse().unwrap(); 125 | assert_eq!(v1, v2); 126 | c.bench_function("eq/jsonbb", |b| b.iter(|| v1 == v2)); 127 | 128 | let v1: serde_json::Value = json1.parse().unwrap(); 129 | let v2: serde_json::Value = json2.parse().unwrap(); 130 | assert_eq!(v1, v2); 131 | c.bench_function("eq/serde_json", |b| b.iter(|| v1 == v2)); 132 | 133 | let v1 = jsonb::parse_value(json1.as_bytes()).unwrap().to_vec(); 134 | let v2 = jsonb::parse_value(json2.as_bytes()).unwrap().to_vec(); 135 | assert_eq!(v1, v2); 136 | c.bench_function("eq/jsonb", |b| b.iter(|| jsonb::compare(&v1, &v2))); 137 | 138 | let v1 = simd_json::to_owned_value(&mut Vec::from(json1)).unwrap(); 139 | let v2 = simd_json::to_owned_value(&mut Vec::from(json2)).unwrap(); 140 | assert_eq!(v1, v2); 141 | c.bench_function("eq/simd-json", |b| b.iter(|| v1 == v2)); 142 | } 143 | 144 | fn bench_cmp(c: &mut Criterion) { 145 | let json1 = r#"{"a":"foo","b":[null,1,"bar"]}"#; 146 | let json2 = r#"{"a":"foo","b":[null,1,"baz"]}"#; 147 | 148 | let v1: jsonbb::Value = json1.parse().unwrap(); 149 | let v2: jsonbb::Value = json2.parse().unwrap(); 150 | assert!(v1 < v2); 151 | c.bench_function("cmp/jsonbb", |b| b.iter(|| v1 < v2)); 152 | 153 | let v1 = jsonb::parse_value(json1.as_bytes()).unwrap().to_vec(); 154 | let v2 = jsonb::parse_value(json2.as_bytes()).unwrap().to_vec(); 155 | assert!(jsonb::compare(&v1, &v2).unwrap().is_lt()); 156 | c.bench_function("cmp/jsonb", |b| b.iter(|| jsonb::compare(&v1, &v2))); 157 | 158 | // serde_json and simd_json don't implement Ord 159 | } 160 | 161 | fn bench_from(c: &mut Criterion) { 162 | let s = "1234567890"; 163 | c.bench_function("from_string/jsonbb", |b| b.iter(|| jsonbb::Value::from(s))); 164 | c.bench_function("from_string/serde_json", |b| { 165 | b.iter(|| serde_json::Value::from(s)) 166 | }); 167 | c.bench_function("from_string/jsonb", |b| { 168 | b.iter(|| jsonb::Value::from(s).to_vec()) 169 | }); 170 | c.bench_function("from_string/simd-json", |b| { 171 | b.iter(|| simd_json::OwnedValue::from(s)) 172 | }); 173 | 174 | let s = 123456789012345678_i64; 175 | c.bench_function("from_i64/jsonbb", |b| b.iter(|| jsonbb::Value::from(s))); 176 | c.bench_function("from_i64/serde_json", |b| { 177 | b.iter(|| serde_json::Value::from(s)) 178 | }); 179 | c.bench_function("from_i64/jsonb", |b| { 180 | b.iter(|| jsonb::Value::from(s).to_vec()) 181 | }); 182 | c.bench_function("from_i64/simd-json", |b| { 183 | b.iter(|| simd_json::OwnedValue::from(s)) 184 | }); 185 | 186 | let s = 1_234_567_890.123_456_7; 187 | c.bench_function("from_f64/jsonbb", |b| b.iter(|| jsonbb::Value::from(s))); 188 | c.bench_function("from_f64/serde_json", |b| { 189 | b.iter(|| serde_json::Value::from(s)) 190 | }); 191 | c.bench_function("from_f64/jsonb", |b| { 192 | b.iter(|| jsonb::Value::from(s).to_vec()) 193 | }); 194 | c.bench_function("from_f64/simd-json", |b| { 195 | b.iter(|| simd_json::OwnedValue::from(s)) 196 | }); 197 | } 198 | 199 | fn bench_index(c: &mut Criterion) { 200 | let json = r#"[{"a":"foo"},{"b":"bar"},{"c":"baz"}]"#; 201 | let v: jsonbb::Value = json.parse().unwrap(); 202 | c.bench_function("json[i]/jsonbb", |b| { 203 | b.iter(|| v.get(2).unwrap().to_owned()) 204 | }); 205 | let v: serde_json::Value = json.parse().unwrap(); 206 | c.bench_function("json[i]/serde_json", |b| { 207 | b.iter(|| v.get(2).unwrap().to_owned()) 208 | }); 209 | let v = jsonb::parse_value(json.as_bytes()).unwrap().to_vec(); 210 | c.bench_function("json[i]/jsonb", |b| { 211 | b.iter(|| jsonb::get_by_index(&v, 2).unwrap()) 212 | }); 213 | let v = simd_json::to_owned_value(&mut Vec::from(json)).unwrap(); 214 | c.bench_function("json[i]/simd-json", |b| { 215 | b.iter(|| v.get_idx(2).unwrap().to_owned()) 216 | }); 217 | 218 | let json = r#"{"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": {"a":"foo"}}"#; 219 | let v: jsonbb::Value = json.parse().unwrap(); 220 | c.bench_function("json['key']/jsonbb", |b| { 221 | b.iter(|| v.get("f").unwrap().to_owned()) 222 | }); 223 | let v: serde_json::Value = json.parse().unwrap(); 224 | c.bench_function("json['key']/serde_json", |b| { 225 | b.iter(|| v.get("f").unwrap().to_owned()) 226 | }); 227 | let v = jsonb::parse_value(json.as_bytes()).unwrap().to_vec(); 228 | c.bench_function("json['key']/jsonb", |b| { 229 | b.iter(|| jsonb::get_by_name(&v, "f", false).unwrap()) 230 | }); 231 | let v = simd_json::to_owned_value(&mut Vec::from(json)).unwrap(); 232 | c.bench_function("json['key']/simd-json", |b| { 233 | b.iter(|| v.get("f").unwrap().to_owned()) 234 | }); 235 | } 236 | 237 | fn bench_index_array(c: &mut Criterion) { 238 | let json = r#" 239 | { 240 | "age": 43, 241 | "name": "John Doe", 242 | "phones": [ 243 | "+44 1234567", 244 | "+44 2345678" 245 | ] 246 | }"#; 247 | let n = 1024; 248 | 249 | let v: jsonbb::Value = json.parse().unwrap(); 250 | let mut array = vec![]; 251 | let mut index = vec![]; 252 | for _ in 0..n { 253 | let start = array.len(); 254 | array.extend_from_slice(v.as_bytes()); 255 | let end = array.len(); 256 | index.push(start..end); 257 | } 258 | c.bench_function("[json['key'] for json in array]/jsonbb", |b| { 259 | b.iter(|| { 260 | let mut buffer = Vec::with_capacity(array.len()); 261 | for range in index.iter() { 262 | let value = ValueRef::from_bytes(&array[range.clone()]); 263 | let mut builder = jsonbb::Builder::<&mut Vec>::new(&mut buffer); 264 | let new_value = value.get("name").unwrap(); 265 | builder.add_value(new_value); 266 | builder.finish(); 267 | } 268 | }) 269 | }); 270 | 271 | let v: serde_json::Value = json.parse().unwrap(); 272 | let array = vec![v; n]; 273 | c.bench_function("[json['key'] for json in array]/serde_json", |b| { 274 | b.iter(|| { 275 | array 276 | .iter() 277 | .map(|v| v["name"].to_owned()) 278 | .collect::>() 279 | }) 280 | }); 281 | 282 | let v = jsonb::parse_value(json.as_bytes()).unwrap().to_vec(); 283 | let mut array = vec![]; 284 | let mut index = vec![]; 285 | for _ in 0..n { 286 | let start = array.len(); 287 | array.extend_from_slice(&v); 288 | let end = array.len(); 289 | index.push(start..end); 290 | } 291 | c.bench_function("[json['key'] for json in array]/jsonb", |b| { 292 | b.iter(|| { 293 | let mut new_array = Vec::with_capacity(array.len()); 294 | for range in index.iter() { 295 | let new_value = jsonb::get_by_name(&array[range.clone()], "name", false).unwrap(); 296 | new_array.extend_from_slice(&new_value); 297 | } 298 | new_array 299 | }) 300 | }); 301 | } 302 | 303 | fn bench_path(c: &mut Criterion) { 304 | let json = r#"{"a": {"b": ["foo","bar"]}}"#; 305 | let v: jsonbb::Value = json.parse().unwrap(); 306 | c.bench_function("json[path]/jsonbb", |b| { 307 | b.iter(|| { 308 | v.get("a") 309 | .unwrap() 310 | .get("b") 311 | .unwrap() 312 | .get(1) 313 | .unwrap() 314 | .to_owned() 315 | }) 316 | }); 317 | let v: serde_json::Value = json.parse().unwrap(); 318 | c.bench_function("json[path]/serde_json", |b| { 319 | b.iter(|| v["a"]["b"][1].to_owned()) 320 | }); 321 | let v = jsonb::parse_value(json.as_bytes()).unwrap().to_vec(); 322 | c.bench_function("json[path]/jsonb", |b| { 323 | let path = jsonb::jsonpath::parse_json_path("{a,b,1}".as_bytes()).unwrap(); 324 | b.iter(|| jsonb::get_by_path(&v, path.clone(), &mut vec![], &mut vec![])) 325 | }); 326 | let v = simd_json::to_owned_value(&mut Vec::from(json)).unwrap(); 327 | c.bench_function("json[path]/simd-json", |b| { 328 | b.iter(|| v["a"]["b"][1].to_owned()) 329 | }); 330 | } 331 | 332 | /// Index JSONs loaded from file. 333 | fn bench_file_index(c: &mut Criterion) { 334 | struct TestSuite { 335 | file: &'static str, 336 | paths: &'static [&'static str], 337 | expected: Option<&'static str>, 338 | } 339 | let test_suites = &[ 340 | TestSuite { 341 | file: "canada", 342 | paths: &["type"], 343 | expected: Some("FeatureCollection"), 344 | }, 345 | TestSuite { 346 | file: "citm_catalog", 347 | paths: &["areaNames"], 348 | expected: None, 349 | }, 350 | TestSuite { 351 | file: "citm_catalog", 352 | paths: &["areaNames", "205705994"], 353 | expected: Some("1er balcon central"), 354 | }, 355 | TestSuite { 356 | file: "citm_catalog", 357 | paths: &["topicNames", "324846100"], 358 | expected: Some("Formations musicales"), 359 | }, 360 | TestSuite { 361 | file: "twitter", 362 | paths: &["search_metadata", "max_id_str"], 363 | expected: Some("505874924095815681"), 364 | }, 365 | ]; 366 | 367 | for test_suite in test_suites { 368 | let suite_name = format!("{}->{}", test_suite.file, test_suite.paths.join("->")); 369 | let bytes = std::fs::read(&format!("./benches/data/{}.json", test_suite.file)).unwrap(); 370 | 371 | let value: jsonbb::Value = std::str::from_utf8(&bytes).unwrap().parse().unwrap(); 372 | c.bench_function(&format!("{suite_name} index/jsonbb"), |b| { 373 | let bench = || { 374 | let mut v = value.as_ref(); 375 | for path in test_suite.paths { 376 | v = v.get(path).unwrap(); 377 | } 378 | v.to_owned() 379 | }; 380 | if let Some(expected) = test_suite.expected { 381 | assert_eq!(bench().as_str(), Some(expected)); 382 | } 383 | b.iter(bench); 384 | }); 385 | 386 | let value: serde_json::Value = serde_json::from_slice(&bytes).unwrap(); 387 | c.bench_function(&format!("{suite_name} index/serde_json"), |b| { 388 | let bench = || { 389 | let mut v = &value; 390 | for path in test_suite.paths { 391 | v = v.get(path).unwrap(); 392 | } 393 | v.to_owned() 394 | }; 395 | if let Some(expected) = test_suite.expected { 396 | assert_eq!(bench().as_str(), Some(expected)); 397 | } 398 | b.iter(bench); 399 | }); 400 | 401 | let jsonb = jsonb::parse_value(&bytes).unwrap().to_vec(); 402 | let json_path = jsonb::jsonpath::JsonPath { 403 | paths: test_suite 404 | .paths 405 | .iter() 406 | .map(|p| jsonb::jsonpath::Path::DotField(std::borrow::Cow::Borrowed(p))) 407 | .collect(), 408 | }; 409 | c.bench_function(&format!("{suite_name} index/jsonb"), |b| { 410 | let bench = || { 411 | let mut data = vec![]; 412 | jsonb::get_by_path(&jsonb, json_path.clone(), &mut data, &mut vec![]); 413 | data 414 | }; 415 | if let Some(expected) = test_suite.expected { 416 | assert_eq!(jsonb::as_str(&bench()), Some(expected.into())); 417 | } 418 | b.iter(bench); 419 | }); 420 | 421 | let value = simd_json::to_owned_value(&mut bytes.clone()).unwrap(); 422 | c.bench_function(&format!("{suite_name} index/simd-json"), |b| { 423 | let bench = || { 424 | let mut v = &value; 425 | for path in test_suite.paths { 426 | v = v.get(*path).unwrap(); 427 | } 428 | v.to_owned() 429 | }; 430 | if let Some(expected) = test_suite.expected { 431 | match bench() { 432 | simd_json::OwnedValue::String(s) => assert_eq!(s, expected), 433 | _ => panic!("expected string"), 434 | } 435 | } 436 | b.iter(bench); 437 | }); 438 | } 439 | } 440 | 441 | fn bench_array_push(c: &mut Criterion) { 442 | let array = r#"[{"a":"foo"},{"b":"bar"},{"c":"baz"}]"#; 443 | let value = r#"{"d":"qqq"}"#; 444 | 445 | let a: jsonbb::Value = array.parse().unwrap(); 446 | let v: jsonbb::Value = value.parse().unwrap(); 447 | c.bench_function("array_push/jsonbb", |b| { 448 | b.iter_batched( 449 | || a.clone(), 450 | |mut a| a.array_push(v.as_ref()), 451 | BatchSize::SmallInput, 452 | ) 453 | }); 454 | 455 | let a: serde_json::Value = array.parse().unwrap(); 456 | let v: serde_json::Value = value.parse().unwrap(); 457 | c.bench_function("array_push/serde_json", |b| { 458 | b.iter_batched( 459 | || a.clone(), 460 | |mut a| a.as_array_mut().unwrap().push(v.clone()), 461 | BatchSize::SmallInput, 462 | ) 463 | }); 464 | 465 | let a = jsonb::parse_value(array.as_bytes()).unwrap().to_vec(); 466 | let v = jsonb::parse_value(value.as_bytes()).unwrap().to_vec(); 467 | c.bench_function("array_push/jsonb", |b| { 468 | b.iter(|| { 469 | let elems = jsonb::array_values(&a).unwrap(); 470 | let mut buf = Vec::with_capacity(a.len() + v.len()); 471 | jsonb::build_array( 472 | elems.iter().map(|v| v.as_slice()).chain([v.as_slice()]), 473 | &mut buf, 474 | ) 475 | .unwrap(); 476 | buf 477 | }) 478 | }); 479 | 480 | let a = simd_json::to_owned_value(&mut Vec::from(array)).unwrap(); 481 | let v = simd_json::to_owned_value(&mut Vec::from(value)).unwrap(); 482 | c.bench_function("array_push/simd-json", |b| { 483 | b.iter_batched( 484 | || a.clone(), 485 | |mut a| a.as_array_mut().unwrap().push(v.clone()), 486 | BatchSize::SmallInput, 487 | ) 488 | }); 489 | } 490 | 491 | /// Iterate over all files in the `./benches/data/` directory. 492 | fn iter_json_files() -> impl Iterator { 493 | std::fs::read_dir("./benches/data/").unwrap().map(|path| { 494 | let path = path.unwrap().path(); 495 | let filename = path.file_stem().unwrap().to_str().unwrap(); 496 | let json = std::fs::read_to_string(&path).unwrap(); 497 | (filename.to_owned(), json) 498 | }) 499 | } 500 | 501 | criterion_group!( 502 | benches, 503 | bench_from, 504 | bench_parse, 505 | bench_to_string, 506 | bench_hash, 507 | bench_eq, 508 | bench_cmp, 509 | bench_index, 510 | bench_index_array, 511 | bench_file_index, 512 | bench_path, 513 | bench_array_push 514 | ); 515 | criterion_main!(benches); 516 | -------------------------------------------------------------------------------- /fuzz/.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | corpus 3 | artifacts 4 | coverage 5 | Cargo.lock 6 | -------------------------------------------------------------------------------- /fuzz/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "flat-json-fuzz" 3 | version = "0.0.0" 4 | publish = false 5 | edition = "2021" 6 | 7 | [package.metadata] 8 | cargo-fuzz = true 9 | 10 | [dependencies] 11 | libfuzzer-sys = "0.4" 12 | 13 | [dependencies.flat-json] 14 | path = ".." 15 | 16 | # Prevent this from interfering with workspaces 17 | [workspace] 18 | members = ["."] 19 | 20 | [profile.release] 21 | debug = 1 22 | 23 | [[bin]] 24 | name = "fuzz_target_1" 25 | path = "fuzz_targets/fuzz_target_1.rs" 26 | test = false 27 | doc = false 28 | -------------------------------------------------------------------------------- /fuzz/fuzz_targets/fuzz_target_1.rs: -------------------------------------------------------------------------------- 1 | #![no_main] 2 | 3 | use jsonbb::Value; 4 | use libfuzzer_sys::fuzz_target; 5 | 6 | fuzz_target!(|data: &[u8]| { 7 | if let Ok(s) = std::str::from_utf8(data) { 8 | let _ = s.parse::(); 9 | } 10 | }); 11 | -------------------------------------------------------------------------------- /src/builder.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 RisingWave Labs 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use super::*; 16 | use bytes::{Buf, BufMut}; 17 | use smallvec::SmallVec; 18 | use std::fmt::{self, Debug, Display}; 19 | 20 | /// A builder for JSON values. 21 | pub struct Builder> { 22 | /// The buffer to write to. 23 | buffer: W, 24 | /// A stack of entries. 25 | /// 26 | /// Smallvec is used to avoid heap allocation for single value. 27 | pointers: SmallVec<[Entry; 1]>, 28 | /// A stack of (position, number of pointers) pairs when the array/object starts. 29 | container_starts: Vec<(usize, usize)>, 30 | } 31 | 32 | impl Debug for Builder { 33 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 34 | f.debug_tuple("JsonbbBuilder").finish() 35 | } 36 | } 37 | 38 | impl Default for Builder> { 39 | fn default() -> Self { 40 | Self::new() 41 | } 42 | } 43 | 44 | impl Clone for Builder { 45 | fn clone(&self) -> Self { 46 | Builder { 47 | buffer: self.buffer.clone(), 48 | pointers: self.pointers.clone(), 49 | container_starts: self.container_starts.clone(), 50 | } 51 | } 52 | } 53 | 54 | impl Builder> { 55 | /// Creates a new [`Builder`]. 56 | pub fn new() -> Self { 57 | Self::with_capacity(0) 58 | } 59 | 60 | /// Creates a new [`Builder`] with capacity. 61 | pub fn with_capacity(capacity: usize) -> Self { 62 | Builder { 63 | buffer: Vec::with_capacity(capacity), 64 | pointers: SmallVec::new(), 65 | container_starts: vec![], 66 | } 67 | } 68 | } 69 | 70 | impl<'a> Builder<&'a mut Vec> { 71 | /// Creates a new [`Builder`]. 72 | pub fn new(buffer: &'a mut Vec) -> Self { 73 | Builder { 74 | buffer, 75 | pointers: SmallVec::new(), 76 | container_starts: vec![], 77 | } 78 | } 79 | } 80 | 81 | impl>> Builder { 82 | /// Adds a null value to the builder. 83 | pub fn add_null(&mut self) { 84 | self.pointers.push(Entry::null()); 85 | } 86 | 87 | /// Adds a boolean value to the builder. 88 | pub fn add_bool(&mut self, v: bool) { 89 | let entry = if v { Entry::true_() } else { Entry::false_() }; 90 | self.pointers.push(entry); 91 | } 92 | 93 | /// Adds an u64 value to the builder. 94 | pub fn add_u64(&mut self, v: u64) { 95 | if let Ok(v) = i64::try_from(v) { 96 | return self.add_i64(v); 97 | } 98 | let offset = self.offset(); 99 | self.pointers.push(Entry::number(offset)); 100 | let buffer = self.buffer.as_mut(); 101 | buffer.push(NUMBER_U64); 102 | buffer.put_u64_ne(v); 103 | } 104 | 105 | /// Adds an i64 value to the builder. 106 | pub fn add_i64(&mut self, v: i64) { 107 | let offset = self.offset(); 108 | self.pointers.push(Entry::number(offset)); 109 | let buffer = self.buffer.as_mut(); 110 | if v == 0 { 111 | buffer.push(NUMBER_ZERO); 112 | } else if let Ok(v) = i8::try_from(v) { 113 | buffer.push(NUMBER_I8); 114 | buffer.put_i8(v); 115 | } else if let Ok(v) = i16::try_from(v) { 116 | buffer.push(NUMBER_I16); 117 | buffer.put_i16_ne(v); 118 | } else if let Ok(v) = i32::try_from(v) { 119 | buffer.push(NUMBER_I32); 120 | buffer.put_i32_ne(v); 121 | } else { 122 | buffer.push(NUMBER_I64); 123 | buffer.put_i64_ne(v); 124 | } 125 | } 126 | 127 | /// Adds an f64 value to the builder. 128 | pub fn add_f64(&mut self, v: f64) { 129 | assert!( 130 | !v.is_nan() && !v.is_infinite(), 131 | "Infinite or NaN values are not JSON numbers" 132 | ); 133 | let offset = self.offset(); 134 | self.pointers.push(Entry::number(offset)); 135 | let buffer = self.buffer.as_mut(); 136 | buffer.push(NUMBER_F64); 137 | buffer.put_f64_ne(v); 138 | } 139 | 140 | /// Adds a string value to the builder. 141 | pub fn add_string(&mut self, v: &str) { 142 | let offset = self.offset(); 143 | self.pointers.push(Entry::string(offset)); 144 | let buffer = self.buffer.as_mut(); 145 | buffer.put_u32_ne(v.len().try_into().expect("string too long")); 146 | buffer.put_slice(v.as_bytes()); 147 | } 148 | 149 | /// Adds a string value that displays the given value to the builder. 150 | pub fn display(&mut self, v: impl Display) { 151 | use std::io::Write; 152 | 153 | let offset = self.offset(); 154 | self.pointers.push(Entry::string(offset)); 155 | 156 | let buffer = self.buffer.as_mut(); 157 | let offset = buffer.len(); 158 | buffer.put_u32_ne(0); // placeholder for length 159 | write!(buffer, "{}", v).unwrap(); 160 | 161 | // update length 162 | let len = buffer.len() - offset - 4; 163 | (&mut buffer[offset..]).put_u32_ne(len.try_into().expect("string too long")); 164 | } 165 | 166 | /// Begins an array. 167 | /// 168 | /// The caller then needs to push the elements and call [`end_array`] to finish the array. 169 | /// 170 | /// [`end_array`]: #method.end_array 171 | pub fn begin_array(&mut self) { 172 | let buffer = self.buffer.as_mut(); 173 | self.container_starts 174 | .push((buffer.len(), self.pointers.len())); 175 | } 176 | 177 | /// Ends an array. 178 | pub fn end_array(&mut self) { 179 | let buffer = self.buffer.as_mut(); 180 | let (start, npointer) = self.container_starts.pop().unwrap(); 181 | let len = self.pointers.len() - npointer; 182 | buffer.reserve(4 * len + 4 + 4); 183 | for entry in self.pointers.drain(npointer..) { 184 | buffer.put_slice(entry.as_bytes()); 185 | } 186 | buffer.put_u32_ne(len as u32); 187 | buffer.put_u32_ne((buffer.len() - start + 4) as u32); 188 | 189 | let offset = self.offset(); 190 | self.pointers.push(Entry::array(offset)); 191 | } 192 | 193 | /// Begins an object. 194 | /// 195 | /// The caller then needs to push the keys and values in the following order: 196 | /// ```text 197 | /// key-1, value-1, key-2, value-2 ... 198 | /// ``` 199 | /// where each key must be a string. 200 | /// 201 | /// Keys are allowed to be duplicated, but the last value will be used. 202 | /// 203 | /// Finally [`end_object`] must be called to finish the object. 204 | /// 205 | /// [`end_object`]: #method.end_object 206 | pub fn begin_object(&mut self) { 207 | let buffer = self.buffer.as_mut(); 208 | self.container_starts 209 | .push((buffer.len(), self.pointers.len())); 210 | } 211 | 212 | /// Ends an object. 213 | /// 214 | /// # Panics 215 | /// 216 | /// Panics if: 217 | /// - there is an odd number of entries pushed since the paired [`begin_object`]. 218 | /// - any key is not a string. 219 | /// 220 | /// [`begin_object`]: #method.begin_object 221 | pub fn end_object(&mut self) { 222 | let buffer = self.buffer.as_mut(); 223 | let (start, npointer) = self.container_starts.pop().unwrap(); 224 | assert!( 225 | (self.pointers.len() - npointer) % 2 == 0, 226 | "expected even number of entries" 227 | ); 228 | let len = (self.pointers.len() - npointer) / 2; 229 | 230 | // sort entries by key 231 | // TODO: use `as_chunks_mut` when stabilized 232 | let entries = unsafe { 233 | std::slice::from_raw_parts_mut( 234 | self.pointers 235 | .as_mut_ptr() 236 | .add(npointer) 237 | .cast::<(Entry, Entry)>(), 238 | len, 239 | ) 240 | }; 241 | for (k, _) in entries.iter() { 242 | assert!(k.is_string(), "key must be string"); 243 | } 244 | let entry_to_str = |entry: Entry| { 245 | // Performance tip: this closure is in hot path, so we use `unsafe` to avoid bound check. 246 | // SAFETY: the string is pushed by us, so it's valid UTF-8 and the range is valid. 247 | let offset = start + entry.offset(); 248 | unsafe { 249 | let len = buffer.as_ptr().add(offset).cast::().read_unaligned() as usize; 250 | std::str::from_utf8_unchecked(buffer.get_unchecked(offset + 4..offset + 4 + len)) 251 | } 252 | }; 253 | entries.sort_by_key(|(k, _)| entry_to_str(*k)); 254 | 255 | // deduplicate keys 256 | let mut prev_key = None; 257 | let mut unique_len = 0; 258 | for i in 0..len { 259 | let key = entry_to_str(entries[i].0); 260 | if prev_key != Some(key) { 261 | prev_key = Some(key); 262 | entries[unique_len] = entries[i]; 263 | unique_len += 1; 264 | } else { 265 | entries[unique_len - 1] = entries[i]; 266 | } 267 | } 268 | 269 | // remove data if there are duplicates 270 | if unique_len != len { 271 | let data = &mut buffer[start..]; 272 | // get the index order by offset 273 | // TODO: reuse buffer to avoid allocation 274 | let mut indices = (0..unique_len).collect::>(); 275 | indices.sort_unstable_by_key(|&i| entries[i].0.offset()); 276 | // compact data and update offset 277 | let mut new_offset = 0; 278 | for i in indices { 279 | // get data range 280 | let (k, v) = &mut entries[i]; 281 | let begin = k.offset(); 282 | let end = if v.is_number() { 283 | v.offset() + 1 + number_size(data[v.offset()]) 284 | } else if v.is_string() { 285 | v.offset() + 4 + (&data[v.offset()..]).get_u32_ne() as usize 286 | } else if v.is_array() || v.is_object() { 287 | v.offset() 288 | } else { 289 | // null, false, true: no data for value 290 | begin + 4 + (&data[begin..]).get_u32_ne() as usize 291 | }; 292 | // move data and update entry 293 | if begin != new_offset { 294 | // eprintln!("move {:?} to {}", begin..end, new_offset); 295 | data.copy_within(begin..end, new_offset); 296 | // update entry 297 | let sub = begin - new_offset; 298 | k.set_offset(new_offset); 299 | if v.offset() != 0 { 300 | v.set_offset(v.offset() - sub); 301 | } 302 | } 303 | new_offset += end - begin; 304 | } 305 | buffer.truncate(start + new_offset); 306 | } 307 | 308 | // write entries to buffer 309 | buffer.reserve(8 * unique_len + 4 + 4); 310 | for (kentry, ventry) in &entries[..unique_len] { 311 | buffer.put_slice(kentry.as_bytes()); 312 | buffer.put_slice(ventry.as_bytes()); 313 | } 314 | buffer.put_u32_ne(unique_len as u32); 315 | buffer.put_u32_ne((buffer.len() - start + 4) as u32); 316 | 317 | let offset = self.offset(); 318 | self.pointers.truncate(npointer); 319 | self.pointers.push(Entry::object(offset)); 320 | } 321 | 322 | /// Adds a JSON value to the builder. 323 | pub fn add_value(&mut self, value: ValueRef<'_>) { 324 | match value { 325 | ValueRef::Null => self.add_null(), 326 | ValueRef::Bool(b) => self.add_bool(b), 327 | ValueRef::Number(n) => { 328 | if let Some(i) = n.as_u64() { 329 | self.add_u64(i) 330 | } else if let Some(i) = n.as_i64() { 331 | self.add_i64(i) 332 | } else if let Some(f) = n.as_f64() { 333 | self.add_f64(f) 334 | } else { 335 | panic!("invalid number"); 336 | } 337 | } 338 | ValueRef::String(s) => self.add_string(s), 339 | ValueRef::Array(a) => { 340 | let buffer = self.buffer.as_mut(); 341 | buffer.extend_from_slice(a.as_slice()); 342 | let offset = self.offset(); 343 | self.pointers.push(Entry::array(offset)); 344 | } 345 | ValueRef::Object(o) => { 346 | let buffer = self.buffer.as_mut(); 347 | buffer.extend_from_slice(o.as_slice()); 348 | let offset = self.offset(); 349 | self.pointers.push(Entry::object(offset)); 350 | } 351 | } 352 | } 353 | 354 | /// Finishes building. 355 | fn finish_internal(mut self) -> W { 356 | assert_eq!(self.pointers.len(), 1, "expected single root value"); 357 | assert!(self.container_starts.is_empty(), "unfinished container"); 358 | let buffer = self.buffer.as_mut(); 359 | let entry = self.pointers.pop().unwrap(); 360 | buffer.put_slice(entry.as_bytes()); 361 | self.buffer 362 | } 363 | 364 | /// Get the current offset from the array/object start. 365 | fn offset(&mut self) -> usize { 366 | self.buffer.as_mut().len() - self.container_starts.last().map_or(0, |&(o, _)| o) 367 | } 368 | 369 | /// Pops the last value. 370 | pub fn pop(&mut self) { 371 | let entry = self.pointers.pop().unwrap(); 372 | if entry == Entry::null() || entry == Entry::false_() || entry == Entry::true_() { 373 | // no payload 374 | return; 375 | } 376 | let buffer = self.buffer.as_mut(); 377 | let new_len = entry.offset() + self.container_starts.last().map_or(0, |&(o, _)| o); 378 | buffer.truncate(new_len); 379 | if entry.is_array() || entry.is_object() { 380 | let len = (&buffer[new_len - 4..]).get_u32_ne() as usize; 381 | buffer.truncate(new_len - len); 382 | } 383 | } 384 | } 385 | 386 | impl Builder> { 387 | /// Returns the capacity of the internal buffer, in bytes. 388 | pub fn capacity(&self) -> usize { 389 | self.buffer.capacity() 390 | } 391 | 392 | /// Finishes building. 393 | pub fn finish(self) -> Value { 394 | Value { 395 | buffer: self.finish_internal().into(), 396 | } 397 | } 398 | } 399 | 400 | impl<'a> Builder<&'a mut Vec> { 401 | /// Finishes building. 402 | pub fn finish(self) { 403 | self.finish_internal(); 404 | } 405 | } 406 | 407 | #[cfg(test)] 408 | mod tests { 409 | use crate::{Builder, Value}; 410 | 411 | #[test] 412 | fn unique_key() { 413 | let value: Value = 414 | r#"{"a":1,"b":2,"c":3,"d":4,"e":5,"e":{},"d":[0],"c":"c","b":1,"a":null}"# 415 | .parse() 416 | .unwrap(); 417 | assert_eq!( 418 | value.to_string(), 419 | r#"{"a":null,"b":1,"c":"c","d":[0],"e":{}}"# 420 | ); 421 | } 422 | 423 | #[test] 424 | fn pop() { 425 | let mut builder = Builder::>::new(); 426 | builder.begin_array(); 427 | builder.add_u64(1); 428 | builder.add_string("2"); 429 | builder.add_null(); 430 | builder.begin_array(); 431 | builder.add_null(); 432 | builder.end_array(); 433 | builder.pop(); 434 | builder.pop(); 435 | builder.pop(); 436 | builder.add_u64(4); 437 | builder.end_array(); 438 | let value = builder.finish(); 439 | assert_eq!(value.to_string(), "[1,4]"); 440 | } 441 | } 442 | -------------------------------------------------------------------------------- /src/entry.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 RisingWave Labs 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] 16 | #[repr(transparent)] 17 | pub struct Entry(pub [u8; 4]); 18 | 19 | impl Entry { 20 | const LEN_MASK: u32 = 0x1FFFFFFF; 21 | 22 | pub const NULL_TAG: u32 = 0; 23 | pub const STRING_TAG: u32 = 1; 24 | pub const NUMBER_TAG: u32 = 2; 25 | pub const FALSE_TAG: u32 = 3; 26 | pub const TRUE_TAG: u32 = 4; 27 | pub const ARRAY_TAG: u32 = 5; 28 | pub const OBJECT_TAG: u32 = 6; 29 | 30 | pub const fn tag(self) -> u32 { 31 | u32::from_ne_bytes(self.0) >> 29 32 | } 33 | 34 | pub const fn offset(self) -> usize { 35 | (u32::from_ne_bytes(self.0) & Self::LEN_MASK) as usize 36 | } 37 | 38 | pub const fn null() -> Self { 39 | Self::from_u32(Self::NULL_TAG << 29) 40 | } 41 | 42 | pub const fn false_() -> Self { 43 | Self::from_u32(Self::FALSE_TAG << 29) 44 | } 45 | 46 | pub const fn true_() -> Self { 47 | Self::from_u32(Self::TRUE_TAG << 29) 48 | } 49 | 50 | pub const fn bool(b: bool) -> Self { 51 | if b { 52 | Self::true_() 53 | } else { 54 | Self::false_() 55 | } 56 | } 57 | 58 | pub const fn number(offset: usize) -> Self { 59 | assert!(offset <= Self::LEN_MASK as usize, "offset too large"); 60 | Self::from_u32((Self::NUMBER_TAG << 29) | (offset as u32)) 61 | } 62 | 63 | pub const fn string(offset: usize) -> Self { 64 | assert!(offset <= Self::LEN_MASK as usize, "offset too large"); 65 | Self::from_u32((Self::STRING_TAG << 29) | (offset as u32)) 66 | } 67 | 68 | pub const fn array(offset: usize) -> Self { 69 | assert!(offset <= Self::LEN_MASK as usize, "offset too large"); 70 | Self::from_u32((Self::ARRAY_TAG << 29) | (offset as u32)) 71 | } 72 | 73 | pub const fn object(offset: usize) -> Self { 74 | assert!(offset <= Self::LEN_MASK as usize, "offset too large"); 75 | Self::from_u32((Self::OBJECT_TAG << 29) | (offset as u32)) 76 | } 77 | 78 | pub const fn is_number(self) -> bool { 79 | self.tag() == Self::NUMBER_TAG 80 | } 81 | 82 | pub const fn is_string(self) -> bool { 83 | self.tag() == Self::STRING_TAG 84 | } 85 | 86 | pub const fn is_array(self) -> bool { 87 | self.tag() == Self::ARRAY_TAG 88 | } 89 | 90 | pub const fn is_object(self) -> bool { 91 | self.tag() == Self::OBJECT_TAG 92 | } 93 | 94 | pub fn set_offset(&mut self, offset: usize) { 95 | assert!(offset <= Self::LEN_MASK as usize, "offset too large"); 96 | self.0 = ((self.tag() << 29) | (offset as u32)).to_ne_bytes(); 97 | } 98 | 99 | pub const fn as_bytes(&self) -> &[u8] { 100 | &self.0 101 | } 102 | 103 | const fn from_u32(value: u32) -> Self { 104 | Self(value.to_ne_bytes()) 105 | } 106 | } 107 | 108 | /// Convert a 4-byte slice to an `Entry`. 109 | /// 110 | /// # Panics 111 | /// 112 | /// Panics if the slice is not 4 bytes long. 113 | impl From<&[u8]> for Entry { 114 | fn from(slice: &[u8]) -> Self { 115 | Entry(slice.try_into().expect("entry must be 4 bytes")) 116 | } 117 | } 118 | 119 | // last 4 bits is the size 120 | pub const NUMBER_ZERO: u8 = 0x0; 121 | pub const NUMBER_I8: u8 = 0x1; 122 | pub const NUMBER_I16: u8 = 0x2; 123 | pub const NUMBER_I32: u8 = 0x4; 124 | pub const NUMBER_I64: u8 = 0x8; 125 | pub const NUMBER_U64: u8 = 0x18; 126 | pub const NUMBER_F64: u8 = 0x28; 127 | 128 | /// Returns the size of the number in bytes. 129 | pub const fn number_size(tag: u8) -> usize { 130 | (tag & 0xF) as usize 131 | } 132 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 RisingWave Labs 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | //! A JSONB-like binary format for JSON. 16 | //! 17 | //! # Usage 18 | //! 19 | //! `jsonbb` provides an API similar to `serde_json` for constructing and querying JSON values. 20 | //! 21 | //! ``` 22 | //! // Deserialize a JSON value from a string of JSON text. 23 | //! let value: jsonbb::Value = r#"{"name": ["foo", "bar"]}"#.parse().unwrap(); 24 | //! 25 | //! // Serialize a JSON value into JSON text. 26 | //! let json = value.to_string(); 27 | //! assert_eq!(json, r#"{"name":["foo","bar"]}"#); 28 | //! ``` 29 | //! 30 | //! As a binary format, you can extract byte slices from it or read JSON values from byte slices. 31 | //! 32 | //! ``` 33 | //! # let value: jsonbb::Value = r#"{"name": ["foo", "bar"]}"#.parse().unwrap(); 34 | //! // Get the underlying byte slice of a JSON value. 35 | //! let bytes = value.as_bytes(); 36 | //! 37 | //! // Read a JSON value from a byte slice. 38 | //! let value = jsonbb::ValueRef::from_bytes(bytes); 39 | //! ``` 40 | //! 41 | //! You can use the [`get`] API to subscript a JSON and then build a new JSON using the [`Builder`] API. 42 | //! 43 | //! ``` 44 | //! # let value: jsonbb::Value = r#"{"name": ["foo", "bar"]}"#.parse().unwrap(); 45 | //! // Subscript a JSON value. 46 | //! let name = value.get("name").unwrap(); 47 | //! let foo = name.get(0).unwrap(); 48 | //! assert_eq!(foo.as_str().unwrap(), "foo"); 49 | //! 50 | //! // Build a JSON value. 51 | //! let mut builder = jsonbb::Builder::>::new(); 52 | //! builder.begin_object(); 53 | //! builder.add_string("name"); 54 | //! builder.add_value(foo); 55 | //! builder.end_object(); 56 | //! let value = builder.finish(); 57 | //! assert_eq!(value.to_string(), r#"{"name":"foo"}"#); 58 | //! ``` 59 | //! 60 | //! [`get`]: ValueRef::get 61 | //! 62 | //! # Encoding Format 63 | //! 64 | //! `jsonbb` stores JSON values in contiguous memory. By avoiding dynamic memory allocation, it is 65 | //! more cache-friendly and provides efficient **parsing** and **querying** performance. 66 | //! 67 | //! It has the following key features: 68 | //! 69 | //! 1. Memory Continuity: The content of any JSON subtree is stored contiguously, allowing for 70 | //! efficient copying through `memcpy`. This leads to highly efficient indexing operations. 71 | //! 72 | //! 2. Post-Order Traversal: JSON nodes are stored in post-order traversal sequence. When parsing 73 | //! JSON strings, output can be sequentially written to the buffer without additional memory 74 | //! allocation and movement. This results in highly efficient parsing operations. 75 | //! 76 | //! Each JSON node consists of a fixed-size **entry** and a variable-length **payload**. 77 | //! Each entry is 4 bytes, with 3 bits storing the node type and 29 bits storing the offset of 78 | //! the payload. 79 | //! 80 | //! ```text 81 | //! entry: type (3 bits) | offset (29 bits) 82 | //! 83 | //! # Null 84 | //! entry: 0x0 85 | //! payload: [] 86 | //! 87 | //! # Bool 88 | //! entry: 0x1 (false) / 0x2 (true) 89 | //! payload: [] 90 | //! 91 | //! # Number 92 | //! entry: 0x3 | offset 93 | //! payload: kind (u8) + u64 / i64 / f64 94 | //! ^ptr 95 | //! 96 | //! # String 97 | //! entry: 0x4 | offset 98 | //! payload: len (u32) + bytes 99 | //! ^ptr 100 | //! 101 | //! # Array 102 | //! entry: 0x5 | offset 103 | //! payload: [elem] x n + [entry] x n + n (u32) + len (u32) 104 | //! ^start ^ptr 105 | //! 106 | //! # Object 107 | //! entry: 0x6 | offset 108 | //! payload: [key, value] x n + [kentry, ventry] x n + n (u32) + len (u32) 109 | //! ^start ^ptr 110 | //! where: len = ptr - start 111 | //! ``` 112 | 113 | mod builder; 114 | mod entry; 115 | mod macros; 116 | mod partial_eq; 117 | mod serde; 118 | mod value; 119 | mod value_ref; 120 | 121 | pub use self::builder::*; 122 | use self::entry::*; 123 | pub use self::serde::*; 124 | pub use self::value::*; 125 | pub use self::value_ref::*; 126 | 127 | // for `json!` macro 128 | #[doc(hidden)] 129 | pub use serde_json; 130 | -------------------------------------------------------------------------------- /src/macros.rs: -------------------------------------------------------------------------------- 1 | /// Construct a `jsonbb::Value` from a JSON literal. 2 | /// 3 | /// ``` 4 | /// # use jsonbb::json; 5 | /// # 6 | /// let value = json!({ 7 | /// "code": 200, 8 | /// "success": true, 9 | /// "payload": { 10 | /// "features": [ 11 | /// "serde", 12 | /// "json" 13 | /// ], 14 | /// "homepage": null 15 | /// } 16 | /// }); 17 | /// ``` 18 | #[macro_export(local_inner_macros)] 19 | macro_rules! json { 20 | ($($json:tt)+) => { 21 | jsonbb::Value::from(jsonbb::serde_json::json!($($json)+)) 22 | }; 23 | } 24 | -------------------------------------------------------------------------------- /src/partial_eq.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2024 RisingWave Labs 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | //! `PartialEq` implementations for `ValueRef` and `Value`. 16 | 17 | use crate::ValueRef; 18 | 19 | use super::Value; 20 | 21 | fn eq_i64(value: ValueRef<'_>, other: i64) -> bool { 22 | value.as_i64().map_or(false, |i| i == other) 23 | } 24 | 25 | fn eq_u64(value: ValueRef<'_>, other: u64) -> bool { 26 | value.as_u64().map_or(false, |i| i == other) 27 | } 28 | 29 | fn eq_f32(value: ValueRef<'_>, other: f32) -> bool { 30 | match value { 31 | ValueRef::Number(n) => n.as_f32().map_or(false, |i| i == other), 32 | _ => false, 33 | } 34 | } 35 | 36 | fn eq_f64(value: ValueRef<'_>, other: f64) -> bool { 37 | value.as_f64().map_or(false, |i| i == other) 38 | } 39 | 40 | fn eq_bool(value: ValueRef<'_>, other: bool) -> bool { 41 | value.as_bool().map_or(false, |i| i == other) 42 | } 43 | 44 | fn eq_str(value: ValueRef<'_>, other: &str) -> bool { 45 | value.as_str().map_or(false, |i| i == other) 46 | } 47 | 48 | impl PartialEq for ValueRef<'_> { 49 | fn eq(&self, other: &str) -> bool { 50 | eq_str(*self, other) 51 | } 52 | } 53 | 54 | impl PartialEq<&str> for ValueRef<'_> { 55 | fn eq(&self, other: &&str) -> bool { 56 | eq_str(*self, other) 57 | } 58 | } 59 | 60 | impl PartialEq> for str { 61 | fn eq(&self, other: &ValueRef<'_>) -> bool { 62 | eq_str(*other, self) 63 | } 64 | } 65 | 66 | impl PartialEq> for &str { 67 | fn eq(&self, other: &ValueRef<'_>) -> bool { 68 | eq_str(*other, self) 69 | } 70 | } 71 | 72 | impl PartialEq for ValueRef<'_> { 73 | fn eq(&self, other: &String) -> bool { 74 | eq_str(*self, other.as_str()) 75 | } 76 | } 77 | 78 | impl PartialEq> for String { 79 | fn eq(&self, other: &ValueRef<'_>) -> bool { 80 | eq_str(*other, self.as_str()) 81 | } 82 | } 83 | 84 | macro_rules! partialeq_numeric { 85 | ($($eq:ident [$($ty:ty)*])*) => { 86 | $($( 87 | impl PartialEq<$ty> for ValueRef<'_> { 88 | fn eq(&self, other: &$ty) -> bool { 89 | $eq(*self, *other as _) 90 | } 91 | } 92 | 93 | impl PartialEq for $ty { 94 | fn eq(&self, other: &Value) -> bool { 95 | $eq(other.as_ref(), *self as _) 96 | } 97 | } 98 | 99 | impl<'a> PartialEq<$ty> for &'a Value { 100 | fn eq(&self, other: &$ty) -> bool { 101 | $eq(self.as_ref(), *other as _) 102 | } 103 | } 104 | )*)* 105 | } 106 | } 107 | 108 | partialeq_numeric! { 109 | eq_i64[i8 i16 i32 i64 isize] 110 | eq_u64[u8 u16 u32 u64 usize] 111 | eq_f32[f32] 112 | eq_f64[f64] 113 | eq_bool[bool] 114 | } 115 | -------------------------------------------------------------------------------- /src/serde.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 RisingWave Labs 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | //! Serde support for `ValueRef` and `Builder`. 16 | 17 | use std::fmt::{self, Display}; 18 | 19 | use serde::de::{DeserializeSeed, MapAccess, SeqAccess, Visitor}; 20 | use serde::ser::{self, Impossible, SerializeMap, SerializeSeq}; 21 | 22 | use crate::{ArrayRef, Builder, NumberRef, ObjectRef, Value, ValueRef}; 23 | 24 | /// Convert a value that `impl Serialize` into `jsonbb::Value`. 25 | pub fn to_value(value: T) -> Result { 26 | let mut builder = Builder::>::new(); 27 | value.serialize(&mut builder)?; 28 | Ok(builder.finish()) 29 | } 30 | 31 | impl ser::Serialize for Value { 32 | #[inline] 33 | fn serialize(&self, serializer: S) -> Result 34 | where 35 | S: ::serde::Serializer, 36 | { 37 | self.as_ref().serialize(serializer) 38 | } 39 | } 40 | 41 | impl ser::Serialize for ValueRef<'_> { 42 | #[inline] 43 | fn serialize(&self, serializer: S) -> Result 44 | where 45 | S: ::serde::Serializer, 46 | { 47 | match self { 48 | Self::Null => serializer.serialize_unit(), 49 | Self::Bool(b) => serializer.serialize_bool(*b), 50 | Self::Number(n) => n.serialize(serializer), 51 | Self::String(s) => serializer.serialize_str(s), 52 | Self::Array(v) => v.serialize(serializer), 53 | Self::Object(o) => o.serialize(serializer), 54 | } 55 | } 56 | } 57 | 58 | impl ser::Serialize for NumberRef<'_> { 59 | #[inline] 60 | fn serialize(&self, serializer: S) -> Result 61 | where 62 | S: ::serde::Serializer, 63 | { 64 | self.to_number().serialize(serializer) 65 | } 66 | } 67 | 68 | impl ser::Serialize for ArrayRef<'_> { 69 | #[inline] 70 | fn serialize(&self, serializer: S) -> Result 71 | where 72 | S: ::serde::Serializer, 73 | { 74 | let mut seq = serializer.serialize_seq(Some(self.len()))?; 75 | for v in self.iter() { 76 | seq.serialize_element(&v)?; 77 | } 78 | seq.end() 79 | } 80 | } 81 | 82 | impl ser::Serialize for ObjectRef<'_> { 83 | #[inline] 84 | fn serialize(&self, serializer: S) -> Result 85 | where 86 | S: ::serde::Serializer, 87 | { 88 | let mut map = serializer.serialize_map(Some(self.len()))?; 89 | for (k, v) in self.iter() { 90 | map.serialize_entry(k, &v)?; 91 | } 92 | map.end() 93 | } 94 | } 95 | 96 | impl<'de, W: AsMut>> DeserializeSeed<'de> for &mut Builder { 97 | type Value = (); 98 | 99 | #[inline] 100 | fn deserialize(self, deserializer: D) -> Result 101 | where 102 | D: serde::Deserializer<'de>, 103 | { 104 | impl<'de, W: AsMut>> Visitor<'de> for &mut Builder { 105 | type Value = (); 106 | 107 | fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { 108 | formatter.write_str("any valid JSON value") 109 | } 110 | 111 | #[inline] 112 | fn visit_bool(self, value: bool) -> Result<(), E> { 113 | self.add_bool(value); 114 | Ok(()) 115 | } 116 | 117 | #[inline] 118 | fn visit_i64(self, value: i64) -> Result<(), E> { 119 | self.add_i64(value); 120 | Ok(()) 121 | } 122 | 123 | #[inline] 124 | fn visit_u64(self, value: u64) -> Result<(), E> { 125 | self.add_u64(value); 126 | Ok(()) 127 | } 128 | 129 | #[inline] 130 | fn visit_f64(self, value: f64) -> Result<(), E> { 131 | self.add_f64(value); 132 | Ok(()) 133 | } 134 | 135 | #[inline] 136 | fn visit_str(self, value: &str) -> Result<(), E> 137 | where 138 | E: serde::de::Error, 139 | { 140 | self.add_string(value); 141 | Ok(()) 142 | } 143 | 144 | #[inline] 145 | fn visit_none(self) -> Result<(), E> { 146 | self.add_null(); 147 | Ok(()) 148 | } 149 | 150 | #[inline] 151 | fn visit_some(self, deserializer: D) -> Result<(), D::Error> 152 | where 153 | D: serde::Deserializer<'de>, 154 | { 155 | self.deserialize(deserializer) 156 | } 157 | 158 | #[inline] 159 | fn visit_unit(self) -> Result<(), E> { 160 | self.add_null(); 161 | Ok(()) 162 | } 163 | 164 | #[inline] 165 | fn visit_seq(self, mut visitor: V) -> Result<(), V::Error> 166 | where 167 | V: SeqAccess<'de>, 168 | { 169 | self.begin_array(); 170 | while visitor.next_element_seed(&mut *self)?.is_some() {} 171 | self.end_array(); 172 | Ok(()) 173 | } 174 | 175 | fn visit_map(self, mut visitor: V) -> Result<(), V::Error> 176 | where 177 | V: MapAccess<'de>, 178 | { 179 | self.begin_object(); 180 | while visitor.next_key_seed(&mut *self)?.is_some() { 181 | visitor.next_value_seed(&mut *self)?; 182 | } 183 | self.end_object(); 184 | Ok(()) 185 | } 186 | } 187 | 188 | deserializer.deserialize_any(self) 189 | } 190 | } 191 | 192 | /// Jsonbb is a data format. 193 | // https://docs.rs/serde_json/latest/src/serde_json/ser.rs.html#59-454 194 | impl>> ser::Serializer for &mut Builder { 195 | type Ok = (); 196 | 197 | type Error = std::fmt::Error; 198 | 199 | type SerializeSeq = Self; 200 | 201 | type SerializeTuple = Self; 202 | 203 | type SerializeTupleStruct = Self; 204 | 205 | type SerializeTupleVariant = Self; 206 | 207 | type SerializeMap = Self; 208 | 209 | type SerializeStruct = Self; 210 | 211 | type SerializeStructVariant = Self; 212 | 213 | fn serialize_bool(self, v: bool) -> Result { 214 | self.add_bool(v); 215 | Ok(()) 216 | } 217 | 218 | fn serialize_i8(self, v: i8) -> Result { 219 | self.add_i64(v as _); 220 | Ok(()) 221 | } 222 | 223 | fn serialize_i16(self, v: i16) -> Result { 224 | self.add_i64(v as _); 225 | Ok(()) 226 | } 227 | 228 | fn serialize_i32(self, v: i32) -> Result { 229 | self.add_i64(v as _); 230 | Ok(()) 231 | } 232 | 233 | fn serialize_i64(self, v: i64) -> Result { 234 | self.add_i64(v as _); 235 | Ok(()) 236 | } 237 | 238 | fn serialize_i128(self, v: i128) -> Result { 239 | self.add_i64(v.try_into().map_err(|_| invalid_number())?); 240 | Ok(()) 241 | } 242 | 243 | fn serialize_u8(self, v: u8) -> Result { 244 | self.add_u64(v as _); 245 | Ok(()) 246 | } 247 | 248 | fn serialize_u16(self, v: u16) -> Result { 249 | self.add_u64(v as _); 250 | Ok(()) 251 | } 252 | 253 | fn serialize_u32(self, v: u32) -> Result { 254 | self.add_u64(v as _); 255 | Ok(()) 256 | } 257 | 258 | fn serialize_u64(self, v: u64) -> Result { 259 | self.add_u64(v as _); 260 | Ok(()) 261 | } 262 | 263 | fn serialize_u128(self, v: u128) -> Result { 264 | self.add_u64(v.try_into().map_err(|_| invalid_number())?); 265 | Ok(()) 266 | } 267 | 268 | fn serialize_f32(self, v: f32) -> Result { 269 | self.add_f64(v as _); 270 | Ok(()) 271 | } 272 | 273 | fn serialize_f64(self, v: f64) -> Result { 274 | self.add_f64(v as _); 275 | Ok(()) 276 | } 277 | 278 | fn serialize_char(self, v: char) -> Result { 279 | self.add_string(v.encode_utf8(&mut [0; 4])); 280 | Ok(()) 281 | } 282 | 283 | fn serialize_str(self, v: &str) -> Result { 284 | self.add_string(v); 285 | Ok(()) 286 | } 287 | 288 | fn serialize_bytes(self, v: &[u8]) -> Result { 289 | // serialize as byte array 290 | self.begin_array(); 291 | for byte in v { 292 | self.add_u64(*byte as _); 293 | } 294 | self.end_array(); 295 | Ok(()) 296 | } 297 | 298 | fn serialize_none(self) -> Result { 299 | self.add_null(); 300 | Ok(()) 301 | } 302 | 303 | fn serialize_some(self, value: &T) -> Result 304 | where 305 | T: ser::Serialize + ?Sized, 306 | { 307 | value.serialize(self) 308 | } 309 | 310 | fn serialize_unit(self) -> Result { 311 | self.add_null(); 312 | Ok(()) 313 | } 314 | 315 | fn serialize_unit_struct(self, _name: &'static str) -> Result { 316 | self.serialize_unit() 317 | } 318 | 319 | fn serialize_unit_variant( 320 | self, 321 | _name: &'static str, 322 | _variant_index: u32, 323 | variant: &'static str, 324 | ) -> Result { 325 | self.serialize_str(variant) 326 | } 327 | 328 | fn serialize_newtype_struct( 329 | self, 330 | _name: &'static str, 331 | value: &T, 332 | ) -> Result 333 | where 334 | T: ser::Serialize + ?Sized, 335 | { 336 | value.serialize(self) 337 | } 338 | 339 | fn serialize_newtype_variant( 340 | self, 341 | _name: &'static str, 342 | _variant_index: u32, 343 | variant: &'static str, 344 | value: &T, 345 | ) -> Result 346 | where 347 | T: ser::Serialize + ?Sized, 348 | { 349 | self.begin_object(); 350 | self.add_string(variant); 351 | value.serialize(&mut *self)?; 352 | self.end_object(); 353 | Ok(()) 354 | } 355 | 356 | fn serialize_seq(self, _len: Option) -> Result { 357 | self.begin_array(); 358 | Ok(self) 359 | } 360 | 361 | fn serialize_tuple(self, len: usize) -> Result { 362 | self.serialize_seq(Some(len)) 363 | } 364 | 365 | fn serialize_tuple_struct( 366 | self, 367 | _name: &'static str, 368 | len: usize, 369 | ) -> Result { 370 | self.serialize_seq(Some(len)) 371 | } 372 | 373 | fn serialize_tuple_variant( 374 | self, 375 | _name: &'static str, 376 | _variant_index: u32, 377 | variant: &'static str, 378 | len: usize, 379 | ) -> Result { 380 | self.begin_object(); 381 | self.add_string(variant); 382 | self.serialize_seq(Some(len)) 383 | } 384 | 385 | fn serialize_map(self, _len: Option) -> Result { 386 | self.begin_object(); 387 | Ok(self) 388 | } 389 | 390 | fn serialize_struct( 391 | self, 392 | _name: &'static str, 393 | len: usize, 394 | ) -> Result { 395 | self.serialize_map(Some(len)) 396 | } 397 | 398 | fn serialize_struct_variant( 399 | self, 400 | _name: &'static str, 401 | _variant_index: u32, 402 | variant: &'static str, 403 | len: usize, 404 | ) -> Result { 405 | self.begin_object(); 406 | self.add_string(variant); 407 | self.serialize_map(Some(len)) 408 | } 409 | 410 | fn collect_str(self, value: &T) -> Result 411 | where 412 | T: ?Sized + Display, 413 | { 414 | self.display(value); 415 | Ok(()) 416 | } 417 | } 418 | 419 | impl>> ser::SerializeTuple for &mut Builder { 420 | type Ok = (); 421 | type Error = std::fmt::Error; 422 | 423 | #[inline] 424 | fn serialize_element(&mut self, value: &T) -> Result<(), Self::Error> 425 | where 426 | T: ?Sized + ser::Serialize, 427 | { 428 | ser::SerializeSeq::serialize_element(self, value) 429 | } 430 | 431 | #[inline] 432 | fn end(self) -> Result<(), Self::Error> { 433 | ser::SerializeSeq::end(self) 434 | } 435 | } 436 | 437 | impl>> ser::SerializeTupleStruct for &mut Builder { 438 | type Ok = (); 439 | type Error = std::fmt::Error; 440 | 441 | #[inline] 442 | fn serialize_field(&mut self, value: &T) -> Result<(), Self::Error> 443 | where 444 | T: ?Sized + ser::Serialize, 445 | { 446 | ser::SerializeSeq::serialize_element(self, value) 447 | } 448 | 449 | #[inline] 450 | fn end(self) -> Result<(), Self::Error> { 451 | ser::SerializeSeq::end(self) 452 | } 453 | } 454 | 455 | impl>> ser::SerializeTupleVariant for &mut Builder { 456 | type Ok = (); 457 | type Error = std::fmt::Error; 458 | 459 | #[inline] 460 | fn serialize_field(&mut self, value: &T) -> Result<(), Self::Error> 461 | where 462 | T: ?Sized + ser::Serialize, 463 | { 464 | ser::SerializeSeq::serialize_element(self, value) 465 | } 466 | 467 | #[inline] 468 | fn end(self) -> Result<(), Self::Error> { 469 | self.end_array(); 470 | self.end_object(); 471 | Ok(()) 472 | } 473 | } 474 | 475 | impl>> ser::SerializeMap for &mut Builder { 476 | type Ok = (); 477 | type Error = std::fmt::Error; 478 | 479 | #[inline] 480 | fn serialize_key(&mut self, key: &T) -> Result<(), Self::Error> 481 | where 482 | T: ?Sized + ser::Serialize, 483 | { 484 | key.serialize(MapKeySerializer { ser: *self }) 485 | } 486 | 487 | #[inline] 488 | fn serialize_value(&mut self, value: &T) -> Result<(), Self::Error> 489 | where 490 | T: ?Sized + ser::Serialize, 491 | { 492 | value.serialize(&mut **self) 493 | } 494 | 495 | #[inline] 496 | fn end(self) -> Result<(), Self::Error> { 497 | self.end_object(); 498 | Ok(()) 499 | } 500 | } 501 | 502 | impl>> ser::SerializeStruct for &mut Builder { 503 | type Ok = (); 504 | type Error = std::fmt::Error; 505 | 506 | #[inline] 507 | fn serialize_field(&mut self, key: &'static str, value: &T) -> Result<(), Self::Error> 508 | where 509 | T: ?Sized + ser::Serialize, 510 | { 511 | ser::SerializeMap::serialize_entry(self, key, value) 512 | } 513 | 514 | #[inline] 515 | fn end(self) -> Result<(), Self::Error> { 516 | ser::SerializeMap::end(self) 517 | } 518 | } 519 | 520 | impl>> ser::SerializeStructVariant for &mut Builder { 521 | type Ok = (); 522 | type Error = std::fmt::Error; 523 | 524 | #[inline] 525 | fn serialize_field(&mut self, key: &'static str, value: &T) -> Result<(), Self::Error> 526 | where 527 | T: ?Sized + ser::Serialize, 528 | { 529 | ser::SerializeStruct::serialize_field(self, key, value) 530 | } 531 | 532 | #[inline] 533 | fn end(self) -> Result<(), Self::Error> { 534 | self.end_object(); 535 | self.end_object(); 536 | Ok(()) 537 | } 538 | } 539 | 540 | impl>> ser::SerializeSeq for &mut Builder { 541 | type Ok = (); 542 | type Error = std::fmt::Error; 543 | 544 | #[inline] 545 | fn serialize_element(&mut self, value: &T) -> Result<(), Self::Error> 546 | where 547 | T: ?Sized + ser::Serialize, 548 | { 549 | value.serialize(&mut **self) 550 | } 551 | 552 | #[inline] 553 | fn end(self) -> Result<(), Self::Error> { 554 | self.end_array(); 555 | Ok(()) 556 | } 557 | } 558 | 559 | struct MapKeySerializer<'a, W> { 560 | ser: &'a mut Builder, 561 | } 562 | 563 | impl<'a, W> ser::Serializer for MapKeySerializer<'a, W> 564 | where 565 | W: AsMut>, 566 | { 567 | type Ok = (); 568 | type Error = std::fmt::Error; 569 | 570 | #[inline] 571 | fn serialize_str(self, value: &str) -> Result { 572 | self.ser.serialize_str(value) 573 | } 574 | 575 | #[inline] 576 | fn serialize_unit_variant( 577 | self, 578 | _name: &'static str, 579 | _variant_index: u32, 580 | variant: &'static str, 581 | ) -> Result { 582 | self.ser.serialize_str(variant) 583 | } 584 | 585 | #[inline] 586 | fn serialize_newtype_struct( 587 | self, 588 | _name: &'static str, 589 | value: &T, 590 | ) -> Result 591 | where 592 | T: ?Sized + ser::Serialize, 593 | { 594 | value.serialize(self) 595 | } 596 | 597 | type SerializeSeq = Impossible<(), Self::Error>; 598 | type SerializeTuple = Impossible<(), Self::Error>; 599 | type SerializeTupleStruct = Impossible<(), Self::Error>; 600 | type SerializeTupleVariant = Impossible<(), Self::Error>; 601 | type SerializeMap = Impossible<(), Self::Error>; 602 | type SerializeStruct = Impossible<(), Self::Error>; 603 | type SerializeStructVariant = Impossible<(), Self::Error>; 604 | 605 | fn serialize_bool(self, value: bool) -> Result { 606 | self.ser.display(value); 607 | Ok(()) 608 | } 609 | 610 | fn serialize_i8(self, value: i8) -> Result { 611 | self.ser.display(value); 612 | Ok(()) 613 | } 614 | 615 | fn serialize_i16(self, value: i16) -> Result { 616 | self.ser.display(value); 617 | Ok(()) 618 | } 619 | 620 | fn serialize_i32(self, value: i32) -> Result { 621 | self.ser.display(value); 622 | Ok(()) 623 | } 624 | 625 | fn serialize_i64(self, value: i64) -> Result { 626 | self.ser.display(value); 627 | Ok(()) 628 | } 629 | 630 | fn serialize_i128(self, value: i128) -> Result { 631 | self.ser.display(value); 632 | Ok(()) 633 | } 634 | 635 | fn serialize_u8(self, value: u8) -> Result { 636 | self.ser.display(value); 637 | Ok(()) 638 | } 639 | 640 | fn serialize_u16(self, value: u16) -> Result { 641 | self.ser.display(value); 642 | Ok(()) 643 | } 644 | 645 | fn serialize_u32(self, value: u32) -> Result { 646 | self.ser.display(value); 647 | Ok(()) 648 | } 649 | 650 | fn serialize_u64(self, value: u64) -> Result { 651 | self.ser.display(value); 652 | Ok(()) 653 | } 654 | 655 | fn serialize_u128(self, value: u128) -> Result { 656 | self.ser.display(value); 657 | Ok(()) 658 | } 659 | 660 | fn serialize_f32(self, value: f32) -> Result { 661 | self.ser.display(value); 662 | Ok(()) 663 | } 664 | 665 | fn serialize_f64(self, value: f64) -> Result { 666 | self.ser.display(value); 667 | Ok(()) 668 | } 669 | 670 | fn serialize_char(self, value: char) -> Result { 671 | self.ser.display(value); 672 | Ok(()) 673 | } 674 | 675 | fn serialize_bytes(self, _value: &[u8]) -> Result { 676 | Err(key_must_be_a_string()) 677 | } 678 | 679 | fn serialize_unit(self) -> Result { 680 | Err(key_must_be_a_string()) 681 | } 682 | 683 | fn serialize_unit_struct(self, _name: &'static str) -> Result { 684 | Err(key_must_be_a_string()) 685 | } 686 | 687 | fn serialize_newtype_variant( 688 | self, 689 | _name: &'static str, 690 | _variant_index: u32, 691 | _variant: &'static str, 692 | _value: &T, 693 | ) -> Result 694 | where 695 | T: ?Sized + ser::Serialize, 696 | { 697 | Err(key_must_be_a_string()) 698 | } 699 | 700 | fn serialize_none(self) -> Result { 701 | Err(key_must_be_a_string()) 702 | } 703 | 704 | fn serialize_some(self, value: &T) -> Result 705 | where 706 | T: ?Sized + ser::Serialize, 707 | { 708 | value.serialize(self) 709 | } 710 | 711 | fn serialize_seq(self, _len: Option) -> Result { 712 | Err(key_must_be_a_string()) 713 | } 714 | 715 | fn serialize_tuple(self, _len: usize) -> Result { 716 | Err(key_must_be_a_string()) 717 | } 718 | 719 | fn serialize_tuple_struct( 720 | self, 721 | _name: &'static str, 722 | _len: usize, 723 | ) -> Result { 724 | Err(key_must_be_a_string()) 725 | } 726 | 727 | fn serialize_tuple_variant( 728 | self, 729 | _name: &'static str, 730 | _variant_index: u32, 731 | _variant: &'static str, 732 | _len: usize, 733 | ) -> Result { 734 | Err(key_must_be_a_string()) 735 | } 736 | 737 | fn serialize_map(self, _len: Option) -> Result { 738 | Err(key_must_be_a_string()) 739 | } 740 | 741 | fn serialize_struct( 742 | self, 743 | _name: &'static str, 744 | _len: usize, 745 | ) -> Result { 746 | Err(key_must_be_a_string()) 747 | } 748 | 749 | fn serialize_struct_variant( 750 | self, 751 | _name: &'static str, 752 | _variant_index: u32, 753 | _variant: &'static str, 754 | _len: usize, 755 | ) -> Result { 756 | Err(key_must_be_a_string()) 757 | } 758 | 759 | fn collect_str(self, value: &T) -> Result 760 | where 761 | T: ?Sized + Display, 762 | { 763 | self.ser.collect_str(value) 764 | } 765 | } 766 | 767 | fn key_must_be_a_string() -> std::fmt::Error { 768 | // TODO: better error message 769 | std::fmt::Error 770 | } 771 | 772 | fn invalid_number() -> std::fmt::Error { 773 | // TODO: better error message 774 | std::fmt::Error 775 | } 776 | 777 | #[cfg(test)] 778 | mod tests { 779 | use crate::Value; 780 | 781 | #[test] 782 | fn test_serde() { 783 | let json = r#" 784 | { 785 | "null": null, 786 | "false": false, 787 | "true": true, 788 | "string": "hello", 789 | "integer": 43, 790 | "u64max": 18446744073709551615, 791 | "i64min": -9223372036854775808, 792 | "float": 178.5, 793 | "array": ["hello", "world"] 794 | }"#; 795 | let value: Value = json.parse().unwrap(); 796 | assert_eq!( 797 | format!("{value}"), 798 | r#"{"array":["hello","world"],"false":false,"float":178.5,"i64min":-9223372036854775808,"integer":43,"null":null,"string":"hello","true":true,"u64max":18446744073709551615}"# 799 | ); 800 | assert_eq!( 801 | format!("{value:#}"), 802 | r#" 803 | { 804 | "array": [ 805 | "hello", 806 | "world" 807 | ], 808 | "false": false, 809 | "float": 178.5, 810 | "i64min": -9223372036854775808, 811 | "integer": 43, 812 | "null": null, 813 | "string": "hello", 814 | "true": true, 815 | "u64max": 18446744073709551615 816 | }"# 817 | .trim() 818 | ); 819 | } 820 | 821 | #[test] 822 | fn expect_end_of_input() { 823 | "1f2".parse::().unwrap_err(); 824 | "trues".parse::().unwrap_err(); 825 | "true, false".parse::().unwrap_err(); 826 | } 827 | 828 | use super::to_value; 829 | use serde::Serialize; 830 | use std::{collections::HashMap, fmt::Display, hash::Hash}; 831 | 832 | #[test] 833 | fn test_to_value() { 834 | /// Test that `value` serializes to `expected`. 835 | #[track_caller] 836 | fn test(value: impl Serialize, expected: &str) { 837 | let actual = to_value(&value).unwrap().to_string(); 838 | assert_eq!(actual, expected); 839 | assert_eq!(serde_json::to_value(&value).unwrap().to_string(), expected); 840 | } 841 | 842 | test((), "null"); 843 | test(true, "true"); 844 | test(42i8, "42"); 845 | test(42i16, "42"); 846 | test(42i32, "42"); 847 | test(42i64, "42"); 848 | test(42i128, "42"); 849 | test(42u8, "42"); 850 | test(42u16, "42"); 851 | test(42u32, "42"); 852 | test(42u64, "42"); 853 | test(42u128, "42"); 854 | // FIXME: actual "1.2300000190734863" 855 | // test(1.23f32, "1.23"); 856 | test(1.23f64, "1.23"); 857 | 858 | test('a', "\"a\""); 859 | test("hello", "\"hello\""); 860 | 861 | test(None as Option, "null"); 862 | test(Some(42), "42"); 863 | 864 | test([1, 2, 3], "[1,2,3]"); 865 | test(vec![1, 2, 3], "[1,2,3]"); 866 | 867 | #[derive(Serialize)] 868 | struct UnitStruct; 869 | 870 | #[derive(Serialize)] 871 | struct NewtypeStruct(i32); 872 | 873 | #[derive(Serialize)] 874 | struct TestStruct { 875 | id: i32, 876 | name: String, 877 | } 878 | 879 | #[derive(Serialize)] 880 | enum TestEnum { 881 | // UnitVariant 882 | A, 883 | // NewTypeVariant 884 | B(i32), 885 | // TupleVariant 886 | C(i32, i32), 887 | // StructVariant 888 | D { x: i32, y: i32 }, 889 | } 890 | 891 | test(UnitStruct, "null"); 892 | test(NewtypeStruct(42), "42"); 893 | 894 | let s = TestStruct { 895 | id: 1, 896 | name: "Alice".to_string(), 897 | }; 898 | test(s, r#"{"id":1,"name":"Alice"}"#); 899 | 900 | test(TestEnum::A, r#""A""#); 901 | test(TestEnum::B(42), r#"{"B":42}"#); 902 | test(TestEnum::C(4, 2), r#"{"C":[4,2]}"#); 903 | test(TestEnum::D { x: 1, y: 2 }, r#"{"D":{"x":1,"y":2}}"#); 904 | 905 | test(vec![1, 2, 3], "[1,2,3]"); 906 | test((1, "two"), "[1,\"two\"]"); 907 | 908 | /// Test that keys are serialized as strings. 909 | #[track_caller] 910 | fn test_map_key(key: impl Serialize + Display + Eq + Hash) { 911 | let expected = format!("{{\"{key}\":\"value\"}}"); 912 | let map = [(key, "value")].into_iter().collect::>(); 913 | assert_eq!(to_value(&map).unwrap().to_string(), expected); 914 | assert_eq!(serde_json::to_value(&map).unwrap().to_string(), expected); 915 | } 916 | test_map_key("key"); 917 | test_map_key(true); 918 | test_map_key(42i8); 919 | test_map_key(42i16); 920 | test_map_key(42i32); 921 | test_map_key(42i64); 922 | // test_map_key(42i128); // not supported by serde_json 923 | test_map_key(42u8); 924 | test_map_key(42u16); 925 | test_map_key(42u32); 926 | test_map_key(42u64); 927 | // test_map_key(42u128); // not supported by serde_json 928 | } 929 | } 930 | -------------------------------------------------------------------------------- /src/value.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 RisingWave Labs 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use super::*; 16 | use bytes::BufMut; 17 | use std::{ 18 | fmt, 19 | hash::{Hash, Hasher}, 20 | str::FromStr, 21 | }; 22 | 23 | /// An owned JSON value. 24 | #[derive(Clone)] 25 | pub struct Value { 26 | pub(crate) buffer: Box<[u8]>, 27 | } 28 | 29 | impl Value { 30 | /// Returns a `null` value. 31 | pub fn null() -> Self { 32 | Self::from(()) 33 | } 34 | 35 | /// Creates a new JSON array from an iterator of values. 36 | pub fn array<'a>(iter: impl IntoIterator>) -> Self { 37 | Self::from_builder(0, |b| { 38 | b.begin_array(); 39 | for v in iter { 40 | b.add_value(v); 41 | } 42 | b.end_array(); 43 | }) 44 | } 45 | 46 | /// Creates a new JSON object from an iterator of key-value pairs. 47 | pub fn object<'a>(iter: impl IntoIterator)>) -> Self { 48 | Self::from_builder(0, |b| { 49 | b.begin_object(); 50 | for (k, v) in iter { 51 | b.add_string(k); 52 | b.add_value(v); 53 | } 54 | b.end_object(); 55 | }) 56 | } 57 | 58 | /// Deserialize an instance of `Value` from bytes of JSON text. 59 | pub fn from_text(json: &[u8]) -> serde_json::Result { 60 | use ::serde::de::DeserializeSeed; 61 | 62 | let mut builder = Builder::with_capacity(json.len()); 63 | let mut deserializer = serde_json::Deserializer::from_slice(json); 64 | builder.deserialize(&mut deserializer)?; 65 | deserializer.end()?; 66 | Ok(builder.finish()) 67 | } 68 | 69 | /// Deserialize an instance of `Value` from bytes of JSON text. 70 | #[cfg(feature = "simd-json")] 71 | pub fn from_text_mut(json: &mut [u8]) -> simd_json::Result { 72 | use ::serde::de::DeserializeSeed; 73 | 74 | let mut builder = Builder::with_capacity(json.len()); 75 | let mut deserializer = simd_json::Deserializer::from_slice(json)?; 76 | builder.deserialize(&mut deserializer)?; 77 | Ok(builder.finish()) 78 | } 79 | 80 | /// Creates a JSON `Value` from bytes of jsonbb encoding. 81 | pub fn from_bytes(bytes: &[u8]) -> Self { 82 | Self { 83 | buffer: bytes.into(), 84 | } 85 | } 86 | 87 | /// Returns a reference to the value. 88 | pub fn as_ref(&self) -> ValueRef<'_> { 89 | ValueRef::from_bytes(&self.buffer) 90 | } 91 | 92 | /// Returns the value as bytes. 93 | pub fn as_bytes(&self) -> &[u8] { 94 | &self.buffer 95 | } 96 | 97 | /// If the value is `null`, returns `()`. Returns `None` otherwise. 98 | /// 99 | /// # Example 100 | /// 101 | /// ``` 102 | /// let value = jsonbb::Value::from(()); 103 | /// assert_eq!(value.as_null(), Some(())); 104 | /// ``` 105 | pub fn as_null(&self) -> Option<()> { 106 | self.as_ref().as_null() 107 | } 108 | 109 | /// If the value is a boolean, returns the associated bool. Returns `None` otherwise. 110 | /// 111 | /// # Example 112 | /// 113 | /// ``` 114 | /// let value = jsonbb::Value::from(true); 115 | /// assert_eq!(value.as_bool(), Some(true)); 116 | /// ``` 117 | pub fn as_bool(&self) -> Option { 118 | self.as_ref().as_bool() 119 | } 120 | 121 | /// If the value is an integer, returns the associated i64. Returns `None` otherwise. 122 | /// 123 | /// # Example 124 | /// 125 | /// ``` 126 | /// let value = jsonbb::Value::from(1i64); 127 | /// assert_eq!(value.as_i64(), Some(1)); 128 | /// ``` 129 | pub fn as_i64(&self) -> Option { 130 | self.as_ref().as_i64() 131 | } 132 | 133 | /// If the value is an integer, returns the associated u64. Returns `None` otherwise. 134 | /// 135 | /// # Example 136 | /// 137 | /// ``` 138 | /// let value = jsonbb::Value::from(1i64); 139 | /// assert_eq!(value.as_u64(), Some(1)); 140 | /// ``` 141 | pub fn as_u64(&self) -> Option { 142 | self.as_ref().as_u64() 143 | } 144 | 145 | /// If the value is a float, returns the associated f64. Returns `None` otherwise. 146 | /// 147 | /// # Example 148 | /// 149 | /// ``` 150 | /// let value = jsonbb::Value::from(3.14_f64); 151 | /// assert_eq!(value.as_f64(), Some(3.14)); 152 | /// ``` 153 | pub fn as_f64(&self) -> Option { 154 | self.as_ref().as_f64() 155 | } 156 | 157 | /// If the value is a string, returns the associated str. Returns `None` otherwise. 158 | /// 159 | /// # Example 160 | /// 161 | /// ``` 162 | /// let value = jsonbb::Value::from("json"); 163 | /// assert_eq!(value.as_str(), Some("json")); 164 | /// ``` 165 | pub fn as_str(&self) -> Option<&str> { 166 | self.as_ref().as_str() 167 | } 168 | 169 | /// If the value is an array, returns the associated array. Returns `None` otherwise. 170 | /// 171 | /// # Example 172 | /// 173 | /// ``` 174 | /// let value: jsonbb::Value = "[]".parse().unwrap(); 175 | /// assert_eq!(value.as_array().unwrap().len(), 0); 176 | /// ``` 177 | pub fn as_array(&self) -> Option> { 178 | self.as_ref().as_array() 179 | } 180 | 181 | /// If the value is an object, returns the associated map. Returns `None` otherwise. 182 | /// 183 | /// # Example 184 | /// 185 | /// ``` 186 | /// let value: jsonbb::Value = "{}".parse().unwrap(); 187 | /// assert_eq!(value.as_object().unwrap().len(), 0); 188 | /// ``` 189 | pub fn as_object(&self) -> Option> { 190 | self.as_ref().as_object() 191 | } 192 | 193 | /// Returns true if the value is a null. Returns false otherwise. 194 | /// 195 | /// # Example 196 | /// 197 | /// ``` 198 | /// assert!(jsonbb::Value::from(()).is_null()); 199 | /// 200 | /// // The boolean `false` is not null. 201 | /// assert!(!jsonbb::Value::from(false).is_null()); 202 | /// ``` 203 | pub fn is_null(&self) -> bool { 204 | self.as_ref().is_null() 205 | } 206 | 207 | /// Returns true if the value is a boolean. Returns false otherwise. 208 | /// 209 | /// # Example 210 | /// 211 | /// ``` 212 | /// assert!(jsonbb::Value::from(false).is_boolean()); 213 | /// 214 | /// // The string `"false"` is a string, not a boolean. 215 | /// assert!(!jsonbb::Value::from("false").is_boolean()); 216 | /// ``` 217 | pub fn is_boolean(&self) -> bool { 218 | self.as_ref().is_boolean() 219 | } 220 | 221 | /// Returns true if the value is a number. Returns false otherwise. 222 | /// 223 | /// # Example 224 | /// 225 | /// ``` 226 | /// assert!(jsonbb::Value::from(1).is_number()); 227 | /// 228 | /// // The string `"1"` is a string, not a number. 229 | /// assert!(!jsonbb::Value::from("1").is_number()); 230 | /// ``` 231 | pub fn is_number(&self) -> bool { 232 | self.as_ref().is_number() 233 | } 234 | 235 | /// Returns true if the value is an integer between zero and `u64::MAX`. 236 | /// 237 | /// # Example 238 | /// 239 | /// ``` 240 | /// assert!(jsonbb::Value::from(1i64).is_u64()); 241 | /// 242 | /// // Negative integer. 243 | /// assert!(!jsonbb::Value::from(-1i64).is_u64()); 244 | /// ``` 245 | pub fn is_u64(&self) -> bool { 246 | self.as_ref().is_u64() 247 | } 248 | 249 | /// Returns true if the value is an integer between `i64::MIN` and `i64::MAX`. 250 | /// 251 | /// # Example 252 | /// 253 | /// ``` 254 | /// assert!(jsonbb::Value::from(1u64).is_i64()); 255 | /// 256 | /// // Greater than i64::MAX. 257 | /// assert!(!jsonbb::Value::from(u64::MAX).is_i64()); 258 | /// ``` 259 | pub fn is_i64(&self) -> bool { 260 | self.as_ref().is_i64() 261 | } 262 | 263 | /// Returns true if the value is a number that can be represented by f64. 264 | /// 265 | /// # Example 266 | /// 267 | /// ``` 268 | /// assert!(jsonbb::Value::from(0f64).is_f64()); 269 | /// 270 | /// // Integer 271 | /// assert!(!jsonbb::Value::from(1i64).is_f64()); 272 | /// ``` 273 | pub fn is_f64(&self) -> bool { 274 | self.as_ref().is_f64() 275 | } 276 | 277 | /// Returns true if the value is a string. Returns false otherwise. 278 | /// 279 | /// # Example 280 | /// 281 | /// ``` 282 | /// assert!(jsonbb::Value::from("string").is_string()); 283 | /// 284 | /// // The boolean `false` is not a string. 285 | /// assert!(!jsonbb::Value::from(false).is_string()); 286 | /// ``` 287 | pub fn is_string(&self) -> bool { 288 | self.as_ref().is_string() 289 | } 290 | 291 | /// Returns true if the value is an array. Returns false otherwise. 292 | pub fn is_array(&self) -> bool { 293 | self.as_ref().is_array() 294 | } 295 | 296 | /// Returns true if the value is an object. Returns false otherwise. 297 | pub fn is_object(&self) -> bool { 298 | self.as_ref().is_object() 299 | } 300 | 301 | /// Returns the capacity of the internal buffer, in bytes. 302 | pub fn capacity(&self) -> usize { 303 | self.buffer.len() 304 | } 305 | 306 | /// Index into a JSON array or object. 307 | /// 308 | /// A string index can be used to access a value in an object, 309 | /// and a usize index can be used to access an element of an array. 310 | /// 311 | /// # Example 312 | /// 313 | /// ``` 314 | /// let object: jsonbb::Value = r#"{"a": 1, "b": 2}"#.parse().unwrap(); 315 | /// assert_eq!(object.get("a").unwrap().to_string(), "1"); 316 | /// assert!(object.get("c").is_none()); 317 | /// assert!(object.get(0).is_none()); 318 | /// 319 | /// let array: jsonbb::Value = r#"["a", "b"]"#.parse().unwrap(); 320 | /// assert_eq!(array.get(0).unwrap().to_string(), "\"a\""); 321 | /// assert!(array.get(2).is_none()); 322 | /// assert!(array.get("a").is_none()); 323 | /// ``` 324 | pub fn get(&self, index: impl Index) -> Option> { 325 | index.index_into(self.as_ref()) 326 | } 327 | 328 | /// Looks up a value by a JSON Pointer. 329 | /// 330 | /// JSON Pointer defines a string syntax for identifying a specific value 331 | /// within a JavaScript Object Notation (JSON) document. 332 | /// 333 | /// A Pointer is a Unicode string with the reference tokens separated by `/`. 334 | /// Inside tokens `/` is replaced by `~1` and `~` is replaced by `~0`. The 335 | /// addressed value is returned and if there is no such value `None` is 336 | /// returned. 337 | /// 338 | /// For more information read [RFC6901](https://tools.ietf.org/html/rfc6901). 339 | /// 340 | /// # Examples 341 | /// 342 | /// ``` 343 | /// # use jsonbb::json; 344 | /// # 345 | /// let data = json!({ 346 | /// "x": { 347 | /// "y": ["z", "zz"] 348 | /// } 349 | /// }); 350 | /// 351 | /// assert_eq!(data.pointer("/x/y/1").unwrap(), json!("zz").as_ref()); 352 | /// assert_eq!(data.pointer("/a/b/c"), None); 353 | /// ``` 354 | pub fn pointer<'a>(&'a self, pointer: &str) -> Option> { 355 | self.as_ref().pointer(pointer) 356 | } 357 | 358 | /// Push a value into a JSON array. 359 | /// 360 | /// This function is `O(N)` where N is the number of elements in the array. 361 | /// 362 | /// # Panics 363 | /// 364 | /// Panics if the value is not an array. 365 | /// 366 | /// # Example 367 | /// ``` 368 | /// let mut array: jsonbb::Value = "[1]".parse().unwrap(); 369 | /// array.array_push(jsonbb::Value::from(()).as_ref()); 370 | /// array.array_push(jsonbb::Value::from(2).as_ref()); 371 | /// array.array_push(jsonbb::Value::from("str").as_ref()); 372 | /// array.array_push(jsonbb::Value::array([]).as_ref()); 373 | /// array.array_push(jsonbb::Value::object([]).as_ref()); 374 | /// assert_eq!(array.to_string(), r#"[1,null,2,"str",[],{}]"#); 375 | /// ``` 376 | pub fn array_push(&mut self, value: ValueRef<'_>) { 377 | let len = self.as_array().expect("not array").len(); 378 | // The offset to insert the value. 379 | let offset = self.buffer.len() - 4 - 4 - 4 - 4 * len; 380 | let mut buffer = std::mem::take(&mut self.buffer).into_vec(); 381 | // reserve space for the value + its entry 382 | buffer.reserve_exact(value.capacity() + 4); 383 | // remove tailing (len, size, entry) 384 | buffer.truncate(buffer.len() - 12); 385 | // insert the value 386 | buffer.splice(offset..offset, value.as_slice().iter().copied()); 387 | // push the entry 388 | buffer.put_slice(value.make_entry(offset).as_bytes()); 389 | // push (len, size, entry) 390 | buffer.put_u32_ne((len + 1) as u32); 391 | buffer.put_u32_ne((buffer.len() + 4) as u32); 392 | buffer.put_slice(Entry::array(buffer.len()).as_bytes()); 393 | // store the buffer 394 | self.buffer = buffer.into(); 395 | } 396 | 397 | fn from_builder(capacity: usize, f: impl FnOnce(&mut Builder)) -> Self { 398 | let mut builder = Builder::with_capacity(capacity); 399 | f(&mut builder); 400 | builder.finish() 401 | } 402 | } 403 | 404 | impl fmt::Debug for Value { 405 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 406 | self.as_ref().fmt(f) 407 | } 408 | } 409 | 410 | /// Display a JSON value as a string. 411 | impl fmt::Display for Value { 412 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 413 | self.as_ref().fmt(f) 414 | } 415 | } 416 | 417 | /// # Example 418 | /// 419 | /// ``` 420 | /// let a: jsonbb::Value = r#"{"a": 1, "b": 2}"#.parse().unwrap(); 421 | /// let b: jsonbb::Value = r#"{"b": 2, "a": 1.0}"#.parse().unwrap(); 422 | /// assert_eq!(a, b); 423 | /// ``` 424 | impl PartialEq for Value { 425 | fn eq(&self, other: &Self) -> bool { 426 | self.as_ref().eq(&other.as_ref()) 427 | } 428 | } 429 | 430 | impl Eq for Value {} 431 | 432 | impl PartialOrd for Value { 433 | fn partial_cmp(&self, other: &Self) -> Option { 434 | Some(self.cmp(other)) 435 | } 436 | } 437 | 438 | /// Compare two JSON values. 439 | /// 440 | /// The ordering is defined as follows: 441 | /// 442 | /// 443 | /// # Example 444 | /// 445 | /// ``` 446 | /// use jsonbb::Value; 447 | /// 448 | /// // Object > Array > Boolean > Number > String > Null 449 | /// let v = ["null", r#""str""#, "-1", "0", "3.14", "false", "true", "[]", "{}"]; 450 | /// let v = v.iter().map(|s| s.parse().unwrap()).collect::>(); 451 | /// for (i, a) in v.iter().enumerate() { 452 | /// for b in v.iter().skip(i + 1) { 453 | /// assert!(a < b); 454 | /// } 455 | /// } 456 | /// 457 | /// // Array with n elements > array with n - 1 elements 458 | /// let a: Value = r#"[1, 2, 3]"#.parse().unwrap(); 459 | /// let b: Value = r#"[1, 2]"#.parse().unwrap(); 460 | /// assert!(a > b); 461 | /// 462 | /// // arrays with equal numbers of elements are compared in the order: 463 | /// // element-1, element-2 ... 464 | /// let a: Value = r#"[1, 2]"#.parse().unwrap(); 465 | /// let b: Value = r#"[1, 3]"#.parse().unwrap(); 466 | /// assert!(a < b); 467 | /// 468 | /// // Object with n pairs > object with n - 1 pairs 469 | /// let a: Value = r#"{"a": 1, "b": 2}"#.parse().unwrap(); 470 | /// let b: Value = r#"{"a": 1}"#.parse().unwrap(); 471 | /// assert!(a > b); 472 | /// 473 | /// // Objects with equal numbers of pairs are compared in the order: 474 | /// // key-1, value-1, key-2 ... 475 | /// let a: Value = r#"{"a": 1, "b": 2}"#.parse().unwrap(); 476 | /// let b: Value = r#"{"a": 2, "b": 1}"#.parse().unwrap(); 477 | /// assert!(a < b); 478 | /// ``` 479 | impl Ord for Value { 480 | fn cmp(&self, other: &Self) -> std::cmp::Ordering { 481 | self.as_ref().cmp(&other.as_ref()) 482 | } 483 | } 484 | 485 | impl Hash for Value { 486 | fn hash(&self, state: &mut H) { 487 | self.as_ref().hash(state) 488 | } 489 | } 490 | 491 | impl Default for Value { 492 | fn default() -> Self { 493 | Self::null() 494 | } 495 | } 496 | 497 | impl From for Value { 498 | fn from(value: serde_json::Value) -> Self { 499 | Self::from(&value) 500 | } 501 | } 502 | 503 | impl From<&serde_json::Value> for Value { 504 | fn from(value: &serde_json::Value) -> Self { 505 | Self::from_builder(0, |b| b.add_serde_value(value)) 506 | } 507 | } 508 | 509 | impl From for Value { 510 | fn from(value: serde_json::Number) -> Self { 511 | Self::from(&value) 512 | } 513 | } 514 | 515 | impl From<&serde_json::Number> for Value { 516 | fn from(n: &serde_json::Number) -> Self { 517 | Self::from_builder(0, |b| b.add_serde_number(n)) 518 | } 519 | } 520 | 521 | impl From for serde_json::Value { 522 | fn from(value: Value) -> Self { 523 | value.as_ref().into() 524 | } 525 | } 526 | 527 | impl>> Builder { 528 | /// Adds a serde `Value` recursively to the builder and returns its ptr. 529 | fn add_serde_value(&mut self, value: &serde_json::Value) { 530 | match value { 531 | serde_json::Value::Null => self.add_null(), 532 | serde_json::Value::Bool(b) => self.add_bool(*b), 533 | serde_json::Value::Number(n) => self.add_serde_number(n), 534 | serde_json::Value::String(s) => self.add_string(s), 535 | serde_json::Value::Array(a) => { 536 | self.begin_array(); 537 | for v in a.iter() { 538 | self.add_serde_value(v); 539 | } 540 | self.end_array(); 541 | } 542 | serde_json::Value::Object(o) => { 543 | self.begin_object(); 544 | for (k, v) in o.iter() { 545 | self.add_string(k); 546 | self.add_serde_value(v); 547 | } 548 | self.end_object() 549 | } 550 | } 551 | } 552 | 553 | /// Adds a serde `Number`. 554 | fn add_serde_number(&mut self, n: &serde_json::Number) { 555 | if let Some(i) = n.as_u64() { 556 | self.add_u64(i) 557 | } else if let Some(i) = n.as_i64() { 558 | self.add_i64(i) 559 | } else if let Some(f) = n.as_f64() { 560 | self.add_f64(f) 561 | } else { 562 | panic!("invalid number"); 563 | } 564 | } 565 | } 566 | 567 | impl FromStr for Value { 568 | type Err = serde_json::Error; 569 | 570 | fn from_str(s: &str) -> Result { 571 | Self::from_text(s.as_bytes()) 572 | } 573 | } 574 | 575 | impl From<()> for Value { 576 | fn from(_: ()) -> Self { 577 | Self::from_builder(4, |b| b.add_null()) 578 | } 579 | } 580 | 581 | impl From for Value { 582 | fn from(v: bool) -> Self { 583 | Self::from_builder(4, |b| b.add_bool(v)) 584 | } 585 | } 586 | 587 | impl From for Value { 588 | fn from(v: u8) -> Self { 589 | Self::from(v as u64) 590 | } 591 | } 592 | 593 | impl From for Value { 594 | fn from(v: u16) -> Self { 595 | Self::from(v as u64) 596 | } 597 | } 598 | 599 | impl From for Value { 600 | fn from(v: u32) -> Self { 601 | Self::from(v as u64) 602 | } 603 | } 604 | 605 | impl From for Value { 606 | fn from(v: u64) -> Self { 607 | Self::from_builder(1 + 8 + 4, |b| b.add_u64(v)) 608 | } 609 | } 610 | 611 | impl From for Value { 612 | fn from(v: usize) -> Self { 613 | Self::from(v as u64) 614 | } 615 | } 616 | 617 | impl From for Value { 618 | fn from(v: i8) -> Self { 619 | Self::from(v as i64) 620 | } 621 | } 622 | 623 | impl From for Value { 624 | fn from(v: i16) -> Self { 625 | Self::from(v as i64) 626 | } 627 | } 628 | 629 | impl From for Value { 630 | fn from(v: i32) -> Self { 631 | Self::from(v as i64) 632 | } 633 | } 634 | 635 | impl From for Value { 636 | fn from(v: i64) -> Self { 637 | Self::from_builder(1 + 8 + 4, |b| b.add_i64(v)) 638 | } 639 | } 640 | 641 | impl From for Value { 642 | fn from(v: isize) -> Self { 643 | Self::from(v as u64) 644 | } 645 | } 646 | 647 | impl From for Value { 648 | fn from(v: f32) -> Self { 649 | Self::from(v as f64) 650 | } 651 | } 652 | 653 | impl From for Value { 654 | fn from(v: f64) -> Self { 655 | Self::from_builder(1 + 8 + 4, |b| b.add_f64(v)) 656 | } 657 | } 658 | 659 | impl From<&str> for Value { 660 | fn from(s: &str) -> Self { 661 | Self::from_builder(s.len() + 8, |b| b.add_string(s)) 662 | } 663 | } 664 | 665 | /// Creates a `Value` from bytes of jsonbb encoding. 666 | /// 667 | /// If you want to create a `Value` from JSON text, use [`FromStr`] or [`from_text`] instead. 668 | /// 669 | /// [`from_text`]: #method.from_text 670 | /// [`FromStr`]: #method.from_str 671 | impl From<&[u8]> for Value { 672 | fn from(s: &[u8]) -> Self { 673 | Self::from_bytes(s) 674 | } 675 | } 676 | 677 | impl From> for Value { 678 | fn from(v: ValueRef<'_>) -> Self { 679 | Self::from_builder(v.capacity() + 4, |b| b.add_value(v)) 680 | } 681 | } 682 | 683 | #[cfg(test)] 684 | mod tests { 685 | use super::*; 686 | 687 | #[test] 688 | fn from_serde() { 689 | let serde_value: serde_json::Value = r#" 690 | { 691 | "name": "John Doe", 692 | "age": 43, 693 | "phones": [ 694 | "+44 1234567", 695 | "+44 2345678" 696 | ] 697 | }"# 698 | .parse() 699 | .unwrap(); 700 | let _value = Value::from(&serde_value); 701 | } 702 | 703 | #[test] 704 | #[should_panic] 705 | fn from_nan() { 706 | _ = Value::from(f64::NAN); 707 | } 708 | 709 | #[test] 710 | #[should_panic] 711 | fn from_inf() { 712 | _ = Value::from(f64::INFINITY); 713 | } 714 | 715 | #[test] 716 | #[should_panic] 717 | fn from_neg_inf() { 718 | _ = Value::from(f64::NEG_INFINITY); 719 | } 720 | 721 | #[test] 722 | fn value_size() { 723 | assert_eq!(Value::from(0).capacity(), 1 + 4); 724 | assert_eq!(Value::from(1).capacity(), 1 + 1 + 4); 725 | assert_eq!(Value::from(128).capacity(), 1 + 2 + 4); 726 | assert_eq!(Value::from(32768).capacity(), 1 + 4 + 4); 727 | assert_eq!(Value::from(2_147_483_648_u64).capacity(), 1 + 8 + 4); 728 | assert_eq!(Value::from(i8::MIN).capacity(), 1 + 1 + 4); 729 | assert_eq!(Value::from(i16::MIN).capacity(), 1 + 2 + 4); 730 | assert_eq!(Value::from(i32::MIN).capacity(), 1 + 4 + 4); 731 | assert_eq!(Value::from(i64::MIN).capacity(), 1 + 8 + 4); 732 | assert_eq!(Value::from(0.0f32).capacity(), 1 + 8 + 4); 733 | assert_eq!(Value::from(0.0f64).capacity(), 1 + 8 + 4); 734 | } 735 | } 736 | -------------------------------------------------------------------------------- /src/value_ref.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 RisingWave Labs 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use std::fmt; 16 | use std::hash::{Hash, Hasher}; 17 | 18 | use super::*; 19 | use bytes::Buf; 20 | use serde_json::Number; 21 | 22 | /// A reference to a JSON value. 23 | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] 24 | pub enum ValueRef<'a> { 25 | // NOTE: Order matters! 26 | // we follow postgresql's order: 27 | // Object > Array > Boolean > Number > String > Null 28 | /// Represents a JSON null value. 29 | Null, 30 | /// Represents a JSON string. 31 | String(&'a str), 32 | /// Represents a JSON number. 33 | Number(NumberRef<'a>), 34 | /// Represents a JSON boolean. 35 | Bool(bool), 36 | /// Represents a JSON array. 37 | Array(ArrayRef<'a>), 38 | /// Represents a JSON object. 39 | Object(ObjectRef<'a>), 40 | } 41 | 42 | impl<'a> ValueRef<'a> { 43 | /// Creates a `ValueRef` from a byte slice. 44 | pub fn from_bytes(bytes: &[u8]) -> ValueRef<'_> { 45 | let entry = Entry::from(&bytes[bytes.len() - 4..]); 46 | ValueRef::from_slice(bytes, entry) 47 | } 48 | 49 | /// Returns true if the value is a null. Returns false otherwise. 50 | pub fn is_null(self) -> bool { 51 | matches!(self, Self::Null) 52 | } 53 | 54 | /// Returns true if the value is a boolean. Returns false otherwise. 55 | pub fn is_boolean(self) -> bool { 56 | matches!(self, Self::Bool(_)) 57 | } 58 | 59 | /// Returns true if the value is a number. Returns false otherwise. 60 | pub fn is_number(self) -> bool { 61 | matches!(self, Self::Number(_)) 62 | } 63 | 64 | /// Returns true if the value is an integer between zero and `u64::MAX`. 65 | pub fn is_u64(self) -> bool { 66 | matches!(self, Self::Number(n) if n.is_u64()) 67 | } 68 | 69 | /// Returns true if the value is an integer between `i64::MIN` and `i64::MAX`. 70 | pub fn is_i64(self) -> bool { 71 | matches!(self, Self::Number(n) if n.is_i64()) 72 | } 73 | 74 | /// Returns true if the value is a number that can be represented by f64. 75 | pub fn is_f64(self) -> bool { 76 | matches!(self, Self::Number(n) if n.is_f64()) 77 | } 78 | 79 | /// Returns true if the value is a string. Returns false otherwise. 80 | pub fn is_string(self) -> bool { 81 | matches!(self, Self::String(_)) 82 | } 83 | 84 | /// Returns true if the value is an array. Returns false otherwise. 85 | pub fn is_array(self) -> bool { 86 | matches!(self, Self::Array(_)) 87 | } 88 | 89 | /// Returns true if the value is an object. Returns false otherwise. 90 | pub fn is_object(self) -> bool { 91 | matches!(self, Self::Object(_)) 92 | } 93 | 94 | /// If the value is `null`, returns `()`. Returns `None` otherwise. 95 | pub fn as_null(self) -> Option<()> { 96 | match self { 97 | Self::Null => Some(()), 98 | _ => None, 99 | } 100 | } 101 | 102 | /// If the value is a boolean, returns the associated bool. Returns `None` otherwise. 103 | pub fn as_bool(self) -> Option { 104 | match self { 105 | Self::Bool(b) => Some(b), 106 | _ => None, 107 | } 108 | } 109 | 110 | /// If the value is a number, returns the associated number. Returns `None` otherwise. 111 | pub fn as_number(self) -> Option> { 112 | match self { 113 | Self::Number(n) => Some(n), 114 | _ => None, 115 | } 116 | } 117 | 118 | /// If the value is an integer, returns the associated u64. Returns `None` otherwise. 119 | pub fn as_u64(self) -> Option { 120 | match self { 121 | Self::Number(n) => n.as_u64(), 122 | _ => None, 123 | } 124 | } 125 | 126 | /// If the value is an integer, returns the associated i64. Returns `None` otherwise. 127 | pub fn as_i64(self) -> Option { 128 | match self { 129 | Self::Number(n) => n.as_i64(), 130 | _ => None, 131 | } 132 | } 133 | 134 | /// If the value is a float, returns the associated f64. Returns `None` otherwise. 135 | pub fn as_f64(self) -> Option { 136 | match self { 137 | Self::Number(n) => n.as_f64(), 138 | _ => None, 139 | } 140 | } 141 | 142 | /// If the value is a string, returns the associated str. Returns `None` otherwise. 143 | pub fn as_str(self) -> Option<&'a str> { 144 | match self { 145 | Self::String(s) => Some(s), 146 | _ => None, 147 | } 148 | } 149 | 150 | /// If the value is an array, returns the associated array. Returns `None` otherwise. 151 | pub fn as_array(self) -> Option> { 152 | match self { 153 | Self::Array(a) => Some(a), 154 | _ => None, 155 | } 156 | } 157 | 158 | /// If the value is an object, returns the associated map. Returns `None` otherwise. 159 | pub fn as_object(self) -> Option> { 160 | match self { 161 | Self::Object(o) => Some(o), 162 | _ => None, 163 | } 164 | } 165 | 166 | /// Creates owned `Value` from `ValueRef`. 167 | pub fn to_owned(self) -> Value { 168 | self.into() 169 | } 170 | 171 | pub(crate) fn from_slice(data: &'a [u8], entry: Entry) -> Self { 172 | match entry.tag() { 173 | Entry::NULL_TAG => Self::Null, 174 | Entry::FALSE_TAG => Self::Bool(false), 175 | Entry::TRUE_TAG => Self::Bool(true), 176 | Entry::NUMBER_TAG => { 177 | let ptr = entry.offset(); 178 | let data = &data[ptr..ptr + 1 + number_size(data[ptr])]; 179 | Self::Number(NumberRef { data }) 180 | } 181 | Entry::STRING_TAG => { 182 | let ptr = entry.offset(); 183 | let len = (&data[ptr..]).get_u32_ne() as usize; 184 | // SAFETY: we don't check for utf8 validity because it's expensive 185 | let payload = 186 | unsafe { std::str::from_utf8_unchecked(&data[ptr + 4..ptr + 4 + len]) }; 187 | Self::String(payload) 188 | } 189 | Entry::ARRAY_TAG => { 190 | let ptr = entry.offset(); 191 | Self::Array(ArrayRef::from_slice(data, ptr)) 192 | } 193 | Entry::OBJECT_TAG => { 194 | let ptr = entry.offset(); 195 | Self::Object(ObjectRef::from_slice(data, ptr)) 196 | } 197 | _ => panic!("invalid entry"), 198 | } 199 | } 200 | 201 | /// Returns the entire value as a slice. 202 | pub(crate) fn as_slice(self) -> &'a [u8] { 203 | match self { 204 | Self::Null => &[], 205 | Self::Bool(_) => &[], 206 | Self::Number(n) => n.data, 207 | Self::String(s) => unsafe { 208 | // SAFETY: include the 4 bytes for the length 209 | std::slice::from_raw_parts(s.as_ptr().sub(4), s.len() + 4) 210 | }, 211 | Self::Array(a) => a.as_slice(), 212 | Self::Object(o) => o.as_slice(), 213 | } 214 | } 215 | 216 | /// Makes an entry from the value. 217 | pub(crate) fn make_entry(self, offset: usize) -> Entry { 218 | match self { 219 | Self::Null => Entry::null(), 220 | Self::Bool(b) => Entry::bool(b), 221 | Self::Number(_) => Entry::number(offset), 222 | Self::String(_) => Entry::string(offset), 223 | Self::Array(a) => Entry::array(offset + a.as_slice().len()), 224 | Self::Object(o) => Entry::object(offset + o.as_slice().len()), 225 | } 226 | } 227 | 228 | /// Returns the capacity to store this value, in bytes. 229 | pub fn capacity(self) -> usize { 230 | self.as_slice().len() 231 | } 232 | 233 | /// Index into a JSON array or object. 234 | /// A string index can be used to access a value in an object, 235 | /// and a usize index can be used to access an element of an array. 236 | pub fn get(self, index: impl Index) -> Option> { 237 | index.index_into(self) 238 | } 239 | 240 | /// Looks up a value by a JSON Pointer. 241 | pub fn pointer(self, pointer: &str) -> Option { 242 | if pointer.is_empty() { 243 | return Some(self); 244 | } 245 | if !pointer.starts_with('/') { 246 | return None; 247 | } 248 | 249 | fn parse_index(s: &str) -> Option { 250 | if s.starts_with('+') || (s.starts_with('0') && s.len() != 1) { 251 | return None; 252 | } 253 | s.parse().ok() 254 | } 255 | 256 | pointer 257 | .split('/') 258 | .skip(1) 259 | .map(|x| x.replace("~1", "/").replace("~0", "~")) 260 | .try_fold(self, |target, token| match target { 261 | Self::Object(map) => map.get(&token), 262 | Self::Array(list) => parse_index(&token).and_then(|x| list.get(x)), 263 | _ => None, 264 | }) 265 | } 266 | } 267 | 268 | impl fmt::Debug for ValueRef<'_> { 269 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 270 | match self { 271 | Self::Null => f.write_str("null"), 272 | Self::Bool(b) => b.fmt(f), 273 | Self::Number(n) => n.fmt(f), 274 | Self::String(s) => s.fmt(f), 275 | Self::Array(a) => a.fmt(f), 276 | Self::Object(o) => o.fmt(f), 277 | } 278 | } 279 | } 280 | 281 | /// Display a JSON value as a string. 282 | impl fmt::Display for ValueRef<'_> { 283 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 284 | serialize_in_json(self, f) 285 | } 286 | } 287 | 288 | /// Build a `serde_json::Value` from a jsonbb node. 289 | impl From> for serde_json::Value { 290 | fn from(value: ValueRef<'_>) -> Self { 291 | match value { 292 | ValueRef::Null => Self::Null, 293 | ValueRef::Bool(b) => Self::Bool(b), 294 | ValueRef::Number(n) => Self::Number(n.to_number()), 295 | ValueRef::String(s) => Self::String(s.to_owned()), 296 | ValueRef::Array(a) => Self::Array(a.iter().map(Self::from).collect()), 297 | ValueRef::Object(o) => Self::Object( 298 | o.iter() 299 | .map(|(k, v)| (k.to_owned(), Self::from(v))) 300 | .collect(), 301 | ), 302 | } 303 | } 304 | } 305 | 306 | /// A reference to a JSON number. 307 | #[derive(Clone, Copy)] 308 | pub struct NumberRef<'a> { 309 | // # layout 310 | // | tag | number | 311 | // | 1 | 0/1/2/4/8 | 312 | data: &'a [u8], 313 | } 314 | 315 | impl NumberRef<'_> { 316 | /// Dereferences the number. 317 | pub fn to_number(self) -> Number { 318 | let mut data = self.data; 319 | match data.get_u8() { 320 | NUMBER_ZERO => Number::from(0), 321 | NUMBER_I8 => Number::from(data.get_i8()), 322 | NUMBER_I16 => Number::from(data.get_i16_ne()), 323 | NUMBER_I32 => Number::from(data.get_i32_ne()), 324 | NUMBER_I64 => Number::from(data.get_i64_ne()), 325 | NUMBER_U64 => Number::from(data.get_u64_ne()), 326 | NUMBER_F64 => Number::from_f64(data.get_f64_ne()).unwrap(), 327 | t => panic!("invalid number tag: {t}"), 328 | } 329 | } 330 | 331 | /// If the number is an integer, returns the associated u64. Returns `None` otherwise. 332 | pub fn as_u64(self) -> Option { 333 | self.to_number().as_u64() 334 | } 335 | 336 | /// If the number is an integer, returns the associated i64. Returns `None` otherwise. 337 | pub fn as_i64(self) -> Option { 338 | self.to_number().as_i64() 339 | } 340 | 341 | /// Represents the number as f64 if possible. Returns None otherwise. 342 | pub fn as_f64(self) -> Option { 343 | self.to_number().as_f64() 344 | } 345 | 346 | /// Represents the number as f32 if possible. Returns None otherwise. 347 | pub(crate) fn as_f32(&self) -> Option { 348 | let mut data = self.data; 349 | Some(match data.get_u8() { 350 | NUMBER_ZERO => 0 as f32, 351 | NUMBER_I8 => data.get_i8() as f32, 352 | NUMBER_I16 => data.get_i16_ne() as f32, 353 | NUMBER_I32 => data.get_i32_ne() as f32, 354 | NUMBER_I64 => data.get_i64_ne() as f32, 355 | NUMBER_U64 => data.get_u64_ne() as f32, 356 | NUMBER_F64 => data.get_f64_ne() as f32, 357 | t => panic!("invalid number tag: {t}"), 358 | }) 359 | } 360 | 361 | /// Returns true if the number can be represented by u64. 362 | pub fn is_u64(self) -> bool { 363 | self.to_number().is_u64() 364 | } 365 | 366 | /// Returns true if the number can be represented by i64. 367 | pub fn is_i64(self) -> bool { 368 | self.to_number().is_i64() 369 | } 370 | 371 | /// Returns true if the number can be represented by f64. 372 | pub fn is_f64(self) -> bool { 373 | self.to_number().is_f64() 374 | } 375 | } 376 | 377 | impl fmt::Debug for NumberRef<'_> { 378 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 379 | self.to_number().fmt(f) 380 | } 381 | } 382 | 383 | impl fmt::Display for NumberRef<'_> { 384 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 385 | self.to_number().fmt(f) 386 | } 387 | } 388 | 389 | impl PartialEq for NumberRef<'_> { 390 | fn eq(&self, other: &Self) -> bool { 391 | let a = self.to_number(); 392 | let b = other.to_number(); 393 | match (a.as_u64(), b.as_u64()) { 394 | (Some(a), Some(b)) => return a == b, // a, b > 0 395 | (Some(_), None) if b.is_i64() => return false, // a >= 0 > b 396 | (None, Some(_)) if a.is_i64() => return false, // a < 0 <= b 397 | (None, None) => { 398 | if let (Some(a), Some(b)) = (a.as_i64(), b.as_i64()) { 399 | return a == b; // a, b < 0 400 | } 401 | } 402 | _ => {} 403 | } 404 | // either a or b is a float 405 | let a = a.as_f64().unwrap(); 406 | let b = b.as_f64().unwrap(); 407 | a == b 408 | } 409 | } 410 | 411 | impl Eq for NumberRef<'_> {} 412 | 413 | impl PartialOrd for NumberRef<'_> { 414 | fn partial_cmp(&self, other: &Self) -> Option { 415 | Some(self.cmp(other)) 416 | } 417 | } 418 | 419 | impl Ord for NumberRef<'_> { 420 | fn cmp(&self, other: &Self) -> std::cmp::Ordering { 421 | let a = self.to_number(); 422 | let b = other.to_number(); 423 | match (a.as_u64(), b.as_u64()) { 424 | (Some(a), Some(b)) => return a.cmp(&b), // a, b > 0 425 | (Some(_), None) if b.is_i64() => return std::cmp::Ordering::Greater, // a >= 0 > b 426 | (None, Some(_)) if a.is_i64() => return std::cmp::Ordering::Less, // a < 0 <= b 427 | (None, None) => { 428 | if let (Some(a), Some(b)) = (a.as_i64(), b.as_i64()) { 429 | return a.cmp(&b); // a, b < 0 430 | } 431 | } 432 | _ => {} 433 | } 434 | // either a or b is a float 435 | let a = a.as_f64().unwrap(); 436 | let b = b.as_f64().unwrap(); 437 | a.partial_cmp(&b).expect("NaN or Inf in JSON number") 438 | } 439 | } 440 | 441 | impl Hash for NumberRef<'_> { 442 | fn hash(&self, state: &mut H) { 443 | self.to_number().hash(state); 444 | } 445 | } 446 | 447 | /// A reference to a JSON array. 448 | #[derive(Clone, Copy)] 449 | pub struct ArrayRef<'a> { 450 | // # layout 451 | // v---------\ 452 | // | elements | [eptr] x len | len | size | 453 | // | | 4 x len | 4 | 4 | 454 | // |<----------- data (size) ------------>|^ptr 455 | data: &'a [u8], 456 | } 457 | 458 | impl<'a> ArrayRef<'a> { 459 | /// Returns the element at the given index, or `None` if the index is out of bounds. 460 | pub fn get(self, index: usize) -> Option> { 461 | let len = self.len(); 462 | if index >= len { 463 | return None; 464 | } 465 | let offset = self.data.len() - 8 - 4 * (len - index); 466 | let entry = Entry::from(&self.data[offset..offset + 4]); 467 | Some(ValueRef::from_slice(self.data, entry)) 468 | } 469 | 470 | /// Returns the number of elements in the array. 471 | pub fn len(self) -> usize { 472 | (&self.data[self.data.len() - 8..]).get_u32_ne() as usize 473 | } 474 | 475 | /// Returns `true` if the array contains no elements. 476 | pub fn is_empty(self) -> bool { 477 | self.len() == 0 478 | } 479 | 480 | /// Returns an iterator over the array's elements. 481 | pub fn iter(self) -> impl ExactSizeIterator> { 482 | let len = self.len(); 483 | let offset = self.data.len() - 8 - 4 * len; 484 | self.data[offset..offset + 4 * len] 485 | .chunks_exact(4) 486 | .map(|slice| ValueRef::from_slice(self.data, Entry::from(slice))) 487 | } 488 | 489 | /// Returns the entire array as a slice. 490 | pub(crate) fn as_slice(self) -> &'a [u8] { 491 | self.data 492 | } 493 | 494 | /// Creates an `ArrayRef` from a slice. 495 | fn from_slice(data: &'a [u8], end: usize) -> Self { 496 | let size = (&data[end - 4..end]).get_u32_ne() as usize; 497 | Self { 498 | data: &data[end - size..end], 499 | } 500 | } 501 | } 502 | 503 | impl fmt::Debug for ArrayRef<'_> { 504 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 505 | f.debug_list().entries(self.iter()).finish() 506 | } 507 | } 508 | 509 | /// Display a JSON array as a string. 510 | impl fmt::Display for ArrayRef<'_> { 511 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 512 | serialize_in_json(self, f) 513 | } 514 | } 515 | 516 | impl PartialEq for ArrayRef<'_> { 517 | fn eq(&self, other: &Self) -> bool { 518 | if self.len() != other.len() { 519 | return false; 520 | } 521 | self.iter().eq(other.iter()) 522 | } 523 | } 524 | 525 | impl Eq for ArrayRef<'_> {} 526 | 527 | impl PartialOrd for ArrayRef<'_> { 528 | fn partial_cmp(&self, other: &Self) -> Option { 529 | Some(self.cmp(other)) 530 | } 531 | } 532 | 533 | impl Ord for ArrayRef<'_> { 534 | fn cmp(&self, other: &Self) -> std::cmp::Ordering { 535 | // Array with n elements > array with n - 1 elements 536 | match self.len().cmp(&other.len()) { 537 | std::cmp::Ordering::Equal => self.iter().cmp(other.iter()), 538 | ord => ord, 539 | } 540 | } 541 | } 542 | 543 | impl Hash for ArrayRef<'_> { 544 | fn hash(&self, state: &mut H) { 545 | for v in self.iter() { 546 | v.hash(state); 547 | } 548 | } 549 | } 550 | 551 | /// A reference to a JSON object. 552 | #[derive(Clone, Copy)] 553 | pub struct ObjectRef<'a> { 554 | // # layout 555 | // v-v------ \-----\ 556 | // | elements | [kptr, vptr] x len | len | size | 557 | // | | 4 x 2 x len | 4 | 4 | 558 | // |<-------------- data (size) --------------->|^ptr 559 | // 560 | // entries are ordered by key and each key is unique. 561 | data: &'a [u8], 562 | } 563 | 564 | impl<'a> ObjectRef<'a> { 565 | /// Returns the value associated with the given key, or `None` if the key is not present. 566 | /// 567 | /// # Examples 568 | /// ``` 569 | /// let json: jsonbb::Value = r#"{"a": 1, "b": 2}"#.parse().unwrap(); 570 | /// let object = json.as_object().unwrap(); 571 | /// assert!(object.get("a").is_some()); 572 | /// assert!(object.get("c").is_none()); 573 | /// ``` 574 | pub fn get(self, key: &str) -> Option> { 575 | // do binary search since entries are ordered by key 576 | let entries = self.entries(); 577 | let idx = entries 578 | .binary_search_by_key(&key, |&(kentry, _)| { 579 | ValueRef::from_slice(self.data, kentry) 580 | .as_str() 581 | .expect("key must be string") 582 | }) 583 | .ok()?; 584 | let (_, ventry) = entries[idx]; 585 | Some(ValueRef::from_slice(self.data, ventry)) 586 | } 587 | 588 | /// Returns `true` if the object contains a value for the specified key. 589 | /// 590 | /// # Examples 591 | /// ``` 592 | /// let json: jsonbb::Value = r#"{"a": 1, "b": 2}"#.parse().unwrap(); 593 | /// let object = json.as_object().unwrap(); 594 | /// assert_eq!(object.contains_key("a"), true); 595 | /// assert_eq!(object.contains_key("c"), false); 596 | /// ``` 597 | pub fn contains_key(self, key: &str) -> bool { 598 | // do binary search since entries are ordered by key 599 | let entries = self.entries(); 600 | entries 601 | .binary_search_by_key(&key, |&(kentry, _)| { 602 | ValueRef::from_slice(self.data, kentry) 603 | .as_str() 604 | .expect("key must be string") 605 | }) 606 | .is_ok() 607 | } 608 | 609 | /// Returns the number of elements in the object. 610 | /// 611 | /// # Examples 612 | /// ``` 613 | /// let json: jsonbb::Value = r#"{"a": 1, "b": 2}"#.parse().unwrap(); 614 | /// let object = json.as_object().unwrap(); 615 | /// assert_eq!(object.len(), 2); 616 | /// ``` 617 | pub fn len(self) -> usize { 618 | (&self.data[self.data.len() - 8..]).get_u32_ne() as usize 619 | } 620 | 621 | /// Returns `true` if the object contains no elements. 622 | /// 623 | /// # Examples 624 | /// ``` 625 | /// let json: jsonbb::Value = r#"{"a": 1, "b": 2}"#.parse().unwrap(); 626 | /// let object = json.as_object().unwrap(); 627 | /// assert_eq!(object.is_empty(), false); 628 | /// ``` 629 | pub fn is_empty(self) -> bool { 630 | self.len() == 0 631 | } 632 | 633 | /// Returns an iterator over the object's key-value pairs. 634 | /// 635 | /// # Examples 636 | /// ``` 637 | /// let json: jsonbb::Value = r#"{"b": 2, "a": 1}"#.parse().unwrap(); 638 | /// let kvs: Vec<_> = json.as_object().unwrap().iter().map(|(k, v)| (k, v.as_u64().unwrap())).collect(); 639 | /// assert_eq!(kvs, [("a", 1), ("b", 2)]); 640 | /// ``` 641 | pub fn iter(self) -> impl ExactSizeIterator)> { 642 | self.entries().iter().map(move |&(kentry, ventry)| { 643 | let k = ValueRef::from_slice(self.data, kentry); 644 | let v = ValueRef::from_slice(self.data, ventry); 645 | (k.as_str().expect("key must be string"), v) 646 | }) 647 | } 648 | 649 | /// Returns an iterator over the object's keys. 650 | /// 651 | /// # Examples 652 | /// ``` 653 | /// let json: jsonbb::Value = r#"{"b": 2, "a": 1}"#.parse().unwrap(); 654 | /// let keys: Vec<_> = json.as_object().unwrap().keys().collect(); 655 | /// assert_eq!(keys, ["a", "b"]); 656 | /// ``` 657 | pub fn keys(self) -> impl ExactSizeIterator { 658 | self.iter().map(|(k, _)| k) 659 | } 660 | 661 | /// Returns an iterator over the object's values. 662 | /// 663 | /// # Examples 664 | /// ``` 665 | /// let json: jsonbb::Value = r#"{"b": 2, "a": 1}"#.parse().unwrap(); 666 | /// let values: Vec<_> = json.as_object().unwrap().values().map(|v| v.as_u64().unwrap()).collect(); 667 | /// assert_eq!(values, [1, 2]); 668 | /// ``` 669 | pub fn values(self) -> impl ExactSizeIterator> { 670 | self.iter().map(|(_, v)| v) 671 | } 672 | 673 | /// Returns the entire object as a slice. 674 | pub(crate) fn as_slice(self) -> &'a [u8] { 675 | self.data 676 | } 677 | 678 | /// Creates an `ObjectRef` from a slice. 679 | fn from_slice(data: &'a [u8], end: usize) -> Self { 680 | let size = (&data[end - 4..end]).get_u32_ne() as usize; 681 | Self { 682 | data: &data[end - size..end], 683 | } 684 | } 685 | 686 | /// Returns the key-value entries. 687 | fn entries(self) -> &'a [(Entry, Entry)] { 688 | let len = self.len(); 689 | let base = self.data.len() - 8 - 8 * len; 690 | let slice = &self.data[base..base + 8 * len]; 691 | unsafe { std::slice::from_raw_parts(slice.as_ptr() as _, len) } 692 | } 693 | } 694 | 695 | impl fmt::Debug for ObjectRef<'_> { 696 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 697 | f.debug_map().entries(self.iter()).finish() 698 | } 699 | } 700 | 701 | /// Display a JSON object as a string. 702 | impl fmt::Display for ObjectRef<'_> { 703 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 704 | serialize_in_json(self, f) 705 | } 706 | } 707 | 708 | impl PartialEq for ObjectRef<'_> { 709 | fn eq(&self, other: &Self) -> bool { 710 | if self.len() != other.len() { 711 | return false; 712 | } 713 | self.iter().eq(other.iter()) 714 | } 715 | } 716 | 717 | impl Eq for ObjectRef<'_> {} 718 | 719 | impl PartialOrd for ObjectRef<'_> { 720 | fn partial_cmp(&self, other: &Self) -> Option { 721 | Some(self.cmp(other)) 722 | } 723 | } 724 | 725 | impl Ord for ObjectRef<'_> { 726 | fn cmp(&self, other: &Self) -> std::cmp::Ordering { 727 | // Object with n pairs > object with n - 1 pairs 728 | match self.len().cmp(&other.len()) { 729 | std::cmp::Ordering::Equal => self.iter().cmp(other.iter()), 730 | ord => ord, 731 | } 732 | } 733 | } 734 | 735 | impl Hash for ObjectRef<'_> { 736 | fn hash(&self, state: &mut H) { 737 | for (k, v) in self.iter() { 738 | k.hash(state); 739 | v.hash(state); 740 | } 741 | } 742 | } 743 | 744 | /// Serialize a value in JSON format. 745 | fn serialize_in_json(value: &impl ::serde::Serialize, f: &mut fmt::Formatter<'_>) -> fmt::Result { 746 | use std::io; 747 | 748 | struct WriterFormatter<'a, 'b: 'a> { 749 | inner: &'a mut fmt::Formatter<'b>, 750 | } 751 | 752 | impl<'a, 'b> io::Write for WriterFormatter<'a, 'b> { 753 | fn write(&mut self, buf: &[u8]) -> io::Result { 754 | // Safety: the serializer below only emits valid utf8 when using 755 | // the default formatter. 756 | let s = unsafe { std::str::from_utf8_unchecked(buf) }; 757 | self.inner.write_str(s).map_err(io_error)?; 758 | Ok(buf.len()) 759 | } 760 | 761 | fn flush(&mut self) -> io::Result<()> { 762 | Ok(()) 763 | } 764 | } 765 | 766 | fn io_error(_: fmt::Error) -> io::Error { 767 | // Error value does not matter because Display impl just maps it 768 | // back to fmt::Error. 769 | io::Error::new(io::ErrorKind::Other, "fmt error") 770 | } 771 | 772 | let alternate = f.alternate(); 773 | let mut wr = WriterFormatter { inner: f }; 774 | if alternate { 775 | // {:#} 776 | value 777 | .serialize(&mut serde_json::Serializer::pretty(&mut wr)) 778 | .map_err(|_| fmt::Error) 779 | } else { 780 | // {} 781 | value 782 | .serialize(&mut serde_json::Serializer::new(&mut wr)) 783 | .map_err(|_| fmt::Error) 784 | } 785 | } 786 | 787 | /// A type that can be used to index into a `ValueRef`. 788 | pub trait Index: private::Sealed { 789 | /// Return None if the key is not already in the array or object. 790 | #[doc(hidden)] 791 | fn index_into<'v>(&self, v: ValueRef<'v>) -> Option>; 792 | } 793 | 794 | impl Index for usize { 795 | fn index_into<'v>(&self, v: ValueRef<'v>) -> Option> { 796 | match v { 797 | ValueRef::Array(a) => a.get(*self), 798 | _ => None, 799 | } 800 | } 801 | } 802 | 803 | impl Index for str { 804 | fn index_into<'v>(&self, v: ValueRef<'v>) -> Option> { 805 | match v { 806 | ValueRef::Object(o) => o.get(self), 807 | _ => None, 808 | } 809 | } 810 | } 811 | 812 | impl Index for String { 813 | fn index_into<'v>(&self, v: ValueRef<'v>) -> Option> { 814 | match v { 815 | ValueRef::Object(o) => o.get(self), 816 | _ => None, 817 | } 818 | } 819 | } 820 | 821 | impl<'a, T> Index for &'a T 822 | where 823 | T: ?Sized + Index, 824 | { 825 | fn index_into<'v>(&self, v: ValueRef<'v>) -> Option> { 826 | (**self).index_into(v) 827 | } 828 | } 829 | 830 | // Prevent users from implementing the Index trait. 831 | mod private { 832 | pub trait Sealed {} 833 | impl Sealed for usize {} 834 | impl Sealed for str {} 835 | impl Sealed for String {} 836 | impl<'a, T> Sealed for &'a T where T: ?Sized + Sealed {} 837 | } 838 | --------------------------------------------------------------------------------