├── .github └── workflows │ ├── publish.yaml │ └── rust.yml ├── .gitignore ├── CHANGELOG.md ├── Cargo.toml ├── LICENSE ├── README.md ├── benches ├── get_path.rs ├── parser.rs └── strip_nulls.rs ├── data ├── canada.json ├── citm_catalog.json └── twitter.json ├── rust-toolchain.toml ├── src ├── constants.rs ├── core │ ├── databend │ │ ├── builder.rs │ │ ├── constants.rs │ │ ├── de.rs │ │ ├── iterator.rs │ │ ├── jentry.rs │ │ ├── mod.rs │ │ ├── ser.rs │ │ └── util.rs │ ├── item.rs │ ├── mod.rs │ └── sqlite │ │ └── mod.rs ├── error.rs ├── extension.rs ├── from.rs ├── functions │ ├── array.rs │ ├── mod.rs │ ├── object.rs │ ├── operator.rs │ ├── path.rs │ └── scalar.rs ├── jsonpath │ ├── mod.rs │ ├── parser.rs │ ├── path.rs │ └── selector.rs ├── keypath.rs ├── lib.rs ├── number.rs ├── owned.rs ├── parser.rs ├── raw.rs ├── util.rs └── value.rs └── tests └── it ├── decode.rs ├── encode.rs ├── functions.rs ├── jsonpath_parser.rs ├── keypath_parser.rs ├── main.rs ├── parser.rs └── testdata ├── json_path.txt └── key_path.txt /.github/workflows/publish.yaml: -------------------------------------------------------------------------------- 1 | name: publish 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | paths: 8 | - '**/Cargo.toml' 9 | 10 | jobs: 11 | crates: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v3 15 | - name: Setup Cargo Release 16 | run: | 17 | curl -fsSLo /tmp/cargo-release.tar.gz https://github.com/crate-ci/cargo-release/releases/download/v0.24.8/cargo-release-v0.24.8-x86_64-unknown-linux-gnu.tar.gz 18 | mkdir -p /tmp/cargo-release 19 | tar -C /tmp/cargo-release -xzf /tmp/cargo-release.tar.gz 20 | sudo mv /tmp/cargo-release/cargo-release /usr/local/bin 21 | - name: Release to crates.io 22 | env: 23 | CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} 24 | run: | 25 | cargo release publish --execute --no-confirm 26 | -------------------------------------------------------------------------------- /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | pull_request: 7 | branches: [ "main" ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v3 19 | - name: Format 20 | run: cargo fmt --all -- --check 21 | - name: Clippy 22 | run: cargo clippy --workspace --all-targets --all-features -- -D warnings 23 | - name: Build 24 | run: cargo build --verbose 25 | - name: Run tests 26 | run: cargo test --verbose 27 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Generated by Cargo 2 | # will have compiled files and executables 3 | /target/ 4 | 5 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 6 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html 7 | Cargo.lock 8 | 9 | # These are backup files generated by rustfmt 10 | **/*.rs.bk 11 | 12 | # IDE and editor 13 | .vscode 14 | .idea 15 | 16 | # env files for backends 17 | .env 18 | 19 | # profiling 20 | flamegraph.svg 21 | perf.* 22 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## [v0.5.1] - 2025-04-18 2 | 3 | ### Added 4 | 5 | - Chore: Bump nom 8.0.0 (#84) 6 | 7 | ## [v0.5.0] - 2025-04-15 8 | 9 | ### Added 10 | 11 | - Feat: json path support recursive wildcard member accessor `.**` syntax (#81) 12 | - Refactor: get object value by key name improve performance (#79) 13 | - Refactor: Implements serde trait for RawJsonb (#77) 14 | - Refactor JSONB functions: Improved API, Documentation, and Data Structures (#75) 15 | - Feat: add arithmatic expression support (#71) 16 | - Feat(expr): add filter expr `starts with` (#52) 17 | 18 | ## [v0.4.4] - 2024-11-16 19 | 20 | ### Fixed 21 | 22 | - Fix: panic when facing corrupted jsonb (#67) 23 | 24 | ### Added 25 | 26 | - Bump fast-float2 v0.2.3 (#69) 27 | - Feat: add a function to parse jsonb only (#66) 28 | - Feat: support `object_delete` and `object_pick` function (#65) 29 | - Feat: support `object_insert` function (#64) 30 | - Feat: Support json array functions (#62) 31 | - Feat: add lazy value (#61) 32 | 33 | ## [v0.4.3] - 2024-09-30 34 | 35 | ### Fixed 36 | 37 | - Fix: Fix compare object value with different length panic (#59) 38 | 39 | ## [v0.4.2] - 2024-09-19 40 | 41 | ### Added 42 | 43 | - Feat: make `preserve_order` a default feature (#56) 44 | 45 | ## [v0.4.1] - 2024-07-18 46 | 47 | ### Fixed 48 | 49 | - Fix: Fix jsonpath selector unwrap panic. (#53) 50 | 51 | ## [v0.4.0] - 2024-05-17 52 | 53 | ### Fixed 54 | 55 | - Fix: Fix get by keypath with null value. (#47) 56 | - Fix: Handle invalid jsonb value to avoid panic in functions. (#46) 57 | - Fix: Fix builder & concat container jentry len. (#43) 58 | 59 | ### Added 60 | 61 | - Feat: Support convert jsonb value to `serde_json` value. (#49) 62 | - Feat: Add `exists` filter expression. (48)` 63 | - Feat: Add `delete_by_keypath`. (#45) 64 | - Feat: Add `delete_by_index` & `delete_by_name`. (#44) 65 | - Feat: Add `concat` & improve `strip_nulls`. (#42) 66 | - Feat: Add jsonpath predicate support. (#41) 67 | - Feat: Add `contains` api. (#40) 68 | - Feat: Add `exists_any_keys` & `exists_all_keys`. (#38) 69 | - Feat: Support parse key paths. (#37) 70 | - Feat: Add `get_by_keypath`. (#36) 71 | 72 | ## [v0.3.0] - 2023-10-13 73 | 74 | ### Added 75 | 76 | - Docs: Add more jsonb encoding format descriptions. (#34) 77 | - Feat: Support `object_each` api. (#33) 78 | - Feat: Support `path_exists` api. (#32) 79 | - Feat: Support `type_of` api. (#31) 80 | - Feat: Support `strip_nulls` api. (#30) 81 | - Perf: Add benches for parser and `get_path`. (#29) 82 | - Chore: Add check fmt and clippy. (#27) 83 | - Feat: Support `to_pretty_string` api. (#26) 84 | - Feat: Support `traverse_check_string` function. (#25) 85 | - Feat: Improve json path selector using less memory. (#24) 86 | 87 | ## [v0.2.3] - 2023-07-10 88 | 89 | ### Fixed 90 | 91 | - Fix: fix parse json path name with escaped characters. (#21) 92 | - Fix: Fix some special characters display errors. (#18) 93 | - Fix: Support parsing Unicode characters enclosed in brackets. (#17) 94 | - Fix: json `to_string` function adds backslash for escaped characters. (#16) 95 | - Fix: fix parse UTF-8 characters. (#15) 96 | 97 | ### Added 98 | 99 | - chore: implement From trait with owned JsonValue for Value. (#22) 100 | - Feat: Add function `convert_to_comparable`, `rand_value`. (#20) 101 | - Create publish.yaml. (#19) 102 | 103 | ## [v0.2.2] - 2023-05-06 104 | 105 | ### Fixed 106 | 107 | - Fix: Allow parse escaped white space. (#14) 108 | 109 | ## [v0.2.1] - 2023-05-05 110 | 111 | ### Fixed 112 | 113 | - Fix: Allow parse invalid Unicode. (#13) 114 | 115 | ## [v0.2.0] - 2023-04-21 116 | 117 | ### Added 118 | 119 | - Feat: Support `JSON path` selector. (#8) 120 | - Feat: Support parse `JSON path` syntax. (#7) 121 | 122 | ## [v0.1.1] - 2023-03-03 123 | 124 | - Rename project name to jsonb. 125 | - Add Readme description. (#4) 126 | - Use stable Rust. (#3) 127 | 128 | ## v0.1.0 - 2023-03-03 129 | 130 | - Implement a `JSON` parser. 131 | - Implement `JSONB` encodes and decodes. 132 | - Implemented a number of `JSONB` functions. 133 | 134 | [v0.5.0]: https://github.com/databendlabs/jsonb/compare/v0.4.4...v0.5.0 135 | [v0.4.4]: https://github.com/databendlabs/jsonb/compare/v0.4.3...v0.4.4 136 | [v0.4.3]: https://github.com/databendlabs/jsonb/compare/v0.4.2...v0.4.3 137 | [v0.4.2]: https://github.com/databendlabs/jsonb/compare/v0.4.1...v0.4.2 138 | [v0.4.1]: https://github.com/databendlabs/jsonb/compare/v0.4.0...v0.4.1 139 | [v0.4.0]: https://github.com/databendlabs/jsonb/compare/v0.3.0...v0.4.0 140 | [v0.3.0]: https://github.com/databendlabs/jsonb/compare/v0.2.3...v0.3.0 141 | [v0.2.3]: https://github.com/databendlabs/jsonb/compare/v0.2.2...v0.2.3 142 | [v0.2.2]: https://github.com/databendlabs/jsonb/compare/v0.2.1...v0.2.2 143 | [v0.2.1]: https://github.com/databendlabs/jsonb/compare/v0.2.0...v0.2.1 144 | [v0.2.0]: https://github.com/databendlabs/jsonb/compare/v0.1.1...v0.2.0 145 | [v0.1.1]: https://github.com/databendlabs/jsonb/compare/v0.1.0...v0.1.1 146 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Datafuse Labs 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | [package] 16 | authors = ["Databend Authors "] 17 | categories = ["encoding"] 18 | description = "JSONB implement in Rust." 19 | edition = "2021" 20 | homepage = "https://github.com/databendlabs/jsonb" 21 | keywords = ["json", "jsonb", "jsonpath"] 22 | license = "Apache-2.0" 23 | name = "jsonb" 24 | repository = "https://github.com/databendlabs/jsonb" 25 | version = "0.5.1" 26 | rust-version = "1.80" 27 | 28 | [dependencies] 29 | byteorder = "1.5.0" 30 | ethnum = "1.5.1" 31 | fast-float2 = "0.2.3" 32 | itoa = "1.0" 33 | jiff = "0.2.10" 34 | nom = "8.0.0" 35 | num-traits = "0.2.19" 36 | ordered-float = { version = "5.0", default-features = false } 37 | rand = { version = "0.9.0", features = ["small_rng"] } 38 | ryu = "1.0" 39 | serde = "1.0" 40 | serde_json = { version = "1.0", default-features = false, features = ["std"] } 41 | 42 | [dev-dependencies] 43 | goldenfile = "1.8" 44 | serde_json = "1.0" 45 | json-deserializer = "0.4.4" 46 | simd-json = "0.15.0" 47 | mockalloc = "0.1.2" 48 | criterion = "0.5.1" 49 | 50 | [features] 51 | default = ["databend", "serde_json/preserve_order"] 52 | databend = [] 53 | sqlite = [] 54 | 55 | [[bench]] 56 | name = "parser" 57 | harness = false 58 | 59 | [[bench]] 60 | name = "get_path" 61 | harness = false 62 | 63 | [[bench]] 64 | name = "strip_nulls" 65 | harness = false 66 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # jsonb   [![Build Status]][actions] [![Latest Version]][crates.io] [![Crate Downloads]][crates.io] 2 | 3 | [build status]: https://img.shields.io/github/actions/workflow/status/datafuselabs/jsonb/rust.yml?branch=main 4 | [actions]: https://github.com/datafuselabs/jsonb/actions?query=branch%3Amain 5 | [latest version]: https://img.shields.io/crates/v/jsonb.svg 6 | [crates.io]: https://crates.io/crates/jsonb 7 | [crate downloads]: https://img.shields.io/crates/d/jsonb.svg 8 | 9 | 10 | `jsonb` is a binary format `JSON` representation inspired by [PostgreSQL](https://www.postgresql.org/docs/current/datatype-json.html) and [CockroachDB](https://www.cockroachlabs.com/docs/stable/jsonb). It provides a fast, lightweight and easy-to-use API for working with `JSON` data. 11 | 12 | ## Features 13 | 14 | - Good compatibility: `jsonb` fully supports the `JSON` standard and can be used to store complex data structures. 15 | - Fast performance: `jsonb` is designed for high performance, allowing you to work with large `JSON` data sets with ease. 16 | - Easy to use: `jsonb` provides a number of built-in functions to support various operations, and also supports the `JSONPath` syntax for selecting and extracting subset elements. 17 | - Safe and secure: `jsonb` is written in Rust, which provides memory and thread safety guarantees, making it a safe choice for handling sensitive data. 18 | 19 | ## Encoding format 20 | 21 | The `jsonb` encoding format is a tree-like structure. Each node contains a container header, a number of JEntry headers, and nested encoding values. 22 | 23 | - 32-bit container header. 3 bits identify the type of value, including `scalar`, `object` and `array`, and 29 bits identify the number of JEntries in the `array` or `object`. The root node of the `jsonb` value is always a container header. 24 | - `scalar` container header: `0x20000000` 25 | - `object` container header: `0x40000000` 26 | - `array` container header: `0x80000000` 27 | - 32-bit JEntry header. 1 bit identifies whether the JEntry stores a length or an offset, 3 bits identify the type of value, including `null`, `string`, `number`, `false`, `true` and `container`, and the remaining 28 bits identify the length or offset of the encoding value. 28 | - `null` JEntry header: `0x00000000` 29 | - `string` JEntry header: `0x10000000` 30 | - `number` JEntry header: `0x20000000` 31 | - `false` JEntry header: `0x30000000` 32 | - `true` JEntry header: `0x40000000` 33 | - `container` JEntry header `0x50000000` 34 | - Encoding value. Different types of JEntry header have different encoding values. 35 | - `null`, `true`, `false`: no encoding value, identified by the JEntry header. 36 | - `string`: a normal UTF-8 string. 37 | - `number`: an encoded number to represent uint64s, int64s and float64s. 38 | - `container`: a nested `json` value with a recursive structure. 39 | 40 | #### An encoding example 41 | 42 | ```text 43 | // JSON value 44 | [false, 10, {"k":"v"}] 45 | 46 | // JSONB encoding 47 | 0x80000003 array container header (3 JEntries) 48 | 0x30000000 false JEntry header (no encoding value) 49 | 0x20000002 number JEntry header (encoding value length 2) 50 | 0x5000000e container JEntry header (encoding value length 14) 51 | 0x500a number encoding value (10) 52 | 0x40000001 object container header (1 JEntry) 53 | 0x10000001 string key JEntry header (encoding value length 1) 54 | 0x10000001 string value JEntry header (encoding value length 1) 55 | 0x6b string encoding value ("k") 56 | 0x76 string encoding value ("v") 57 | ``` 58 | 59 | ## Jsonb value 60 | 61 | The `jsonb` value is an enumeration that represents all kinds of `JSON` values and serves as an intermediate for converting other data types to the `jsonb` binary format value. 62 | 63 | ```rust 64 | // jsonb value 65 | #[derive(Clone, PartialEq, Eq)] 66 | pub enum Value<'a> { 67 | Null, 68 | Bool(bool), 69 | String(Cow<'a, str>), 70 | Number(Number), 71 | Array(Vec>), 72 | Object(Object<'a>), 73 | } 74 | ``` 75 | 76 | ## Built-in functions 77 | 78 | `jsonb` implements a number of commonly used built-in functions. Since most functions only focus on a subset of the total value, using container headers and JEntry headers to can efficiently skip over intermediate parts of the `jsonb` value. This avoids time-consuming deserialisation operations and provides very high performance. For more information, see https://docs.rs/jsonb/latest/jsonb/#functions 79 | 80 | ## SQL/JSONPath 81 | 82 | [SQL/JSONPath](https://www.iso.org/standard/67367.html) is a query language used to select and extract a subset of elements from a `jsonb` value. 83 | 84 | #### Operators 85 | 86 | The following operators have been implemented: 87 | 88 | | Operator | Description | Examples | 89 | |--------------------------|--------------------------------------------------------------|--------------------| 90 | | `$` | The root element | `$` | 91 | | `@` | The current element in the filter expression | `$.event?(@ == 1)` | 92 | | `.*` | Selecting all elements in an Object | `$.*` | 93 | | `.` | Selecting element that match the name in an Object | `$.event` | 94 | | `:` | Alias of `.` | `$:event` | 95 | | `[""]` | Alias of `.` | `$["event"]` | 96 | | `[*]` | Selecting all elements in an Array | `$[*]` | 97 | | `[, ..]` | Selecting 0-based `n-th` elements in an Array | `$[1, 2]` | 98 | | `[last - , ..]` | Selecting `n-th` element before the last element in an Array | `$[0, last - 1]` | 99 | | `[ to , ..]` | Selecting all elements of a range in an Array | `$[1 to last - 2]` | 100 | | `?()` | Selecting all elements that matched the filter expression | `$?(@.price < 10)` | 101 | 102 | ## Examples 103 | 104 | ```rust 105 | fn main() { 106 | let json = r#" 107 | { 108 | "name":"Fred", 109 | "phones":[ 110 | { 111 | "type":"home", 112 | "number":3720453 113 | }, 114 | { 115 | "type": "work", 116 | "number":5062051 117 | } 118 | ] 119 | }"#; 120 | 121 | let path = r#"$.phones[*]?(@.number == 3720453)"#; 122 | 123 | // parse JSON string to jsonb value 124 | let value = jsonb::parse_value(json.as_bytes()).unwrap(); 125 | // encode jsonb value to jsonb binary value 126 | let jsonb = value.to_vec(); 127 | // parse JSONPath string 128 | let json_path = jsonb::jsonpath::parse_json_path(path.as_bytes()).unwrap(); 129 | // select subset value from jsonb binary value 130 | let mut sub_jsonb = Vec::new(); 131 | let mut sub_offsets = Vec::new(); 132 | jsonb::get_by_path(&jsonb, json_path, &mut sub_jsonb, &mut sub_offsets); 133 | 134 | // value={"number":3720453,"type":"home"} 135 | println!("value={}", jsonb::to_string(&sub_jsonb)); 136 | } 137 | ``` 138 | 139 | ## Contributing 140 | 141 | `jsonb` is an open source project and all kinds of contributions are welcome! You can help with ideas, code or documentation. 142 | 143 | ## License 144 | 145 | Licensed under the [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) 146 | -------------------------------------------------------------------------------- /benches/get_path.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use std::fs; 16 | use std::io::Read; 17 | 18 | use criterion::{criterion_group, criterion_main, Criterion}; 19 | 20 | fn jsonb_get(data: &[u8], paths: &[&str], expected: &str) { 21 | let paths = paths 22 | .iter() 23 | .map(|p| jsonb::jsonpath::Path::DotField(std::borrow::Cow::Borrowed(p))) 24 | .collect::>(); 25 | let json_path = jsonb::jsonpath::JsonPath { paths }; 26 | 27 | let raw_jsonb = jsonb::RawJsonb::new(data); 28 | let result_jsonb = raw_jsonb.select_value_by_path(&json_path).unwrap(); 29 | assert!(result_jsonb.is_some()); 30 | let result_jsonb = result_jsonb.unwrap(); 31 | let result_raw_jsonb = result_jsonb.as_raw(); 32 | 33 | let s = result_raw_jsonb.as_str().unwrap().unwrap(); 34 | assert_eq!(s, expected); 35 | } 36 | 37 | fn serde_json_get(data: &[u8], paths: &Vec<&str>, expected: &str) { 38 | let mut v: serde_json::Value = serde_json::from_slice(data).unwrap(); 39 | for path in paths { 40 | v = v.get(path).unwrap().clone(); 41 | } 42 | let s = v.as_str().unwrap(); 43 | assert_eq!(s, expected); 44 | } 45 | 46 | fn read(file: &str) -> Vec { 47 | let mut f = fs::File::open(file).unwrap(); 48 | let mut data = vec![]; 49 | f.read_to_end(&mut data).unwrap(); 50 | data 51 | } 52 | 53 | struct TestSuite<'a> { 54 | file: &'a str, 55 | paths: Vec<&'a str>, 56 | expected: &'a str, 57 | } 58 | 59 | fn add_benchmark(c: &mut Criterion) { 60 | let test_suites = vec![ 61 | TestSuite { 62 | file: "canada", 63 | paths: vec!["type"], 64 | expected: "FeatureCollection", 65 | }, 66 | TestSuite { 67 | file: "citm_catalog", 68 | paths: vec!["areaNames", "205705994"], 69 | expected: "1er balcon central", 70 | }, 71 | TestSuite { 72 | file: "citm_catalog", 73 | paths: vec!["topicNames", "324846100"], 74 | expected: "Formations musicales", 75 | }, 76 | TestSuite { 77 | file: "twitter", 78 | paths: vec!["search_metadata", "max_id_str"], 79 | expected: "505874924095815681", 80 | }, 81 | ]; 82 | 83 | for test_suite in test_suites { 84 | let bytes = read(&format!("./data/{}.json", test_suite.file)); 85 | 86 | let val = jsonb::parse_value(&bytes).unwrap(); 87 | let jsonb_bytes = val.to_vec(); 88 | 89 | c.bench_function( 90 | &format!( 91 | "jsonb get {}->{}", 92 | test_suite.file, 93 | test_suite.paths.join("->") 94 | ), 95 | |b| b.iter(|| jsonb_get(&jsonb_bytes, &test_suite.paths, test_suite.expected)), 96 | ); 97 | 98 | c.bench_function( 99 | &format!( 100 | "serde_json get {}->{}", 101 | test_suite.file, 102 | test_suite.paths.join("->") 103 | ), 104 | |b| b.iter(|| serde_json_get(&bytes, &test_suite.paths, test_suite.expected)), 105 | ); 106 | } 107 | } 108 | 109 | criterion_group!(benches, add_benchmark); 110 | criterion_main!(benches); 111 | -------------------------------------------------------------------------------- /benches/parser.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use std::fs; 16 | use std::io::Read; 17 | 18 | use criterion::{criterion_group, criterion_main, BatchSize, Criterion}; 19 | 20 | fn parse_jsonb(data: &[u8]) { 21 | let _v: jsonb::Value = jsonb::parse_value(data).unwrap(); 22 | } 23 | 24 | fn parse_serde_json(data: &[u8]) { 25 | let _v: serde_json::Value = serde_json::from_slice(data).unwrap(); 26 | } 27 | 28 | fn parse_json_deserializer(data: &[u8]) { 29 | let _v: json_deserializer::Value = json_deserializer::parse(data).unwrap(); 30 | } 31 | 32 | fn parse_simd_json(data: &mut [u8]) { 33 | let _v = simd_json::to_borrowed_value(data).unwrap(); 34 | } 35 | 36 | fn read(file: &str) -> Vec { 37 | let mut f = fs::File::open(file).unwrap(); 38 | let mut data = vec![]; 39 | f.read_to_end(&mut data).unwrap(); 40 | data 41 | } 42 | 43 | fn add_benchmark(c: &mut Criterion) { 44 | let paths = fs::read_dir("./data/").unwrap(); 45 | for path in paths { 46 | let file = format!("{}", path.unwrap().path().display()); 47 | let bytes = read(&file); 48 | 49 | c.bench_function(&format!("jsonb parse {}", file), |b| { 50 | b.iter(|| parse_jsonb(&bytes)) 51 | }); 52 | 53 | c.bench_function(&format!("serde_json parse {}", file), |b| { 54 | b.iter(|| parse_serde_json(&bytes)) 55 | }); 56 | 57 | c.bench_function(&format!("json_deserializer parse {}", file), |b| { 58 | b.iter(|| parse_json_deserializer(&bytes)) 59 | }); 60 | 61 | let bytes = bytes.clone(); 62 | c.bench_function(&format!("simd_json parse {}", file), move |b| { 63 | b.iter_batched( 64 | || bytes.clone(), 65 | |mut data| parse_simd_json(&mut data), 66 | BatchSize::SmallInput, 67 | ) 68 | }); 69 | } 70 | } 71 | 72 | criterion_group!(benches, add_benchmark); 73 | criterion_main!(benches); 74 | -------------------------------------------------------------------------------- /benches/strip_nulls.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use std::{fs, io::Read}; 16 | 17 | use criterion::{criterion_group, criterion_main, Criterion}; 18 | use jsonb::{from_slice, Value}; 19 | 20 | fn read(file: &str) -> Vec { 21 | let mut f = fs::File::open(file).unwrap(); 22 | let mut data = vec![]; 23 | f.read_to_end(&mut data).unwrap(); 24 | data 25 | } 26 | 27 | fn strip_nulls_deser(data: &[u8]) { 28 | let mut buf = Vec::new(); 29 | let mut json = from_slice(data).unwrap(); 30 | strip_value_nulls(&mut json); 31 | json.write_to_vec(&mut buf); 32 | assert!(!buf.is_empty()); 33 | } 34 | 35 | fn strip_value_nulls(val: &mut Value<'_>) { 36 | match val { 37 | Value::Array(arr) => { 38 | for v in arr { 39 | strip_value_nulls(v); 40 | } 41 | } 42 | Value::Object(ref mut obj) => { 43 | for (_, v) in obj.iter_mut() { 44 | strip_value_nulls(v); 45 | } 46 | obj.retain(|_, v| !matches!(v, Value::Null)); 47 | } 48 | _ => {} 49 | } 50 | } 51 | 52 | fn strip_nulls_fast(data: &[u8]) { 53 | let raw_jsonb = jsonb::RawJsonb::new(data); 54 | let result_jsonb = raw_jsonb.strip_nulls().unwrap(); 55 | assert!(!result_jsonb.is_empty()); 56 | } 57 | 58 | fn add_benchmark(c: &mut Criterion) { 59 | let paths = fs::read_dir("./data/").unwrap(); 60 | for path in paths { 61 | let file = format!("{}", path.unwrap().path().display()); 62 | let bytes = read(&file); 63 | let json = from_slice(&bytes).unwrap().to_vec(); 64 | 65 | c.bench_function(&format!("strip_nulls_deser[{}]", file), |b| { 66 | b.iter(|| strip_nulls_deser(&json)); 67 | }); 68 | 69 | c.bench_function(&format!("strip_nulls_fast[{}]", file), |b| { 70 | b.iter(|| strip_nulls_fast(&json)); 71 | }); 72 | } 73 | } 74 | 75 | criterion_group!(benches, add_benchmark); 76 | criterion_main!(benches); 77 | -------------------------------------------------------------------------------- /rust-toolchain.toml: -------------------------------------------------------------------------------- 1 | [toolchain] 2 | channel = "stable" 3 | components = ["rustfmt", "clippy"] 4 | -------------------------------------------------------------------------------- /src/constants.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // JSON text constants 16 | pub(crate) const UNICODE_LEN: usize = 4; 17 | 18 | // JSON text escape characters constants 19 | pub(crate) const BS: char = '\x5C'; // \\ Backslash 20 | pub(crate) const QU: char = '\x22'; // \" Double quotation mark 21 | pub(crate) const SD: char = '\x2F'; // \/ Slash or divide 22 | pub(crate) const BB: char = '\x08'; // \b Backspace 23 | pub(crate) const FF: char = '\x0C'; // \f Formfeed Page Break 24 | pub(crate) const NN: char = '\x0A'; // \n Newline 25 | pub(crate) const RR: char = '\x0D'; // \r Carriage Return 26 | pub(crate) const TT: char = '\x09'; // \t Horizontal Tab 27 | 28 | // JSONB value compare level 29 | pub(crate) const NULL_LEVEL: u8 = 8; 30 | pub(crate) const ARRAY_LEVEL: u8 = 7; 31 | pub(crate) const OBJECT_LEVEL: u8 = 6; 32 | pub(crate) const STRING_LEVEL: u8 = 5; 33 | pub(crate) const NUMBER_LEVEL: u8 = 4; 34 | pub(crate) const TRUE_LEVEL: u8 = 3; 35 | pub(crate) const FALSE_LEVEL: u8 = 2; 36 | pub(crate) const EXTENSION_LEVEL: u8 = 1; 37 | 38 | pub(crate) const TYPE_STRING: &str = "string"; 39 | pub(crate) const TYPE_NULL: &str = "null"; 40 | pub(crate) const TYPE_BOOLEAN: &str = "boolean"; 41 | pub(crate) const TYPE_NUMBER: &str = "number"; 42 | pub(crate) const TYPE_ARRAY: &str = "array"; 43 | pub(crate) const TYPE_OBJECT: &str = "object"; 44 | pub(crate) const TYPE_DECIMAL: &str = "decimal"; 45 | pub(crate) const TYPE_BINARY: &str = "binary"; 46 | pub(crate) const TYPE_DATE: &str = "date"; 47 | pub(crate) const TYPE_TIMESTAMP: &str = "timestamp"; 48 | pub(crate) const TYPE_TIMESTAMP_TZ: &str = "timestamp_tz"; 49 | pub(crate) const TYPE_INTERVAL: &str = "interval"; 50 | -------------------------------------------------------------------------------- /src/core/databend/builder.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use core::ops::Range; 16 | use std::collections::BTreeMap; 17 | 18 | use byteorder::BigEndian; 19 | use byteorder::WriteBytesExt; 20 | 21 | use super::constants::*; 22 | use super::jentry::JEntry; 23 | use crate::core::JsonbItem; 24 | use crate::error::Error; 25 | use crate::error::Result; 26 | use crate::OwnedJsonb; 27 | use crate::RawJsonb; 28 | 29 | pub(crate) struct ArrayBuilder<'a> { 30 | items: Vec>, 31 | } 32 | 33 | impl<'a> ArrayBuilder<'a> { 34 | pub(crate) fn new() -> Self { 35 | Self { items: Vec::new() } 36 | } 37 | 38 | pub(crate) fn with_capacity(capacity: usize) -> Self { 39 | Self { 40 | items: Vec::with_capacity(capacity), 41 | } 42 | } 43 | 44 | pub(crate) fn push_jsonb_item(&mut self, item: JsonbItem<'a>) { 45 | self.items.push(item); 46 | } 47 | 48 | pub(crate) fn push_raw_jsonb(&mut self, raw: RawJsonb<'a>) { 49 | let item = JsonbItem::Raw(raw); 50 | self.items.push(item); 51 | } 52 | 53 | pub(crate) fn push_owned_jsonb(&mut self, owned: OwnedJsonb) { 54 | let item = JsonbItem::Owned(owned); 55 | self.push_jsonb_item(item) 56 | } 57 | 58 | pub(crate) fn build(self) -> Result { 59 | let mut buf = Vec::new(); 60 | let header = ARRAY_CONTAINER_TAG | self.items.len() as u32; 61 | buf.write_u32::(header)?; 62 | 63 | let mut jentry_index = reserve_jentries(&mut buf, self.items.len() * 4); 64 | for item in self.items.into_iter() { 65 | append_jsonb_item(&mut buf, &mut jentry_index, item)?; 66 | } 67 | Ok(OwnedJsonb::new(buf)) 68 | } 69 | } 70 | 71 | pub(crate) struct ArrayDistinctBuilder<'a> { 72 | items: Vec>, 73 | item_map: BTreeMap, usize>, 74 | } 75 | 76 | impl<'a> ArrayDistinctBuilder<'a> { 77 | pub(crate) fn new(capacity: usize) -> Self { 78 | Self { 79 | items: Vec::with_capacity(capacity), 80 | item_map: BTreeMap::new(), 81 | } 82 | } 83 | 84 | pub(crate) fn push_jsonb_item(&mut self, item: JsonbItem<'a>) { 85 | if let Some(cnt) = self.item_map.get_mut(&item) { 86 | *cnt += 1; 87 | } else { 88 | self.item_map.insert(item.clone(), 1); 89 | self.items.push(item); 90 | } 91 | } 92 | 93 | pub(crate) fn push_raw_jsonb(&mut self, raw: RawJsonb<'a>) { 94 | let item = JsonbItem::Raw(raw); 95 | self.push_jsonb_item(item); 96 | } 97 | 98 | pub(crate) fn pop_jsonb_item(&mut self, item: JsonbItem<'a>) -> Option<()> { 99 | if let Some(cnt) = self.item_map.get_mut(&item) { 100 | if *cnt > 0 { 101 | *cnt -= 1; 102 | return Some(()); 103 | } 104 | } 105 | None 106 | } 107 | 108 | pub(crate) fn pop_raw_jsonb(&mut self, raw: RawJsonb<'a>) -> Option<()> { 109 | let item = JsonbItem::Raw(raw); 110 | self.pop_jsonb_item(item) 111 | } 112 | 113 | pub(crate) fn build(self) -> Result { 114 | let mut buf = Vec::new(); 115 | let header = ARRAY_CONTAINER_TAG | self.items.len() as u32; 116 | buf.write_u32::(header)?; 117 | 118 | let mut jentry_index = reserve_jentries(&mut buf, self.items.len() * 4); 119 | for item in self.items.into_iter() { 120 | append_jsonb_item(&mut buf, &mut jentry_index, item)?; 121 | } 122 | Ok(OwnedJsonb::new(buf)) 123 | } 124 | } 125 | 126 | pub(crate) struct ObjectBuilder<'a> { 127 | entries: BTreeMap<&'a str, JsonbItem<'a>>, 128 | } 129 | 130 | impl<'a> ObjectBuilder<'a> { 131 | pub(crate) fn new() -> Self { 132 | Self { 133 | entries: BTreeMap::new(), 134 | } 135 | } 136 | 137 | pub(crate) fn push_jsonb_item(&mut self, key: &'a str, val_item: JsonbItem<'a>) -> Result<()> { 138 | if self.entries.contains_key(key) { 139 | return Err(Error::ObjectDuplicateKey); 140 | } 141 | self.entries.insert(key, val_item); 142 | Ok(()) 143 | } 144 | 145 | pub(crate) fn push_raw_jsonb(&mut self, key: &'a str, raw: RawJsonb<'a>) -> Result<()> { 146 | let item = JsonbItem::Raw(raw); 147 | self.push_jsonb_item(key, item) 148 | } 149 | 150 | pub(crate) fn push_owned_jsonb(&mut self, key: &'a str, owned: OwnedJsonb) -> Result<()> { 151 | let item = JsonbItem::Owned(owned); 152 | self.push_jsonb_item(key, item) 153 | } 154 | 155 | pub(crate) fn contains_key(&self, key: &'a str) -> bool { 156 | self.entries.contains_key(key) 157 | } 158 | 159 | pub(crate) fn build(self) -> Result { 160 | let mut buf = Vec::new(); 161 | let header = OBJECT_CONTAINER_TAG | self.entries.len() as u32; 162 | buf.write_u32::(header)?; 163 | 164 | let mut jentry_index = reserve_jentries(&mut buf, self.entries.len() * 8); 165 | for (key, _) in self.entries.iter() { 166 | let key_len = key.len(); 167 | buf.extend_from_slice(key.as_bytes()); 168 | let jentry = JEntry::make_string_jentry(key_len); 169 | replace_jentry(&mut buf, jentry, &mut jentry_index) 170 | } 171 | for (_, item) in self.entries.into_iter() { 172 | append_jsonb_item(&mut buf, &mut jentry_index, item)?; 173 | } 174 | Ok(OwnedJsonb::new(buf)) 175 | } 176 | } 177 | 178 | fn append_jsonb_item(buf: &mut Vec, jentry_index: &mut usize, item: JsonbItem) -> Result<()> { 179 | match item { 180 | JsonbItem::Null => { 181 | let jentry = JEntry::make_null_jentry(); 182 | replace_jentry(buf, jentry, jentry_index); 183 | } 184 | JsonbItem::Boolean(v) => { 185 | let jentry = if v { 186 | JEntry::make_true_jentry() 187 | } else { 188 | JEntry::make_false_jentry() 189 | }; 190 | replace_jentry(buf, jentry, jentry_index); 191 | } 192 | JsonbItem::Number(data) => { 193 | let jentry = JEntry::make_number_jentry(data.len()); 194 | replace_jentry(buf, jentry, jentry_index); 195 | buf.extend_from_slice(data); 196 | } 197 | JsonbItem::String(data) => { 198 | let jentry = JEntry::make_string_jentry(data.len()); 199 | replace_jentry(buf, jentry, jentry_index); 200 | buf.extend_from_slice(data); 201 | } 202 | JsonbItem::Extension(data) => { 203 | let jentry = JEntry::make_extension_jentry(data.len()); 204 | replace_jentry(buf, jentry, jentry_index); 205 | buf.extend_from_slice(data); 206 | } 207 | JsonbItem::Raw(raw_jsonb) => { 208 | append_raw_jsonb_data(buf, jentry_index, raw_jsonb)?; 209 | } 210 | JsonbItem::Owned(owned_jsonb) => { 211 | let raw_jsonb = owned_jsonb.as_raw(); 212 | append_raw_jsonb_data(buf, jentry_index, raw_jsonb)?; 213 | } 214 | } 215 | Ok(()) 216 | } 217 | 218 | fn append_raw_jsonb_data( 219 | buf: &mut Vec, 220 | jentry_index: &mut usize, 221 | raw_jsonb: RawJsonb, 222 | ) -> Result<()> { 223 | let (header_type, _) = raw_jsonb.read_header(0)?; 224 | if header_type == SCALAR_CONTAINER_TAG { 225 | let scalar_jentry = raw_jsonb.read_jentry(4)?; 226 | let range = Range { 227 | start: 8, 228 | end: raw_jsonb.len(), 229 | }; 230 | let data = raw_jsonb.slice(range)?; 231 | replace_jentry(buf, scalar_jentry, jentry_index); 232 | buf.extend_from_slice(data); 233 | } else { 234 | let jentry = JEntry::make_container_jentry(raw_jsonb.len()); 235 | replace_jentry(buf, jentry, jentry_index); 236 | buf.extend_from_slice(raw_jsonb.data); 237 | } 238 | Ok(()) 239 | } 240 | 241 | fn reserve_jentries(buf: &mut Vec, len: usize) -> usize { 242 | let old_len = buf.len(); 243 | let new_len = old_len + len; 244 | buf.resize(new_len, 0); 245 | old_len 246 | } 247 | 248 | fn replace_jentry(buf: &mut [u8], jentry: JEntry, jentry_index: &mut usize) { 249 | let jentry_bytes = jentry.encoded().to_be_bytes(); 250 | for (i, b) in jentry_bytes.iter().enumerate() { 251 | buf[*jentry_index + i] = *b; 252 | } 253 | *jentry_index += 4; 254 | } 255 | 256 | #[cfg(test)] 257 | mod tests { 258 | use std::collections::BTreeMap; 259 | 260 | use super::ArrayBuilder; 261 | use super::ObjectBuilder; 262 | use crate::to_owned_jsonb; 263 | use crate::Value; 264 | 265 | #[test] 266 | fn test_build_with_inner_array() { 267 | let from_builder = { 268 | let mut builder = ObjectBuilder::new(); 269 | let mut inner_array_builder = ArrayBuilder::with_capacity(1); 270 | 271 | let val = to_owned_jsonb(&false).unwrap(); 272 | inner_array_builder.push_owned_jsonb(val); 273 | let array = inner_array_builder.build().unwrap(); 274 | 275 | builder.push_owned_jsonb("arr", array).unwrap(); 276 | let object = builder.build().unwrap(); 277 | object.to_vec() 278 | }; 279 | let mut from_encoder = Vec::new(); 280 | { 281 | let value = init_object(vec![("arr", Value::Array(vec![Value::Bool(false)]))]); 282 | value.write_to_vec(&mut from_encoder); 283 | } 284 | assert_eq!(from_builder, from_encoder); 285 | } 286 | 287 | #[test] 288 | fn test_build_with_inner_object() { 289 | let from_builder = { 290 | let mut builder = ObjectBuilder::new(); 291 | let mut inner_obj_builder = ObjectBuilder::new(); 292 | 293 | let val = to_owned_jsonb(&true).unwrap(); 294 | inner_obj_builder.push_owned_jsonb("field", val).unwrap(); 295 | let inner_obj = inner_obj_builder.build().unwrap(); 296 | 297 | builder.push_owned_jsonb("obj", inner_obj).unwrap(); 298 | let object = builder.build().unwrap(); 299 | object.to_vec() 300 | }; 301 | let mut from_encoder = Vec::new(); 302 | { 303 | let value = init_object(vec![( 304 | "obj", 305 | init_object(vec![("field", Value::Bool(true))]), 306 | )]); 307 | value.write_to_vec(&mut from_encoder); 308 | } 309 | assert_eq!(from_builder, from_encoder); 310 | } 311 | 312 | fn init_object<'a>(entries: Vec<(&str, Value<'a>)>) -> Value<'a> { 313 | let mut map = BTreeMap::new(); 314 | for (key, val) in entries { 315 | map.insert(key.to_string(), val); 316 | } 317 | Value::Object(map) 318 | } 319 | } 320 | -------------------------------------------------------------------------------- /src/core/databend/constants.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // JSONB header constants 16 | pub(super) const ARRAY_CONTAINER_TAG: u32 = 0x80000000; 17 | pub(super) const OBJECT_CONTAINER_TAG: u32 = 0x40000000; 18 | pub(super) const SCALAR_CONTAINER_TAG: u32 = 0x20000000; 19 | 20 | pub(super) const CONTAINER_HEADER_TYPE_MASK: u32 = 0xE0000000; 21 | pub(super) const CONTAINER_HEADER_LEN_MASK: u32 = 0x1FFFFFFF; 22 | 23 | // JSONB JEntry constants 24 | pub(super) const NULL_TAG: u32 = 0x00000000; 25 | pub(super) const STRING_TAG: u32 = 0x10000000; 26 | pub(super) const NUMBER_TAG: u32 = 0x20000000; 27 | pub(super) const FALSE_TAG: u32 = 0x30000000; 28 | pub(super) const TRUE_TAG: u32 = 0x40000000; 29 | pub(super) const CONTAINER_TAG: u32 = 0x50000000; 30 | pub(super) const EXTENSION_TAG: u32 = 0x60000000; 31 | 32 | // JSONB number constants 33 | pub(super) const NUMBER_ZERO: u8 = 0x00; 34 | pub(super) const NUMBER_NAN: u8 = 0x10; 35 | pub(super) const NUMBER_INF: u8 = 0x20; 36 | pub(super) const NUMBER_NEG_INF: u8 = 0x30; 37 | pub(super) const NUMBER_INT: u8 = 0x40; 38 | pub(super) const NUMBER_UINT: u8 = 0x50; 39 | pub(super) const NUMBER_FLOAT: u8 = 0x60; 40 | pub(super) const NUMBER_DECIMAL: u8 = 0x70; 41 | 42 | // JSONB extension constants 43 | pub(super) const EXTENSION_BINARY: u8 = 0x00; 44 | pub(super) const EXTENSION_DATE: u8 = 0x10; 45 | pub(super) const EXTENSION_TIMESTAMP: u8 = 0x20; 46 | pub(super) const EXTENSION_TIMESTAMP_TZ: u8 = 0x30; 47 | pub(super) const EXTENSION_INTERVAL: u8 = 0x40; 48 | 49 | // @todo support offset mode 50 | #[allow(dead_code)] 51 | pub(super) const JENTRY_IS_OFF_FLAG: u32 = 0x80000000; 52 | pub(super) const JENTRY_TYPE_MASK: u32 = 0x70000000; 53 | pub(super) const JENTRY_OFF_LEN_MASK: u32 = 0x0FFFFFFF; 54 | -------------------------------------------------------------------------------- /src/core/databend/iterator.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use std::collections::VecDeque; 16 | use std::ops::Range; 17 | 18 | use super::constants::*; 19 | use super::jentry::JEntry; 20 | use crate::core::databend::util::jentry_to_jsonb_item; 21 | use crate::core::JsonbItem; 22 | use crate::error::Result; 23 | use crate::RawJsonb; 24 | 25 | pub(crate) struct ArrayIterator<'a> { 26 | raw_jsonb: RawJsonb<'a>, 27 | jentry_offset: usize, 28 | item_offset: usize, 29 | length: usize, 30 | index: usize, 31 | } 32 | 33 | impl<'a> ArrayIterator<'a> { 34 | pub(crate) fn new(raw_jsonb: RawJsonb<'a>) -> Result> { 35 | let (header_type, header_len) = raw_jsonb.read_header(0)?; 36 | if header_type == ARRAY_CONTAINER_TAG { 37 | let jentry_offset = 4; 38 | let item_offset = 4 + 4 * header_len as usize; 39 | Ok(Some(Self { 40 | raw_jsonb, 41 | jentry_offset, 42 | item_offset, 43 | length: header_len as usize, 44 | index: 0, 45 | })) 46 | } else { 47 | Ok(None) 48 | } 49 | } 50 | 51 | pub(crate) fn len(&self) -> usize { 52 | self.length 53 | } 54 | } 55 | 56 | impl<'a> Iterator for ArrayIterator<'a> { 57 | type Item = Result>; 58 | 59 | fn next(&mut self) -> Option { 60 | if self.index >= self.length { 61 | return None; 62 | } 63 | let jentry = match self.raw_jsonb.read_jentry(self.jentry_offset) { 64 | Ok(jentry) => jentry, 65 | Err(err) => return Some(Err(err)), 66 | }; 67 | 68 | let item_length = jentry.length as usize; 69 | let item_range = Range { 70 | start: self.item_offset, 71 | end: self.item_offset + item_length, 72 | }; 73 | let data = match self.raw_jsonb.slice(item_range) { 74 | Ok(data) => data, 75 | Err(err) => return Some(Err(err)), 76 | }; 77 | let item = jentry_to_jsonb_item(jentry, data); 78 | 79 | self.index += 1; 80 | self.jentry_offset += 4; 81 | self.item_offset += item_length; 82 | 83 | Some(Ok(item)) 84 | } 85 | } 86 | 87 | pub(crate) struct ObjectKeyIterator<'a> { 88 | raw_jsonb: RawJsonb<'a>, 89 | jentry_offset: usize, 90 | item_offset: usize, 91 | length: usize, 92 | index: usize, 93 | } 94 | 95 | impl<'a> ObjectKeyIterator<'a> { 96 | pub(crate) fn new(raw_jsonb: RawJsonb<'a>) -> Result> { 97 | let (header_type, header_len) = raw_jsonb.read_header(0)?; 98 | if header_type == OBJECT_CONTAINER_TAG { 99 | let jentry_offset = 4; 100 | let item_offset = 4 + 8 * header_len as usize; 101 | Ok(Some(Self { 102 | raw_jsonb, 103 | jentry_offset, 104 | item_offset, 105 | length: header_len as usize, 106 | index: 0, 107 | })) 108 | } else { 109 | Ok(None) 110 | } 111 | } 112 | 113 | pub(crate) fn len(&self) -> usize { 114 | self.length 115 | } 116 | } 117 | 118 | impl<'a> Iterator for ObjectKeyIterator<'a> { 119 | type Item = Result>; 120 | 121 | fn next(&mut self) -> Option { 122 | if self.index >= self.length { 123 | return None; 124 | } 125 | let jentry = match self.raw_jsonb.read_jentry(self.jentry_offset) { 126 | Ok(jentry) => jentry, 127 | Err(err) => return Some(Err(err)), 128 | }; 129 | 130 | let key_length = jentry.length as usize; 131 | let key_range = Range { 132 | start: self.item_offset, 133 | end: self.item_offset + key_length, 134 | }; 135 | let data = match self.raw_jsonb.slice(key_range) { 136 | Ok(data) => data, 137 | Err(err) => return Some(Err(err)), 138 | }; 139 | let key_item = jentry_to_jsonb_item(jentry, data); 140 | 141 | self.index += 1; 142 | self.jentry_offset += 4; 143 | self.item_offset += key_length; 144 | 145 | Some(Ok(key_item)) 146 | } 147 | } 148 | 149 | pub(crate) struct ObjectValueIterator<'a> { 150 | raw_jsonb: RawJsonb<'a>, 151 | jentry_offset: usize, 152 | item_offset: usize, 153 | length: usize, 154 | index: usize, 155 | } 156 | 157 | impl<'a> ObjectValueIterator<'a> { 158 | pub(crate) fn new(raw_jsonb: RawJsonb<'a>) -> Result> { 159 | let (header_type, header_len) = raw_jsonb.read_header(0)?; 160 | if header_type == OBJECT_CONTAINER_TAG { 161 | let mut jentry_offset = 4; 162 | let mut item_offset = 4 + 8 * header_len as usize; 163 | for _ in 0..header_len { 164 | let key_jentry = raw_jsonb.read_jentry(jentry_offset)?; 165 | jentry_offset += 4; 166 | item_offset += key_jentry.length as usize; 167 | } 168 | 169 | Ok(Some(Self { 170 | raw_jsonb, 171 | jentry_offset, 172 | item_offset, 173 | length: header_len as usize, 174 | index: 0, 175 | })) 176 | } else { 177 | Ok(None) 178 | } 179 | } 180 | 181 | #[allow(dead_code)] 182 | pub(crate) fn len(&self) -> usize { 183 | self.length 184 | } 185 | } 186 | 187 | impl<'a> Iterator for ObjectValueIterator<'a> { 188 | type Item = Result>; 189 | 190 | fn next(&mut self) -> Option { 191 | if self.index >= self.length { 192 | return None; 193 | } 194 | let jentry = match self.raw_jsonb.read_jentry(self.jentry_offset) { 195 | Ok(jentry) => jentry, 196 | Err(err) => return Some(Err(err)), 197 | }; 198 | 199 | let val_length = jentry.length as usize; 200 | let val_range = Range { 201 | start: self.item_offset, 202 | end: self.item_offset + val_length, 203 | }; 204 | let data = match self.raw_jsonb.slice(val_range) { 205 | Ok(data) => data, 206 | Err(err) => return Some(Err(err)), 207 | }; 208 | let val_item = jentry_to_jsonb_item(jentry, data); 209 | 210 | self.index += 1; 211 | self.jentry_offset += 4; 212 | self.item_offset += val_length; 213 | 214 | Some(Ok(val_item)) 215 | } 216 | } 217 | 218 | pub(crate) struct ObjectIterator<'a> { 219 | raw_jsonb: RawJsonb<'a>, 220 | key_jentries: VecDeque, 221 | jentry_offset: usize, 222 | key_offset: usize, 223 | val_offset: usize, 224 | length: usize, 225 | } 226 | 227 | impl<'a> ObjectIterator<'a> { 228 | pub(crate) fn new(raw_jsonb: RawJsonb<'a>) -> Result> { 229 | let (header_type, header_len) = raw_jsonb.read_header(0)?; 230 | if header_type == OBJECT_CONTAINER_TAG { 231 | let mut jentry_offset = 4; 232 | let mut key_jentries = VecDeque::with_capacity(header_len as usize); 233 | for _ in 0..header_len { 234 | let key_jentry = raw_jsonb.read_jentry(jentry_offset)?; 235 | jentry_offset += 4; 236 | key_jentries.push_back(key_jentry); 237 | } 238 | let key_length: usize = key_jentries.iter().map(|j| j.length as usize).sum(); 239 | let key_offset = 4 + 8 * header_len as usize; 240 | let val_offset = key_offset + key_length; 241 | 242 | Ok(Some(Self { 243 | raw_jsonb, 244 | key_jentries, 245 | jentry_offset, 246 | key_offset, 247 | val_offset, 248 | length: header_len as usize, 249 | })) 250 | } else { 251 | Ok(None) 252 | } 253 | } 254 | 255 | pub(crate) fn len(&self) -> usize { 256 | self.length 257 | } 258 | } 259 | 260 | impl<'a> Iterator for ObjectIterator<'a> { 261 | type Item = Result<(&'a str, JsonbItem<'a>)>; 262 | 263 | fn next(&mut self) -> Option { 264 | match self.key_jentries.pop_front() { 265 | Some(key_jentry) => { 266 | let val_jentry = match self.raw_jsonb.read_jentry(self.jentry_offset) { 267 | Ok(jentry) => jentry, 268 | Err(err) => return Some(Err(err)), 269 | }; 270 | let key_length = key_jentry.length as usize; 271 | let val_length = val_jentry.length as usize; 272 | 273 | let key_range = Range { 274 | start: self.key_offset, 275 | end: self.key_offset + key_length, 276 | }; 277 | let key_data = match self.raw_jsonb.slice(key_range) { 278 | Ok(data) => data, 279 | Err(err) => return Some(Err(err)), 280 | }; 281 | let key = unsafe { std::str::from_utf8_unchecked(key_data) }; 282 | 283 | let val_range = Range { 284 | start: self.val_offset, 285 | end: self.val_offset + val_length, 286 | }; 287 | let val_data = match self.raw_jsonb.slice(val_range) { 288 | Ok(data) => data, 289 | Err(err) => return Some(Err(err)), 290 | }; 291 | let val_item = jentry_to_jsonb_item(val_jentry, val_data); 292 | 293 | self.jentry_offset += 4; 294 | self.key_offset += key_length; 295 | self.val_offset += val_length; 296 | 297 | Some(Ok((key, val_item))) 298 | } 299 | None => None, 300 | } 301 | } 302 | } 303 | -------------------------------------------------------------------------------- /src/core/databend/jentry.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use super::constants::*; 16 | 17 | #[derive(Clone, Debug, PartialOrd, PartialEq, Eq, Ord)] 18 | pub(super) struct JEntry { 19 | pub(super) type_code: u32, 20 | pub(super) length: u32, 21 | } 22 | 23 | impl JEntry { 24 | pub(super) fn decode_jentry(encoded: u32) -> JEntry { 25 | let type_code = encoded & JENTRY_TYPE_MASK; 26 | let length = encoded & JENTRY_OFF_LEN_MASK; 27 | JEntry { type_code, length } 28 | } 29 | 30 | pub(super) fn make_null_jentry() -> JEntry { 31 | JEntry { 32 | type_code: NULL_TAG, 33 | length: 0, 34 | } 35 | } 36 | 37 | pub(super) fn make_true_jentry() -> JEntry { 38 | JEntry { 39 | type_code: TRUE_TAG, 40 | length: 0, 41 | } 42 | } 43 | 44 | pub(super) fn make_false_jentry() -> JEntry { 45 | JEntry { 46 | type_code: FALSE_TAG, 47 | length: 0, 48 | } 49 | } 50 | 51 | pub(super) fn make_string_jentry(length: usize) -> JEntry { 52 | JEntry { 53 | type_code: STRING_TAG, 54 | length: length as u32, 55 | } 56 | } 57 | 58 | pub(super) fn make_number_jentry(length: usize) -> JEntry { 59 | JEntry { 60 | type_code: NUMBER_TAG, 61 | length: length as u32, 62 | } 63 | } 64 | 65 | pub(super) fn make_container_jentry(length: usize) -> JEntry { 66 | JEntry { 67 | type_code: CONTAINER_TAG, 68 | length: length as u32, 69 | } 70 | } 71 | 72 | pub(super) fn make_extension_jentry(length: usize) -> JEntry { 73 | JEntry { 74 | type_code: EXTENSION_TAG, 75 | length: length as u32, 76 | } 77 | } 78 | 79 | pub(super) fn encoded(&self) -> u32 { 80 | self.type_code | self.length 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /src/core/databend/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | mod builder; 16 | mod constants; 17 | mod de; 18 | mod iterator; 19 | mod jentry; 20 | mod ser; 21 | mod util; 22 | 23 | pub(crate) use builder::*; 24 | pub use de::*; 25 | pub(crate) use iterator::*; 26 | pub use ser::*; 27 | -------------------------------------------------------------------------------- /src/core/item.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use std::borrow::Cow; 16 | use std::cmp::Ordering; 17 | 18 | use crate::error::*; 19 | use crate::ExtensionValue; 20 | use crate::Number; 21 | use crate::OwnedJsonb; 22 | use crate::RawJsonb; 23 | 24 | /// The value type of JSONB data. 25 | #[derive(Debug, Clone, Copy)] 26 | pub(crate) enum JsonbItemType { 27 | /// The Null JSONB type. 28 | Null, 29 | /// The Boolean JSONB type. 30 | Boolean, 31 | /// The Number JSONB type. 32 | Number, 33 | /// The String JSONB type. 34 | String, 35 | /// The Extension JSONB type. 36 | Extension, 37 | /// The Array JSONB type with the length of items. 38 | Array(usize), 39 | /// The Object JSONB type with the length of key and value pairs. 40 | Object(usize), 41 | } 42 | 43 | impl Eq for JsonbItemType {} 44 | 45 | impl PartialEq for JsonbItemType { 46 | fn eq(&self, other: &Self) -> bool { 47 | self.partial_cmp(other) == Some(Ordering::Equal) 48 | } 49 | } 50 | 51 | impl PartialOrd for JsonbItemType { 52 | fn partial_cmp(&self, other: &Self) -> Option { 53 | match (self, other) { 54 | (JsonbItemType::Null, JsonbItemType::Null) => Some(Ordering::Equal), 55 | (JsonbItemType::Null, _) => Some(Ordering::Greater), 56 | (_, JsonbItemType::Null) => Some(Ordering::Less), 57 | 58 | (JsonbItemType::Array(_), JsonbItemType::Array(_)) => None, 59 | (JsonbItemType::Array(_), _) => Some(Ordering::Greater), 60 | (_, JsonbItemType::Array(_)) => Some(Ordering::Less), 61 | 62 | (JsonbItemType::Object(_), JsonbItemType::Object(_)) => None, 63 | (JsonbItemType::Object(_), _) => Some(Ordering::Greater), 64 | (_, JsonbItemType::Object(_)) => Some(Ordering::Less), 65 | 66 | (JsonbItemType::String, JsonbItemType::String) => None, 67 | (JsonbItemType::String, _) => Some(Ordering::Greater), 68 | (_, JsonbItemType::String) => Some(Ordering::Less), 69 | 70 | (JsonbItemType::Number, JsonbItemType::Number) => None, 71 | (JsonbItemType::Number, _) => Some(Ordering::Greater), 72 | (_, JsonbItemType::Number) => Some(Ordering::Less), 73 | 74 | (JsonbItemType::Boolean, JsonbItemType::Boolean) => None, 75 | (JsonbItemType::Boolean, _) => Some(Ordering::Greater), 76 | (_, JsonbItemType::Boolean) => Some(Ordering::Less), 77 | 78 | (JsonbItemType::Extension, JsonbItemType::Extension) => None, 79 | } 80 | } 81 | } 82 | 83 | /// `JsonbItem` is an internal enum used primarily within `ArrayIterator` and 84 | /// `ObjectIterator` to represent temporary values during iteration. It is also 85 | /// utilized by `ArrayBuilder` and `ObjectBuilder` to store intermediate variables 86 | /// during the construction of JSONB objects and arrays. 87 | /// 88 | /// This enum encapsulates different types of JSONB values, allowing iterators and 89 | /// builders to handle various data types uniformly. It supports null values, 90 | /// booleans, numbers (represented as byte slices), strings (represented as byte slices), 91 | /// raw JSONB data (`RawJsonb`), and owned JSONB data (`OwnedJsonb`). 92 | #[derive(Debug, Clone)] 93 | pub(crate) enum JsonbItem<'a> { 94 | /// Represents a JSONB null value. 95 | Null, 96 | /// Represents a JSONB boolean value. 97 | Boolean(bool), 98 | /// Represents a JSONB number, stored as a byte slice. 99 | Number(&'a [u8]), 100 | /// Represents a JSONB string, stored as a byte slice. 101 | String(&'a [u8]), 102 | /// Represents a JSONB extension values, stored as a byte slice. 103 | Extension(&'a [u8]), 104 | /// Represents raw JSONB data, using a borrowed slice. 105 | Raw(RawJsonb<'a>), 106 | /// Represents owned JSONB data. 107 | Owned(OwnedJsonb), 108 | } 109 | 110 | impl<'a> JsonbItem<'a> { 111 | pub(crate) fn jsonb_item_type(&self) -> Result { 112 | match self { 113 | JsonbItem::Null => Ok(JsonbItemType::Null), 114 | JsonbItem::Boolean(_) => Ok(JsonbItemType::Boolean), 115 | JsonbItem::Number(_) => Ok(JsonbItemType::Number), 116 | JsonbItem::String(_) => Ok(JsonbItemType::String), 117 | JsonbItem::Extension(_) => Ok(JsonbItemType::Extension), 118 | JsonbItem::Raw(raw) => raw.jsonb_item_type(), 119 | JsonbItem::Owned(owned) => owned.as_raw().jsonb_item_type(), 120 | } 121 | } 122 | 123 | pub(crate) fn as_raw_jsonb(&self) -> Option> { 124 | match self { 125 | JsonbItem::Raw(raw_jsonb) => Some(*raw_jsonb), 126 | _ => None, 127 | } 128 | } 129 | 130 | pub(crate) fn as_null(&self) -> Option<()> { 131 | match self { 132 | JsonbItem::Null => Some(()), 133 | _ => None, 134 | } 135 | } 136 | 137 | pub(crate) fn as_str(&self) -> Option<&'a str> { 138 | match self { 139 | JsonbItem::String(data) => { 140 | let s = unsafe { std::str::from_utf8_unchecked(data) }; 141 | Some(s) 142 | } 143 | _ => None, 144 | } 145 | } 146 | } 147 | 148 | impl Eq for JsonbItem<'_> {} 149 | 150 | impl PartialEq for JsonbItem<'_> { 151 | fn eq(&self, other: &Self) -> bool { 152 | self.partial_cmp(other) == Some(Ordering::Equal) 153 | } 154 | } 155 | 156 | #[allow(clippy::non_canonical_partial_ord_impl)] 157 | impl PartialOrd for JsonbItem<'_> { 158 | fn partial_cmp(&self, other: &Self) -> Option { 159 | let self_type = self.jsonb_item_type().ok()?; 160 | let other_type = other.jsonb_item_type().ok()?; 161 | 162 | // First use JSONB type to determine the order, 163 | // different types must have different orders. 164 | if let Some(ord) = self_type.partial_cmp(&other_type) { 165 | return Some(ord); 166 | } 167 | 168 | let self_item = if let JsonbItem::Owned(owned) = self { 169 | &JsonbItem::Raw(owned.as_raw()) 170 | } else { 171 | self 172 | }; 173 | let other_item = if let JsonbItem::Owned(owned) = other { 174 | &JsonbItem::Raw(owned.as_raw()) 175 | } else { 176 | other 177 | }; 178 | 179 | match (self_item, other_item) { 180 | (JsonbItem::Raw(self_raw), JsonbItem::Raw(other_raw)) => { 181 | self_raw.partial_cmp(other_raw) 182 | } 183 | // compare null, raw jsonb must not null 184 | (JsonbItem::Raw(_), JsonbItem::Null) => Some(Ordering::Less), 185 | (JsonbItem::Null, JsonbItem::Raw(_)) => Some(Ordering::Greater), 186 | // compare extension 187 | (JsonbItem::Extension(self_data), JsonbItem::Extension(other_data)) => { 188 | let self_val = ExtensionValue::decode(self_data).ok()?; 189 | let other_val = ExtensionValue::decode(other_data).ok()?; 190 | self_val.partial_cmp(&other_val) 191 | } 192 | (JsonbItem::Raw(self_raw), JsonbItem::Extension(other_data)) => { 193 | let self_val = self_raw.as_extension_value(); 194 | let other_val = ExtensionValue::decode(other_data).ok()?; 195 | if let Ok(Some(self_val)) = self_val { 196 | self_val.partial_cmp(&other_val) 197 | } else { 198 | None 199 | } 200 | } 201 | (JsonbItem::Extension(self_data), JsonbItem::Raw(other_raw)) => { 202 | let self_val = ExtensionValue::decode(self_data).ok()?; 203 | let other_val = other_raw.as_extension_value(); 204 | if let Ok(Some(other_val)) = other_val { 205 | self_val.partial_cmp(&other_val) 206 | } else { 207 | None 208 | } 209 | } 210 | // compare boolean 211 | (JsonbItem::Boolean(self_val), JsonbItem::Boolean(other_val)) => { 212 | self_val.partial_cmp(other_val) 213 | } 214 | (JsonbItem::Raw(self_raw), JsonbItem::Boolean(other_val)) => { 215 | let self_val = self_raw.as_bool(); 216 | if let Ok(Some(self_val)) = self_val { 217 | self_val.partial_cmp(other_val) 218 | } else { 219 | None 220 | } 221 | } 222 | (JsonbItem::Boolean(self_val), JsonbItem::Raw(other_raw)) => { 223 | let other_val = other_raw.as_bool(); 224 | if let Ok(Some(other_val)) = other_val { 225 | self_val.partial_cmp(&other_val) 226 | } else { 227 | None 228 | } 229 | } 230 | // compare number 231 | (JsonbItem::Number(self_data), JsonbItem::Number(other_data)) => { 232 | let self_num = Number::decode(self_data).ok()?; 233 | let other_num = Number::decode(other_data).ok()?; 234 | self_num.partial_cmp(&other_num) 235 | } 236 | (JsonbItem::Raw(self_raw), JsonbItem::Number(other_data)) => { 237 | let self_num = self_raw.as_number(); 238 | let other_num = Number::decode(other_data).ok()?; 239 | if let Ok(Some(self_num)) = self_num { 240 | self_num.partial_cmp(&other_num) 241 | } else { 242 | None 243 | } 244 | } 245 | (JsonbItem::Number(self_data), JsonbItem::Raw(other_raw)) => { 246 | let self_num = Number::decode(self_data).ok()?; 247 | let other_num = other_raw.as_number(); 248 | if let Ok(Some(other_num)) = other_num { 249 | self_num.partial_cmp(&other_num) 250 | } else { 251 | None 252 | } 253 | } 254 | // compare string 255 | (JsonbItem::String(self_data), JsonbItem::String(other_data)) => { 256 | let self_str = unsafe { std::str::from_utf8_unchecked(self_data) }; 257 | let other_str = unsafe { std::str::from_utf8_unchecked(other_data) }; 258 | self_str.partial_cmp(other_str) 259 | } 260 | (JsonbItem::Raw(self_raw), JsonbItem::String(other_data)) => { 261 | let self_str = self_raw.as_str(); 262 | let other_str = Cow::Borrowed(unsafe { std::str::from_utf8_unchecked(other_data) }); 263 | if let Ok(Some(self_str)) = self_str { 264 | self_str.partial_cmp(&other_str) 265 | } else { 266 | None 267 | } 268 | } 269 | (JsonbItem::String(self_data), JsonbItem::Raw(other_raw)) => { 270 | let self_str = Cow::Borrowed(unsafe { std::str::from_utf8_unchecked(self_data) }); 271 | let other_str = other_raw.as_str(); 272 | if let Ok(Some(other_str)) = other_str { 273 | self_str.partial_cmp(&other_str) 274 | } else { 275 | None 276 | } 277 | } 278 | (_, _) => None, 279 | } 280 | } 281 | } 282 | 283 | impl Ord for JsonbItem<'_> { 284 | fn cmp(&self, other: &Self) -> Ordering { 285 | match self.partial_cmp(other) { 286 | Some(ordering) => ordering, 287 | None => Ordering::Equal, 288 | } 289 | } 290 | } 291 | -------------------------------------------------------------------------------- /src/core/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #[cfg(feature = "databend")] 16 | mod databend; 17 | #[cfg(feature = "sqlite")] 18 | mod sqlite; 19 | 20 | mod item; 21 | 22 | #[cfg(feature = "databend")] 23 | pub use databend::*; 24 | pub(crate) use item::*; 25 | #[cfg(feature = "sqlite")] 26 | #[allow(unused_imports)] 27 | pub use sqlite::*; 28 | -------------------------------------------------------------------------------- /src/core/sqlite/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | -------------------------------------------------------------------------------- /src/error.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use core::fmt::Display; 16 | 17 | use serde::de; 18 | use serde::ser; 19 | 20 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] 21 | pub enum ParseErrorCode { 22 | InvalidEOF, 23 | InvalidNumberValue, 24 | InvalidStringValue, 25 | ExpectedSomeIdent, 26 | ExpectedSomeValue, 27 | ExpectedColon, 28 | ExpectedArrayCommaOrEnd, 29 | ExpectedObjectCommaOrEnd, 30 | UnexpectedTrailingCharacters, 31 | KeyMustBeAString, 32 | ControlCharacterWhileParsingString, 33 | InvalidEscaped(u8), 34 | InvalidHex(u8), 35 | InvalidLoneLeadingSurrogateInHexEscape(u16), 36 | InvalidSurrogateInHexEscape(u16), 37 | UnexpectedEndOfHexEscape, 38 | } 39 | 40 | pub type Result = std::result::Result; 41 | 42 | impl Display for ParseErrorCode { 43 | fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { 44 | match *self { 45 | ParseErrorCode::InvalidEOF => f.write_str("EOF while parsing a value"), 46 | ParseErrorCode::InvalidNumberValue => f.write_str("invalid number"), 47 | ParseErrorCode::InvalidStringValue => f.write_str("invalid string"), 48 | ParseErrorCode::ExpectedSomeIdent => f.write_str("expected ident"), 49 | ParseErrorCode::ExpectedSomeValue => f.write_str("expected value"), 50 | ParseErrorCode::ExpectedColon => f.write_str("expected `:`"), 51 | ParseErrorCode::ExpectedArrayCommaOrEnd => f.write_str("expected `,` or `]`"), 52 | ParseErrorCode::ExpectedObjectCommaOrEnd => f.write_str("expected `,` or `}`"), 53 | ParseErrorCode::UnexpectedTrailingCharacters => f.write_str("trailing characters"), 54 | ParseErrorCode::KeyMustBeAString => f.write_str("key must be a string"), 55 | ParseErrorCode::ControlCharacterWhileParsingString => { 56 | f.write_str("control character (\\u0000-\\u001F) found while parsing a string") 57 | } 58 | ParseErrorCode::InvalidEscaped(n) => { 59 | write!(f, "invalid escaped '{:X}'", n) 60 | } 61 | ParseErrorCode::InvalidHex(n) => { 62 | write!(f, "invalid hex '{:X}'", n) 63 | } 64 | ParseErrorCode::InvalidLoneLeadingSurrogateInHexEscape(n) => { 65 | write!(f, "lone leading surrogate in hex escape '{:X}'", n) 66 | } 67 | ParseErrorCode::InvalidSurrogateInHexEscape(n) => { 68 | write!(f, "invalid surrogate in hex escape '{:X}'", n) 69 | } 70 | ParseErrorCode::UnexpectedEndOfHexEscape => f.write_str("unexpected end of hex escape"), 71 | } 72 | } 73 | } 74 | 75 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] 76 | #[non_exhaustive] 77 | pub enum Error { 78 | InvalidUtf8, 79 | InvalidEOF, 80 | InvalidToken, 81 | InvalidCast, 82 | 83 | InvalidJson, 84 | InvalidJsonb, 85 | InvalidJsonbHeader, 86 | InvalidJsonbJEntry, 87 | InvalidJsonbNumber, 88 | InvalidJsonbExtension, 89 | 90 | InvalidJsonPath, 91 | InvalidJsonPathPredicate, 92 | InvalidKeyPath, 93 | 94 | InvalidJsonType, 95 | InvalidObject, 96 | ObjectDuplicateKey, 97 | UnexpectedType, 98 | 99 | Message(String), 100 | Syntax(ParseErrorCode, usize), 101 | } 102 | 103 | impl ser::Error for Error { 104 | fn custom(msg: T) -> Self { 105 | Error::Message(msg.to_string()) 106 | } 107 | } 108 | 109 | impl de::Error for Error { 110 | fn custom(msg: T) -> Self { 111 | Error::Message(msg.to_string()) 112 | } 113 | } 114 | 115 | impl Display for Error { 116 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { 117 | match self { 118 | Error::Message(m) => write!(f, "{}", m), 119 | Error::Syntax(code, pos) => write!(f, "{}, pos {}", code, pos), 120 | _ => write!(f, "{:?}", self), 121 | } 122 | } 123 | } 124 | 125 | impl std::error::Error for Error { 126 | fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { 127 | None 128 | } 129 | } 130 | 131 | impl From for Error { 132 | fn from(_error: std::io::Error) -> Self { 133 | Error::InvalidUtf8 134 | } 135 | } 136 | 137 | impl From for Error { 138 | fn from(_error: std::str::Utf8Error) -> Self { 139 | Error::InvalidUtf8 140 | } 141 | } 142 | 143 | impl From>> for Error { 144 | fn from(_error: nom::Err>) -> Self { 145 | Error::InvalidJsonb 146 | } 147 | } 148 | -------------------------------------------------------------------------------- /src/extension.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use std::cmp::Ordering; 16 | use std::fmt::Debug; 17 | use std::fmt::Display; 18 | use std::fmt::Formatter; 19 | 20 | use jiff::civil::date; 21 | use jiff::fmt::strtime; 22 | use jiff::tz::Offset; 23 | use jiff::SignedDuration; 24 | 25 | const MICROS_PER_SEC: i64 = 1_000_000; 26 | const MICROS_PER_MINUTE: i64 = 60 * MICROS_PER_SEC; 27 | const MICROS_PER_HOUR: i64 = 60 * MICROS_PER_MINUTE; 28 | const MONTHS_PER_YEAR: i32 = 12; 29 | 30 | const TIMESTAMP_FORMAT: &str = "%Y-%m-%d %H:%M:%S%.6f"; 31 | 32 | #[derive(Debug, Clone)] 33 | pub enum ExtensionValue<'a> { 34 | Binary(&'a [u8]), 35 | Date(Date), 36 | Timestamp(Timestamp), 37 | TimestampTz(TimestampTz), 38 | Interval(Interval), 39 | } 40 | 41 | #[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd)] 42 | pub struct Date { 43 | pub value: i32, 44 | } 45 | 46 | #[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd)] 47 | pub struct Timestamp { 48 | pub value: i64, 49 | } 50 | 51 | #[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd)] 52 | pub struct TimestampTz { 53 | pub offset: i8, 54 | pub value: i64, 55 | } 56 | 57 | #[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd)] 58 | pub struct Interval { 59 | pub months: i32, 60 | pub days: i32, 61 | pub micros: i64, 62 | } 63 | 64 | impl Display for Date { 65 | fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { 66 | let dur = SignedDuration::from_hours(self.value as i64 * 24); 67 | let date = date(1970, 1, 1).checked_add(dur).unwrap(); 68 | write!(f, "{}", date) 69 | } 70 | } 71 | 72 | impl Display for Timestamp { 73 | fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { 74 | let micros = self.value; 75 | let (mut secs, mut nanos) = (micros / MICROS_PER_SEC, (micros % MICROS_PER_SEC) * 1_000); 76 | if nanos < 0 { 77 | secs -= 1; 78 | nanos += 1_000_000_000; 79 | } 80 | 81 | if secs > 253402207200 { 82 | secs = 253402207200; 83 | nanos = 0; 84 | } else if secs < -377705023201 { 85 | secs = -377705023201; 86 | nanos = 0; 87 | } 88 | let ts = jiff::Timestamp::new(secs, nanos as i32).unwrap(); 89 | 90 | write!(f, "{}", strtime::format(TIMESTAMP_FORMAT, ts).unwrap()) 91 | } 92 | } 93 | 94 | impl Display for TimestampTz { 95 | fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { 96 | let micros = self.value; 97 | let (mut secs, mut nanos) = (micros / MICROS_PER_SEC, (micros % MICROS_PER_SEC) * 1_000); 98 | if nanos < 0 { 99 | secs -= 1; 100 | nanos += 1_000_000_000; 101 | } 102 | 103 | if secs > 253402207200 { 104 | secs = 253402207200; 105 | nanos = 0; 106 | } else if secs < -377705023201 { 107 | secs = -377705023201; 108 | nanos = 0; 109 | } 110 | let ts = jiff::Timestamp::new(secs, nanos as i32).unwrap(); 111 | let tz = Offset::constant(self.offset).to_time_zone(); 112 | let zoned = ts.to_zoned(tz); 113 | 114 | write!(f, "{}", strtime::format(TIMESTAMP_FORMAT, &zoned).unwrap()) 115 | } 116 | } 117 | 118 | impl Display for Interval { 119 | fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { 120 | let mut date_parts = vec![]; 121 | let years = self.months / MONTHS_PER_YEAR; 122 | let months = self.months % MONTHS_PER_YEAR; 123 | match years.cmp(&1) { 124 | Ordering::Equal => { 125 | date_parts.push((years, "year")); 126 | } 127 | Ordering::Greater => { 128 | date_parts.push((years, "years")); 129 | } 130 | _ => {} 131 | } 132 | match months.cmp(&1) { 133 | Ordering::Equal => { 134 | date_parts.push((months, "month")); 135 | } 136 | Ordering::Greater => { 137 | date_parts.push((months, "months")); 138 | } 139 | _ => {} 140 | } 141 | match self.days.cmp(&1) { 142 | Ordering::Equal => { 143 | date_parts.push((self.days, "day")); 144 | } 145 | Ordering::Greater => { 146 | date_parts.push((self.days, "days")); 147 | } 148 | _ => {} 149 | } 150 | if !date_parts.is_empty() { 151 | for (i, (val, name)) in date_parts.into_iter().enumerate() { 152 | if i > 0 { 153 | write!(f, " ")?; 154 | } 155 | write!(f, "{} {}", val, name)?; 156 | } 157 | if self.micros != 0 { 158 | write!(f, " ")?; 159 | } 160 | } 161 | 162 | if self.micros != 0 { 163 | let mut micros = self.micros; 164 | if micros < 0 { 165 | write!(f, "-")?; 166 | micros = -micros; 167 | } 168 | let hour = micros / MICROS_PER_HOUR; 169 | micros -= hour * MICROS_PER_HOUR; 170 | let min = micros / MICROS_PER_MINUTE; 171 | micros -= min * MICROS_PER_MINUTE; 172 | let sec = micros / MICROS_PER_SEC; 173 | micros -= sec * MICROS_PER_SEC; 174 | 175 | if hour < 100 { 176 | write!(f, "{:02}:{:02}:{:02}", hour, min, sec)?; 177 | } else { 178 | write!(f, "{}:{:02}:{:02}", hour, min, sec)?; 179 | } 180 | if micros != 0 { 181 | write!(f, ".{:06}", micros)?; 182 | } 183 | } else if self.months == 0 && self.days == 0 { 184 | write!(f, "00:00:00")?; 185 | } 186 | Ok(()) 187 | } 188 | } 189 | 190 | impl Display for ExtensionValue<'_> { 191 | fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { 192 | match self { 193 | ExtensionValue::Binary(v) => { 194 | for c in *v { 195 | write!(f, "{c:02X}")?; 196 | } 197 | Ok(()) 198 | } 199 | ExtensionValue::Date(v) => write!(f, "{}", v), 200 | ExtensionValue::Timestamp(v) => write!(f, "{}", v), 201 | ExtensionValue::TimestampTz(v) => write!(f, "{}", v), 202 | ExtensionValue::Interval(v) => write!(f, "{}", v), 203 | } 204 | } 205 | } 206 | 207 | impl Eq for ExtensionValue<'_> {} 208 | 209 | impl PartialEq for ExtensionValue<'_> { 210 | fn eq(&self, other: &Self) -> bool { 211 | self.partial_cmp(other) == Some(Ordering::Equal) 212 | } 213 | } 214 | 215 | #[allow(clippy::non_canonical_partial_ord_impl)] 216 | impl PartialOrd for ExtensionValue<'_> { 217 | fn partial_cmp(&self, other: &Self) -> Option { 218 | let self_level = match self { 219 | ExtensionValue::Binary(_) => 0, 220 | ExtensionValue::Date(_) => 1, 221 | ExtensionValue::Timestamp(_) => 2, 222 | ExtensionValue::TimestampTz(_) => 3, 223 | ExtensionValue::Interval(_) => 4, 224 | }; 225 | let other_level = match other { 226 | ExtensionValue::Binary(_) => 0, 227 | ExtensionValue::Date(_) => 1, 228 | ExtensionValue::Timestamp(_) => 2, 229 | ExtensionValue::TimestampTz(_) => 3, 230 | ExtensionValue::Interval(_) => 4, 231 | }; 232 | let res = self_level.cmp(&other_level); 233 | if matches!(res, Ordering::Greater | Ordering::Less) { 234 | return Some(res); 235 | } 236 | 237 | match (self, other) { 238 | (ExtensionValue::Binary(self_data), ExtensionValue::Binary(other_data)) => { 239 | Some(self_data.cmp(other_data)) 240 | } 241 | (ExtensionValue::Date(self_data), ExtensionValue::Date(other_data)) => { 242 | Some(self_data.cmp(other_data)) 243 | } 244 | (ExtensionValue::Timestamp(self_data), ExtensionValue::Timestamp(other_data)) => { 245 | Some(self_data.cmp(other_data)) 246 | } 247 | (ExtensionValue::TimestampTz(self_data), ExtensionValue::TimestampTz(other_data)) => { 248 | Some(self_data.cmp(other_data)) 249 | } 250 | (ExtensionValue::Interval(self_data), ExtensionValue::Interval(other_data)) => { 251 | Some(self_data.cmp(other_data)) 252 | } 253 | (_, _) => None, 254 | } 255 | } 256 | } 257 | -------------------------------------------------------------------------------- /src/from.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use core::iter::FromIterator; 16 | use std::borrow::Cow; 17 | 18 | use ordered_float::OrderedFloat; 19 | use serde_json::Map as JsonMap; 20 | use serde_json::Number as JsonNumber; 21 | use serde_json::Value as JsonValue; 22 | 23 | use super::number::Number; 24 | use super::value::Object; 25 | use super::value::Value; 26 | 27 | macro_rules! from_signed_integer { 28 | ($($ty:ident)*) => { 29 | $( 30 | impl<'a> From<$ty> for Value<'a> { 31 | fn from(n: $ty) -> Self { 32 | Value::Number(Number::Int64(n as i64)) 33 | } 34 | } 35 | )* 36 | }; 37 | } 38 | 39 | macro_rules! from_unsigned_integer { 40 | ($($ty:ident)*) => { 41 | $( 42 | impl<'a> From<$ty> for Value<'a> { 43 | fn from(n: $ty) -> Self { 44 | Value::Number(Number::UInt64(n as u64)) 45 | } 46 | } 47 | )* 48 | }; 49 | } 50 | 51 | macro_rules! from_float { 52 | ($($ty:ident)*) => { 53 | $( 54 | impl<'a> From<$ty> for Value<'a> { 55 | fn from(n: $ty) -> Self { 56 | Value::Number(Number::Float64(n as f64)) 57 | } 58 | } 59 | )* 60 | }; 61 | } 62 | 63 | from_signed_integer! { 64 | i8 i16 i32 i64 isize 65 | } 66 | 67 | from_unsigned_integer! { 68 | u8 u16 u32 u64 usize 69 | } 70 | 71 | from_float! { 72 | f32 f64 73 | } 74 | 75 | impl From> for Value<'_> { 76 | fn from(f: OrderedFloat) -> Self { 77 | Value::Number(Number::Float64(f.0 as f64)) 78 | } 79 | } 80 | 81 | impl From> for Value<'_> { 82 | fn from(f: OrderedFloat) -> Self { 83 | Value::Number(Number::Float64(f.0)) 84 | } 85 | } 86 | 87 | impl From for Value<'_> { 88 | fn from(f: bool) -> Self { 89 | Value::Bool(f) 90 | } 91 | } 92 | 93 | impl From for Value<'_> { 94 | fn from(f: String) -> Self { 95 | Value::String(f.into()) 96 | } 97 | } 98 | 99 | impl<'a> From<&'a str> for Value<'a> { 100 | fn from(f: &'a str) -> Self { 101 | Value::String(Cow::from(f)) 102 | } 103 | } 104 | 105 | impl<'a> From> for Value<'a> { 106 | fn from(f: Cow<'a, str>) -> Self { 107 | Value::String(f) 108 | } 109 | } 110 | 111 | impl<'a> From> for Value<'a> { 112 | fn from(o: Object<'a>) -> Self { 113 | Value::Object(o) 114 | } 115 | } 116 | 117 | impl<'a, T: Into>> From> for Value<'a> { 118 | fn from(f: Vec) -> Self { 119 | Value::Array(f.into_iter().map(Into::into).collect()) 120 | } 121 | } 122 | 123 | impl<'a, T: Clone + Into>> From<&'a [T]> for Value<'a> { 124 | fn from(f: &'a [T]) -> Self { 125 | Value::Array(f.iter().cloned().map(Into::into).collect()) 126 | } 127 | } 128 | 129 | impl<'a, T: Into>> FromIterator for Value<'a> { 130 | fn from_iter>(iter: I) -> Self { 131 | Value::Array(iter.into_iter().map(Into::into).collect()) 132 | } 133 | } 134 | 135 | impl<'a, K: Into, V: Into>> FromIterator<(K, V)> for Value<'a> { 136 | fn from_iter>(iter: I) -> Self { 137 | Value::Object( 138 | iter.into_iter() 139 | .map(|(k, v)| (k.into(), v.into())) 140 | .collect(), 141 | ) 142 | } 143 | } 144 | 145 | impl From<()> for Value<'_> { 146 | fn from((): ()) -> Self { 147 | Value::Null 148 | } 149 | } 150 | 151 | impl From<&JsonValue> for Value<'_> { 152 | fn from(value: &JsonValue) -> Self { 153 | match value { 154 | JsonValue::Null => Value::Null, 155 | JsonValue::Bool(v) => Value::Bool(*v), 156 | JsonValue::Number(v) => { 157 | if v.is_u64() { 158 | Value::Number(Number::UInt64(v.as_u64().unwrap())) 159 | } else if v.is_i64() { 160 | Value::Number(Number::Int64(v.as_i64().unwrap())) 161 | } else { 162 | Value::Number(Number::Float64(v.as_f64().unwrap())) 163 | } 164 | } 165 | JsonValue::String(v) => Value::String(v.clone().into()), 166 | JsonValue::Array(arr) => { 167 | let mut vals: Vec = Vec::with_capacity(arr.len()); 168 | for val in arr { 169 | vals.push(val.into()); 170 | } 171 | Value::Array(vals) 172 | } 173 | JsonValue::Object(obj) => { 174 | let mut map = Object::new(); 175 | for (k, v) in obj.iter() { 176 | let val: Value = v.into(); 177 | map.insert(k.to_string(), val); 178 | } 179 | Value::Object(map) 180 | } 181 | } 182 | } 183 | } 184 | 185 | impl From for Value<'_> { 186 | fn from(value: JsonValue) -> Self { 187 | (&value).into() 188 | } 189 | } 190 | 191 | impl<'a> From> for JsonValue { 192 | fn from(value: Value<'a>) -> Self { 193 | match value { 194 | Value::Null => JsonValue::Null, 195 | Value::Bool(v) => JsonValue::Bool(v), 196 | Value::Number(v) => match v { 197 | Number::Int64(v) => JsonValue::Number(v.into()), 198 | Number::UInt64(v) => JsonValue::Number(v.into()), 199 | Number::Float64(v) => JsonValue::Number(JsonNumber::from_f64(v).unwrap()), 200 | Number::Decimal128(v) => { 201 | JsonValue::Number(JsonNumber::from_f64(v.to_float64()).unwrap()) 202 | } 203 | Number::Decimal256(v) => { 204 | JsonValue::Number(JsonNumber::from_f64(v.to_float64()).unwrap()) 205 | } 206 | }, 207 | Value::String(v) => JsonValue::String(v.to_string()), 208 | Value::Binary(v) => { 209 | let mut s = String::new(); 210 | for c in v { 211 | s.push_str(&format!("{c:02X}")); 212 | } 213 | JsonValue::String(s) 214 | } 215 | Value::Date(v) => { 216 | let s = format!("{}", v); 217 | JsonValue::String(s) 218 | } 219 | Value::Timestamp(v) => { 220 | let s = format!("{}", v); 221 | JsonValue::String(s) 222 | } 223 | Value::TimestampTz(v) => { 224 | let s = format!("{}", v); 225 | JsonValue::String(s) 226 | } 227 | Value::Interval(v) => { 228 | let s = format!("{}", v); 229 | JsonValue::String(s) 230 | } 231 | Value::Array(arr) => { 232 | let mut vals: Vec = Vec::with_capacity(arr.len()); 233 | for val in arr { 234 | vals.push(val.into()); 235 | } 236 | JsonValue::Array(vals) 237 | } 238 | Value::Object(obj) => { 239 | let mut map = JsonMap::new(); 240 | for (k, v) in obj.iter() { 241 | let val: JsonValue = v.clone().into(); 242 | map.insert(k.to_string(), val); 243 | } 244 | JsonValue::Object(map) 245 | } 246 | } 247 | } 248 | } 249 | -------------------------------------------------------------------------------- /src/functions/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | mod array; 16 | mod object; 17 | mod operator; 18 | mod path; 19 | mod scalar; 20 | -------------------------------------------------------------------------------- /src/functions/object.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // This file contains functions that specifically operate on JSONB object values. 16 | 17 | use std::collections::BTreeSet; 18 | 19 | use crate::core::ArrayBuilder; 20 | use crate::core::JsonbItem; 21 | use crate::core::ObjectBuilder; 22 | use crate::core::ObjectIterator; 23 | use crate::core::ObjectKeyIterator; 24 | use crate::error::*; 25 | use crate::OwnedJsonb; 26 | use crate::RawJsonb; 27 | 28 | impl RawJsonb<'_> { 29 | /// Returns an `OwnedJsonb` array containing the keys of the JSONB object. 30 | /// 31 | /// If the JSONB value is an object, this function returns a new `OwnedJsonb` array containing the keys of the object as string values. 32 | /// The order of the keys in the returned array is the same as their order in the original object. 33 | /// If the JSONB value is not an object (e.g., it's an array or a scalar), this function returns `None`. 34 | /// 35 | /// # Arguments 36 | /// 37 | /// * `self` - The JSONB value. 38 | /// 39 | /// # Returns 40 | /// 41 | /// * `Ok(Some(OwnedJsonb))` - An `OwnedJsonb` representing the array of keys if the input is an object. 42 | /// * `Ok(None)` - If the input is not an object. 43 | /// * `Err(Error)` - If the input JSONB data is invalid. 44 | /// 45 | /// # Examples 46 | /// 47 | /// ```rust 48 | /// use jsonb::OwnedJsonb; 49 | /// 50 | /// // Object keys 51 | /// let obj_jsonb = r#"{"a": 1, "b": 2, "c": 3}"#.parse::().unwrap(); 52 | /// let raw_jsonb = obj_jsonb.as_raw(); 53 | /// let keys_result = raw_jsonb.object_keys(); 54 | /// assert!(keys_result.is_ok()); 55 | /// 56 | /// let keys_jsonb = keys_result.unwrap(); 57 | /// assert_eq!( 58 | /// keys_jsonb.as_ref().map(|k| k.to_string()), 59 | /// Some(r#"["a","b","c"]"#.to_string()) 60 | /// ); 61 | /// 62 | /// // Array - returns None 63 | /// let arr_jsonb = "[1, 2, 3]".parse::().unwrap(); 64 | /// let raw_jsonb = arr_jsonb.as_raw(); 65 | /// let keys_result = raw_jsonb.object_keys(); 66 | /// assert!(keys_result.is_ok()); 67 | /// assert!(keys_result.unwrap().is_none()); 68 | /// 69 | /// // Scalar - returns None 70 | /// let scalar_jsonb = "1".parse::().unwrap(); 71 | /// let raw_jsonb = scalar_jsonb.as_raw(); 72 | /// let keys_result = raw_jsonb.object_keys(); 73 | /// assert!(keys_result.is_ok()); 74 | /// assert!(keys_result.unwrap().is_none()); 75 | /// ``` 76 | pub fn object_keys(&self) -> Result> { 77 | let object_key_iter_opt = ObjectKeyIterator::new(*self)?; 78 | match object_key_iter_opt { 79 | Some(mut object_key_iter) => { 80 | let mut builder = ArrayBuilder::with_capacity(object_key_iter.len()); 81 | for key_result in &mut object_key_iter { 82 | let key_item = key_result?; 83 | builder.push_jsonb_item(key_item); 84 | } 85 | Ok(Some(builder.build()?)) 86 | } 87 | None => Ok(None), 88 | } 89 | } 90 | 91 | /// Iterates over the key-value pairs of a JSONB object. 92 | /// 93 | /// If the JSONB value is an object, this function returns a vector of tuples, where each tuple contains 94 | /// the key (as a `String`) and the value (as an `OwnedJsonb`) of a key-value pair. 95 | /// The order of the key-value pairs in the returned vector is the same as their order in the original object. 96 | /// If the JSONB value is not an object (e.g., it's an array or a scalar), this function returns `None`. 97 | /// 98 | /// # Arguments 99 | /// 100 | /// * `self` - The JSONB value. 101 | /// 102 | /// # Returns 103 | /// 104 | /// * `Ok(Some(Vec<(String, OwnedJsonb)>))` - A vector of tuples representing the key-value pairs if the input is an object. 105 | /// * `Ok(None)` - If the input is not an object. 106 | /// * `Err(Error)` - If the input JSONB data is invalid. 107 | /// 108 | /// # Examples 109 | /// 110 | /// ```rust 111 | /// use jsonb::OwnedJsonb; 112 | /// 113 | /// // Object iteration 114 | /// let obj_jsonb = r#"{"a": 1, "b": "hello", "c": [1, 2]}"#.parse::().unwrap(); 115 | /// let raw_jsonb = obj_jsonb.as_raw(); 116 | /// let items_result = raw_jsonb.object_each(); 117 | /// assert!(items_result.is_ok()); 118 | /// 119 | /// let items = items_result.unwrap().unwrap(); 120 | /// assert_eq!(items.len(), 3); 121 | /// 122 | /// assert_eq!(items[0].0, "a"); 123 | /// assert_eq!(items[0].1.to_string(), "1"); 124 | /// assert_eq!(items[1].0, "b"); 125 | /// assert_eq!(items[1].1.to_string(), r#""hello""#); 126 | /// assert_eq!(items[2].0, "c"); 127 | /// assert_eq!(items[2].1.to_string(), r#"[1,2]"#); 128 | /// 129 | /// // Array - returns None 130 | /// let arr_jsonb = "[1, 2, 3]".parse::().unwrap(); 131 | /// let raw_jsonb = arr_jsonb.as_raw(); 132 | /// let items_result = raw_jsonb.object_each(); 133 | /// assert!(items_result.is_ok()); 134 | /// assert!(items_result.unwrap().is_none()); 135 | /// 136 | /// // Scalar - returns None 137 | /// let scalar_jsonb = "1".parse::().unwrap(); 138 | /// let raw_jsonb = scalar_jsonb.as_raw(); 139 | /// let items_result = raw_jsonb.object_each(); 140 | /// assert!(items_result.is_ok()); 141 | /// assert!(items_result.unwrap().is_none()); 142 | /// ``` 143 | pub fn object_each(&self) -> Result>> { 144 | let object_iter_opt = ObjectIterator::new(*self)?; 145 | match object_iter_opt { 146 | Some(mut object_iter) => { 147 | let mut items = Vec::with_capacity(object_iter.len()); 148 | for result in &mut object_iter { 149 | let (key, val_item) = result?; 150 | let owned_jsonb_val = OwnedJsonb::from_item(val_item)?; 151 | items.push((key.to_string(), owned_jsonb_val)); 152 | } 153 | Ok(Some(items)) 154 | } 155 | None => Ok(None), 156 | } 157 | } 158 | 159 | /// Inserts or updates a key-value pair in a JSONB object. 160 | /// 161 | /// This function inserts a new key-value pair into a JSONB object or updates an existing key-value pair if the key already exists. 162 | /// The behavior is controlled by the `update_flag`: 163 | /// * `update_flag = true`: If the key already exists, its value is updated with `new_val`. If the key does not exist, it is inserted. 164 | /// * `update_flag = false`: If the key already exists, an error (`Error::ObjectDuplicateKey`) is returned. If the key does not exist, it is inserted. 165 | /// 166 | /// The input JSONB value must be an object; otherwise, an error (`Error::InvalidObject`) is returned. 167 | /// 168 | /// # Arguments 169 | /// 170 | /// * `self` - The JSONB object. 171 | /// * `new_key` - The key to insert or update. 172 | /// * `new_val` - The new JSONB value. 173 | /// * `update_flag` - A boolean indicating whether to update an existing key (true) or fail if a duplicate key is found (false). 174 | /// 175 | /// # Returns 176 | /// 177 | /// * `Ok(OwnedJsonb)` - The modified JSONB object. 178 | /// * `Err(Error)` - If the input is not a JSONB object, if `update_flag` is false and the key already exists, or if the JSONB data is invalid. 179 | /// 180 | /// # Examples 181 | /// 182 | /// ```rust 183 | /// use jsonb::OwnedJsonb; 184 | /// 185 | /// // Inserting a new key-value pair 186 | /// let obj_jsonb = r#"{"a": 1}"#.parse::().unwrap(); 187 | /// let raw_jsonb = obj_jsonb.as_raw(); 188 | /// let new_jsonb = "2".parse::().unwrap(); 189 | /// let new_raw_jsonb = new_jsonb.as_raw(); 190 | /// let inserted = raw_jsonb.object_insert("b", &new_raw_jsonb, false).unwrap(); 191 | /// assert_eq!(inserted.to_string(), r#"{"a":1,"b":2}"#); 192 | /// 193 | /// // Updating an existing key-value pair 194 | /// let new_jsonb = r#"3"#.parse::().unwrap(); 195 | /// let new_raw_jsonb = new_jsonb.as_raw(); 196 | /// let updated = inserted 197 | /// .as_raw() 198 | /// .object_insert("b", &new_raw_jsonb, true) 199 | /// .unwrap(); 200 | /// assert_eq!(updated.to_string(), r#"{"a":1,"b":3}"#); 201 | /// 202 | /// // Attempting to insert a duplicate key without update 203 | /// let result = raw_jsonb.object_insert("a", &new_raw_jsonb, false); 204 | /// assert!(result.is_err()); // Returns an error because key "a" already exists 205 | /// 206 | /// // Invalid JSONB input 207 | /// let invalid_jsonb = OwnedJsonb::new(vec![1, 2, 3, 4]); 208 | /// let invalid_raw_jsonb = invalid_jsonb.as_raw(); 209 | /// let new_raw_jsonb = new_jsonb.as_raw(); 210 | /// let result = invalid_raw_jsonb.object_insert("a", &new_raw_jsonb, false); 211 | /// assert!(result.is_err()); // Returns an error due to invalid JSONB data 212 | /// 213 | /// // Inserting into a non-object 214 | /// let arr_jsonb = "[1,2,3]".parse::().unwrap(); 215 | /// let arr_raw_jsonb = invalid_jsonb.as_raw(); 216 | /// let new_raw_jsonb = new_jsonb.as_raw(); 217 | /// let result = arr_raw_jsonb.object_insert("a", &new_raw_jsonb, false); 218 | /// assert!(result.is_err()); // Returns an error because input is not a JSONB object 219 | /// ``` 220 | pub fn object_insert( 221 | &self, 222 | new_key: &str, 223 | new_val: &RawJsonb, 224 | update_flag: bool, 225 | ) -> Result { 226 | let mut builder = ObjectBuilder::new(); 227 | let object_iter_opt = ObjectIterator::new(*self)?; 228 | match object_iter_opt { 229 | Some(mut object_iter) => { 230 | for result in &mut object_iter { 231 | let (key, val_item) = result?; 232 | if new_key.eq(key) { 233 | if !update_flag { 234 | return Err(Error::ObjectDuplicateKey); 235 | } 236 | } else { 237 | builder.push_jsonb_item(key, val_item)?; 238 | } 239 | } 240 | let new_val_item = JsonbItem::from_raw_jsonb(*new_val)?; 241 | builder.push_jsonb_item(new_key, new_val_item)?; 242 | } 243 | None => { 244 | return Err(Error::InvalidObject); 245 | } 246 | } 247 | builder.build() 248 | } 249 | 250 | /// Deletes key-value pairs from a JSONB object based on a set of keys. 251 | /// 252 | /// This function removes key-value pairs from a JSONB object where the keys are present in the provided `keys` set. The key comparison is case-sensitive. 253 | /// 254 | /// If the input JSONB value is not an object, an error (`Error::InvalidObject`) is returned. 255 | /// 256 | /// # Arguments 257 | /// 258 | /// * `self` - The JSONB object. 259 | /// * `keys` - A set of keys to delete. 260 | /// 261 | /// # Returns 262 | /// 263 | /// * `Ok(OwnedJsonb)` - A new JSONB object with the specified keys removed. 264 | /// * `Err(Error)` - If the input JSONB value is not an object, or if the JSONB data is invalid. 265 | /// 266 | /// # Examples 267 | /// 268 | /// ```rust 269 | /// use std::collections::BTreeSet; 270 | /// 271 | /// use jsonb::OwnedJsonb; 272 | /// 273 | /// let obj_jsonb = r#"{"a": 1, "b": "hello", "c": 3}"#.parse::().unwrap(); 274 | /// let raw_jsonb = obj_jsonb.as_raw(); 275 | /// 276 | /// // Delete keys "a" and "c" 277 | /// let keys_to_delete: BTreeSet<&str> = ["a", "c"].into_iter().collect(); 278 | /// let deleted = raw_jsonb.object_delete(&keys_to_delete).unwrap(); 279 | /// assert_eq!(deleted.to_string(), r#"{"b":"hello"}"#); 280 | /// 281 | /// // Delete a non-existent key 282 | /// let keys_to_delete: BTreeSet<&str> = ["x"].into_iter().collect(); 283 | /// let deleted = raw_jsonb.object_delete(&keys_to_delete).unwrap(); 284 | /// assert_eq!(deleted.to_string(), r#"{"a":1,"b":"hello","c":3}"#); // Original object returned 285 | /// 286 | /// // Attempting to delete from a non-object 287 | /// let arr_jsonb = "[1, 2, 3]".parse::().unwrap(); 288 | /// let result = arr_jsonb.as_raw().object_delete(&keys_to_delete); 289 | /// assert!(result.is_err()); // Returns an error 290 | /// 291 | /// // Invalid JSONB data 292 | /// let invalid_jsonb = OwnedJsonb::new(vec![1, 2, 3, 4]); 293 | /// let invalid_raw_jsonb = invalid_jsonb.as_raw(); 294 | /// let result = invalid_raw_jsonb.object_delete(&keys_to_delete); 295 | /// assert!(result.is_err()); // Returns an error 296 | /// ``` 297 | pub fn object_delete(&self, keys: &BTreeSet<&str>) -> Result { 298 | let object_iter_opt = ObjectIterator::new(*self)?; 299 | match object_iter_opt { 300 | Some(mut object_iter) => { 301 | let mut builder = ObjectBuilder::new(); 302 | for result in &mut object_iter { 303 | let (key, val_item) = result?; 304 | if keys.contains(key) { 305 | continue; 306 | } 307 | builder.push_jsonb_item(key, val_item)?; 308 | } 309 | builder.build() 310 | } 311 | None => Err(Error::InvalidObject), 312 | } 313 | } 314 | 315 | /// Creates a new JSONB object containing only the specified keys from the original object. 316 | /// 317 | /// This function selects a subset of key-value pairs from a JSONB object based on the provided `keys` set. 318 | /// Only key-value pairs where the key is present in the `keys` set are included in the resulting object. The key comparison is case-sensitive. 319 | /// 320 | /// If the input JSONB value is not an object, an error (`Error::InvalidObject`) is returned. 321 | /// 322 | /// # Arguments 323 | /// 324 | /// * `self` - The JSONB object. 325 | /// * `keys` - A set of keys to select. 326 | /// 327 | /// # Returns 328 | /// 329 | /// * `Ok(OwnedJsonb)` - A new JSONB object containing only the key-value pairs specified by the `keys` set. 330 | /// * `Err(Error)` - If the input JSONB value is not an object, or if the JSONB data is invalid. 331 | /// 332 | /// # Examples 333 | /// 334 | /// ```rust 335 | /// use std::collections::BTreeSet; 336 | /// 337 | /// use jsonb::OwnedJsonb; 338 | /// 339 | /// let obj_jsonb = r#"{"a": 1, "b": "hello", "c": 3}"#.parse::().unwrap(); 340 | /// let raw_jsonb = obj_jsonb.as_raw(); 341 | /// 342 | /// // Pick keys "a" and "c" 343 | /// let keys_to_pick: BTreeSet<&str> = ["a", "c"].into_iter().collect(); 344 | /// let picked = raw_jsonb.object_pick(&keys_to_pick).unwrap(); 345 | /// assert_eq!(picked.to_string(), r#"{"a":1,"c":3}"#); 346 | /// 347 | /// // Pick a non-existent key 348 | /// let keys_to_pick: BTreeSet<&str> = ["x"].into_iter().collect(); 349 | /// let picked = raw_jsonb.object_pick(&keys_to_pick).unwrap(); 350 | /// assert_eq!(picked.to_string(), "{}"); // Empty object returned 351 | /// 352 | /// // Attempting to pick from a non-object 353 | /// let arr_jsonb = "[1, 2, 3]".parse::().unwrap(); 354 | /// let result = arr_jsonb.as_raw().object_pick(&keys_to_pick); 355 | /// assert!(result.is_err()); // Returns an error 356 | /// 357 | /// // Invalid JSONB data 358 | /// let invalid_jsonb = OwnedJsonb::new(vec![1, 2, 3, 4]); 359 | /// let invalid_raw_jsonb = invalid_jsonb.as_raw(); 360 | /// let result = invalid_raw_jsonb.object_pick(&keys_to_pick); 361 | /// assert!(result.is_err()); // Returns an error 362 | /// ``` 363 | pub fn object_pick(&self, keys: &BTreeSet<&str>) -> Result { 364 | let object_iter_opt = ObjectIterator::new(*self)?; 365 | match object_iter_opt { 366 | Some(mut object_iter) => { 367 | let mut builder = ObjectBuilder::new(); 368 | for result in &mut object_iter { 369 | let (key, val_item) = result?; 370 | if !keys.contains(key) { 371 | continue; 372 | } 373 | builder.push_jsonb_item(key, val_item)?; 374 | } 375 | builder.build() 376 | } 377 | None => Err(Error::InvalidObject), 378 | } 379 | } 380 | } 381 | -------------------------------------------------------------------------------- /src/jsonpath/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | mod parser; 16 | mod path; 17 | mod selector; 18 | 19 | pub use parser::parse_json_path; 20 | pub(crate) use parser::raw_string; 21 | pub(crate) use parser::string; 22 | pub use path::*; 23 | pub use selector::Selector; 24 | -------------------------------------------------------------------------------- /src/keypath.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use std::borrow::Cow; 16 | use std::fmt::Display; 17 | use std::fmt::Formatter; 18 | 19 | use nom::branch::alt; 20 | use nom::character::complete::char; 21 | use nom::character::complete::i32; 22 | use nom::character::complete::multispace0; 23 | use nom::combinator::map; 24 | use nom::multi::separated_list1; 25 | use nom::sequence::delimited; 26 | use nom::sequence::preceded; 27 | use nom::sequence::terminated; 28 | use nom::IResult; 29 | use nom::Parser; 30 | 31 | use crate::jsonpath::raw_string; 32 | use crate::jsonpath::string; 33 | use crate::Error; 34 | 35 | /// Represents a set of key path chains. 36 | /// Compatible with PostgreSQL extracts JSON sub-object paths syntax. 37 | #[derive(Debug, Clone, PartialEq)] 38 | pub struct KeyPaths<'a> { 39 | pub paths: Vec>, 40 | } 41 | 42 | /// Represents a valid key path. 43 | #[derive(Debug, Clone, PartialEq)] 44 | pub enum KeyPath<'a> { 45 | /// represents the index of an Array, allow negative indexing. 46 | Index(i32), 47 | /// represents the quoted field name of an Object. 48 | QuotedName(Cow<'a, str>), 49 | /// represents the field name of an Object. 50 | Name(Cow<'a, str>), 51 | } 52 | 53 | impl Display for KeyPaths<'_> { 54 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 55 | write!(f, "{{")?; 56 | for (i, path) in self.paths.iter().enumerate() { 57 | if i > 0 { 58 | write!(f, ",")?; 59 | } 60 | write!(f, "{path}")?; 61 | } 62 | write!(f, "}}")?; 63 | Ok(()) 64 | } 65 | } 66 | 67 | impl Display for KeyPath<'_> { 68 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 69 | match self { 70 | KeyPath::Index(idx) => { 71 | write!(f, "{idx}")?; 72 | } 73 | KeyPath::QuotedName(name) => { 74 | write!(f, "\"{name}\"")?; 75 | } 76 | KeyPath::Name(name) => { 77 | write!(f, "{name}")?; 78 | } 79 | } 80 | Ok(()) 81 | } 82 | } 83 | 84 | /// Parsing the input string to key paths. 85 | pub fn parse_key_paths(input: &[u8]) -> Result, Error> { 86 | match key_paths(input) { 87 | Ok((rest, paths)) => { 88 | if !rest.is_empty() { 89 | return Err(Error::InvalidKeyPath); 90 | } 91 | let key_paths = KeyPaths { paths }; 92 | Ok(key_paths) 93 | } 94 | Err(nom::Err::Error(_) | nom::Err::Failure(_)) => Err(Error::InvalidKeyPath), 95 | Err(nom::Err::Incomplete(_)) => unreachable!(), 96 | } 97 | } 98 | 99 | fn key_path(input: &[u8]) -> IResult<&[u8], KeyPath<'_>> { 100 | alt(( 101 | map(i32, KeyPath::Index), 102 | map(string, KeyPath::QuotedName), 103 | map(raw_string, KeyPath::Name), 104 | )) 105 | .parse(input) 106 | } 107 | 108 | fn key_paths(input: &[u8]) -> IResult<&[u8], Vec>> { 109 | alt(( 110 | delimited( 111 | preceded(multispace0, char('{')), 112 | separated_list1(char(','), delimited(multispace0, key_path, multispace0)), 113 | terminated(char('}'), multispace0), 114 | ), 115 | map( 116 | delimited( 117 | preceded(multispace0, char('{')), 118 | multispace0, 119 | terminated(char('}'), multispace0), 120 | ), 121 | |_| vec![], 122 | ), 123 | )) 124 | .parse(input) 125 | } 126 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | //! `jsonb` is a binary format `JSON` representation inspired by [PostgreSQL](https://www.postgresql.org/docs/current/datatype-json.html) and [CockroachDB](https://www.cockroachlabs.com/docs/stable/jsonb). It provides a fast, lightweight and easy-to-use API for working with `JSON` data. 16 | //! 17 | //! ## Features 18 | //! 19 | //! - Good compatibility: `jsonb` fully supports the `JSON` standard and can be used to store complex data structures. 20 | //! - Fast performance: `jsonb` is designed for high performance, allowing you to work with large `JSON` data sets with ease. 21 | //! - Easy to use: `jsonb` provides a number of built-in functions to support various operations, and also supports the `JSONPath` syntax for selecting and extracting subset elements. 22 | //! - Safe and secure: `jsonb` is written in Rust, which provides memory and thread safety guarantees, making it a safe choice for handling sensitive data. 23 | //! 24 | //! ## Encoding format 25 | //! 26 | //! The `jsonb` encoding format is a tree-like structure. Each node contains a container header, a number of JEntry headers, and nested encoding values. 27 | //! 28 | //! - 32-bit container header. 3 bits identify the type of value, including `scalar`, `object` and `array`, and 29 bits identify the number of JEntries in the `array` or `object`. The root node of the `jsonb` value is always a container header. 29 | //! - `scalar` container header: `0x20000000` 30 | //! - `object` container header: `0x40000000` 31 | //! - `array` container header: `0x80000000` 32 | //! - 32-bit JEntry header. 1 bit identifies whether the JEntry stores a length or an offset, 3 bits identify the type of value, including `null`, `string`, `number`, `false`, `true` and `container`, and the remaining 28 bits identify the length or offset of the encoding value. 33 | //! - `null` JEntry header: `0x00000000` 34 | //! - `string` JEntry header: `0x10000000` 35 | //! - `number` JEntry header: `0x20000000` 36 | //! - `false` JEntry header: `0x30000000` 37 | //! - `true` JEntry header: `0x40000000` 38 | //! - `container` JEntry header `0x50000000` 39 | //! - Encoding value. Different types of JEntry header have different encoding values. 40 | //! - `null`, `true`, `false`: no encoding value, identified by the JEntry header. 41 | //! - `string`: a normal UTF-8 string. 42 | //! - `number`: an encoded number to represent uint64s, int64s and float64s. 43 | //! - `container`: a nested `json` value with a recursive structure. 44 | //! 45 | //! #### An encoding example 46 | //! 47 | //! ```text 48 | //! // JSON value 49 | //! [false, 10, {"k":"v"}] 50 | //! 51 | //! // JSONB encoding 52 | //! 0x80000003 array container header (3 JEntries) 53 | //! 0x30000000 false JEntry header (no encoding value) 54 | //! 0x20000002 number JEntry header (encoding value length 2) 55 | //! 0x5000000e container JEntry header (encoding value length 14) 56 | //! 0x500a number encoding value (10) 57 | //! 0x40000001 object container header (1 JEntry) 58 | //! 0x10000001 string key JEntry header (encoding value length 1) 59 | //! 0x10000001 string value JEntry header (encoding value length 1) 60 | //! 0x6b string encoding value ("k") 61 | //! 0x76 string encoding value ("v") 62 | //! ``` 63 | 64 | #![allow(clippy::uninlined_format_args)] 65 | 66 | mod constants; 67 | pub mod core; 68 | mod error; 69 | mod extension; 70 | mod from; 71 | mod functions; 72 | pub mod jsonpath; 73 | pub mod keypath; 74 | mod number; 75 | mod owned; 76 | mod parser; 77 | mod raw; 78 | mod util; 79 | mod value; 80 | 81 | pub use error::Error; 82 | pub use extension::*; 83 | #[allow(unused_imports)] 84 | pub use from::*; 85 | pub use number::Decimal128; 86 | pub use number::Decimal256; 87 | pub use number::Number; 88 | pub use owned::to_owned_jsonb; 89 | pub use owned::OwnedJsonb; 90 | pub use parser::from_slice; 91 | pub use parser::parse_value; 92 | pub use raw::from_raw_jsonb; 93 | pub use raw::RawJsonb; 94 | pub use value::*; 95 | -------------------------------------------------------------------------------- /src/owned.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use std::cmp::Ordering; 16 | use std::fmt::Display; 17 | use std::str::FromStr; 18 | 19 | use crate::core::ArrayBuilder; 20 | use crate::core::ObjectBuilder; 21 | use crate::core::Serializer; 22 | use crate::error::Error; 23 | use crate::error::Result; 24 | use crate::parse_value; 25 | use crate::RawJsonb; 26 | 27 | /// Represents a JSONB data that owns its underlying data. 28 | /// 29 | /// This struct provides ownership over the binary JSONB representation. 30 | /// `OwnedJsonb` is primarily used to create JSONB data from other data types (such as JSON String). 31 | /// However, for most operations, it's necessary to convert an `OwnedJsonb` to a `RawJsonb` using the `as_raw()` method 32 | /// to avoid unnecessary copying and to take advantage of the performance benefits of the read-only access of the `RawJsonb`. 33 | #[derive(Debug, Clone)] 34 | pub struct OwnedJsonb { 35 | /// The underlying `Vec` containing the binary JSONB data. 36 | pub(crate) data: Vec, 37 | } 38 | 39 | impl OwnedJsonb { 40 | /// Creates a new OwnedJsonb from a Vec. 41 | /// 42 | /// # Arguments 43 | /// 44 | /// * `data` - The `Vec` containing the JSONB data. 45 | /// 46 | /// # Returns 47 | /// 48 | /// A new `OwnedJsonb` instance. 49 | pub fn new(data: Vec) -> OwnedJsonb { 50 | Self { data } 51 | } 52 | 53 | /// Creates a `RawJsonb` view of the owned data. 54 | /// This is useful for passing the data to functions that expect a `RawJsonb`. 55 | /// This does *not* transfer ownership. 56 | /// 57 | /// # Returns 58 | /// 59 | /// A `RawJsonb` instance referencing the owned data. 60 | pub fn as_raw(&self) -> RawJsonb<'_> { 61 | RawJsonb::new(self.data.as_slice()) 62 | } 63 | 64 | /// Consumes the OwnedJsonb and returns the underlying Vec. 65 | /// 66 | /// # Returns 67 | /// 68 | /// The underlying `Vec` containing the JSONB data. 69 | pub fn to_vec(self) -> Vec { 70 | self.data 71 | } 72 | 73 | /// Checks if the JSONB data is empty. 74 | /// 75 | /// # Returns 76 | /// 77 | /// `true` if the data is empty, `false` otherwise. 78 | pub fn is_empty(&self) -> bool { 79 | self.len() == 0 80 | } 81 | 82 | /// Returns the length of the JSONB data in bytes. 83 | /// 84 | /// # Returns 85 | /// 86 | /// The length of the data in bytes. 87 | pub fn len(&self) -> usize { 88 | self.data.len() 89 | } 90 | 91 | /// Builds a JSONB array from a collection of RawJsonb values. 92 | /// 93 | /// This function constructs a new JSONB array from an iterator of `RawJsonb` values. 94 | /// The resulting `OwnedJsonb` represents the binary encoding of the array. 95 | /// 96 | /// # Arguments 97 | /// 98 | /// * `items` - An iterator of `RawJsonb` values representing the elements of the array. 99 | /// 100 | /// # Returns 101 | /// 102 | /// * `Ok(OwnedJsonb)` - The newly created JSONB array. 103 | /// * `Err(Error)` - If any of the input `RawJsonb` values are invalid or if an error occurs during array construction. 104 | /// 105 | /// # Examples 106 | /// 107 | /// ```rust 108 | /// use jsonb::OwnedJsonb; 109 | /// use jsonb::RawJsonb; 110 | /// 111 | /// // Create some RawJsonb values 112 | /// let owned_num = "1".parse::().unwrap(); 113 | /// let owned_str = r#""hello""#.parse::().unwrap(); 114 | /// let owned_arr = "[1,2,3]".parse::().unwrap(); 115 | /// 116 | /// // Build the array 117 | /// let raw_jsonbs = vec![owned_num.as_raw(), owned_str.as_raw(), owned_arr.as_raw()]; 118 | /// let array_result = OwnedJsonb::build_array(raw_jsonbs.into_iter()); 119 | /// assert!(array_result.is_ok()); 120 | /// let array = array_result.unwrap(); 121 | /// 122 | /// // Convert to string for easy verification 123 | /// assert_eq!(array.to_string(), "[1,\"hello\",[1,2,3]]"); 124 | /// 125 | /// // Example with an empty iterator 126 | /// let empty_array = 127 | /// OwnedJsonb::build_array(<[RawJsonb<'_>; 0] as IntoIterator>::into_iter([])).unwrap(); 128 | /// assert_eq!(empty_array.to_string(), "[]"); 129 | /// 130 | /// // Example with invalid input (this will cause an error) 131 | /// let invalid_data = OwnedJsonb::new(vec![1, 2, 3, 4]); 132 | /// let result = OwnedJsonb::build_array([invalid_data.as_raw()].into_iter()); 133 | /// assert!(result.is_err()); 134 | /// ``` 135 | pub fn build_array<'a>( 136 | raw_jsonbs: impl IntoIterator>, 137 | ) -> Result { 138 | let mut builder = ArrayBuilder::new(); 139 | for raw_jsonb in raw_jsonbs.into_iter() { 140 | builder.push_raw_jsonb(raw_jsonb); 141 | } 142 | builder.build() 143 | } 144 | 145 | /// Builds a JSONB object from a collection of key-value pairs. 146 | /// 147 | /// This function constructs a new JSONB object from an iterator of key-value pairs. The keys are strings, and the values are `RawJsonb` values. 148 | /// The resulting `OwnedJsonb` represents the binary encoding of the object. 149 | /// 150 | /// # Arguments 151 | /// 152 | /// * `items` - An iterator of `(K, &'a RawJsonb<'a>)` tuples, where `K` is a type that can be converted into a string slice (`AsRef`) representing the key, 153 | /// and the second element is a `RawJsonb` representing the value. 154 | /// 155 | /// # Returns 156 | /// 157 | /// * `Ok(OwnedJsonb)` - The newly created JSONB object. 158 | /// * `Err(Error)` - If any of the input `RawJsonb` values are invalid, if contain duplicate keys, or if an error occurs during object construction. 159 | /// 160 | /// # Examples 161 | /// 162 | /// ```rust 163 | /// use jsonb::OwnedJsonb; 164 | /// use jsonb::RawJsonb; 165 | /// 166 | /// // Create some RawJsonb values 167 | /// let owned_num = "1".parse::().unwrap(); 168 | /// let owned_str = r#""hello""#.parse::().unwrap(); 169 | /// let owned_arr = "[1,2,3]".parse::().unwrap(); 170 | /// 171 | /// // Build the object 172 | /// let raw_jsonbs = vec![ 173 | /// ("a", owned_num.as_raw()), 174 | /// ("b", owned_str.as_raw()), 175 | /// ("c", owned_arr.as_raw()), 176 | /// ]; 177 | /// let object_result = OwnedJsonb::build_object(raw_jsonbs.into_iter()); 178 | /// assert!(object_result.is_ok()); 179 | /// let object = object_result.unwrap(); 180 | /// 181 | /// // Convert to string for easy verification 182 | /// assert_eq!(object.to_string(), r#"{"a":1,"b":"hello","c":[1,2,3]}"#); 183 | /// 184 | /// // Example with an empty iterator 185 | /// let empty_object = 186 | /// OwnedJsonb::build_object(<[(&str, RawJsonb<'_>); 0] as IntoIterator>::into_iter([])) 187 | /// .unwrap(); 188 | /// assert_eq!(empty_object.to_string(), "{}"); 189 | /// 190 | /// // Example with invalid value 191 | /// let invalid_data = OwnedJsonb::new(vec![1, 2, 3, 4]); 192 | /// let result = OwnedJsonb::build_object([("a", invalid_data.as_raw())].into_iter()); 193 | /// assert!(result.is_err()); 194 | /// ``` 195 | pub fn build_object<'a, K: AsRef>( 196 | items: impl IntoIterator)>, 197 | ) -> Result { 198 | let mut kvs = Vec::new(); 199 | for (key, val) in items.into_iter() { 200 | kvs.push((key, val)); 201 | } 202 | let mut builder = ObjectBuilder::new(); 203 | for (key, val) in kvs.iter() { 204 | builder.push_raw_jsonb(key.as_ref(), *val)?; 205 | } 206 | builder.build() 207 | } 208 | } 209 | 210 | /// Creates an `OwnedJsonb` from a borrowed byte slice. The byte slice is copied into a new `Vec`. 211 | impl From<&[u8]> for OwnedJsonb { 212 | fn from(data: &[u8]) -> Self { 213 | Self { 214 | data: data.to_vec(), 215 | } 216 | } 217 | } 218 | 219 | /// Creates an `OwnedJsonb` from a `Vec`. This is a simple ownership transfer. 220 | impl From> for OwnedJsonb { 221 | fn from(data: Vec) -> Self { 222 | Self { data } 223 | } 224 | } 225 | 226 | /// Parses a string into an `OwnedJsonb`. 227 | /// The string is parsed into a JSON value, then encoded into the binary JSONB format. 228 | impl FromStr for OwnedJsonb { 229 | type Err = Error; 230 | 231 | fn from_str(s: &str) -> std::result::Result { 232 | let value = parse_value(s.as_bytes())?; 233 | let mut data = Vec::new(); 234 | value.write_to_vec(&mut data); 235 | Ok(Self { data }) 236 | } 237 | } 238 | 239 | /// Allows accessing the underlying byte slice as a reference. 240 | /// This enables easy integration with functions that expect a `&[u8]`. 241 | impl AsRef<[u8]> for OwnedJsonb { 242 | fn as_ref(&self) -> &[u8] { 243 | self.data.as_ref() 244 | } 245 | } 246 | 247 | /// Implements the Display trait, allowing OwnedJsonb to be formatted as a string using the `{}` format specifier. 248 | impl Display for OwnedJsonb { 249 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { 250 | let raw_jsonb = self.as_raw(); 251 | write!(f, "{}", raw_jsonb.to_string()) 252 | } 253 | } 254 | 255 | impl Eq for OwnedJsonb {} 256 | 257 | impl PartialEq for OwnedJsonb { 258 | fn eq(&self, other: &Self) -> bool { 259 | self.partial_cmp(other) == Some(Ordering::Equal) 260 | } 261 | } 262 | 263 | /// Implements `PartialOrd` for `OwnedJsonb`, allowing comparison of two `OwnedJsonb` values. 264 | /// 265 | /// The comparison logic handles different JSONB types (scalar, array, object) and considers null values. 266 | /// The ordering is defined as follows: 267 | /// 268 | /// 1. Null is considered greater than any other type. 269 | /// 2. Scalars are compared based on their type and value (String > Number > Boolean). 270 | /// 3. Arrays are compared element by element. 271 | /// 4. Objects are compared based on their keys and values. 272 | /// 5. Arrays are greater than objects and scalars. 273 | /// 6. Objects are greater than scalars. 274 | /// 7. If the types are incompatible, None is returned. 275 | #[allow(clippy::non_canonical_partial_ord_impl)] 276 | impl PartialOrd for OwnedJsonb { 277 | fn partial_cmp(&self, other: &Self) -> Option { 278 | let self_raw = self.as_raw(); 279 | let other_raw = other.as_raw(); 280 | self_raw.partial_cmp(&other_raw) 281 | } 282 | } 283 | 284 | /// Implements `Ord` for `OwnedJsonb`, allowing comparison of two `OwnedJsonb` values using the total ordering. 285 | /// This implementation leverages the `PartialOrd` implementation, returning `Ordering::Equal` for incomparable values. 286 | impl Ord for OwnedJsonb { 287 | fn cmp(&self, other: &Self) -> Ordering { 288 | let self_raw = self.as_raw(); 289 | let other_raw = other.as_raw(); 290 | match self_raw.partial_cmp(&other_raw) { 291 | Some(ordering) => ordering, 292 | None => Ordering::Equal, 293 | } 294 | } 295 | } 296 | 297 | /// Serializes a Rust data structure into an `OwnedJsonb` using Serde. 298 | /// 299 | /// This function takes a Rust type `T` that implements the `Serialize` trait and 300 | /// serializes it into an `OwnedJsonb`, which is a struct containing a `Vec` 301 | /// representing the JSONB data. It uses a custom `Serializer` to handle the 302 | /// serialization process. 303 | /// 304 | /// # Arguments 305 | /// 306 | /// * `value`: A reference to the value of type `T` to be serialized. 307 | /// 308 | /// # Type Parameters 309 | /// 310 | /// * `T`: The Rust type to serialize. This type must implement the `serde::ser::Serialize` trait. 311 | /// 312 | /// # Returns 313 | /// 314 | /// * `Ok(OwnedJsonb)`: If the serialization is successful, returns an `OwnedJsonb` 315 | /// containing the serialized JSONB data. 316 | /// * `Err(e)`: If any Serde serialization error occurs. 317 | /// 318 | /// # Examples 319 | /// 320 | /// ``` 321 | /// use jsonb::to_owned_jsonb; 322 | /// use jsonb::OwnedJsonb; 323 | /// use serde::Serialize; 324 | /// 325 | /// #[derive(Serialize, Debug)] 326 | /// struct Person { 327 | /// name: String, 328 | /// age: u32, 329 | /// } 330 | /// 331 | /// let person = Person { 332 | /// name: "Bob".to_string(), 333 | /// age: 42, 334 | /// }; 335 | /// 336 | /// let owned_jsonb: OwnedJsonb = to_owned_jsonb(&person).unwrap(); 337 | /// 338 | /// println!("JSONB data: {}", owned_jsonb); 339 | /// ``` 340 | pub fn to_owned_jsonb(value: &T) -> Result 341 | where 342 | T: serde::ser::Serialize, 343 | { 344 | let mut serializer = Serializer::default(); 345 | value.serialize(&mut serializer)?; 346 | Ok(serializer.to_owned_jsonb()) 347 | } 348 | -------------------------------------------------------------------------------- /src/parser.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use std::borrow::Cow; 16 | 17 | use super::constants::*; 18 | use super::error::Error; 19 | use super::error::ParseErrorCode; 20 | use super::error::Result; 21 | use super::number::Number; 22 | use super::util::parse_string; 23 | use super::value::Object; 24 | use super::value::Value; 25 | use crate::core::Decoder; 26 | 27 | /// The binary `JSONB` contains three parts, `Header`, `JEntry` and `RawData`. 28 | /// This structure can be nested. Each group of structures starts with a `Header`. 29 | /// The upper-level `Value` will store the `Header` length or offset of 30 | /// the lower-level `Value`. 31 | /// 32 | /// `Header` stores the type of the `Value`, include `Array`, `Object` and `Scalar`, 33 | /// `Scalar` has only one `Value`, and a corresponding `JEntry`. 34 | /// `Array` and `Object` are nested type, they have multiple lower-level `Values`. 35 | /// So the `Header` also stores the number of lower-level `Values`. 36 | /// 37 | /// `JEntry` stores the types of `Scalar Value`, including `Null`, `True`, `False`, 38 | /// `Number`, `String` and `Container`. They have three different decode methods. 39 | /// 1. `Null`, `True` and `False` can be obtained by `JEntry`, no extra work required. 40 | /// 2. `Number` and `String` has related `RawData`, `JEntry` store the length 41 | /// or offset of this data, the `Value` can be read out and then decoded. 42 | /// 3. `Container` is actually a nested `Array` or `Object` with the same structure, 43 | /// `JEntry` store the length or offset of the lower-level `Header`, 44 | /// from where the same decode process can begin. 45 | /// 46 | /// `RawData` is the encoded `Value`. 47 | /// `Number` is a variable-length `Decimal`, store both int and float value. 48 | /// `String` is the original string, can be borrowed directly without extra decode. 49 | /// `Array` and `Object` is a lower-level encoded `JSONB` value. 50 | /// The upper-level doesn't care about the specific content. 51 | /// Decode can be executed recursively. 52 | /// 53 | /// Decode `JSONB` Value from binary bytes. 54 | pub fn from_slice(buf: &[u8]) -> Result> { 55 | let mut decoder = Decoder::new(buf); 56 | match decoder.decode() { 57 | Ok(value) => Ok(value), 58 | // for compatible with the first version of `JSON` text, parse it again 59 | Err(_) => parse_value(buf), 60 | } 61 | } 62 | 63 | // Parse JSON text to JSONB Value. 64 | // Inspired by `https://github.com/jorgecarleitao/json-deserializer` 65 | // Thanks Jorge Leitao. 66 | pub fn parse_value(buf: &[u8]) -> Result> { 67 | let mut parser = Parser::new(buf); 68 | parser.parse() 69 | } 70 | 71 | struct Parser<'a> { 72 | buf: &'a [u8], 73 | idx: usize, 74 | } 75 | 76 | impl<'a> Parser<'a> { 77 | fn new(buf: &'a [u8]) -> Parser<'a> { 78 | Self { buf, idx: 0 } 79 | } 80 | 81 | fn parse(&mut self) -> Result> { 82 | let val = self.parse_json_value()?; 83 | self.skip_unused(); 84 | if self.idx < self.buf.len() { 85 | self.step(); 86 | return Err(self.error(ParseErrorCode::UnexpectedTrailingCharacters)); 87 | } 88 | Ok(val) 89 | } 90 | 91 | fn parse_json_value(&mut self) -> Result> { 92 | self.skip_unused(); 93 | let c = self.next()?; 94 | match c { 95 | b'n' => self.parse_json_null(), 96 | b't' => self.parse_json_true(), 97 | b'f' => self.parse_json_false(), 98 | b'0'..=b'9' | b'-' => self.parse_json_number(), 99 | b'"' => self.parse_json_string(), 100 | b'[' => self.parse_json_array(), 101 | b'{' => self.parse_json_object(), 102 | _ => { 103 | self.step(); 104 | Err(self.error(ParseErrorCode::ExpectedSomeValue)) 105 | } 106 | } 107 | } 108 | 109 | fn next(&mut self) -> Result<&u8> { 110 | match self.buf.get(self.idx) { 111 | Some(c) => Ok(c), 112 | None => Err(self.error(ParseErrorCode::InvalidEOF)), 113 | } 114 | } 115 | 116 | fn must_is(&mut self, c: u8) -> Result<()> { 117 | match self.buf.get(self.idx) { 118 | Some(v) => { 119 | self.step(); 120 | if v == &c { 121 | Ok(()) 122 | } else { 123 | Err(self.error(ParseErrorCode::ExpectedSomeIdent)) 124 | } 125 | } 126 | None => Err(self.error(ParseErrorCode::InvalidEOF)), 127 | } 128 | } 129 | 130 | fn check_next(&mut self, c: u8) -> bool { 131 | if self.idx < self.buf.len() { 132 | let v = self.buf.get(self.idx).unwrap(); 133 | if v == &c { 134 | return true; 135 | } 136 | } 137 | false 138 | } 139 | 140 | fn check_next_either(&mut self, c1: u8, c2: u8) -> bool { 141 | if self.idx < self.buf.len() { 142 | let v = self.buf.get(self.idx).unwrap(); 143 | if v == &c1 || v == &c2 { 144 | return true; 145 | } 146 | } 147 | false 148 | } 149 | 150 | fn check_digit(&mut self) -> bool { 151 | if self.idx < self.buf.len() { 152 | let v = self.buf.get(self.idx).unwrap(); 153 | if v.is_ascii_digit() { 154 | return true; 155 | } 156 | } 157 | false 158 | } 159 | 160 | fn step_digits(&mut self) -> Result { 161 | if self.idx == self.buf.len() { 162 | return Err(self.error(ParseErrorCode::InvalidEOF)); 163 | } 164 | let mut len = 0; 165 | while self.idx < self.buf.len() { 166 | let c = self.buf.get(self.idx).unwrap(); 167 | if !c.is_ascii_digit() { 168 | break; 169 | } 170 | len += 1; 171 | self.step(); 172 | } 173 | Ok(len) 174 | } 175 | 176 | #[inline] 177 | fn step(&mut self) { 178 | self.idx += 1; 179 | } 180 | 181 | #[inline] 182 | fn step_by(&mut self, n: usize) { 183 | self.idx += n; 184 | } 185 | 186 | fn error(&self, code: ParseErrorCode) -> Error { 187 | let pos = self.idx; 188 | Error::Syntax(code, pos) 189 | } 190 | 191 | #[inline] 192 | fn skip_unused(&mut self) { 193 | while self.idx < self.buf.len() { 194 | let c = self.buf.get(self.idx).unwrap(); 195 | if c.is_ascii_whitespace() { 196 | self.step(); 197 | continue; 198 | } 199 | // Allow parse escaped white space 200 | if *c == b'\\' { 201 | if self.idx + 1 < self.buf.len() 202 | && matches!(self.buf[self.idx + 1], b'n' | b'r' | b't') 203 | { 204 | self.step_by(2); 205 | continue; 206 | } 207 | if self.idx + 3 < self.buf.len() 208 | && self.buf[self.idx + 1] == b'x' 209 | && self.buf[self.idx + 2] == b'0' 210 | && self.buf[self.idx + 3] == b'C' 211 | { 212 | self.step_by(4); 213 | continue; 214 | } 215 | } 216 | break; 217 | } 218 | } 219 | 220 | fn parse_json_null(&mut self) -> Result> { 221 | let data = [b'n', b'u', b'l', b'l']; 222 | for v in data.into_iter() { 223 | self.must_is(v)?; 224 | } 225 | Ok(Value::Null) 226 | } 227 | 228 | fn parse_json_true(&mut self) -> Result> { 229 | let data = [b't', b'r', b'u', b'e']; 230 | for v in data.into_iter() { 231 | self.must_is(v)?; 232 | } 233 | Ok(Value::Bool(true)) 234 | } 235 | 236 | fn parse_json_false(&mut self) -> Result> { 237 | let data = [b'f', b'a', b'l', b's', b'e']; 238 | for v in data.into_iter() { 239 | self.must_is(v)?; 240 | } 241 | Ok(Value::Bool(false)) 242 | } 243 | 244 | fn parse_json_number(&mut self) -> Result> { 245 | let start_idx = self.idx; 246 | 247 | let mut has_fraction = false; 248 | let mut has_exponent = false; 249 | let mut negative: bool = false; 250 | 251 | if self.check_next(b'-') { 252 | negative = true; 253 | self.step(); 254 | } 255 | if self.check_next(b'0') { 256 | self.step(); 257 | if self.check_digit() { 258 | self.step(); 259 | return Err(self.error(ParseErrorCode::InvalidNumberValue)); 260 | } 261 | } else { 262 | let len = self.step_digits()?; 263 | if len == 0 { 264 | self.step(); 265 | return Err(self.error(ParseErrorCode::InvalidNumberValue)); 266 | } 267 | } 268 | if self.check_next(b'.') { 269 | has_fraction = true; 270 | self.step(); 271 | let len = self.step_digits()?; 272 | if len == 0 { 273 | self.step(); 274 | return Err(self.error(ParseErrorCode::InvalidNumberValue)); 275 | } 276 | } 277 | if self.check_next_either(b'E', b'e') { 278 | has_exponent = true; 279 | self.step(); 280 | if self.check_next_either(b'+', b'-') { 281 | self.step(); 282 | } 283 | let len = self.step_digits()?; 284 | if len == 0 { 285 | self.step(); 286 | return Err(self.error(ParseErrorCode::InvalidNumberValue)); 287 | } 288 | } 289 | let s = unsafe { std::str::from_utf8_unchecked(&self.buf[start_idx..self.idx]) }; 290 | 291 | if !has_fraction && !has_exponent { 292 | if !negative { 293 | if let Ok(v) = s.parse::() { 294 | return Ok(Value::Number(Number::UInt64(v))); 295 | } 296 | } else if let Ok(v) = s.parse::() { 297 | return Ok(Value::Number(Number::Int64(v))); 298 | } 299 | } 300 | 301 | match fast_float2::parse(s) { 302 | Ok(v) => Ok(Value::Number(Number::Float64(v))), 303 | Err(_) => Err(self.error(ParseErrorCode::InvalidNumberValue)), 304 | } 305 | } 306 | 307 | fn parse_json_string(&mut self) -> Result> { 308 | self.must_is(b'"')?; 309 | 310 | let start_idx = self.idx; 311 | let mut escapes = 0; 312 | loop { 313 | let c = self.next()?; 314 | match c { 315 | b'\\' => { 316 | self.step(); 317 | escapes += 1; 318 | let next_c = self.next()?; 319 | if *next_c == b'u' { 320 | self.step(); 321 | let next_c = self.next()?; 322 | if *next_c == b'{' { 323 | self.step_by(UNICODE_LEN + 2); 324 | } else { 325 | self.step_by(UNICODE_LEN); 326 | } 327 | } else { 328 | self.step(); 329 | } 330 | continue; 331 | } 332 | b'"' => { 333 | self.step(); 334 | break; 335 | } 336 | _ => {} 337 | } 338 | self.step(); 339 | } 340 | 341 | let data = &self.buf[start_idx..self.idx - 1]; 342 | let val = if escapes > 0 { 343 | let len = self.idx - 1 - start_idx - escapes; 344 | let mut idx = start_idx + 1; 345 | let s = parse_string(data, len, &mut idx)?; 346 | Cow::Owned(s) 347 | } else { 348 | std::str::from_utf8(data) 349 | .map(Cow::Borrowed) 350 | .map_err(|_| self.error(ParseErrorCode::InvalidStringValue))? 351 | }; 352 | Ok(Value::String(val)) 353 | } 354 | 355 | fn parse_json_array(&mut self) -> Result> { 356 | self.must_is(b'[')?; 357 | 358 | let mut first = true; 359 | let mut values = Vec::new(); 360 | loop { 361 | self.skip_unused(); 362 | let c = self.next()?; 363 | if *c == b']' { 364 | self.step(); 365 | break; 366 | } 367 | if !first { 368 | if *c != b',' { 369 | return Err(self.error(ParseErrorCode::ExpectedArrayCommaOrEnd)); 370 | } 371 | self.step(); 372 | } 373 | first = false; 374 | let value = self.parse_json_value()?; 375 | values.push(value); 376 | } 377 | Ok(Value::Array(values)) 378 | } 379 | 380 | fn parse_json_object(&mut self) -> Result> { 381 | self.must_is(b'{')?; 382 | 383 | let mut first = true; 384 | let mut obj = Object::new(); 385 | loop { 386 | self.skip_unused(); 387 | let c = self.next()?; 388 | if *c == b'}' { 389 | self.step(); 390 | break; 391 | } 392 | if !first { 393 | if *c != b',' { 394 | return Err(self.error(ParseErrorCode::ExpectedObjectCommaOrEnd)); 395 | } 396 | self.step(); 397 | } 398 | first = false; 399 | let key = self.parse_json_value()?; 400 | if !key.is_string() { 401 | return Err(self.error(ParseErrorCode::KeyMustBeAString)); 402 | } 403 | self.skip_unused(); 404 | let c = self.next()?; 405 | if *c != b':' { 406 | return Err(self.error(ParseErrorCode::ExpectedColon)); 407 | } 408 | self.step(); 409 | let value = self.parse_json_value()?; 410 | 411 | let k = key.as_str().unwrap(); 412 | obj.insert(k.to_string(), value); 413 | } 414 | Ok(Value::Object(obj)) 415 | } 416 | } 417 | -------------------------------------------------------------------------------- /src/raw.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use std::cmp::Ordering; 16 | 17 | use serde::Serialize; 18 | 19 | use crate::core::ArrayIterator; 20 | use crate::core::Deserializer; 21 | use crate::core::JsonbItemType; 22 | use crate::core::ObjectIterator; 23 | use crate::error::*; 24 | use crate::OwnedJsonb; 25 | 26 | /// Represents JSONB data wrapped around a raw, immutable slice of bytes. 27 | /// 28 | /// It does not own the underlying data, allowing various operations to be performed on the JSONB data *without copying*. 29 | /// This is critical for performance when dealing with large JSONB values. 30 | /// `RawJsonb` provides various methods to inspect and manipulate the JSONB data efficiently. 31 | #[derive(Debug, Clone, Copy)] 32 | pub struct RawJsonb<'a> { 33 | /// The underlying byte slice representing the JSONB data. 34 | pub(crate) data: &'a [u8], 35 | } 36 | 37 | impl<'a> RawJsonb<'a> { 38 | /// Creates a new RawJsonb from a byte slice. 39 | /// 40 | /// # Arguments 41 | /// 42 | /// * `data` - The byte slice containing the JSONB data. 43 | /// 44 | /// # Returns 45 | /// 46 | /// A new `RawJsonb` instance. 47 | pub fn new(data: &'a [u8]) -> Self { 48 | Self { data } 49 | } 50 | 51 | /// Checks if the JSONB data is empty. 52 | /// 53 | /// # Returns 54 | /// 55 | /// `true` if the data is empty, `false` otherwise. 56 | pub fn is_empty(&self) -> bool { 57 | self.len() == 0 58 | } 59 | 60 | /// Returns the length of the JSONB data in bytes. 61 | /// 62 | /// # Returns 63 | /// 64 | /// The length of the data in bytes. 65 | pub fn len(&self) -> usize { 66 | self.data.as_ref().len() 67 | } 68 | 69 | /// Creates an `OwnedJsonb` from the `RawJsonb` by copying the underlying data. 70 | /// 71 | /// This method converts a `RawJsonb`, which holds a reference to JSONB data, 72 | /// into an `OwnedJsonb`, which owns its own copy of the JSONB data. This is 73 | /// achieved by cloning the byte slice held by the `RawJsonb` into a new `Vec`. 74 | /// 75 | /// # Returns 76 | /// 77 | /// An `OwnedJsonb` instance containing a copy of the JSONB data from the `RawJsonb`. 78 | pub fn to_owned(&self) -> OwnedJsonb { 79 | OwnedJsonb::new(self.data.to_vec()) 80 | } 81 | 82 | /// Converts the JSONB value to a JSON string. 83 | /// 84 | /// This function serializes the JSONB value into a human-readable JSON string representation. 85 | /// If the JSONB data is invalid, treate it as a text JSON string and return directly. 86 | /// If the JSONB data is empty, return a JSON null for compatibility. 87 | /// 88 | /// # Returns 89 | /// 90 | /// * `String` - The JSON string representation of the value. 91 | /// 92 | /// # Examples 93 | /// 94 | /// ```rust 95 | /// use jsonb::OwnedJsonb; 96 | /// 97 | /// let arr_jsonb = "[1, 2, 3]".parse::().unwrap(); 98 | /// let raw_jsonb = arr_jsonb.as_raw(); 99 | /// assert_eq!(raw_jsonb.to_string(), "[1,2,3]"); 100 | /// 101 | /// let obj_jsonb = r#"{"a": 1, "b": "hello"}"#.parse::().unwrap(); 102 | /// let raw_jsonb = obj_jsonb.as_raw(); 103 | /// assert_eq!(raw_jsonb.to_string(), r#"{"a":1,"b":"hello"}"#); 104 | /// 105 | /// let num_jsonb = "123.45".parse::().unwrap(); 106 | /// let raw_jsonb = num_jsonb.as_raw(); 107 | /// assert_eq!(raw_jsonb.to_string(), "123.45"); 108 | /// 109 | /// let string_jsonb = r#""hello, world!""#.parse::().unwrap(); 110 | /// let raw_jsonb = string_jsonb.as_raw(); 111 | /// assert_eq!(raw_jsonb.to_string(), r#""hello, world!""#); 112 | /// 113 | /// let true_jsonb = "true".parse::().unwrap(); 114 | /// let raw_jsonb = true_jsonb.as_raw(); 115 | /// assert_eq!(raw_jsonb.to_string(), "true"); 116 | /// 117 | /// // Example with invalid JSONB data (fallback to treat as text JSON string) 118 | /// let invalid_jsonb = OwnedJsonb::new(vec![1, 2, 3, 4]); // Invalid binary JSONB 119 | /// let invalid_raw_jsonb = invalid_jsonb.as_raw(); 120 | /// 121 | /// // It will treat as text JSON string. 122 | /// assert_eq!(invalid_raw_jsonb.to_string(), "\u{1}\u{2}\u{3}\u{4}"); 123 | /// ``` 124 | #[allow(clippy::inherent_to_string)] 125 | pub fn to_string(&self) -> String { 126 | let mut buf = Vec::with_capacity(self.len()); 127 | let formatter = serde_json::ser::CompactFormatter {}; 128 | let mut ser = serde_json::Serializer::with_formatter(&mut buf, formatter); 129 | match self.serialize(&mut ser) { 130 | Ok(_) => String::from_utf8(buf).unwrap(), 131 | Err(_) => { 132 | if self.data.is_empty() { 133 | "null".to_string() 134 | } else { 135 | String::from_utf8_lossy(self.data).to_string() 136 | } 137 | } 138 | } 139 | } 140 | 141 | /// Converts the JSONB value to a pretty-printed JSON string. 142 | /// 143 | /// This function serializes the JSONB value into a human-readable JSON string representation with indentation for formatting. 144 | /// If the JSONB data is invalid, return a "null" string. 145 | /// 146 | /// # Returns 147 | /// 148 | /// * `String` - The pretty-printed JSON string representation of the value. 149 | /// 150 | /// # Examples 151 | /// 152 | /// ```rust 153 | /// use jsonb::OwnedJsonb; 154 | /// 155 | /// let arr_jsonb = "[1, 2, 3]".parse::().unwrap(); 156 | /// let raw_jsonb = arr_jsonb.as_raw(); 157 | /// assert_eq!(raw_jsonb.to_pretty_string(), "[\n 1,\n 2,\n 3\n]"); 158 | /// 159 | /// let obj_jsonb = r#"{"a": 1, "b": "hello"}"#.parse::().unwrap(); 160 | /// let raw_jsonb = obj_jsonb.as_raw(); 161 | /// assert_eq!( 162 | /// raw_jsonb.to_pretty_string(), 163 | /// "{\n \"a\": 1,\n \"b\": \"hello\"\n}" 164 | /// ); 165 | /// 166 | /// let num_jsonb = "123.45".parse::().unwrap(); 167 | /// let raw_jsonb = num_jsonb.as_raw(); 168 | /// assert_eq!(raw_jsonb.to_pretty_string(), "123.45"); 169 | /// 170 | /// let string_jsonb = r#""hello, world!""#.parse::().unwrap(); 171 | /// let raw_jsonb = string_jsonb.as_raw(); 172 | /// assert_eq!(raw_jsonb.to_pretty_string(), r#""hello, world!""#); 173 | /// 174 | /// // Example with invalid JSONB data (fallback to text JSON parsing) 175 | /// let invalid_jsonb = OwnedJsonb::new(vec![1, 2, 3, 4]); // Invalid binary JSONB 176 | /// let invalid_raw_jsonb = invalid_jsonb.as_raw(); 177 | /// assert_eq!(invalid_raw_jsonb.to_pretty_string(), "null"); // Fails and returns "null" 178 | /// ``` 179 | pub fn to_pretty_string(&self) -> String { 180 | let mut buf = Vec::with_capacity(self.len()); 181 | let formatter = serde_json::ser::PrettyFormatter::new(); 182 | let mut ser = serde_json::Serializer::with_formatter(&mut buf, formatter); 183 | match self.serialize(&mut ser) { 184 | Ok(_) => String::from_utf8(buf).unwrap(), 185 | Err(_) => "null".to_string(), 186 | } 187 | } 188 | } 189 | 190 | /// Converts a borrowed byte slice into a RawJsonb. 191 | /// This provides a convenient way to create a RawJsonb from existing data without copying. 192 | impl<'a> From<&'a [u8]> for RawJsonb<'a> { 193 | fn from(data: &'a [u8]) -> Self { 194 | Self { data } 195 | } 196 | } 197 | 198 | /// Allows accessing the underlying byte slice as a reference. 199 | /// This enables easy integration with functions that expect a &[u8]. 200 | impl AsRef<[u8]> for RawJsonb<'_> { 201 | fn as_ref(&self) -> &[u8] { 202 | self.data 203 | } 204 | } 205 | 206 | impl Eq for RawJsonb<'_> {} 207 | 208 | impl PartialEq for RawJsonb<'_> { 209 | fn eq(&self, other: &Self) -> bool { 210 | self.partial_cmp(other) == Some(Ordering::Equal) 211 | } 212 | } 213 | 214 | /// Implements `PartialOrd` for `RawJsonb`, allowing comparison of two `RawJsonb` values. 215 | /// 216 | /// The comparison logic handles different JSONB types (scalar, array, object) and considers null values. 217 | /// The ordering is defined as follows: 218 | /// 219 | /// 1. Null is considered greater than any other type. 220 | /// 2. Scalars are compared based on their type and value (String > Number > Boolean > ExtensionValue). 221 | /// 3. Arrays are compared element by element. 222 | /// 4. Objects are compared based on their keys and values. 223 | /// 5. Arrays are greater than objects and scalars. 224 | /// 6. Objects are greater than scalars. 225 | /// 7. If the types are incompatible, None is returned. 226 | #[allow(clippy::non_canonical_partial_ord_impl)] 227 | impl PartialOrd for RawJsonb<'_> { 228 | fn partial_cmp(&self, other: &Self) -> Option { 229 | let self_type = self.jsonb_item_type().ok()?; 230 | let other_type = other.jsonb_item_type().ok()?; 231 | 232 | // First use JSONB type to determine the order, 233 | // different types must have different orders. 234 | if let Some(ord) = self_type.partial_cmp(&other_type) { 235 | return Some(ord); 236 | } 237 | 238 | match (self_type, other_type) { 239 | (JsonbItemType::Array(self_len), JsonbItemType::Array(other_len)) => { 240 | let self_array_iter = ArrayIterator::new(*self).ok()?.unwrap(); 241 | let mut other_array_iter = ArrayIterator::new(*other).ok()?.unwrap(); 242 | for (self_res, other_res) in &mut self_array_iter.zip(&mut other_array_iter) { 243 | let self_item = self_res.ok()?; 244 | let other_item = other_res.ok()?; 245 | 246 | let ord = self_item.partial_cmp(&other_item)?; 247 | if ord != Ordering::Equal { 248 | return Some(ord); 249 | } 250 | } 251 | Some(self_len.cmp(&other_len)) 252 | } 253 | (JsonbItemType::Object(self_len), JsonbItemType::Object(other_len)) => { 254 | let self_object_iter = ObjectIterator::new(*self).ok()?.unwrap(); 255 | let mut other_object_iter = ObjectIterator::new(*other).ok()?.unwrap(); 256 | for (self_res, other_res) in &mut self_object_iter.zip(&mut other_object_iter) { 257 | let (self_key, self_val) = self_res.ok()?; 258 | let (other_key, other_val) = other_res.ok()?; 259 | 260 | let key_ord = self_key.partial_cmp(other_key)?; 261 | if key_ord != Ordering::Equal { 262 | return Some(key_ord); 263 | } 264 | let val_ord = self_val.partial_cmp(&other_val)?; 265 | if val_ord != Ordering::Equal { 266 | return Some(val_ord); 267 | } 268 | } 269 | Some(self_len.cmp(&other_len)) 270 | } 271 | (JsonbItemType::String, JsonbItemType::String) => { 272 | let self_val = self.as_str(); 273 | let other_val = other.as_str(); 274 | match (self_val, other_val) { 275 | (Ok(Some(self_val)), Ok(Some(other_val))) => self_val.partial_cmp(&other_val), 276 | (_, _) => None, 277 | } 278 | } 279 | (JsonbItemType::Number, JsonbItemType::Number) => { 280 | let self_val = self.as_number(); 281 | let other_val = other.as_number(); 282 | match (self_val, other_val) { 283 | (Ok(Some(self_val)), Ok(Some(other_val))) => self_val.partial_cmp(&other_val), 284 | (_, _) => None, 285 | } 286 | } 287 | (JsonbItemType::Boolean, JsonbItemType::Boolean) => { 288 | let self_val = self.as_bool(); 289 | let other_val = other.as_bool(); 290 | match (self_val, other_val) { 291 | (Ok(Some(self_val)), Ok(Some(other_val))) => self_val.partial_cmp(&other_val), 292 | (_, _) => None, 293 | } 294 | } 295 | (JsonbItemType::Extension, JsonbItemType::Extension) => { 296 | let self_val = self.as_extension_value(); 297 | let other_val = other.as_extension_value(); 298 | match (self_val, other_val) { 299 | (Ok(Some(self_val)), Ok(Some(other_val))) => self_val.partial_cmp(&other_val), 300 | (_, _) => None, 301 | } 302 | } 303 | (_, _) => None, 304 | } 305 | } 306 | } 307 | 308 | /// Implements `Ord` for `RawJsonb`, allowing comparison of two `RawJsonb` values using the total ordering. 309 | /// This implementation leverages the `PartialOrd` implementation, returning `Ordering::Equal` for incomparable values. 310 | impl Ord for RawJsonb<'_> { 311 | fn cmp(&self, other: &Self) -> Ordering { 312 | match self.partial_cmp(other) { 313 | Some(ordering) => ordering, 314 | None => Ordering::Equal, 315 | } 316 | } 317 | } 318 | 319 | /// Deserializes a `RawJsonb` into a Rust data structure using Serde. 320 | /// 321 | /// This function takes a `RawJsonb` (a borrowed slice of JSONB data) and attempts 322 | /// to deserialize it into a Rust type `T` that implements the `Deserialize` trait. 323 | /// It uses a custom `Deserializer` to handle the JSONB data. 324 | /// 325 | /// # Arguments 326 | /// 327 | /// * `raw_jsonb`: A reference to the `RawJsonb` containing the JSONB data to deserialize. 328 | /// 329 | /// # Type Parameters 330 | /// 331 | /// * `T`: The Rust type to deserialize the JSONB data into. This type must implement 332 | /// the `serde::de::Deserialize` trait. 333 | /// 334 | /// # Returns 335 | /// 336 | /// * `Ok(t)`: If the deserialization is successful, returns the deserialized value of type `T`. 337 | /// * `Err(Error::InvalidJsonb)`: If the deserialization fails due to invalid JSONB data 338 | /// (e.g., trailing characters after the expected JSONB structure). 339 | /// * `Err(e)`: If any other Serde deserialization error occurs. 340 | /// 341 | /// # Examples 342 | /// 343 | /// ``` 344 | /// use jsonb::from_raw_jsonb; 345 | /// use jsonb::OwnedJsonb; 346 | /// use jsonb::RawJsonb; 347 | /// use serde::Deserialize; 348 | /// 349 | /// #[derive(Deserialize, Debug)] 350 | /// struct Person { 351 | /// name: String, 352 | /// age: u32, 353 | /// } 354 | /// 355 | /// let owned_jsonb = r#"{"name": "Alice", "age": 20}"#.parse::().unwrap(); 356 | /// let raw_jsonb = owned_jsonb.as_raw(); 357 | /// 358 | /// let person: Person = from_raw_jsonb(&raw_jsonb).unwrap(); 359 | /// println!("{:?}", person); // Output: Person { name: "Alice", age: 20 } 360 | /// ``` 361 | pub fn from_raw_jsonb<'de, T>(raw_jsonb: &'de RawJsonb) -> Result 362 | where 363 | T: serde::de::Deserialize<'de>, 364 | { 365 | let mut deserializer = Deserializer::new(raw_jsonb); 366 | let t = T::deserialize(&mut deserializer)?; 367 | if deserializer.end() { 368 | Ok(t) 369 | } else { 370 | // Trailing characters 371 | Err(Error::InvalidJsonb) 372 | } 373 | } 374 | -------------------------------------------------------------------------------- /src/util.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use std::io::Read; 16 | 17 | use super::constants::*; 18 | use super::error::Error; 19 | use super::error::ParseErrorCode; 20 | 21 | #[allow(clippy::zero_prefixed_literal)] 22 | static HEX: [u8; 256] = { 23 | const __: u8 = 255; // not a hex digit 24 | [ 25 | // 1 2 3 4 5 6 7 8 9 A B C D E F 26 | __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 0 27 | __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 1 28 | __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2 29 | 00, 01, 02, 03, 04, 05, 06, 07, 08, 09, __, __, __, __, __, __, // 3 30 | __, 10, 11, 12, 13, 14, 15, __, __, __, __, __, __, __, __, __, // 4 31 | __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 5 32 | __, 10, 11, 12, 13, 14, 15, __, __, __, __, __, __, __, __, __, // 6 33 | __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7 34 | __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8 35 | __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9 36 | __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // A 37 | __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // B 38 | __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // C 39 | __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // D 40 | __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // E 41 | __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F 42 | ] 43 | }; 44 | 45 | pub fn parse_string(mut data: &[u8], len: usize, idx: &mut usize) -> Result { 46 | let mut buf = Vec::with_capacity(len); 47 | let mut str_buf = String::with_capacity(4); 48 | while !data.is_empty() { 49 | *idx += 1; 50 | let byte = data[0]; 51 | if byte == b'\\' { 52 | data = &data[1..]; 53 | data = parse_escaped_string(data, idx, &mut str_buf)?; 54 | buf.extend_from_slice(str_buf.as_bytes()); 55 | str_buf.clear(); 56 | } else { 57 | buf.push(byte); 58 | data = &data[1..]; 59 | } 60 | } 61 | String::from_utf8(buf).map_err(|_| Error::Syntax(ParseErrorCode::InvalidStringValue, *idx)) 62 | } 63 | 64 | fn parse_escaped_string<'a>( 65 | mut data: &'a [u8], 66 | idx: &mut usize, 67 | str_buf: &mut String, 68 | ) -> Result<&'a [u8], Error> { 69 | let byte = data[0]; 70 | *idx += 1; 71 | data = &data[1..]; 72 | match byte { 73 | b'\\' => str_buf.push(BS), 74 | b'"' => str_buf.push(QU), 75 | b'/' => str_buf.push(SD), 76 | b'b' => str_buf.push(BB), 77 | b'f' => str_buf.push(FF), 78 | b'n' => str_buf.push(NN), 79 | b'r' => str_buf.push(RR), 80 | b't' => str_buf.push(TT), 81 | b'u' => { 82 | let mut numbers = vec![0; UNICODE_LEN]; 83 | if data[0] == b'{' { 84 | data = &data[1..]; 85 | data.read_exact(numbers.as_mut_slice())?; 86 | if data[0] != b'}' { 87 | return Err(Error::Syntax( 88 | ParseErrorCode::UnexpectedEndOfHexEscape, 89 | *idx, 90 | )); 91 | } 92 | data = &data[1..]; 93 | *idx += 6; 94 | } else { 95 | data.read_exact(numbers.as_mut_slice())?; 96 | *idx += 4; 97 | } 98 | let hex = decode_hex_escape(numbers.clone(), idx)?; 99 | 100 | let c = match hex { 101 | 0xDC00..=0xDFFF => { 102 | encode_invalid_unicode(numbers, str_buf); 103 | return Ok(data); 104 | } 105 | 106 | // Non-BMP characters are encoded as a sequence of two hex 107 | // escapes, representing UTF-16 surrogates. If deserializing a 108 | // utf-8 string the surrogates are required to be paired, 109 | // whereas deserializing a byte string accepts lone surrogates. 110 | n1 @ 0xD800..=0xDBFF => { 111 | if data.len() < 2 { 112 | encode_invalid_unicode(numbers, str_buf); 113 | return Ok(data); 114 | } 115 | if data[0] == b'\\' && data[1] == b'u' { 116 | *idx += 2; 117 | data = &data[2..]; 118 | } else { 119 | encode_invalid_unicode(numbers, str_buf); 120 | return Ok(data); 121 | } 122 | let mut lower_numbers = vec![0; UNICODE_LEN]; 123 | if data[0] == b'{' { 124 | data = &data[1..]; 125 | data.read_exact(lower_numbers.as_mut_slice())?; 126 | if data[0] != b'}' { 127 | return Err(Error::Syntax( 128 | ParseErrorCode::UnexpectedEndOfHexEscape, 129 | *idx, 130 | )); 131 | } 132 | data = &data[1..]; 133 | *idx += 6; 134 | } else { 135 | data.read_exact(lower_numbers.as_mut_slice())?; 136 | *idx += 4; 137 | } 138 | let n2 = decode_hex_escape(lower_numbers.clone(), idx)?; 139 | if !(0xDC00..=0xDFFF).contains(&n2) { 140 | encode_invalid_unicode(numbers, str_buf); 141 | encode_invalid_unicode(lower_numbers, str_buf); 142 | return Ok(data); 143 | } 144 | 145 | #[allow(clippy::precedence)] 146 | let n = (((n1 - 0xD800) as u32) << 10 | (n2 - 0xDC00) as u32) + 0x1_0000; 147 | char::from_u32(n).unwrap() 148 | } 149 | 150 | // Every u16 outside of the surrogate ranges above is guaranteed 151 | // to be a legal char. 152 | n => char::from_u32(n as u32).unwrap(), 153 | }; 154 | str_buf.push(c); 155 | } 156 | other => return Err(Error::Syntax(ParseErrorCode::InvalidEscaped(other), *idx)), 157 | } 158 | Ok(data) 159 | } 160 | 161 | // https://datatracker.ietf.org/doc/html/rfc8259#section-8.2 162 | // RFC8259 allow invalid Unicode 163 | #[inline] 164 | fn encode_invalid_unicode(numbers: Vec, str_buf: &mut String) { 165 | str_buf.push('\\'); 166 | str_buf.push('u'); 167 | for n in numbers { 168 | str_buf.push(n.into()); 169 | } 170 | } 171 | 172 | #[inline] 173 | fn decode_hex_val(val: u8) -> Option { 174 | let n = HEX[val as usize] as u16; 175 | if n == 255 { 176 | None 177 | } else { 178 | Some(n) 179 | } 180 | } 181 | 182 | #[inline] 183 | fn decode_hex_escape(numbers: Vec, idx: &usize) -> Result { 184 | let mut n = 0; 185 | for number in numbers { 186 | if let Some(hex) = decode_hex_val(number) { 187 | n = (n << 4) + hex; 188 | } else { 189 | return Err(Error::Syntax(ParseErrorCode::InvalidHex(number), *idx)); 190 | } 191 | } 192 | Ok(n) 193 | } 194 | -------------------------------------------------------------------------------- /src/value.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use std::borrow::Cow; 16 | use std::collections::BTreeMap; 17 | use std::fmt::Debug; 18 | use std::fmt::Display; 19 | use std::fmt::Formatter; 20 | use std::mem::discriminant; 21 | 22 | use rand::distr::Alphanumeric; 23 | use rand::distr::SampleString; 24 | use rand::rng; 25 | use rand::Rng; 26 | 27 | use super::extension::Date; 28 | use super::extension::Interval; 29 | use super::extension::Timestamp; 30 | use super::extension::TimestampTz; 31 | use super::number::Number; 32 | use crate::core::Encoder; 33 | 34 | pub type Object<'a> = BTreeMap>; 35 | 36 | // JSONB value 37 | #[derive(Clone, PartialEq, Default, Eq)] 38 | pub enum Value<'a> { 39 | #[default] 40 | Null, 41 | Bool(bool), 42 | String(Cow<'a, str>), 43 | Number(Number), 44 | Binary(&'a [u8]), 45 | Date(Date), 46 | Timestamp(Timestamp), 47 | TimestampTz(TimestampTz), 48 | Interval(Interval), 49 | Array(Vec>), 50 | Object(Object<'a>), 51 | } 52 | 53 | impl Debug for Value<'_> { 54 | fn fmt(&self, formatter: &mut Formatter) -> std::fmt::Result { 55 | match *self { 56 | Value::Null => formatter.debug_tuple("Null").finish(), 57 | Value::Bool(v) => formatter.debug_tuple("Bool").field(&v).finish(), 58 | Value::Number(ref v) => Debug::fmt(v, formatter), 59 | Value::String(ref v) => formatter.debug_tuple("String").field(v).finish(), 60 | Value::Binary(ref v) => formatter.debug_tuple("Binary").field(v).finish(), 61 | Value::Date(ref v) => formatter.debug_tuple("Date").field(v).finish(), 62 | Value::Timestamp(ref v) => formatter.debug_tuple("Timestamp").field(v).finish(), 63 | Value::TimestampTz(ref v) => formatter.debug_tuple("TimestampTz").field(v).finish(), 64 | Value::Interval(ref v) => formatter.debug_tuple("Interval").field(v).finish(), 65 | Value::Array(ref v) => { 66 | formatter.write_str("Array(")?; 67 | Debug::fmt(v, formatter)?; 68 | formatter.write_str(")") 69 | } 70 | Value::Object(ref v) => { 71 | formatter.write_str("Object(")?; 72 | Debug::fmt(v, formatter)?; 73 | formatter.write_str(")") 74 | } 75 | } 76 | } 77 | } 78 | 79 | impl Display for Value<'_> { 80 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 81 | match self { 82 | Value::Null => write!(f, "null"), 83 | Value::Bool(v) => { 84 | if *v { 85 | write!(f, "true") 86 | } else { 87 | write!(f, "false") 88 | } 89 | } 90 | Value::Number(ref v) => write!(f, "{}", v), 91 | Value::String(ref v) => { 92 | write!(f, "{:?}", v) 93 | } 94 | Value::Binary(v) => { 95 | write!(f, "\"")?; 96 | for c in *v { 97 | write!(f, "{c:02X}")?; 98 | } 99 | write!(f, "\"")?; 100 | Ok(()) 101 | } 102 | Value::Date(v) => { 103 | write!(f, "\"{}\"", v) 104 | } 105 | Value::Timestamp(v) => { 106 | write!(f, "\"{}\"", v) 107 | } 108 | Value::TimestampTz(v) => { 109 | write!(f, "\"{}\"", v) 110 | } 111 | Value::Interval(v) => { 112 | write!(f, "\"{}\"", v) 113 | } 114 | Value::Array(ref vs) => { 115 | write!(f, "[")?; 116 | for (i, v) in vs.iter().enumerate() { 117 | if i > 0 { 118 | write!(f, ",")?; 119 | } 120 | write!(f, "{v}")?; 121 | } 122 | write!(f, "]") 123 | } 124 | Value::Object(ref vs) => { 125 | write!(f, "{{")?; 126 | for (i, (k, v)) in vs.iter().enumerate() { 127 | if i > 0 { 128 | write!(f, ",")?; 129 | } 130 | write!(f, "\"")?; 131 | write!(f, "{k}")?; 132 | write!(f, "\"")?; 133 | write!(f, ":")?; 134 | write!(f, "{v}")?; 135 | } 136 | write!(f, "}}") 137 | } 138 | } 139 | } 140 | } 141 | 142 | impl<'a> Value<'a> { 143 | pub fn is_scalar(&self) -> bool { 144 | !self.is_array() && !self.is_object() 145 | } 146 | 147 | pub fn is_object(&self) -> bool { 148 | matches!(self, Value::Object(_v)) 149 | } 150 | 151 | pub fn as_object(&self) -> Option<&Object<'a>> { 152 | match self { 153 | Value::Object(ref obj) => Some(obj), 154 | _ => None, 155 | } 156 | } 157 | 158 | pub fn is_array(&self) -> bool { 159 | matches!(self, Value::Array(_v)) 160 | } 161 | 162 | pub fn as_array(&self) -> Option<&Vec>> { 163 | match self { 164 | Value::Array(ref array) => Some(array), 165 | _ => None, 166 | } 167 | } 168 | 169 | pub fn is_string(&self) -> bool { 170 | self.as_str().is_some() 171 | } 172 | 173 | pub fn as_str(&self) -> Option<&Cow<'_, str>> { 174 | match self { 175 | Value::String(s) => Some(s), 176 | _ => None, 177 | } 178 | } 179 | 180 | pub fn is_number(&self) -> bool { 181 | matches!(self, Value::Number(_)) 182 | } 183 | 184 | pub fn as_number(&self) -> Option<&Number> { 185 | match self { 186 | Value::Number(n) => Some(n), 187 | _ => None, 188 | } 189 | } 190 | 191 | pub fn is_i64(&self) -> bool { 192 | self.as_i64().is_some() 193 | } 194 | 195 | pub fn is_u64(&self) -> bool { 196 | self.as_u64().is_some() 197 | } 198 | 199 | pub fn is_f64(&self) -> bool { 200 | self.as_f64().is_some() 201 | } 202 | 203 | pub fn as_i64(&self) -> Option { 204 | match self { 205 | Value::Number(n) => n.as_i64(), 206 | _ => None, 207 | } 208 | } 209 | 210 | pub fn as_u64(&self) -> Option { 211 | match self { 212 | Value::Number(n) => n.as_u64(), 213 | _ => None, 214 | } 215 | } 216 | 217 | pub fn as_f64(&self) -> Option { 218 | match self { 219 | Value::Number(n) => n.as_f64(), 220 | _ => None, 221 | } 222 | } 223 | 224 | pub fn is_boolean(&self) -> bool { 225 | matches!(self, Value::Bool(_v)) 226 | } 227 | 228 | pub fn as_bool(&self) -> Option { 229 | match self { 230 | Value::Bool(v) => Some(*v), 231 | _ => None, 232 | } 233 | } 234 | 235 | pub fn is_null(&self) -> bool { 236 | matches!(self, Value::Null) 237 | } 238 | 239 | pub fn as_null(&self) -> Option<()> { 240 | match self { 241 | Value::Null => Some(()), 242 | _ => None, 243 | } 244 | } 245 | 246 | pub fn is_binary(&self) -> bool { 247 | matches!(self, Value::Binary(_v)) 248 | } 249 | 250 | pub fn as_binary(&self) -> Option<&[u8]> { 251 | match self { 252 | Value::Binary(v) => Some(v), 253 | _ => None, 254 | } 255 | } 256 | 257 | pub fn is_date(&self) -> bool { 258 | matches!(self, Value::Date(_v)) 259 | } 260 | 261 | pub fn as_date(&self) -> Option<&Date> { 262 | match self { 263 | Value::Date(v) => Some(v), 264 | _ => None, 265 | } 266 | } 267 | 268 | pub fn is_timestamp(&self) -> bool { 269 | matches!(self, Value::Timestamp(_v)) 270 | } 271 | 272 | pub fn as_timestamp(&self) -> Option<&Timestamp> { 273 | match self { 274 | Value::Timestamp(v) => Some(v), 275 | _ => None, 276 | } 277 | } 278 | 279 | pub fn is_timestamp_tz(&self) -> bool { 280 | matches!(self, Value::TimestampTz(_v)) 281 | } 282 | 283 | pub fn as_timestamp_tz(&self) -> Option<&TimestampTz> { 284 | match self { 285 | Value::TimestampTz(v) => Some(v), 286 | _ => None, 287 | } 288 | } 289 | 290 | pub fn is_interval(&self) -> bool { 291 | matches!(self, Value::Interval(_v)) 292 | } 293 | 294 | pub fn as_interval(&self) -> Option<&Interval> { 295 | match self { 296 | Value::Interval(v) => Some(v), 297 | _ => None, 298 | } 299 | } 300 | 301 | /// Serialize the JSONB Value into a byte stream. 302 | pub fn write_to_vec(&self, buf: &mut Vec) { 303 | let mut encoder = Encoder::new(buf); 304 | encoder.encode(self); 305 | } 306 | 307 | /// Serialize the JSONB Value into a byte stream. 308 | pub fn to_vec(&self) -> Vec { 309 | let mut buf = Vec::new(); 310 | self.write_to_vec(&mut buf); 311 | buf 312 | } 313 | 314 | pub fn get_by_name_ignore_case(&self, name: &str) -> Option<&Value<'a>> { 315 | match self { 316 | Value::Object(obj) => match obj.get(name) { 317 | Some(val) => Some(val), 318 | None => { 319 | for key in obj.keys() { 320 | if name.eq_ignore_ascii_case(key) { 321 | return obj.get(key); 322 | } 323 | } 324 | None 325 | } 326 | }, 327 | _ => None, 328 | } 329 | } 330 | 331 | pub fn array_length(&self) -> Option { 332 | match self { 333 | Value::Array(arr) => Some(arr.len()), 334 | _ => None, 335 | } 336 | } 337 | 338 | pub fn object_keys(&self) -> Option> { 339 | match self { 340 | Value::Object(obj) => { 341 | let mut keys = Vec::with_capacity(obj.len()); 342 | for k in obj.keys() { 343 | keys.push(k.clone().into()); 344 | } 345 | Some(Value::Array(keys)) 346 | } 347 | _ => None, 348 | } 349 | } 350 | 351 | pub fn eq_variant(&self, other: &Value) -> bool { 352 | discriminant(self) == discriminant(other) 353 | } 354 | 355 | /// generate random JSONB value 356 | pub fn rand_value() -> Value<'static> { 357 | let mut rng = rng(); 358 | let val = match rng.random_range(0..=2) { 359 | 0 => { 360 | let len = rng.random_range(0..=5); 361 | let mut values = Vec::with_capacity(len); 362 | for _ in 0..len { 363 | values.push(Self::rand_scalar_value()); 364 | } 365 | Value::Array(values) 366 | } 367 | 1 => { 368 | let len = rng.random_range(0..=5); 369 | let mut obj = Object::new(); 370 | for _ in 0..len { 371 | let k = Alphanumeric.sample_string(&mut rng, 5); 372 | let v = Self::rand_scalar_value(); 373 | obj.insert(k, v); 374 | } 375 | Value::Object(obj) 376 | } 377 | _ => Self::rand_scalar_value(), 378 | }; 379 | val 380 | } 381 | 382 | fn rand_scalar_value() -> Value<'static> { 383 | let mut rng = rng(); 384 | let val = match rng.random_range(0..=3) { 385 | 0 => { 386 | let v = rng.random_bool(0.5); 387 | Value::Bool(v) 388 | } 389 | 1 => { 390 | let s = Alphanumeric.sample_string(&mut rng, 5); 391 | Value::String(Cow::from(s)) 392 | } 393 | 2 => match rng.random_range(0..=2) { 394 | 0 => { 395 | let n: u64 = rng.random_range(0..=100000); 396 | Value::Number(Number::UInt64(n)) 397 | } 398 | 1 => { 399 | let n: i64 = rng.random_range(-100000..=100000); 400 | Value::Number(Number::Int64(n)) 401 | } 402 | _ => { 403 | let n: f64 = rng.random_range(-4000.0..1.3e5); 404 | Value::Number(Number::Float64(n)) 405 | } 406 | }, 407 | _ => Value::Null, 408 | }; 409 | val 410 | } 411 | } 412 | -------------------------------------------------------------------------------- /tests/it/decode.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use std::borrow::Cow; 16 | 17 | use ethnum::I256; 18 | use jsonb::{ 19 | from_slice, Date, Decimal128, Decimal256, Interval, Number, Object, Timestamp, TimestampTz, 20 | Value, 21 | }; 22 | 23 | #[test] 24 | fn test_decode_null() { 25 | let s = b"\x20\0\0\0\0\0\0\0"; 26 | let value = from_slice(s).unwrap(); 27 | assert!(value.is_null()); 28 | assert_eq!(value.as_null(), Some(())); 29 | } 30 | 31 | #[test] 32 | fn test_decode_boolean() { 33 | let tests = vec![ 34 | (b"\x20\0\0\0\x40\0\0\0".to_vec(), true), 35 | (b"\x20\0\0\0\x30\0\0\0".to_vec(), false), 36 | ]; 37 | for (s, v) in tests { 38 | let value = from_slice(s.as_slice()).unwrap(); 39 | assert!(value.is_boolean()); 40 | assert_eq!(value.as_bool().unwrap(), v); 41 | } 42 | } 43 | 44 | #[test] 45 | fn test_decode_string() { 46 | let tests = vec![ 47 | (b"\x20\0\0\0\x10\0\0\x03\x61\x73\x64".to_vec(), "asd"), 48 | ( 49 | b"\x20\0\0\0\x10\0\0\x06\xE6\xB5\x8B\xE8\xAF\x95".to_vec(), 50 | "测试", 51 | ), 52 | (b"\x20\0\0\0\x10\0\0\x01\x0A".to_vec(), "\n"), 53 | ]; 54 | for (s, v) in tests { 55 | let value = from_slice(s.as_slice()).unwrap(); 56 | assert!(value.is_string()); 57 | assert_eq!(value.as_str().unwrap(), &Cow::from(v)); 58 | } 59 | } 60 | 61 | #[test] 62 | fn test_decode_int64() { 63 | let tests = vec![ 64 | (b"\x20\0\0\0\x20\0\0\x01\x00".to_vec(), 0i64), 65 | (b"\x20\0\0\0\x20\0\0\x02\x40\x9C".to_vec(), -100i64), 66 | (b"\x20\0\0\0\x20\0\0\x02\x40\x80".to_vec(), i8::MIN as i64), 67 | (b"\x20\0\0\0\x20\0\0\x02\x40\x7F".to_vec(), i8::MAX as i64), 68 | ( 69 | b"\x20\0\0\0\x20\0\0\x03\x40\x80\0".to_vec(), 70 | i16::MIN as i64, 71 | ), 72 | ( 73 | b"\x20\0\0\0\x20\0\0\x03\x40\x7F\xFF".to_vec(), 74 | i16::MAX as i64, 75 | ), 76 | ( 77 | b"\x20\0\0\0\x20\0\0\x05\x40\x80\0\0\0".to_vec(), 78 | i32::MIN as i64, 79 | ), 80 | ( 81 | b"\x20\0\0\0\x20\0\0\x05\x40\x7F\xFF\xFF\xFF".to_vec(), 82 | i32::MAX as i64, 83 | ), 84 | ( 85 | b"\x20\0\0\0\x20\0\0\x09\x40\x80\0\0\0\0\0\0\0".to_vec(), 86 | i64::MIN, 87 | ), 88 | ( 89 | b"\x20\0\0\0\x20\0\0\x09\x40\x7F\xFF\xFF\xFF\xFF\xFF\xFF\xFF".to_vec(), 90 | i64::MAX, 91 | ), 92 | ]; 93 | for (s, v) in tests { 94 | let value = from_slice(s.as_slice()).unwrap(); 95 | assert!(value.is_i64()); 96 | assert_eq!(value.as_i64().unwrap(), v); 97 | } 98 | } 99 | 100 | #[test] 101 | fn test_decode_uint64() { 102 | let tests = vec![ 103 | (b"\x20\0\0\0\x20\0\0\x01\x00".to_vec(), 0u64), 104 | (b"\x20\0\0\0\x20\0\0\x02\x50\x64".to_vec(), 100u64), 105 | (b"\x20\0\0\0\x20\0\0\x02\x50\xFF".to_vec(), u8::MAX as u64), 106 | ( 107 | b"\x20\0\0\0\x20\0\0\x03\x50\xFF\xFF".to_vec(), 108 | u16::MAX as u64, 109 | ), 110 | ( 111 | b"\x20\0\0\0\x20\0\0\x05\x50\xFF\xFF\xFF\xFF".to_vec(), 112 | u32::MAX as u64, 113 | ), 114 | ( 115 | b"\x20\0\0\0\x20\0\0\x09\x50\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF".to_vec(), 116 | u64::MAX, 117 | ), 118 | ]; 119 | for (s, v) in tests { 120 | let value = from_slice(s.as_slice()).unwrap(); 121 | assert!(value.is_u64()); 122 | assert_eq!(value.as_u64().unwrap(), v); 123 | } 124 | } 125 | 126 | #[test] 127 | fn test_decode_float64() { 128 | let tests = vec![ 129 | (b"\x20\0\0\0\x20\0\0\x01\x20".to_vec(), f64::INFINITY), 130 | (b"\x20\0\0\0\x20\0\0\x01\x30".to_vec(), f64::NEG_INFINITY), 131 | ( 132 | b"\x20\0\0\0\x20\0\0\x09\x60\x3F\x89\x30\xBE\x0D\xED\x28\x8D".to_vec(), 133 | 0.0123f64, 134 | ), 135 | ( 136 | b"\x20\0\0\0\x20\0\0\x09\x60\x7F\xE5\x5C\x57\x6D\x81\x57\x26".to_vec(), 137 | 1.2e308f64, 138 | ), 139 | ]; 140 | for (s, v) in tests { 141 | let value = from_slice(s.as_slice()).unwrap(); 142 | assert!(value.is_f64()); 143 | assert_eq!(value.as_f64().unwrap(), v); 144 | } 145 | } 146 | 147 | #[test] 148 | fn test_decode_decimal() { 149 | let tests = vec![ 150 | (b"\x20\0\0\0\x20\0\0\x13\x70\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\xD2\x26\x02".to_vec(), Number::Decimal128(Decimal128 { 151 | precision: 38, 152 | scale: 2, 153 | value: 1234 154 | })), 155 | (b"\x20\0\0\0\x20\0\0\x13\x70\0\0\0\0\0\0\0\0\0\0\x09\x18\x4E\x72\xA1\xE5\x26\x0A".to_vec(), Number::Decimal128(Decimal128 { 156 | precision: 38, 157 | scale: 10, 158 | value: 10000000000485 159 | })), 160 | (b"\x20\0\0\0\x20\0\0\x23\x70\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\xD2\x4C\x02".to_vec(), 161 | Number::Decimal256(Decimal256 { precision: 76, scale: 2, value: I256::new(1234) })), 162 | (b"\x20\0\0\0\x20\0\0\x23\x70\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x09\x18\x4E\x72\xA1\xE5\x4C\x0A".to_vec(), 163 | Number::Decimal256(Decimal256 { precision: 76, scale: 10, value: I256::new(10000000000485) })), 164 | ]; 165 | for (s, v) in tests { 166 | let value = from_slice(s.as_slice()).unwrap(); 167 | assert!(value.is_number()); 168 | assert_eq!(value.as_number().unwrap(), v); 169 | } 170 | } 171 | 172 | #[test] 173 | fn test_decode_array() { 174 | let tests = vec![( 175 | b"\x80\0\0\x02\x30\0\0\0\x40\0\0\0".to_vec(), 176 | vec![Value::Bool(false), Value::Bool(true)], 177 | )]; 178 | for (s, v) in tests { 179 | let value = from_slice(s.as_slice()).unwrap(); 180 | assert!(value.is_array()); 181 | let arr = value.as_array().unwrap(); 182 | assert_eq!(arr.len(), v.len()); 183 | for (l, r) in arr.iter().zip(v.iter()) { 184 | assert_eq!(l, r); 185 | } 186 | } 187 | } 188 | 189 | #[test] 190 | fn test_decode_object() { 191 | let mut obj1 = Object::new(); 192 | obj1.insert("asd".to_string(), Value::String(Cow::from("adf"))); 193 | let tests = vec![( 194 | b"\x40\0\0\x01\x10\0\0\x03\x10\0\0\x03\x61\x73\x64\x61\x64\x66".to_vec(), 195 | obj1, 196 | )]; 197 | for (s, v) in tests { 198 | let value = from_slice(s.as_slice()).unwrap(); 199 | assert!(value.is_object()); 200 | let obj = value.as_object().unwrap(); 201 | assert_eq!(obj.len(), v.len()); 202 | for ((lk, lv), (rk, rv)) in obj.iter().enumerate().zip(v.iter().enumerate()) { 203 | assert_eq!(lk, rk); 204 | assert_eq!(lv, rv); 205 | } 206 | } 207 | } 208 | 209 | #[test] 210 | fn test_decode_extension() { 211 | let tests = vec![ 212 | ( 213 | b"\x20\0\0\0\x60\0\0\x04\0\x01\x02\x03".to_vec(), 214 | Value::Binary(&[1, 2, 3]), 215 | ), 216 | ( 217 | b"\x20\0\0\0\x60\0\0\x05\x10\0\0\x4f\x94".to_vec(), 218 | Value::Date(Date { value: 20372 }), 219 | ), 220 | ( 221 | b"\x20\0\0\0\x60\0\0\x09\x20\0\x06\x40\xd6\xb7\x23\x80\0".to_vec(), 222 | Value::Timestamp(Timestamp { 223 | value: 1760140800000000, 224 | }), 225 | ), 226 | ( 227 | b"\x20\0\0\0\x60\0\0\x0a\x30\0\x06\x40\xd6\xb7\x23\x80\0\x08".to_vec(), 228 | Value::TimestampTz(TimestampTz { 229 | offset: 8, 230 | value: 1760140800000000, 231 | }), 232 | ), 233 | ( 234 | b"\x20\0\0\0\x60\0\0\x11\x40\0\0\0\x0A\0\0\0\x14\0\0\0\0\x11\xE1\xA3\0".to_vec(), 235 | Value::Interval(Interval { 236 | months: 10, 237 | days: 20, 238 | micros: 300000000, 239 | }), 240 | ), 241 | ]; 242 | 243 | for (s, v) in tests { 244 | let value = from_slice(s.as_slice()).unwrap(); 245 | assert_eq!(value, v); 246 | } 247 | } 248 | 249 | #[test] 250 | fn test_decode_corrupted() { 251 | let json = "{\"a\": 1, \"b\": \"123\"}"; 252 | let jsonb = jsonb::parse_value(json.as_bytes()).unwrap().to_vec(); 253 | let corrupted = jsonb[0..jsonb.len() - 1].to_vec(); 254 | let value = from_slice(corrupted.as_slice()); 255 | assert!(value.is_err()); 256 | } 257 | -------------------------------------------------------------------------------- /tests/it/encode.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use std::borrow::Cow; 16 | 17 | use ethnum::I256; 18 | use jsonb::{ 19 | Date, Decimal128, Decimal256, Interval, Number, Object, Timestamp, TimestampTz, Value, 20 | }; 21 | 22 | #[test] 23 | fn test_encode_null() { 24 | assert_eq!(&Value::Null.to_vec(), b"\x20\0\0\0\0\0\0\0"); 25 | } 26 | 27 | #[test] 28 | fn test_encode_boolean() { 29 | assert_eq!(&Value::Bool(true).to_vec(), b"\x20\0\0\0\x40\0\0\0"); 30 | assert_eq!(&Value::Bool(false).to_vec(), b"\x20\0\0\0\x30\0\0\0"); 31 | } 32 | 33 | #[test] 34 | fn test_encode_string() { 35 | assert_eq!( 36 | &Value::String(Cow::from("asd")).to_vec(), 37 | b"\x20\0\0\0\x10\0\0\x03\x61\x73\x64" 38 | ); 39 | assert_eq!( 40 | &Value::String(Cow::from("测试")).to_vec(), 41 | b"\x20\0\0\0\x10\0\0\x06\xE6\xB5\x8B\xE8\xAF\x95" 42 | ); 43 | } 44 | 45 | #[test] 46 | fn test_encode_int64() { 47 | assert_eq!( 48 | &Value::Number(Number::Int64(0)).to_vec(), 49 | b"\x20\0\0\0\x20\0\0\x01\x00" 50 | ); 51 | assert_eq!( 52 | &Value::Number(Number::Int64(-100)).to_vec(), 53 | b"\x20\0\0\0\x20\0\0\x02\x40\x9C" 54 | ); 55 | assert_eq!( 56 | &Value::Number(Number::Int64(i8::MIN as i64)).to_vec(), 57 | b"\x20\0\0\0\x20\0\0\x02\x40\x80" 58 | ); 59 | assert_eq!( 60 | &Value::Number(Number::Int64(i8::MAX as i64)).to_vec(), 61 | b"\x20\0\0\0\x20\0\0\x02\x40\x7F" 62 | ); 63 | assert_eq!( 64 | &Value::Number(Number::Int64(i16::MIN as i64)).to_vec(), 65 | b"\x20\0\0\0\x20\0\0\x03\x40\x80\0" 66 | ); 67 | assert_eq!( 68 | &Value::Number(Number::Int64(i16::MAX as i64)).to_vec(), 69 | b"\x20\0\0\0\x20\0\0\x03\x40\x7F\xFF" 70 | ); 71 | assert_eq!( 72 | &Value::Number(Number::Int64(i32::MIN as i64)).to_vec(), 73 | b"\x20\0\0\0\x20\0\0\x05\x40\x80\0\0\0" 74 | ); 75 | assert_eq!( 76 | &Value::Number(Number::Int64(i32::MAX as i64)).to_vec(), 77 | b"\x20\0\0\0\x20\0\0\x05\x40\x7F\xFF\xFF\xFF" 78 | ); 79 | assert_eq!( 80 | &Value::Number(Number::Int64(i64::MIN)).to_vec(), 81 | b"\x20\0\0\0\x20\0\0\x09\x40\x80\0\0\0\0\0\0\0" 82 | ); 83 | assert_eq!( 84 | &Value::Number(Number::Int64(i64::MAX)).to_vec(), 85 | b"\x20\0\0\0\x20\0\0\x09\x40\x7F\xFF\xFF\xFF\xFF\xFF\xFF\xFF" 86 | ); 87 | } 88 | 89 | #[test] 90 | fn test_encode_uint64() { 91 | assert_eq!( 92 | &Value::Number(Number::UInt64(0)).to_vec(), 93 | b"\x20\0\0\0\x20\0\0\x01\x00" 94 | ); 95 | assert_eq!( 96 | &Value::Number(Number::UInt64(100)).to_vec(), 97 | b"\x20\0\0\0\x20\0\0\x02\x50\x64" 98 | ); 99 | assert_eq!( 100 | &Value::Number(Number::UInt64(u8::MAX as u64)).to_vec(), 101 | b"\x20\0\0\0\x20\0\0\x02\x50\xFF" 102 | ); 103 | assert_eq!( 104 | &Value::Number(Number::UInt64(u16::MAX as u64)).to_vec(), 105 | b"\x20\0\0\0\x20\0\0\x03\x50\xFF\xFF" 106 | ); 107 | assert_eq!( 108 | &Value::Number(Number::UInt64(u32::MAX as u64)).to_vec(), 109 | b"\x20\0\0\0\x20\0\0\x05\x50\xFF\xFF\xFF\xFF" 110 | ); 111 | assert_eq!( 112 | &Value::Number(Number::UInt64(u64::MAX)).to_vec(), 113 | b"\x20\0\0\0\x20\0\0\x09\x50\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" 114 | ); 115 | } 116 | 117 | #[test] 118 | fn test_encode_float64() { 119 | assert_eq!( 120 | &Value::Number(Number::Float64(f64::INFINITY)).to_vec(), 121 | b"\x20\0\0\0\x20\0\0\x01\x20" 122 | ); 123 | assert_eq!( 124 | &Value::Number(Number::Float64(f64::NEG_INFINITY)).to_vec(), 125 | b"\x20\0\0\0\x20\0\0\x01\x30" 126 | ); 127 | assert_eq!( 128 | &Value::Number(Number::Float64(0.0123f64)).to_vec(), 129 | b"\x20\0\0\0\x20\0\0\x09\x60\x3F\x89\x30\xBE\x0D\xED\x28\x8D" 130 | ); 131 | assert_eq!( 132 | &Value::Number(Number::Float64(1.2e308f64)).to_vec(), 133 | b"\x20\0\0\0\x20\0\0\x09\x60\x7F\xE5\x5C\x57\x6D\x81\x57\x26" 134 | ); 135 | } 136 | 137 | #[test] 138 | fn test_encode_decimal() { 139 | assert_eq!( 140 | &Value::Number(Number::Decimal128(Decimal128 { 141 | precision: 38, 142 | scale: 2, 143 | value: 1234 144 | })) 145 | .to_vec(), 146 | b"\x20\0\0\0\x20\0\0\x13\x70\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\xD2\x26\x02" 147 | ); 148 | assert_eq!( 149 | &Value::Number(Number::Decimal128(Decimal128 { 150 | precision: 38, 151 | scale: 10, 152 | value: 10000000000485 153 | })) 154 | .to_vec(), 155 | b"\x20\0\0\0\x20\0\0\x13\x70\0\0\0\0\0\0\0\0\0\0\x09\x18\x4E\x72\xA1\xE5\x26\x0A" 156 | ); 157 | 158 | assert_eq!( 159 | &Value::Number(Number::Decimal256(Decimal256 { precision: 76, scale: 2, value: I256::new(1234) })).to_vec(), 160 | b"\x20\0\0\0\x20\0\0\x23\x70\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\xD2\x4C\x02" 161 | ); 162 | assert_eq!( 163 | &Value::Number(Number::Decimal256(Decimal256 { precision: 76, scale: 10, value: I256::new(10000000000485) })).to_vec(), 164 | b"\x20\0\0\0\x20\0\0\x23\x70\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x09\x18\x4E\x72\xA1\xE5\x4C\x0A" 165 | ); 166 | } 167 | 168 | #[test] 169 | fn test_encode_array() { 170 | assert_eq!( 171 | &Value::Array(vec![Value::Bool(false), Value::Bool(true)]).to_vec(), 172 | b"\x80\0\0\x02\x30\0\0\0\x40\0\0\0", 173 | ); 174 | 175 | let buf = Value::Array(vec![Value::Bool(false), Value::Bool(true)]).to_vec(); 176 | let raw_jsonb = jsonb::RawJsonb::new(&buf); 177 | println!("{}", raw_jsonb.to_string()); 178 | 179 | assert_eq!( 180 | &Value::Array(vec![ 181 | Value::Bool(false), 182 | Value::Binary(&[100, 101, 102, 103]), 183 | Value::Date(Date {value: 20381 }), 184 | Value::Timestamp(Timestamp { value: 1540230120000000 }), 185 | Value::TimestampTz(TimestampTz { offset: 8, value: 1670389100000000 }), 186 | Value::Interval(Interval { months: 2, days: 10, micros: 500000000 }), 187 | Value::Number(Number::Decimal256(Decimal256 { precision: 76, scale: 2, value: I256::new(1234) })), 188 | ]).to_vec(), 189 | b"\x80\0\0\x07\x30\0\0\0\x60\0\0\x05\x60\0\0\x05\x60\0\0\x09\x60\0\0\x0A\x60\0\0\x11\x20\0\0\x23\0\x64\x65\x66\x67\x10\0\0\x4F\x9D\x20\0\x05\x78\xD4\xC5\x2C\xCA\0\x30\0\x05\xEF\x35\xC4\xF1\x33\0\x08\x40\0\0\0\x02\0\0\0\x0A\0\0\0\0\x1D\xCD\x65\0\x70\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\xD2\x4C\x02", 190 | ); 191 | } 192 | 193 | #[test] 194 | fn test_encode_object() { 195 | let mut obj1 = Object::new(); 196 | obj1.insert("asd".to_string(), Value::String(Cow::from("adf"))); 197 | assert_eq!( 198 | &Value::Object(obj1).to_vec(), 199 | b"\x40\0\0\x01\x10\0\0\x03\x10\0\0\x03\x61\x73\x64\x61\x64\x66" 200 | ); 201 | 202 | let mut obj2 = Object::new(); 203 | obj2.insert("k1".to_string(), Value::String(Cow::from("v1"))); 204 | obj2.insert("k2".to_string(), Value::Binary(&[200, 201, 202, 203])); 205 | obj2.insert("k3".to_string(), Value::Date(Date { value: 20381 })); 206 | obj2.insert( 207 | "k4".to_string(), 208 | Value::Timestamp(Timestamp { 209 | value: 1540230120000000, 210 | }), 211 | ); 212 | obj2.insert( 213 | "k5".to_string(), 214 | Value::TimestampTz(TimestampTz { 215 | offset: 8, 216 | value: 1670389100000000, 217 | }), 218 | ); 219 | obj2.insert( 220 | "k6".to_string(), 221 | Value::Interval(Interval { 222 | months: 2, 223 | days: 10, 224 | micros: 500000000, 225 | }), 226 | ); 227 | obj2.insert( 228 | "k7".to_string(), 229 | Value::Number(Number::Decimal256(Decimal256 { 230 | precision: 76, 231 | scale: 2, 232 | value: I256::new(1234), 233 | })), 234 | ); 235 | 236 | assert_eq!( 237 | &Value::Object(obj2).to_vec(), 238 | b"\x40\0\0\x07\x10\0\0\x02\x10\0\0\x02\x10\0\0\x02\x10\0\0\x02\x10\0\0\x02\x10\0\0\x02\x10\0\0\x02\x10\0\0\x02\x60\0\0\x05\x60\0\0\x05\x60\0\0\x09\x60\0\0\x0A\x60\0\0\x11\x20\0\0\x23\x6B\x31\x6B\x32\x6B\x33\x6B\x34\x6B\x35\x6B\x36\x6B\x37\x76\x31\0\xC8\xC9\xCA\xCB\x10\0\0\x4F\x9D\x20\0\x05\x78\xD4\xC5\x2C\xCA\0\x30\0\x05\xEF\x35\xC4\xF1\x33\0\x08\x40\0\0\0\x02\0\0\0\x0A\0\0\0\0\x1D\xCD\x65\0\x70\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\xD2\x4C\x02" 239 | ); 240 | } 241 | 242 | #[test] 243 | fn test_encode_extension() { 244 | assert_eq!( 245 | Value::Binary(&[1, 2, 3]).to_vec(), 246 | b"\x20\0\0\0\x60\0\0\x04\0\x01\x02\x03" 247 | ); 248 | assert_eq!( 249 | Value::Date(Date { value: 20372 }).to_vec(), 250 | b"\x20\0\0\0\x60\0\0\x05\x10\0\0\x4f\x94" 251 | ); 252 | assert_eq!( 253 | Value::Timestamp(Timestamp { 254 | value: 1760140800000000 255 | }) 256 | .to_vec(), 257 | b"\x20\0\0\0\x60\0\0\x09\x20\0\x06\x40\xd6\xb7\x23\x80\0" 258 | ); 259 | assert_eq!( 260 | Value::TimestampTz(TimestampTz { 261 | offset: 8, 262 | value: 1760140800000000 263 | }) 264 | .to_vec(), 265 | b"\x20\0\0\0\x60\0\0\x0a\x30\0\x06\x40\xd6\xb7\x23\x80\0\x08" 266 | ); 267 | assert_eq!( 268 | Value::Interval(Interval { 269 | months: 10, 270 | days: 20, 271 | micros: 300000000 272 | }) 273 | .to_vec(), 274 | b"\x20\0\0\0\x60\0\0\x11\x40\0\0\0\x0A\0\0\0\x14\0\0\0\0\x11\xE1\xA3\0" 275 | ); 276 | } 277 | -------------------------------------------------------------------------------- /tests/it/jsonpath_parser.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use std::io::Write; 16 | 17 | use goldenfile::Mint; 18 | use jsonb::jsonpath::parse_json_path; 19 | 20 | #[test] 21 | fn test_json_path() { 22 | let mut mint = Mint::new("tests/it/testdata"); 23 | let mut file = mint.new_goldenfile("json_path.txt").unwrap(); 24 | let cases = &[ 25 | r#"$"#, 26 | r#"$.*"#, 27 | r#"$.**"#, 28 | r#"$.**{2 to last}"#, 29 | r#"$[*]"#, 30 | r#"5 + 5"#, 31 | r#"10 - 5"#, 32 | r#"10 * 5"#, 33 | r#"10 / 5"#, 34 | r#"10 % 5"#, 35 | r#"$.store.book[*].*"#, 36 | // r#"$.store.book[*].* + 5"#, 37 | r#"$.store.book[0].price"#, 38 | r#"+$.store.book[0].price"#, 39 | r#"-$.store.book[0].price"#, 40 | r#"$.store.book[0].price + 5"#, 41 | r#"$.store.book[last].isbn"#, 42 | r"$.store.book[last].test_key\uD83D\uDC8E测试", 43 | r#"$.store.book[0,1, last - 2].price"#, 44 | r#"$.store.book[0,1 to last-1]"#, 45 | r#"$."store"."book""#, 46 | r#"$."st\"ore"."book\uD83D\uDC8E""#, 47 | r#"$[*].book.price ? (@ == 10)"#, 48 | r#"$.store.book?(@.price > 10).title"#, 49 | r#"$.store.book?(@.price < $.expensive).price"#, 50 | r#"$.store.book?(@.price < 10 && @.category == "fiction")"#, 51 | r#"$.store.book?(@.price > 10 || @.category == "reference")"#, 52 | r#"$.store.book?(@.price > 20 && (@.category == "reference" || @.category == "fiction"))"#, 53 | // compatible with Snowflake style path 54 | r#"[1][2]"#, 55 | r#"["k1"]["k2"]"#, 56 | r#"k1.k2:k3"#, 57 | r#"k1["k2"][1]"#, 58 | // predicates 59 | r#"$ > 1"#, 60 | r#"$.* == 0"#, 61 | r#"$[*] > 1"#, 62 | r#"$.a > $.b"#, 63 | r#"$.price > 10 || $.category == "reference""#, 64 | // exists expression 65 | r#"$.store.book?(exists(@.price?(@ > 20)))"#, 66 | r#"$.store?(exists(@.book?(exists(@.category?(@ == "fiction")))))"#, 67 | r#"$.store.book?(@ starts with "Nigel")"#, 68 | r#"$[*] ? (@.job == null) .name"#, 69 | // arithmetic functions 70 | r#"$.phones[0].number + 3"#, 71 | r#"7 - $[0]"#, 72 | r#"- $.phones[0].number"#, 73 | ]; 74 | 75 | for case in cases { 76 | let json_path = parse_json_path(case.as_bytes()).unwrap(); 77 | 78 | writeln!(file, "---------- Input ----------").unwrap(); 79 | writeln!(file, "{}", case).unwrap(); 80 | writeln!(file, "---------- Output ---------").unwrap(); 81 | writeln!(file, "{}", json_path).unwrap(); 82 | writeln!(file, "---------- AST ------------").unwrap(); 83 | writeln!(file, "{:#?}", json_path).unwrap(); 84 | writeln!(file, "\n").unwrap(); 85 | } 86 | } 87 | 88 | #[test] 89 | fn test_json_path_error() { 90 | let cases = &[ 91 | r#"$.["#, 92 | r#"$X"#, 93 | r#"$."#, 94 | r#"$.prop."#, 95 | r#"$.prop+."#, 96 | r#"$.."#, 97 | r#"$.prop.."#, 98 | r#"$.foo bar"#, 99 | r#"$[0, 1, 2 4]"#, 100 | r#"$['1','2',]"#, 101 | r#"$['1', ,'3']"#, 102 | r#"$['aaa'}'bbb']"#, 103 | r#"@ > 10"#, 104 | ]; 105 | 106 | for case in cases { 107 | let res = parse_json_path(case.as_bytes()); 108 | assert!(res.is_err()); 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /tests/it/keypath_parser.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use std::io::Write; 16 | 17 | use goldenfile::Mint; 18 | use jsonb::keypath::parse_key_paths; 19 | 20 | #[test] 21 | fn test_json_path() { 22 | let mut mint = Mint::new("tests/it/testdata"); 23 | let mut file = mint.new_goldenfile("key_path.txt").unwrap(); 24 | let cases = &[" { } ", " { 1, a } ", "{1,a,-2}", r#"{a,"b","c"} "#]; 25 | 26 | for case in cases { 27 | let key_paths = parse_key_paths(case.as_bytes()).unwrap(); 28 | 29 | writeln!(file, "---------- Input ----------").unwrap(); 30 | writeln!(file, "{}", case).unwrap(); 31 | writeln!(file, "---------- Output ---------").unwrap(); 32 | writeln!(file, "{}", key_paths).unwrap(); 33 | writeln!(file, "---------- AST ------------").unwrap(); 34 | writeln!(file, "{:#?}", key_paths).unwrap(); 35 | writeln!(file, "\n").unwrap(); 36 | } 37 | } 38 | 39 | #[test] 40 | fn test_json_path_error() { 41 | let cases = &[r#"{"#, r#"ab"#]; 42 | 43 | for case in cases { 44 | let res = parse_key_paths(case.as_bytes()); 45 | assert!(res.is_err()); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /tests/it/main.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Datafuse Labs. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | mod decode; 16 | mod encode; 17 | mod functions; 18 | mod jsonpath_parser; 19 | mod keypath_parser; 20 | mod parser; 21 | -------------------------------------------------------------------------------- /tests/it/testdata/key_path.txt: -------------------------------------------------------------------------------- 1 | ---------- Input ---------- 2 | { } 3 | ---------- Output --------- 4 | {} 5 | ---------- AST ------------ 6 | KeyPaths { 7 | paths: [], 8 | } 9 | 10 | 11 | ---------- Input ---------- 12 | { 1, a } 13 | ---------- Output --------- 14 | {1,a} 15 | ---------- AST ------------ 16 | KeyPaths { 17 | paths: [ 18 | Index( 19 | 1, 20 | ), 21 | Name( 22 | "a", 23 | ), 24 | ], 25 | } 26 | 27 | 28 | ---------- Input ---------- 29 | {1,a,-2} 30 | ---------- Output --------- 31 | {1,a,-2} 32 | ---------- AST ------------ 33 | KeyPaths { 34 | paths: [ 35 | Index( 36 | 1, 37 | ), 38 | Name( 39 | "a", 40 | ), 41 | Index( 42 | -2, 43 | ), 44 | ], 45 | } 46 | 47 | 48 | ---------- Input ---------- 49 | {a,"b","c"} 50 | ---------- Output --------- 51 | {a,"b","c"} 52 | ---------- AST ------------ 53 | KeyPaths { 54 | paths: [ 55 | Name( 56 | "a", 57 | ), 58 | QuotedName( 59 | "b", 60 | ), 61 | QuotedName( 62 | "c", 63 | ), 64 | ], 65 | } 66 | 67 | 68 | --------------------------------------------------------------------------------