├── .github └── workflows │ ├── ci.yml │ └── rustdoc.yml ├── .gitignore ├── CHANGELOG.md ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── Cargo.toml ├── LICENSE ├── README.md ├── README.tpl ├── SECURITY.md ├── benches └── bcs_bench.rs ├── src ├── de.rs ├── error.rs ├── lib.rs ├── ser.rs └── test_helpers.rs └── tests └── serde.rs /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | pull_request: 6 | 7 | env: 8 | CARGO_INCREMENTAL: 0 9 | RUSTFLAGS: -D warnings 10 | 11 | jobs: 12 | test: 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - name: Checkout repository 17 | uses: actions/checkout@v2 18 | 19 | - name: Install Rust toolchain 20 | uses: actions-rs/toolchain@v1 21 | with: 22 | toolchain: stable 23 | override: true 24 | components: rustfmt, clippy 25 | 26 | - name: Lint 27 | run: | 28 | cargo fmt -- --check 29 | cargo clippy --all-targets 30 | 31 | - name: Build Documentation 32 | run: cargo doc --no-deps 33 | 34 | - name: Run tests 35 | run: cargo test 36 | 37 | minimum-supported-rust-version: 38 | runs-on: ubuntu-latest 39 | steps: 40 | - uses: actions/checkout@v2 41 | - uses: actions-rs/toolchain@v1 42 | with: 43 | toolchain: 1.36.0 44 | override: true 45 | - run: cargo check 46 | -------------------------------------------------------------------------------- /.github/workflows/rustdoc.yml: -------------------------------------------------------------------------------- 1 | name: rustdoc 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | 7 | env: 8 | CARGO_INCREMENTAL: 0 9 | RUSTFLAGS: -D warnings 10 | 11 | jobs: 12 | rustdoc: 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - name: Checkout repository 17 | uses: actions/checkout@v2 18 | 19 | - name: Install Rust toolchain 20 | uses: actions-rs/toolchain@v1 21 | with: 22 | toolchain: stable 23 | override: true 24 | 25 | - name: Build Documentation 26 | run: cargo doc --no-deps 27 | 28 | - name: Deploy Docs 29 | uses: peaceiris/actions-gh-pages@v3 30 | with: 31 | github_token: ${{ secrets.GITHUB_TOKEN }} 32 | publish_branch: gh-pages 33 | publish_dir: ./target/doc 34 | force_orphan: true 35 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | Cargo.lock 3 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## [v0.1.1] - 2020-12-11 4 | - Renaming crate into "bcs". 5 | 6 | ## [v0.1.0] - 2020-11-17 7 | - Initial release. 8 | 9 | [v0.1.1]: https://github.com/diem/bcs/releases/tag/v0.1.1 10 | [v0.1.0]: https://github.com/diem/bcs/releases/tag/v0.1.0 11 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | The project has adopted a Code of Conduct that we expect project participants to adhere to. Please [read the full text](https://developers.diem.com/docs/policies/code-of-conduct) so that you can understand what actions will and will not be tolerated. 4 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to this project 2 | 3 | This project welcomes contributions. 4 | 5 | ## Contributor License Agreement (CLA) 6 | 7 | For pull request to be accepted by any Diem projects, a CLA must be [signed](https://diem.com/en-US/cla-sign). You will only need to do this once to work on any of Diem's open source projects. 8 | 9 | When submitting a pull request (PR), the `diem-github-bot` will check your submission for a valid CLA. If one is not found, then you will need to [submit](https://diem.com/en-US/cla-sign) an Individual CLA for yourself or a Corporate CLA for your company. 10 | 11 | ## Issues 12 | 13 | This project uses GitHub Issues to track bugs. Please include necessary information and instructions to reproduce your issue. 14 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "bcs" 3 | version = "0.1.4" 4 | authors = ["Diem "] 5 | description = "Binary Canonical Serialization (BCS)" 6 | repository = "https://github.com/diem/bcs" 7 | homepage = "https://diem.com" 8 | readme = "README.md" 9 | license = "Apache-2.0" 10 | edition = "2018" 11 | 12 | [dependencies] 13 | thiserror = "1.0.37" 14 | serde = { version = "1.0.117", features = ["derive"] } 15 | 16 | [dev-dependencies] 17 | criterion = "0.3.3" 18 | proptest = "0.10.1" 19 | proptest-derive = "0.2.0" 20 | 21 | [[bench]] 22 | name = "bcs_bench" 23 | harness = false 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://github.com/diem/bcs/workflows/CI/badge.svg)](https://github.com/diem/bcs/actions?query=workflow%3ACI) 2 | [![License](https://img.shields.io/badge/license-Apache-green.svg)](LICENSE) 3 | [![bcs on crates.io](https://img.shields.io/crates/v/bcs)](https://crates.io/crates/bcs) 4 | [![Documentation (latest release)](https://docs.rs/bcs/badge.svg)](https://docs.rs/bcs/) 5 | [![Documentation (master)](https://img.shields.io/badge/docs-master-59f)](https://diem.github.io/bcs/bcs/) 6 | 7 | ## Binary Canonical Serialization (BCS) 8 | 9 | BCS (formerly "Libra Canonical Serialization" or LCS) is a serialization format developed 10 | in the context of the [Diem](https://diem.com) blockchain. 11 | 12 | BCS was designed with the following main goals in mind: 13 | * provide good performance and concise (binary) representations; 14 | * support a rich set of data types commonly used in Rust; 15 | * enforce canonical serialization, meaning that every value of a given type should have 16 | a single valid representation. 17 | 18 | BCS also aims to mitigate the consequence of malicious inputs by enforcing well-defined limits 19 | on large or nested containers during (de)serialization. 20 | 21 | ### Rust Implementation 22 | 23 | This crate provides a Rust implementation of BCS as an encoding format for the [Serde library](https://serde.rs). 24 | As such, this implementation covers most data types supported by Serde -- including user-defined structs, 25 | tagged variants (Rust enums), tuples, and maps -- excluding floats, single unicode characters (char), and sets. 26 | 27 | BCS is also available in other programming languages, thanks to the separate project [serde-reflection](https://github.com/novifinancial/serde-reflection). 28 | 29 | ### Application to Cryptography 30 | 31 | The BCS format guarantees canonical serialization, meaning that for any given data type, there 32 | is a one-to-one correspondance between in-memory values and valid byte representations. 33 | 34 | In the context of a cryptographic application, canonical serialization has several benefits: 35 | * It provides a natural and reliable way to associate in-memory values to cryptographic hashes. 36 | * It allows the signature of a message to be defined equivalently as the signature of the serialized bytes or as the signature of the in-memory value. 37 | 38 | Note that BCS ensures canonical serialization for each data type separately. The data type of a serialized value 39 | must be enforced by the application itself. This requirement is typically fulfilled 40 | using unique hash seeds for each data type. (See [Diem's cryptographic library](https://github.com/diem/diem/blob/master/crypto/crypto/src/hash.rs) for an example.) 41 | 42 | ### Backwards Compatibility 43 | 44 | By design, BCS does not provide implicit versioning or backwards/forwards compatibility, therefore 45 | applications must carefully plan in advance for adhoc extension points: 46 | * Enums may be used for explicit versioning and backward compatibility (e.g. extensible query interfaces). 47 | * In some cases, data fields of type `Vec` may also be added to allow (future) unknown payloads 48 | in serialized form. 49 | 50 | ### Detailed Specifications 51 | 52 | BCS supports the following data types: 53 | 54 | * Booleans 55 | * Signed 8-bit, 16-bit, 32-bit, 64-bit, and 128-bit integers 56 | * Unsigned 8-bit, 16-bit, 32-bit, 64-bit, and 128-bit integers 57 | * Option 58 | * Unit (an empty value) 59 | * Fixed and variable length sequences 60 | * UTF-8 Encoded Strings 61 | * Tuples 62 | * Structures (aka "structs") 63 | * Externally tagged enumerations (aka "enums") 64 | * Maps 65 | 66 | BCS is not a self-describing format. As such, in order to deserialize a message, one must 67 | know the message type and layout ahead of time. 68 | 69 | Unless specified, all numbers are stored in little endian, two's complement format. 70 | 71 | #### Recursion and Depth of BCS Data 72 | 73 | Recursive data-structures (e.g. trees) are allowed. However, because of the possibility of stack 74 | overflow during (de)serialization, the *container depth* of any valid BCS data cannot exceed the constant 75 | `MAX_CONTAINER_DEPTH`. Formally, we define *container depth* as the number of structs and enums traversed 76 | during (de)serialization. 77 | 78 | This definition aims to minimize the number of operations while ensuring that 79 | (de)serialization of a known BCS format cannot cause arbitrarily large stack allocations. 80 | 81 | As an example, if `v1` and `v2` are values of depth `n1` and `n2`, 82 | * a struct value `Foo { v1, v2 }` has depth `1 + max(n1, n2)`; 83 | * an enum value `E::Foo { v1, v2 }` has depth `1 + max(n1, n2)`; 84 | * a pair `(v1, v2)` has depth `max(n1, n2)`; 85 | * the value `Some(v1)` has depth `n1`. 86 | 87 | All string and integer values have depths `0`. 88 | 89 | #### Booleans and Integers 90 | 91 | |Type |Original data |Hex representation |Serialized bytes | 92 | |--- |--- |--- |--- | 93 | |Boolean |True / False |0x01 / 0x00 |01 / 00 | 94 | |8-bit signed integer |-1 |0xFF |FF | 95 | |8-bit unsigned integer |1 |0x01 |01 | 96 | |16-bit signed integer |-4660 |0xEDCC |CC ED | 97 | |16-bit unsigned integer |4660 |0x1234 |34 12 | 98 | |32-bit signed integer |-305419896 |0xEDCBA988 |88 A9 CB ED | 99 | |32-bit unsigned integer |305419896 |0x12345678 |78 56 34 12 | 100 | |64-bit signed integer |-1311768467750121216 |0xEDCBA98754321100 |00 11 32 54 87 A9 CB ED | 101 | |64-bit unsigned integer |1311768467750121216 |0x12345678ABCDEF00 |00 EF CD AB 78 56 34 12 | 102 | 103 | #### ULEB128-Encoded Integers 104 | 105 | The BCS format also uses the [ULEB128 encoding](https://en.wikipedia.org/wiki/LEB128) internally 106 | to represent unsigned 32-bit integers in two cases where small values are usually expected: 107 | (1) lengths of variable-length sequences and (2) tags of enum values (see the corresponding 108 | sections below). 109 | 110 | |Type |Original data |Hex representation |Serialized bytes | 111 | |--- |--- |--- |--- | 112 | |ULEB128-encoded u32-integer|2^0 = 1 |0x00000001 |01 | 113 | | |2^7 = 128 |0x00000080 |80 01 | 114 | | |2^14 = 16384 |0x00004000 |80 80 01 | 115 | | |2^21 = 2097152 |0x00200000 |80 80 80 01 | 116 | | |2^28 = 268435456 |0x10000000 |80 80 80 80 01 | 117 | | |9487 |0x0000250f |8f 4a | 118 | 119 | In general, a ULEB128 encoding consists of a little-endian sequence of base-128 (7-bit) 120 | digits. Each digit is completed into a byte by setting the highest bit to 1, except for the 121 | last (highest-significance) digit whose highest bit is set to 0. 122 | 123 | In BCS, the result of decoding ULEB128 bytes is required to fit into a 32-bit unsigned 124 | integer and be in canonical form. For instance, the following values are rejected: 125 | * 80 80 80 80 80 01 (2^36) is too large. 126 | * 80 80 80 80 10 (2^33) is too large. 127 | * 80 00 is not a minimal encoding of 0. 128 | 129 | #### Optional Data 130 | 131 | Optional or nullable data either exists in its full representation or does not. BCS represents 132 | this as a single byte representing the presence `0x01` or absence `0x00` of data. If the data 133 | is present then the serialized form of that data follows. For example: 134 | 135 | ```rust 136 | let some_data: Option = Some(8); 137 | assert_eq!(to_bytes(&some_data)?, vec![1, 8]); 138 | 139 | let no_data: Option = None; 140 | assert_eq!(to_bytes(&no_data)?, vec![0]); 141 | ``` 142 | 143 | #### Fixed and Variable Length Sequences 144 | 145 | Sequences can be made of up of any BCS supported types (even complex structures) but all 146 | elements in the sequence must be of the same type. If the length of a sequence is fixed and 147 | well known then BCS represents this as just the concatenation of the serialized form of each 148 | individual element in the sequence. If the length of the sequence can be variable, then the 149 | serialized sequence is length prefixed with a ULEB128-encoded unsigned integer indicating 150 | the number of elements in the sequence. All variable length sequences must be 151 | `MAX_SEQUENCE_LENGTH` elements long or less. 152 | 153 | ```rust 154 | let fixed: [u16; 3] = [1, 2, 3]; 155 | assert_eq!(to_bytes(&fixed)?, vec![1, 0, 2, 0, 3, 0]); 156 | 157 | let variable: Vec = vec![1, 2]; 158 | assert_eq!(to_bytes(&variable)?, vec![2, 1, 0, 2, 0]); 159 | 160 | let large_variable_length: Vec<()> = vec![(); 9_487]; 161 | assert_eq!(to_bytes(&large_variable_length)?, vec![0x8f, 0x4a]); 162 | ``` 163 | 164 | #### Strings 165 | 166 | Only valid UTF-8 Strings are supported. BCS serializes such strings as a variable length byte 167 | sequence, i.e. length prefixed with a ULEB128-encoded unsigned integer followed by the byte 168 | representation of the string. 169 | 170 | ```rust 171 | // Note that this string has 10 characters but has a byte length of 24 172 | let utf8_str = "çå∞≠¢õß∂ƒ∫"; 173 | let expecting = vec![ 174 | 24, 0xc3, 0xa7, 0xc3, 0xa5, 0xe2, 0x88, 0x9e, 0xe2, 0x89, 0xa0, 0xc2, 175 | 0xa2, 0xc3, 0xb5, 0xc3, 0x9f, 0xe2, 0x88, 0x82, 0xc6, 0x92, 0xe2, 0x88, 0xab, 176 | ]; 177 | assert_eq!(to_bytes(&utf8_str)?, expecting); 178 | ``` 179 | 180 | #### Tuples 181 | 182 | Tuples are typed composition of objects: `(Type0, Type1)` 183 | 184 | Tuples are considered a fixed length sequence where each element in the sequence can be a 185 | different type supported by BCS. Each element of a tuple is serialized in the order it is 186 | defined within the tuple, i.e. [tuple.0, tuple.2]. 187 | 188 | ```rust 189 | let tuple = (-1i8, "diem"); 190 | let expecting = vec![0xFF, 4, b'd', b'i', b'e', b'm']; 191 | assert_eq!(to_bytes(&tuple)?, expecting); 192 | ``` 193 | 194 | 195 | #### Structures 196 | 197 | Structures are fixed length sequences consisting of fields with potentially different types. 198 | Each field within a struct is serialized in the order specified by the canonical structure 199 | definition. Structs can exist within other structs and as such, BCS recurses into each struct 200 | and serializes them in order. There are no labels in the serialized format, the struct ordering 201 | defines the organization within the serialization stream. 202 | 203 | ```rust 204 | #[derive(Serialize)] 205 | struct MyStruct { 206 | boolean: bool, 207 | bytes: Vec, 208 | label: String, 209 | } 210 | 211 | #[derive(Serialize)] 212 | struct Wrapper { 213 | inner: MyStruct, 214 | name: String, 215 | } 216 | 217 | let s = MyStruct { 218 | boolean: true, 219 | bytes: vec![0xC0, 0xDE], 220 | label: "a".to_owned(), 221 | }; 222 | let s_bytes = to_bytes(&s)?; 223 | let mut expecting = vec![1, 2, 0xC0, 0xDE, 1, b'a']; 224 | assert_eq!(s_bytes, expecting); 225 | 226 | let w = Wrapper { 227 | inner: s, 228 | name: "b".to_owned(), 229 | }; 230 | let w_bytes = to_bytes(&w)?; 231 | assert!(w_bytes.starts_with(&s_bytes)); 232 | 233 | expecting.append(&mut vec![1, b'b']); 234 | assert_eq!(w_bytes, expecting); 235 | ``` 236 | 237 | #### Externally Tagged Enumerations 238 | 239 | An enumeration is typically represented as a type that can take one of potentially many 240 | different variants. In BCS, each variant is mapped to a variant index, a ULEB128-encoded 32-bit unsigned 241 | integer, followed by serialized data if the type has an associated value. An 242 | associated type can be any BCS supported type. The variant index is determined based on the 243 | ordering of the variants in the canonical enum definition, where the first variant has an index 244 | of `0`, the second an index of `1`, etc. 245 | 246 | ```rust 247 | #[derive(Serialize)] 248 | enum E { 249 | Variant0(u16), 250 | Variant1(u8), 251 | Variant2(String), 252 | } 253 | 254 | let v0 = E::Variant0(8000); 255 | let v1 = E::Variant1(255); 256 | let v2 = E::Variant2("e".to_owned()); 257 | 258 | assert_eq!(to_bytes(&v0)?, vec![0, 0x40, 0x1F]); 259 | assert_eq!(to_bytes(&v1)?, vec![1, 0xFF]); 260 | assert_eq!(to_bytes(&v2)?, vec![2, 1, b'e']); 261 | ``` 262 | 263 | If you need to serialize a C-style enum, you should use a primitive integer type. 264 | 265 | #### Maps (Key / Value Stores) 266 | 267 | Maps are represented as a variable-length, sorted sequence of (Key, Value) tuples. Keys must be 268 | unique and the tuples sorted by increasing lexicographical order on the BCS bytes of each key. 269 | The representation is otherwise similar to that of a variable-length sequence. In particular, 270 | it is preceded by the number of tuples, encoded in ULEB128. 271 | 272 | ```rust 273 | let mut map = HashMap::new(); 274 | map.insert(b'e', b'f'); 275 | map.insert(b'a', b'b'); 276 | map.insert(b'c', b'd'); 277 | 278 | let expecting = vec![(b'a', b'b'), (b'c', b'd'), (b'e', b'f')]; 279 | 280 | assert_eq!(to_bytes(&map)?, to_bytes(&expecting)?); 281 | ``` 282 | 283 | ## Contributing 284 | 285 | See the [CONTRIBUTING](CONTRIBUTING.md) file for how to help out. 286 | 287 | ## License 288 | 289 | This project is available under the terms of either the [Apache 2.0 license](LICENSE). 290 | 291 | 297 | -------------------------------------------------------------------------------- /README.tpl: -------------------------------------------------------------------------------- 1 | [![Build Status](https://github.com/diem/bcs/workflows/CI/badge.svg)](https://github.com/diem/bcs/actions?query=workflow%3ACI) 2 | [![License](https://img.shields.io/badge/license-Apache-green.svg)](LICENSE) 3 | [![bcs on crates.io](https://img.shields.io/crates/v/bcs)](https://crates.io/crates/bcs) 4 | [![Documentation (latest release)](https://docs.rs/bcs/badge.svg)](https://docs.rs/bcs/) 5 | [![Documentation (master)](https://img.shields.io/badge/docs-master-59f)](https://diem.github.io/bcs/bcs/) 6 | 7 | {{readme}} 8 | 9 | ## Contributing 10 | 11 | See the [CONTRIBUTING](CONTRIBUTING.md) file for how to help out. 12 | 13 | ## License 14 | 15 | This project is available under the terms of either the [Apache 2.0 license](LICENSE). 16 | 17 | 23 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policies and Procedures 2 | 3 | Please see Diem's 4 | [security policies](https://developers.diem.com/docs/policies/security) and 5 | procedures for reporting vulnerabilities. 6 | -------------------------------------------------------------------------------- /benches/bcs_bench.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) The Diem Core Contributors 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | use bcs::to_bytes; 5 | use criterion::{criterion_group, criterion_main, Criterion}; 6 | use std::collections::{BTreeMap, HashMap}; 7 | 8 | pub fn bcs_benchmark(c: &mut Criterion) { 9 | let mut btree_map = BTreeMap::new(); 10 | let mut hash_map = HashMap::new(); 11 | for i in 0u32..2000u32 { 12 | btree_map.insert(i, i); 13 | hash_map.insert(i, i); 14 | } 15 | c.bench_function("serialize btree map", |b| { 16 | b.iter(|| { 17 | to_bytes(&btree_map).unwrap(); 18 | }) 19 | }); 20 | c.bench_function("serialize hash map", |b| { 21 | b.iter(|| { 22 | to_bytes(&hash_map).unwrap(); 23 | }) 24 | }); 25 | } 26 | 27 | criterion_group!(benches, bcs_benchmark); 28 | criterion_main!(benches); 29 | -------------------------------------------------------------------------------- /src/de.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) The Diem Core Contributors 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | use crate::error::{Error, Result}; 5 | use serde::de::{self, Deserialize, DeserializeSeed, IntoDeserializer, Visitor}; 6 | use std::convert::TryFrom; 7 | 8 | /// Deserializes a `&[u8]` into a type. 9 | /// 10 | /// This function will attempt to interpret `bytes` as the BCS serialized form of `T` and 11 | /// deserialize `T` from `bytes`. 12 | /// 13 | /// # Examples 14 | /// 15 | /// ``` 16 | /// use bcs::from_bytes; 17 | /// use serde::Deserialize; 18 | /// 19 | /// #[derive(Deserialize)] 20 | /// struct Ip([u8; 4]); 21 | /// 22 | /// #[derive(Deserialize)] 23 | /// struct Port(u16); 24 | /// 25 | /// #[derive(Deserialize)] 26 | /// struct SocketAddr { 27 | /// ip: Ip, 28 | /// port: Port, 29 | /// } 30 | /// 31 | /// let bytes = vec![0x7f, 0x00, 0x00, 0x01, 0x41, 0x1f]; 32 | /// let socket_addr: SocketAddr = from_bytes(&bytes).unwrap(); 33 | /// 34 | /// assert_eq!(socket_addr.ip.0, [127, 0, 0, 1]); 35 | /// assert_eq!(socket_addr.port.0, 8001); 36 | /// ``` 37 | pub fn from_bytes<'a, T>(bytes: &'a [u8]) -> Result 38 | where 39 | T: Deserialize<'a>, 40 | { 41 | let mut deserializer = Deserializer::new(bytes, crate::MAX_CONTAINER_DEPTH); 42 | let t = T::deserialize(&mut deserializer)?; 43 | deserializer.end().map(move |_| t) 44 | } 45 | 46 | /// Perform a stateful deserialization from a `&[u8]` using the provided `seed`. 47 | pub fn from_bytes_seed<'a, T>(seed: T, bytes: &'a [u8]) -> Result 48 | where 49 | T: DeserializeSeed<'a>, 50 | { 51 | let mut deserializer = Deserializer::new(bytes, crate::MAX_CONTAINER_DEPTH); 52 | let t = seed.deserialize(&mut deserializer)?; 53 | deserializer.end().map(move |_| t) 54 | } 55 | 56 | /// Deserialization implementation for BCS 57 | struct Deserializer<'de> { 58 | input: &'de [u8], 59 | max_remaining_depth: usize, 60 | } 61 | 62 | impl<'de> Deserializer<'de> { 63 | /// Creates a new `Deserializer` which will be deserializing the provided 64 | /// input. 65 | fn new(input: &'de [u8], max_remaining_depth: usize) -> Self { 66 | Deserializer { 67 | input, 68 | max_remaining_depth, 69 | } 70 | } 71 | 72 | /// The `Deserializer::end` method should be called after a type has been 73 | /// fully deserialized. This allows the `Deserializer` to validate that 74 | /// the there are no more bytes remaining in the input stream. 75 | fn end(&mut self) -> Result<()> { 76 | if self.input.is_empty() { 77 | Ok(()) 78 | } else { 79 | Err(Error::RemainingInput) 80 | } 81 | } 82 | } 83 | 84 | impl<'de> Deserializer<'de> { 85 | fn peek(&mut self) -> Result { 86 | self.input.first().copied().ok_or(Error::Eof) 87 | } 88 | 89 | fn next(&mut self) -> Result { 90 | let byte = self.peek()?; 91 | self.input = &self.input[1..]; 92 | Ok(byte) 93 | } 94 | 95 | fn parse_bool(&mut self) -> Result { 96 | let byte = self.next()?; 97 | 98 | match byte { 99 | 0 => Ok(false), 100 | 1 => Ok(true), 101 | _ => Err(Error::ExpectedBoolean), 102 | } 103 | } 104 | 105 | fn fill_slice(&mut self, slice: &mut [u8]) -> Result<()> { 106 | for byte in slice { 107 | *byte = self.next()?; 108 | } 109 | Ok(()) 110 | } 111 | 112 | fn parse_u8(&mut self) -> Result { 113 | self.next() 114 | } 115 | 116 | fn parse_u16(&mut self) -> Result { 117 | let mut le_bytes = [0; 2]; 118 | self.fill_slice(&mut le_bytes)?; 119 | Ok(u16::from_le_bytes(le_bytes)) 120 | } 121 | 122 | fn parse_u32(&mut self) -> Result { 123 | let mut le_bytes = [0; 4]; 124 | self.fill_slice(&mut le_bytes)?; 125 | Ok(u32::from_le_bytes(le_bytes)) 126 | } 127 | 128 | fn parse_u64(&mut self) -> Result { 129 | let mut le_bytes = [0; 8]; 130 | self.fill_slice(&mut le_bytes)?; 131 | Ok(u64::from_le_bytes(le_bytes)) 132 | } 133 | 134 | fn parse_u128(&mut self) -> Result { 135 | let mut le_bytes = [0; 16]; 136 | self.fill_slice(&mut le_bytes)?; 137 | Ok(u128::from_le_bytes(le_bytes)) 138 | } 139 | 140 | #[allow(clippy::integer_arithmetic)] 141 | fn parse_u32_from_uleb128(&mut self) -> Result { 142 | let mut value: u64 = 0; 143 | for shift in (0..32).step_by(7) { 144 | let byte = self.next()?; 145 | let digit = byte & 0x7f; 146 | value |= u64::from(digit) << shift; 147 | // If the highest bit of `byte` is 0, return the final value. 148 | if digit == byte { 149 | if shift > 0 && digit == 0 { 150 | // We only accept canonical ULEB128 encodings, therefore the 151 | // heaviest (and last) base-128 digit must be non-zero. 152 | return Err(Error::NonCanonicalUleb128Encoding); 153 | } 154 | // Decoded integer must not overflow. 155 | return u32::try_from(value) 156 | .map_err(|_| Error::IntegerOverflowDuringUleb128Decoding); 157 | } 158 | } 159 | // Decoded integer must not overflow. 160 | Err(Error::IntegerOverflowDuringUleb128Decoding) 161 | } 162 | 163 | fn parse_length(&mut self) -> Result { 164 | let len = self.parse_u32_from_uleb128()? as usize; 165 | if len > crate::MAX_SEQUENCE_LENGTH { 166 | return Err(Error::ExceededMaxLen(len)); 167 | } 168 | Ok(len) 169 | } 170 | 171 | fn parse_bytes(&mut self) -> Result<&'de [u8]> { 172 | let len = self.parse_length()?; 173 | let slice = self.input.get(..len).ok_or(Error::Eof)?; 174 | self.input = &self.input[len..]; 175 | Ok(slice) 176 | } 177 | 178 | fn parse_string(&mut self) -> Result<&'de str> { 179 | let slice = self.parse_bytes()?; 180 | std::str::from_utf8(slice).map_err(|_| Error::Utf8) 181 | } 182 | 183 | fn enter_named_container(&mut self, name: &'static str) -> Result<()> { 184 | if self.max_remaining_depth == 0 { 185 | return Err(Error::ExceededContainerDepthLimit(name)); 186 | } 187 | self.max_remaining_depth -= 1; 188 | Ok(()) 189 | } 190 | 191 | fn leave_named_container(&mut self) { 192 | self.max_remaining_depth += 1; 193 | } 194 | } 195 | 196 | impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> { 197 | type Error = Error; 198 | 199 | // BCS is not a self-describing format so we can't implement `deserialize_any` 200 | fn deserialize_any(self, _visitor: V) -> Result 201 | where 202 | V: Visitor<'de>, 203 | { 204 | Err(Error::NotSupported("deserialize_any")) 205 | } 206 | 207 | fn deserialize_bool(self, visitor: V) -> Result 208 | where 209 | V: Visitor<'de>, 210 | { 211 | visitor.visit_bool(self.parse_bool()?) 212 | } 213 | 214 | fn deserialize_i8(self, visitor: V) -> Result 215 | where 216 | V: Visitor<'de>, 217 | { 218 | visitor.visit_i8(self.parse_u8()? as i8) 219 | } 220 | 221 | fn deserialize_i16(self, visitor: V) -> Result 222 | where 223 | V: Visitor<'de>, 224 | { 225 | visitor.visit_i16(self.parse_u16()? as i16) 226 | } 227 | 228 | fn deserialize_i32(self, visitor: V) -> Result 229 | where 230 | V: Visitor<'de>, 231 | { 232 | visitor.visit_i32(self.parse_u32()? as i32) 233 | } 234 | 235 | fn deserialize_i64(self, visitor: V) -> Result 236 | where 237 | V: Visitor<'de>, 238 | { 239 | visitor.visit_i64(self.parse_u64()? as i64) 240 | } 241 | 242 | fn deserialize_i128(self, visitor: V) -> Result 243 | where 244 | V: Visitor<'de>, 245 | { 246 | visitor.visit_i128(self.parse_u128()? as i128) 247 | } 248 | 249 | fn deserialize_u8(self, visitor: V) -> Result 250 | where 251 | V: Visitor<'de>, 252 | { 253 | visitor.visit_u8(self.parse_u8()?) 254 | } 255 | 256 | fn deserialize_u16(self, visitor: V) -> Result 257 | where 258 | V: Visitor<'de>, 259 | { 260 | visitor.visit_u16(self.parse_u16()?) 261 | } 262 | 263 | fn deserialize_u32(self, visitor: V) -> Result 264 | where 265 | V: Visitor<'de>, 266 | { 267 | visitor.visit_u32(self.parse_u32()?) 268 | } 269 | 270 | fn deserialize_u64(self, visitor: V) -> Result 271 | where 272 | V: Visitor<'de>, 273 | { 274 | visitor.visit_u64(self.parse_u64()?) 275 | } 276 | 277 | fn deserialize_u128(self, visitor: V) -> Result 278 | where 279 | V: Visitor<'de>, 280 | { 281 | visitor.visit_u128(self.parse_u128()?) 282 | } 283 | 284 | fn deserialize_f32(self, _visitor: V) -> Result 285 | where 286 | V: Visitor<'de>, 287 | { 288 | Err(Error::NotSupported("deserialize_f32")) 289 | } 290 | 291 | fn deserialize_f64(self, _visitor: V) -> Result 292 | where 293 | V: Visitor<'de>, 294 | { 295 | Err(Error::NotSupported("deserialize_f64")) 296 | } 297 | 298 | fn deserialize_char(self, _visitor: V) -> Result 299 | where 300 | V: Visitor<'de>, 301 | { 302 | Err(Error::NotSupported("deserialize_char")) 303 | } 304 | 305 | fn deserialize_str(self, visitor: V) -> Result 306 | where 307 | V: Visitor<'de>, 308 | { 309 | visitor.visit_borrowed_str(self.parse_string()?) 310 | } 311 | 312 | fn deserialize_string(self, visitor: V) -> Result 313 | where 314 | V: Visitor<'de>, 315 | { 316 | self.deserialize_str(visitor) 317 | } 318 | 319 | fn deserialize_bytes(self, visitor: V) -> Result 320 | where 321 | V: Visitor<'de>, 322 | { 323 | visitor.visit_borrowed_bytes(self.parse_bytes()?) 324 | } 325 | 326 | fn deserialize_byte_buf(self, visitor: V) -> Result 327 | where 328 | V: Visitor<'de>, 329 | { 330 | self.deserialize_bytes(visitor) 331 | } 332 | 333 | fn deserialize_option(self, visitor: V) -> Result 334 | where 335 | V: Visitor<'de>, 336 | { 337 | let byte = self.next()?; 338 | 339 | match byte { 340 | 0 => visitor.visit_none(), 341 | 1 => visitor.visit_some(self), 342 | _ => Err(Error::ExpectedOption), 343 | } 344 | } 345 | 346 | fn deserialize_unit(self, visitor: V) -> Result 347 | where 348 | V: Visitor<'de>, 349 | { 350 | visitor.visit_unit() 351 | } 352 | 353 | fn deserialize_unit_struct(self, name: &'static str, visitor: V) -> Result 354 | where 355 | V: Visitor<'de>, 356 | { 357 | self.enter_named_container(name)?; 358 | let r = self.deserialize_unit(visitor); 359 | self.leave_named_container(); 360 | r 361 | } 362 | 363 | fn deserialize_newtype_struct(self, name: &'static str, visitor: V) -> Result 364 | where 365 | V: Visitor<'de>, 366 | { 367 | self.enter_named_container(name)?; 368 | let r = visitor.visit_newtype_struct(&mut *self); 369 | self.leave_named_container(); 370 | r 371 | } 372 | #[allow(clippy::needless_borrow)] 373 | fn deserialize_seq(mut self, visitor: V) -> Result 374 | where 375 | V: Visitor<'de>, 376 | { 377 | let len = self.parse_length()?; 378 | visitor.visit_seq(SeqDeserializer::new(&mut self, len)) 379 | } 380 | #[allow(clippy::needless_borrow)] 381 | fn deserialize_tuple(mut self, len: usize, visitor: V) -> Result 382 | where 383 | V: Visitor<'de>, 384 | { 385 | visitor.visit_seq(SeqDeserializer::new(&mut self, len)) 386 | } 387 | #[allow(clippy::needless_borrow)] 388 | fn deserialize_tuple_struct( 389 | mut self, 390 | name: &'static str, 391 | len: usize, 392 | visitor: V, 393 | ) -> Result 394 | where 395 | V: Visitor<'de>, 396 | { 397 | self.enter_named_container(name)?; 398 | let r = visitor.visit_seq(SeqDeserializer::new(&mut self, len)); 399 | self.leave_named_container(); 400 | r 401 | } 402 | #[allow(clippy::needless_borrow)] 403 | fn deserialize_map(mut self, visitor: V) -> Result 404 | where 405 | V: Visitor<'de>, 406 | { 407 | let len = self.parse_length()?; 408 | visitor.visit_map(MapDeserializer::new(&mut self, len)) 409 | } 410 | #[allow(clippy::needless_borrow)] 411 | fn deserialize_struct( 412 | mut self, 413 | name: &'static str, 414 | fields: &'static [&'static str], 415 | visitor: V, 416 | ) -> Result 417 | where 418 | V: Visitor<'de>, 419 | { 420 | self.enter_named_container(name)?; 421 | let r = visitor.visit_seq(SeqDeserializer::new(&mut self, fields.len())); 422 | self.leave_named_container(); 423 | r 424 | } 425 | 426 | fn deserialize_enum( 427 | self, 428 | name: &'static str, 429 | _variants: &'static [&'static str], 430 | visitor: V, 431 | ) -> Result 432 | where 433 | V: Visitor<'de>, 434 | { 435 | self.enter_named_container(name)?; 436 | let r = visitor.visit_enum(&mut *self); 437 | self.leave_named_container(); 438 | r 439 | } 440 | 441 | // BCS does not utilize identifiers, so throw them away 442 | fn deserialize_identifier(self, _visitor: V) -> Result 443 | where 444 | V: Visitor<'de>, 445 | { 446 | self.deserialize_bytes(_visitor) 447 | } 448 | 449 | // BCS is not a self-describing format so we can't implement `deserialize_ignored_any` 450 | fn deserialize_ignored_any(self, _visitor: V) -> Result 451 | where 452 | V: Visitor<'de>, 453 | { 454 | Err(Error::NotSupported("deserialize_ignored_any")) 455 | } 456 | 457 | // BCS is not a human readable format 458 | fn is_human_readable(&self) -> bool { 459 | false 460 | } 461 | } 462 | 463 | struct SeqDeserializer<'a, 'de: 'a> { 464 | de: &'a mut Deserializer<'de>, 465 | remaining: usize, 466 | } 467 | #[allow(clippy::needless_borrow)] 468 | impl<'a, 'de> SeqDeserializer<'a, 'de> { 469 | fn new(de: &'a mut Deserializer<'de>, remaining: usize) -> Self { 470 | Self { de, remaining } 471 | } 472 | } 473 | 474 | impl<'de, 'a> de::SeqAccess<'de> for SeqDeserializer<'a, 'de> { 475 | type Error = Error; 476 | 477 | fn next_element_seed(&mut self, seed: T) -> Result> 478 | where 479 | T: DeserializeSeed<'de>, 480 | { 481 | if self.remaining == 0 { 482 | Ok(None) 483 | } else { 484 | self.remaining -= 1; 485 | seed.deserialize(&mut *self.de).map(Some) 486 | } 487 | } 488 | 489 | fn size_hint(&self) -> Option { 490 | Some(self.remaining) 491 | } 492 | } 493 | 494 | struct MapDeserializer<'a, 'de: 'a> { 495 | de: &'a mut Deserializer<'de>, 496 | remaining: usize, 497 | previous_key_bytes: Option<&'a [u8]>, 498 | } 499 | 500 | impl<'a, 'de> MapDeserializer<'a, 'de> { 501 | fn new(de: &'a mut Deserializer<'de>, remaining: usize) -> Self { 502 | Self { 503 | de, 504 | remaining, 505 | previous_key_bytes: None, 506 | } 507 | } 508 | } 509 | 510 | impl<'de, 'a> de::MapAccess<'de> for MapDeserializer<'a, 'de> { 511 | type Error = Error; 512 | 513 | fn next_key_seed(&mut self, seed: K) -> Result> 514 | where 515 | K: DeserializeSeed<'de>, 516 | { 517 | match self.remaining.checked_sub(1) { 518 | None => Ok(None), 519 | Some(remaining) => { 520 | let previous_input_slice = self.de.input; 521 | let key_value = seed.deserialize(&mut *self.de)?; 522 | let key_len = previous_input_slice 523 | .len() 524 | .saturating_sub(self.de.input.len()); 525 | let key_bytes = &previous_input_slice[..key_len]; 526 | if let Some(previous_key_bytes) = self.previous_key_bytes { 527 | if previous_key_bytes >= key_bytes { 528 | return Err(Error::NonCanonicalMap); 529 | } 530 | } 531 | self.remaining = remaining; 532 | self.previous_key_bytes = Some(key_bytes); 533 | Ok(Some(key_value)) 534 | } 535 | } 536 | } 537 | 538 | fn next_value_seed(&mut self, seed: V) -> Result 539 | where 540 | V: DeserializeSeed<'de>, 541 | { 542 | seed.deserialize(&mut *self.de) 543 | } 544 | 545 | fn size_hint(&self) -> Option { 546 | Some(self.remaining) 547 | } 548 | } 549 | 550 | impl<'de, 'a> de::EnumAccess<'de> for &'a mut Deserializer<'de> { 551 | type Error = Error; 552 | type Variant = Self; 553 | 554 | fn variant_seed(self, seed: V) -> Result<(V::Value, Self::Variant)> 555 | where 556 | V: DeserializeSeed<'de>, 557 | { 558 | let variant_index = self.parse_u32_from_uleb128()?; 559 | let result: Result = seed.deserialize(variant_index.into_deserializer()); 560 | Ok((result?, self)) 561 | } 562 | } 563 | 564 | impl<'de, 'a> de::VariantAccess<'de> for &'a mut Deserializer<'de> { 565 | type Error = Error; 566 | 567 | fn unit_variant(self) -> Result<()> { 568 | Ok(()) 569 | } 570 | 571 | fn newtype_variant_seed(self, seed: T) -> Result 572 | where 573 | T: DeserializeSeed<'de>, 574 | { 575 | seed.deserialize(self) 576 | } 577 | 578 | fn tuple_variant(self, len: usize, visitor: V) -> Result 579 | where 580 | V: Visitor<'de>, 581 | { 582 | de::Deserializer::deserialize_tuple(self, len, visitor) 583 | } 584 | 585 | fn struct_variant(self, fields: &'static [&'static str], visitor: V) -> Result 586 | where 587 | V: Visitor<'de>, 588 | { 589 | de::Deserializer::deserialize_tuple(self, fields.len(), visitor) 590 | } 591 | } 592 | -------------------------------------------------------------------------------- /src/error.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) The Diem Core Contributors 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | use serde::{de, ser}; 5 | use std::fmt; 6 | use thiserror::Error; 7 | 8 | pub type Result = std::result::Result; 9 | #[allow(clippy::derive_partial_eq_without_eq)] 10 | #[derive(Clone, Debug, Error, PartialEq)] 11 | pub enum Error { 12 | #[error("unexpected end of input")] 13 | Eof, 14 | #[error("I/O error: {0}")] 15 | Io(String), 16 | #[error("exceeded max sequence length: {0}")] 17 | ExceededMaxLen(usize), 18 | #[error("exceeded max container depth while entering: {0}")] 19 | ExceededContainerDepthLimit(&'static str), 20 | #[error("expected boolean")] 21 | ExpectedBoolean, 22 | #[error("expected map key")] 23 | ExpectedMapKey, 24 | #[error("expected map value")] 25 | ExpectedMapValue, 26 | #[error("keys of serialized maps must be unique and in increasing order")] 27 | NonCanonicalMap, 28 | #[error("expected option type")] 29 | ExpectedOption, 30 | #[error("{0}")] 31 | Custom(String), 32 | #[error("sequence missing length")] 33 | MissingLen, 34 | #[error("not supported: {0}")] 35 | NotSupported(&'static str), 36 | #[error("remaining input")] 37 | RemainingInput, 38 | #[error("malformed utf8")] 39 | Utf8, 40 | #[error("ULEB128 encoding was not minimal in size")] 41 | NonCanonicalUleb128Encoding, 42 | #[error("ULEB128-encoded integer did not fit in the target size")] 43 | IntegerOverflowDuringUleb128Decoding, 44 | } 45 | 46 | impl From for Error { 47 | fn from(err: std::io::Error) -> Self { 48 | Error::Io(err.to_string()) 49 | } 50 | } 51 | 52 | impl ser::Error for Error { 53 | fn custom(msg: T) -> Self { 54 | Error::Custom(msg.to_string()) 55 | } 56 | } 57 | 58 | impl de::Error for Error { 59 | fn custom(msg: T) -> Self { 60 | Error::Custom(msg.to_string()) 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) The Diem Core Contributors 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #![forbid(unsafe_code)] 5 | 6 | //! # Binary Canonical Serialization (BCS) 7 | //! 8 | //! BCS (formerly "Libra Canonical Serialization" or LCS) is a serialization format developed 9 | //! in the context of the [Diem](https://diem.com) blockchain. 10 | //! 11 | //! BCS was designed with the following main goals in mind: 12 | //! * provide good performance and concise (binary) representations; 13 | //! * support a rich set of data types commonly used in Rust; 14 | //! * enforce canonical serialization, meaning that every value of a given type should have 15 | //! a single valid representation. 16 | //! 17 | //! BCS also aims to mitigate the consequence of malicious inputs by enforcing well-defined limits 18 | //! on large or nested containers during (de)serialization. 19 | //! 20 | //! ## Rust Implementation 21 | //! 22 | //! This crate provides a Rust implementation of BCS as an encoding format for the [Serde library](https://serde.rs). 23 | //! As such, this implementation covers most data types supported by Serde -- including user-defined structs, 24 | //! tagged variants (Rust enums), tuples, and maps -- excluding floats, single unicode characters (char), and sets. 25 | //! 26 | //! BCS is also available in other programming languages, thanks to the separate project [serde-reflection](https://github.com/novifinancial/serde-reflection). 27 | //! 28 | //! ## Application to Cryptography 29 | //! 30 | //! The BCS format guarantees canonical serialization, meaning that for any given data type, there 31 | //! is a one-to-one correspondance between in-memory values and valid byte representations. 32 | //! 33 | //! In the context of a cryptographic application, canonical serialization has several benefits: 34 | //! * It provides a natural and reliable way to associate in-memory values to cryptographic hashes. 35 | //! * It allows the signature of a message to be defined equivalently as the signature of the serialized bytes or as the signature of the in-memory value. 36 | //! 37 | //! Note that BCS ensures canonical serialization for each data type separately. The data type of a serialized value 38 | //! must be enforced by the application itself. This requirement is typically fulfilled 39 | //! using unique hash seeds for each data type. (See [Diem's cryptographic library](https://github.com/diem/diem/blob/master/crypto/crypto/src/hash.rs) for an example.) 40 | //! 41 | //! ## Backwards Compatibility 42 | //! 43 | //! By design, BCS does not provide implicit versioning or backwards/forwards compatibility, therefore 44 | //! applications must carefully plan in advance for adhoc extension points: 45 | //! * Enums may be used for explicit versioning and backward compatibility (e.g. extensible query interfaces). 46 | //! * In some cases, data fields of type `Vec` may also be added to allow (future) unknown payloads 47 | //! in serialized form. 48 | //! 49 | //! ## Detailed Specifications 50 | //! 51 | //! BCS supports the following data types: 52 | //! 53 | //! * Booleans 54 | //! * Signed 8-bit, 16-bit, 32-bit, 64-bit, and 128-bit integers 55 | //! * Unsigned 8-bit, 16-bit, 32-bit, 64-bit, and 128-bit integers 56 | //! * Option 57 | //! * Unit (an empty value) 58 | //! * Fixed and variable length sequences 59 | //! * UTF-8 Encoded Strings 60 | //! * Tuples 61 | //! * Structures (aka "structs") 62 | //! * Externally tagged enumerations (aka "enums") 63 | //! * Maps 64 | //! 65 | //! BCS is not a self-describing format. As such, in order to deserialize a message, one must 66 | //! know the message type and layout ahead of time. 67 | //! 68 | //! Unless specified, all numbers are stored in little endian, two's complement format. 69 | //! 70 | //! ### Recursion and Depth of BCS Data 71 | //! 72 | //! Recursive data-structures (e.g. trees) are allowed. However, because of the possibility of stack 73 | //! overflow during (de)serialization, the *container depth* of any valid BCS data cannot exceed the constant 74 | //! `MAX_CONTAINER_DEPTH`. Formally, we define *container depth* as the number of structs and enums traversed 75 | //! during (de)serialization. 76 | //! 77 | //! This definition aims to minimize the number of operations while ensuring that 78 | //! (de)serialization of a known BCS format cannot cause arbitrarily large stack allocations. 79 | //! 80 | //! As an example, if `v1` and `v2` are values of depth `n1` and `n2`, 81 | //! * a struct value `Foo { v1, v2 }` has depth `1 + max(n1, n2)`; 82 | //! * an enum value `E::Foo { v1, v2 }` has depth `1 + max(n1, n2)`; 83 | //! * a pair `(v1, v2)` has depth `max(n1, n2)`; 84 | //! * the value `Some(v1)` has depth `n1`. 85 | //! 86 | //! All string and integer values have depths `0`. 87 | //! 88 | //! ### Booleans and Integers 89 | //! 90 | //! |Type |Original data |Hex representation |Serialized bytes | 91 | //! |--- |--- |--- |--- | 92 | //! |Boolean |True / False |0x01 / 0x00 |01 / 00 | 93 | //! |8-bit signed integer |-1 |0xFF |FF | 94 | //! |8-bit unsigned integer |1 |0x01 |01 | 95 | //! |16-bit signed integer |-4660 |0xEDCC |CC ED | 96 | //! |16-bit unsigned integer |4660 |0x1234 |34 12 | 97 | //! |32-bit signed integer |-305419896 |0xEDCBA988 |88 A9 CB ED | 98 | //! |32-bit unsigned integer |305419896 |0x12345678 |78 56 34 12 | 99 | //! |64-bit signed integer |-1311768467750121216 |0xEDCBA98754321100 |00 11 32 54 87 A9 CB ED | 100 | //! |64-bit unsigned integer |1311768467750121216 |0x12345678ABCDEF00 |00 EF CD AB 78 56 34 12 | 101 | //! 102 | //! ### ULEB128-Encoded Integers 103 | //! 104 | //! The BCS format also uses the [ULEB128 encoding](https://en.wikipedia.org/wiki/LEB128) internally 105 | //! to represent unsigned 32-bit integers in two cases where small values are usually expected: 106 | //! (1) lengths of variable-length sequences and (2) tags of enum values (see the corresponding 107 | //! sections below). 108 | //! 109 | //! |Type |Original data |Hex representation |Serialized bytes | 110 | //! |--- |--- |--- |--- | 111 | //! |ULEB128-encoded u32-integer|2^0 = 1 |0x00000001 |01 | 112 | //! | |2^7 = 128 |0x00000080 |80 01 | 113 | //! | |2^14 = 16384 |0x00004000 |80 80 01 | 114 | //! | |2^21 = 2097152 |0x00200000 |80 80 80 01 | 115 | //! | |2^28 = 268435456 |0x10000000 |80 80 80 80 01 | 116 | //! | |9487 |0x0000250f |8f 4a | 117 | //! 118 | //! In general, a ULEB128 encoding consists of a little-endian sequence of base-128 (7-bit) 119 | //! digits. Each digit is completed into a byte by setting the highest bit to 1, except for the 120 | //! last (highest-significance) digit whose highest bit is set to 0. 121 | //! 122 | //! In BCS, the result of decoding ULEB128 bytes is required to fit into a 32-bit unsigned 123 | //! integer and be in canonical form. For instance, the following values are rejected: 124 | //! * 80 80 80 80 80 01 (2^36) is too large. 125 | //! * 80 80 80 80 10 (2^33) is too large. 126 | //! * 80 00 is not a minimal encoding of 0. 127 | //! 128 | //! ### Optional Data 129 | //! 130 | //! Optional or nullable data either exists in its full representation or does not. BCS represents 131 | //! this as a single byte representing the presence `0x01` or absence `0x00` of data. If the data 132 | //! is present then the serialized form of that data follows. For example: 133 | //! 134 | //! ```rust 135 | //! # use bcs::{Result, to_bytes}; 136 | //! # fn main() -> Result<()> { 137 | //! let some_data: Option = Some(8); 138 | //! assert_eq!(to_bytes(&some_data)?, vec![1, 8]); 139 | //! 140 | //! let no_data: Option = None; 141 | //! assert_eq!(to_bytes(&no_data)?, vec![0]); 142 | //! # Ok(())} 143 | //! ``` 144 | //! 145 | //! ### Fixed and Variable Length Sequences 146 | //! 147 | //! Sequences can be made of up of any BCS supported types (even complex structures) but all 148 | //! elements in the sequence must be of the same type. If the length of a sequence is fixed and 149 | //! well known then BCS represents this as just the concatenation of the serialized form of each 150 | //! individual element in the sequence. If the length of the sequence can be variable, then the 151 | //! serialized sequence is length prefixed with a ULEB128-encoded unsigned integer indicating 152 | //! the number of elements in the sequence. All variable length sequences must be 153 | //! `MAX_SEQUENCE_LENGTH` elements long or less. 154 | //! 155 | //! ```rust 156 | //! # use bcs::{Result, to_bytes}; 157 | //! # fn main() -> Result<()> { 158 | //! let fixed: [u16; 3] = [1, 2, 3]; 159 | //! assert_eq!(to_bytes(&fixed)?, vec![1, 0, 2, 0, 3, 0]); 160 | //! 161 | //! let variable: Vec = vec![1, 2]; 162 | //! assert_eq!(to_bytes(&variable)?, vec![2, 1, 0, 2, 0]); 163 | //! 164 | //! let large_variable_length: Vec<()> = vec![(); 9_487]; 165 | //! assert_eq!(to_bytes(&large_variable_length)?, vec![0x8f, 0x4a]); 166 | //! # Ok(())} 167 | //! ``` 168 | //! 169 | //! ### Strings 170 | //! 171 | //! Only valid UTF-8 Strings are supported. BCS serializes such strings as a variable length byte 172 | //! sequence, i.e. length prefixed with a ULEB128-encoded unsigned integer followed by the byte 173 | //! representation of the string. 174 | //! 175 | //! ```rust 176 | //! # use bcs::{Result, to_bytes}; 177 | //! # fn main() -> Result<()> { 178 | //! // Note that this string has 10 characters but has a byte length of 24 179 | //! let utf8_str = "çå∞≠¢õß∂ƒ∫"; 180 | //! let expecting = vec![ 181 | //! 24, 0xc3, 0xa7, 0xc3, 0xa5, 0xe2, 0x88, 0x9e, 0xe2, 0x89, 0xa0, 0xc2, 182 | //! 0xa2, 0xc3, 0xb5, 0xc3, 0x9f, 0xe2, 0x88, 0x82, 0xc6, 0x92, 0xe2, 0x88, 0xab, 183 | //! ]; 184 | //! assert_eq!(to_bytes(&utf8_str)?, expecting); 185 | //! # Ok(())} 186 | //! ``` 187 | //! 188 | //! ### Tuples 189 | //! 190 | //! Tuples are typed composition of objects: `(Type0, Type1)` 191 | //! 192 | //! Tuples are considered a fixed length sequence where each element in the sequence can be a 193 | //! different type supported by BCS. Each element of a tuple is serialized in the order it is 194 | //! defined within the tuple, i.e. [tuple.0, tuple.2]. 195 | //! 196 | //! ```rust 197 | //! # use bcs::{Result, to_bytes}; 198 | //! # fn main() -> Result<()> { 199 | //! let tuple = (-1i8, "diem"); 200 | //! let expecting = vec![0xFF, 4, b'd', b'i', b'e', b'm']; 201 | //! assert_eq!(to_bytes(&tuple)?, expecting); 202 | //! # Ok(())} 203 | //! ``` 204 | //! 205 | //! 206 | //! ### Structures 207 | //! 208 | //! Structures are fixed length sequences consisting of fields with potentially different types. 209 | //! Each field within a struct is serialized in the order specified by the canonical structure 210 | //! definition. Structs can exist within other structs and as such, BCS recurses into each struct 211 | //! and serializes them in order. There are no labels in the serialized format, the struct ordering 212 | //! defines the organization within the serialization stream. 213 | //! 214 | //! ```rust 215 | //! # use bcs::{Result, to_bytes}; 216 | //! # use serde::Serialize; 217 | //! # fn main() -> Result<()> { 218 | //! #[derive(Serialize)] 219 | //! struct MyStruct { 220 | //! boolean: bool, 221 | //! bytes: Vec, 222 | //! label: String, 223 | //! } 224 | //! 225 | //! #[derive(Serialize)] 226 | //! struct Wrapper { 227 | //! inner: MyStruct, 228 | //! name: String, 229 | //! } 230 | //! 231 | //! let s = MyStruct { 232 | //! boolean: true, 233 | //! bytes: vec![0xC0, 0xDE], 234 | //! label: "a".to_owned(), 235 | //! }; 236 | //! let s_bytes = to_bytes(&s)?; 237 | //! let mut expecting = vec![1, 2, 0xC0, 0xDE, 1, b'a']; 238 | //! assert_eq!(s_bytes, expecting); 239 | //! 240 | //! let w = Wrapper { 241 | //! inner: s, 242 | //! name: "b".to_owned(), 243 | //! }; 244 | //! let w_bytes = to_bytes(&w)?; 245 | //! assert!(w_bytes.starts_with(&s_bytes)); 246 | //! 247 | //! expecting.append(&mut vec![1, b'b']); 248 | //! assert_eq!(w_bytes, expecting); 249 | //! # Ok(())} 250 | //! ``` 251 | //! 252 | //! ### Externally Tagged Enumerations 253 | //! 254 | //! An enumeration is typically represented as a type that can take one of potentially many 255 | //! different variants. In BCS, each variant is mapped to a variant index, a ULEB128-encoded 32-bit unsigned 256 | //! integer, followed by serialized data if the type has an associated value. An 257 | //! associated type can be any BCS supported type. The variant index is determined based on the 258 | //! ordering of the variants in the canonical enum definition, where the first variant has an index 259 | //! of `0`, the second an index of `1`, etc. 260 | //! 261 | //! ```rust 262 | //! # use bcs::{Result, to_bytes}; 263 | //! # use serde::Serialize; 264 | //! # fn main() -> Result<()> { 265 | //! #[derive(Serialize)] 266 | //! enum E { 267 | //! Variant0(u16), 268 | //! Variant1(u8), 269 | //! Variant2(String), 270 | //! } 271 | //! 272 | //! let v0 = E::Variant0(8000); 273 | //! let v1 = E::Variant1(255); 274 | //! let v2 = E::Variant2("e".to_owned()); 275 | //! 276 | //! assert_eq!(to_bytes(&v0)?, vec![0, 0x40, 0x1F]); 277 | //! assert_eq!(to_bytes(&v1)?, vec![1, 0xFF]); 278 | //! assert_eq!(to_bytes(&v2)?, vec![2, 1, b'e']); 279 | //! # Ok(())} 280 | //! ``` 281 | //! 282 | //! If you need to serialize a C-style enum, you should use a primitive integer type. 283 | //! 284 | //! ### Maps (Key / Value Stores) 285 | //! 286 | //! Maps are represented as a variable-length, sorted sequence of (Key, Value) tuples. Keys must be 287 | //! unique and the tuples sorted by increasing lexicographical order on the BCS bytes of each key. 288 | //! The representation is otherwise similar to that of a variable-length sequence. In particular, 289 | //! it is preceded by the number of tuples, encoded in ULEB128. 290 | //! 291 | //! ```rust 292 | //! # use bcs::{Result, to_bytes}; 293 | //! # use std::collections::HashMap; 294 | //! # fn main() -> Result<()> { 295 | //! let mut map = HashMap::new(); 296 | //! map.insert(b'e', b'f'); 297 | //! map.insert(b'a', b'b'); 298 | //! map.insert(b'c', b'd'); 299 | //! 300 | //! let expecting = vec![(b'a', b'b'), (b'c', b'd'), (b'e', b'f')]; 301 | //! 302 | //! assert_eq!(to_bytes(&map)?, to_bytes(&expecting)?); 303 | //! # Ok(())} 304 | //! ``` 305 | 306 | mod de; 307 | mod error; 308 | mod ser; 309 | pub mod test_helpers; 310 | 311 | /// Variable length sequences in BCS are limited to max length of 2^31 - 1. 312 | pub const MAX_SEQUENCE_LENGTH: usize = (1 << 31) - 1; 313 | 314 | /// Maximal allowed depth of BCS data, counting only structs and enums. 315 | pub const MAX_CONTAINER_DEPTH: usize = 500; 316 | 317 | pub use de::{from_bytes, from_bytes_seed}; 318 | pub use error::{Error, Result}; 319 | pub use ser::{is_human_readable, serialize_into, serialized_size, to_bytes}; 320 | -------------------------------------------------------------------------------- /src/ser.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) The Diem Core Contributors 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | use crate::error::{Error, Result}; 5 | use serde::{ser, Serialize}; 6 | 7 | /// Serialize the given data structure as a `Vec` of BCS. 8 | /// 9 | /// Serialization can fail if `T`'s implementation of `Serialize` decides to 10 | /// fail, if `T` contains sequences which are longer than `MAX_SEQUENCE_LENGTH`, 11 | /// or if `T` attempts to serialize an unsupported datatype such as a f32, 12 | /// f64, or char. 13 | /// 14 | /// # Examples 15 | /// 16 | /// ``` 17 | /// use bcs::to_bytes; 18 | /// use serde::Serialize; 19 | /// 20 | /// #[derive(Serialize)] 21 | /// struct Ip([u8; 4]); 22 | /// 23 | /// #[derive(Serialize)] 24 | /// struct Port(u16); 25 | /// 26 | /// #[derive(Serialize)] 27 | /// struct Service { 28 | /// ip: Ip, 29 | /// port: Vec, 30 | /// connection_max: Option, 31 | /// enabled: bool, 32 | /// } 33 | /// 34 | /// let service = Service { 35 | /// ip: Ip([192, 168, 1, 1]), 36 | /// port: vec![Port(8001), Port(8002), Port(8003)], 37 | /// connection_max: Some(5000), 38 | /// enabled: false, 39 | /// }; 40 | /// 41 | /// let bytes = to_bytes(&service).unwrap(); 42 | /// let expected = vec![ 43 | /// 0xc0, 0xa8, 0x01, 0x01, 0x03, 0x41, 0x1f, 0x42, 44 | /// 0x1f, 0x43, 0x1f, 0x01, 0x88, 0x13, 0x00, 0x00, 45 | /// 0x00, 46 | /// ]; 47 | /// assert_eq!(bytes, expected); 48 | /// ``` 49 | pub fn to_bytes(value: &T) -> Result> 50 | where 51 | T: ?Sized + Serialize, 52 | { 53 | let mut output = Vec::new(); 54 | serialize_into(&mut output, value)?; 55 | Ok(output) 56 | } 57 | 58 | /// Same as `to_bytes` but write directly into an `std::io::Write` object. 59 | pub fn serialize_into(write: &mut W, value: &T) -> Result<()> 60 | where 61 | W: ?Sized + std::io::Write, 62 | T: ?Sized + Serialize, 63 | { 64 | let serializer = Serializer::new(write, crate::MAX_CONTAINER_DEPTH); 65 | value.serialize(serializer) 66 | } 67 | 68 | struct WriteCounter(usize); 69 | 70 | impl std::io::Write for WriteCounter { 71 | fn write(&mut self, buf: &[u8]) -> std::io::Result { 72 | let len = buf.len(); 73 | self.0 = self.0.checked_add(len).ok_or_else(|| { 74 | std::io::Error::new(std::io::ErrorKind::Other, "WriteCounter reached max value") 75 | })?; 76 | Ok(len) 77 | } 78 | 79 | fn flush(&mut self) -> std::io::Result<()> { 80 | Ok(()) 81 | } 82 | } 83 | 84 | /// Same as `to_bytes` but only return the size of the serialized bytes. 85 | pub fn serialized_size(value: &T) -> Result 86 | where 87 | T: ?Sized + Serialize, 88 | { 89 | let mut counter = WriteCounter(0); 90 | serialize_into(&mut counter, value)?; 91 | Ok(counter.0) 92 | } 93 | 94 | pub fn is_human_readable() -> bool { 95 | let mut output = Vec::new(); 96 | let serializer = Serializer::new(&mut output, crate::MAX_CONTAINER_DEPTH); 97 | ser::Serializer::is_human_readable(&serializer) 98 | } 99 | 100 | /// Serialization implementation for BCS 101 | struct Serializer<'a, W: ?Sized> { 102 | output: &'a mut W, 103 | max_remaining_depth: usize, 104 | } 105 | 106 | impl<'a, W> Serializer<'a, W> 107 | where 108 | W: ?Sized + std::io::Write, 109 | { 110 | /// Creates a new `Serializer` which will emit BCS. 111 | fn new(output: &'a mut W, max_remaining_depth: usize) -> Self { 112 | Self { 113 | output, 114 | max_remaining_depth, 115 | } 116 | } 117 | 118 | fn output_u32_as_uleb128(&mut self, mut value: u32) -> Result<()> { 119 | while value >= 0x80 { 120 | // Write 7 (lowest) bits of data and set the 8th bit to 1. 121 | let byte = (value & 0x7f) as u8; 122 | self.output.write_all(&[byte | 0x80])?; 123 | value >>= 7; 124 | } 125 | // Write the remaining bits of data and set the highest bit to 0. 126 | self.output.write_all(&[value as u8])?; 127 | Ok(()) 128 | } 129 | 130 | fn output_variant_index(&mut self, v: u32) -> Result<()> { 131 | self.output_u32_as_uleb128(v) 132 | } 133 | 134 | /// Serialize a sequence length as a u32. 135 | fn output_seq_len(&mut self, len: usize) -> Result<()> { 136 | if len > crate::MAX_SEQUENCE_LENGTH { 137 | return Err(Error::ExceededMaxLen(len)); 138 | } 139 | self.output_u32_as_uleb128(len as u32) 140 | } 141 | 142 | fn enter_named_container(&mut self, name: &'static str) -> Result<()> { 143 | if self.max_remaining_depth == 0 { 144 | return Err(Error::ExceededContainerDepthLimit(name)); 145 | } 146 | self.max_remaining_depth -= 1; 147 | Ok(()) 148 | } 149 | } 150 | 151 | impl<'a, W> ser::Serializer for Serializer<'a, W> 152 | where 153 | W: ?Sized + std::io::Write, 154 | { 155 | type Ok = (); 156 | type Error = Error; 157 | type SerializeSeq = Self; 158 | type SerializeTuple = Self; 159 | type SerializeTupleStruct = Self; 160 | type SerializeTupleVariant = Self; 161 | type SerializeMap = MapSerializer<'a, W>; 162 | type SerializeStruct = Self; 163 | type SerializeStructVariant = Self; 164 | 165 | fn serialize_bool(self, v: bool) -> Result<()> { 166 | self.serialize_u8(v.into()) 167 | } 168 | 169 | fn serialize_i8(self, v: i8) -> Result<()> { 170 | self.serialize_u8(v as u8) 171 | } 172 | 173 | fn serialize_i16(self, v: i16) -> Result<()> { 174 | self.serialize_u16(v as u16) 175 | } 176 | 177 | fn serialize_i32(self, v: i32) -> Result<()> { 178 | self.serialize_u32(v as u32) 179 | } 180 | 181 | fn serialize_i64(self, v: i64) -> Result<()> { 182 | self.serialize_u64(v as u64) 183 | } 184 | 185 | fn serialize_i128(self, v: i128) -> Result<()> { 186 | self.serialize_u128(v as u128) 187 | } 188 | 189 | fn serialize_u8(self, v: u8) -> Result<()> { 190 | self.output.write_all(&[v])?; 191 | Ok(()) 192 | } 193 | 194 | fn serialize_u16(self, v: u16) -> Result<()> { 195 | self.output.write_all(&v.to_le_bytes())?; 196 | Ok(()) 197 | } 198 | 199 | fn serialize_u32(self, v: u32) -> Result<()> { 200 | self.output.write_all(&v.to_le_bytes())?; 201 | Ok(()) 202 | } 203 | 204 | fn serialize_u64(self, v: u64) -> Result<()> { 205 | self.output.write_all(&v.to_le_bytes())?; 206 | Ok(()) 207 | } 208 | 209 | fn serialize_u128(self, v: u128) -> Result<()> { 210 | self.output.write_all(&v.to_le_bytes())?; 211 | Ok(()) 212 | } 213 | 214 | fn serialize_f32(self, _v: f32) -> Result<()> { 215 | Err(Error::NotSupported("serialize_f32")) 216 | } 217 | 218 | fn serialize_f64(self, _v: f64) -> Result<()> { 219 | Err(Error::NotSupported("serialize_f64")) 220 | } 221 | 222 | fn serialize_char(self, _v: char) -> Result<()> { 223 | Err(Error::NotSupported("serialize_char")) 224 | } 225 | 226 | // Just serialize the string as a raw byte array 227 | fn serialize_str(self, v: &str) -> Result<()> { 228 | self.serialize_bytes(v.as_bytes()) 229 | } 230 | 231 | // Serialize a byte array as an array of bytes. 232 | fn serialize_bytes(mut self, v: &[u8]) -> Result<()> { 233 | self.output_seq_len(v.len())?; 234 | self.output.write_all(v)?; 235 | Ok(()) 236 | } 237 | 238 | // An absent optional is represented as `00` 239 | fn serialize_none(self) -> Result<()> { 240 | self.serialize_u8(0) 241 | } 242 | 243 | // A present optional is represented as `01` followed by the serialized value 244 | fn serialize_some(self, value: &T) -> Result<()> 245 | where 246 | T: ?Sized + Serialize, 247 | { 248 | self.output.write_all(&[1])?; 249 | value.serialize(self) 250 | } 251 | 252 | fn serialize_unit(self) -> Result<()> { 253 | Ok(()) 254 | } 255 | 256 | fn serialize_unit_struct(mut self, name: &'static str) -> Result<()> { 257 | self.enter_named_container(name)?; 258 | self.serialize_unit() 259 | } 260 | 261 | fn serialize_unit_variant( 262 | mut self, 263 | name: &'static str, 264 | variant_index: u32, 265 | _variant: &'static str, 266 | ) -> Result<()> { 267 | self.enter_named_container(name)?; 268 | self.output_variant_index(variant_index) 269 | } 270 | 271 | fn serialize_newtype_struct(mut self, name: &'static str, value: &T) -> Result<()> 272 | where 273 | T: ?Sized + Serialize, 274 | { 275 | self.enter_named_container(name)?; 276 | value.serialize(self) 277 | } 278 | 279 | fn serialize_newtype_variant( 280 | mut self, 281 | name: &'static str, 282 | variant_index: u32, 283 | _variant: &'static str, 284 | value: &T, 285 | ) -> Result<()> 286 | where 287 | T: ?Sized + Serialize, 288 | { 289 | self.enter_named_container(name)?; 290 | self.output_variant_index(variant_index)?; 291 | value.serialize(self) 292 | } 293 | 294 | // The start of the sequence, each value, and the end are three separate 295 | // method calls. This one is responsible only for serializing the start, 296 | // which for BCS is either nothing for fixed structures or for variable 297 | // length structures, the length encoded as a u32. 298 | fn serialize_seq(mut self, len: Option) -> Result { 299 | if let Some(len) = len { 300 | self.output_seq_len(len)?; 301 | Ok(self) 302 | } else { 303 | Err(Error::MissingLen) 304 | } 305 | } 306 | 307 | // Tuples are fixed sized structs so we don't need to encode the length 308 | fn serialize_tuple(self, _len: usize) -> Result { 309 | Ok(self) 310 | } 311 | 312 | fn serialize_tuple_struct( 313 | mut self, 314 | name: &'static str, 315 | _len: usize, 316 | ) -> Result { 317 | self.enter_named_container(name)?; 318 | Ok(self) 319 | } 320 | 321 | fn serialize_tuple_variant( 322 | mut self, 323 | name: &'static str, 324 | variant_index: u32, 325 | _variant: &'static str, 326 | _len: usize, 327 | ) -> Result { 328 | self.enter_named_container(name)?; 329 | self.output_variant_index(variant_index)?; 330 | Ok(self) 331 | } 332 | 333 | fn serialize_map(self, _len: Option) -> Result { 334 | Ok(MapSerializer::new(self)) 335 | } 336 | 337 | fn serialize_struct( 338 | mut self, 339 | name: &'static str, 340 | _len: usize, 341 | ) -> Result { 342 | self.enter_named_container(name)?; 343 | Ok(self) 344 | } 345 | 346 | fn serialize_struct_variant( 347 | mut self, 348 | name: &'static str, 349 | variant_index: u32, 350 | _variant: &'static str, 351 | _len: usize, 352 | ) -> Result { 353 | self.enter_named_container(name)?; 354 | self.output_variant_index(variant_index)?; 355 | Ok(self) 356 | } 357 | 358 | // BCS is not a human readable format 359 | fn is_human_readable(&self) -> bool { 360 | false 361 | } 362 | } 363 | 364 | impl<'a, W> ser::SerializeSeq for Serializer<'a, W> 365 | where 366 | W: ?Sized + std::io::Write, 367 | { 368 | type Ok = (); 369 | type Error = Error; 370 | 371 | fn serialize_element(&mut self, value: &T) -> Result<()> 372 | where 373 | T: ?Sized + Serialize, 374 | { 375 | value.serialize(Serializer::new(self.output, self.max_remaining_depth)) 376 | } 377 | 378 | fn end(self) -> Result<()> { 379 | Ok(()) 380 | } 381 | } 382 | 383 | impl<'a, W> ser::SerializeTuple for Serializer<'a, W> 384 | where 385 | W: ?Sized + std::io::Write, 386 | { 387 | type Ok = (); 388 | type Error = Error; 389 | 390 | fn serialize_element(&mut self, value: &T) -> Result<()> 391 | where 392 | T: ?Sized + Serialize, 393 | { 394 | value.serialize(Serializer::new(self.output, self.max_remaining_depth)) 395 | } 396 | 397 | fn end(self) -> Result<()> { 398 | Ok(()) 399 | } 400 | } 401 | 402 | impl<'a, W> ser::SerializeTupleStruct for Serializer<'a, W> 403 | where 404 | W: ?Sized + std::io::Write, 405 | { 406 | type Ok = (); 407 | type Error = Error; 408 | 409 | fn serialize_field(&mut self, value: &T) -> Result<()> 410 | where 411 | T: ?Sized + Serialize, 412 | { 413 | value.serialize(Serializer::new(self.output, self.max_remaining_depth)) 414 | } 415 | 416 | fn end(self) -> Result<()> { 417 | Ok(()) 418 | } 419 | } 420 | 421 | impl<'a, W> ser::SerializeTupleVariant for Serializer<'a, W> 422 | where 423 | W: ?Sized + std::io::Write, 424 | { 425 | type Ok = (); 426 | type Error = Error; 427 | 428 | fn serialize_field(&mut self, value: &T) -> Result<()> 429 | where 430 | T: ?Sized + Serialize, 431 | { 432 | value.serialize(Serializer::new(self.output, self.max_remaining_depth)) 433 | } 434 | 435 | fn end(self) -> Result<()> { 436 | Ok(()) 437 | } 438 | } 439 | 440 | #[doc(hidden)] 441 | struct MapSerializer<'a, W: ?Sized> { 442 | serializer: Serializer<'a, W>, 443 | entries: Vec<(Vec, Vec)>, 444 | next_key: Option>, 445 | } 446 | 447 | impl<'a, W: ?Sized> MapSerializer<'a, W> { 448 | fn new(serializer: Serializer<'a, W>) -> Self { 449 | MapSerializer { 450 | serializer, 451 | entries: Vec::new(), 452 | next_key: None, 453 | } 454 | } 455 | } 456 | 457 | impl<'a, W> ser::SerializeMap for MapSerializer<'a, W> 458 | where 459 | W: ?Sized + std::io::Write, 460 | { 461 | type Ok = (); 462 | type Error = Error; 463 | 464 | fn serialize_key(&mut self, key: &T) -> Result<()> 465 | where 466 | T: ?Sized + Serialize, 467 | { 468 | if self.next_key.is_some() { 469 | return Err(Error::ExpectedMapValue); 470 | } 471 | 472 | let mut output = Vec::new(); 473 | key.serialize(Serializer::new( 474 | &mut output, 475 | self.serializer.max_remaining_depth, 476 | ))?; 477 | self.next_key = Some(output); 478 | Ok(()) 479 | } 480 | 481 | fn serialize_value(&mut self, value: &T) -> Result<()> 482 | where 483 | T: ?Sized + Serialize, 484 | { 485 | match self.next_key.take() { 486 | Some(key) => { 487 | let mut output = Vec::new(); 488 | value.serialize(Serializer::new( 489 | &mut output, 490 | self.serializer.max_remaining_depth, 491 | ))?; 492 | self.entries.push((key, output)); 493 | Ok(()) 494 | } 495 | None => Err(Error::ExpectedMapKey), 496 | } 497 | } 498 | 499 | fn end(mut self) -> Result<()> { 500 | if self.next_key.is_some() { 501 | return Err(Error::ExpectedMapValue); 502 | } 503 | self.entries.sort_by(|e1, e2| e1.0.cmp(&e2.0)); 504 | self.entries.dedup_by(|e1, e2| e1.0.eq(&e2.0)); 505 | 506 | let len = self.entries.len(); 507 | self.serializer.output_seq_len(len)?; 508 | 509 | for (key, value) in &self.entries { 510 | self.serializer.output.write_all(key)?; 511 | self.serializer.output.write_all(value)?; 512 | } 513 | 514 | Ok(()) 515 | } 516 | } 517 | 518 | impl<'a, W> ser::SerializeStruct for Serializer<'a, W> 519 | where 520 | W: ?Sized + std::io::Write, 521 | { 522 | type Ok = (); 523 | type Error = Error; 524 | 525 | fn serialize_field(&mut self, _key: &'static str, value: &T) -> Result<()> 526 | where 527 | T: ?Sized + Serialize, 528 | { 529 | value.serialize(Serializer::new(self.output, self.max_remaining_depth)) 530 | } 531 | 532 | fn end(self) -> Result<()> { 533 | Ok(()) 534 | } 535 | } 536 | 537 | impl<'a, W> ser::SerializeStructVariant for Serializer<'a, W> 538 | where 539 | W: ?Sized + std::io::Write, 540 | { 541 | type Ok = (); 542 | type Error = Error; 543 | 544 | fn serialize_field(&mut self, _key: &'static str, value: &T) -> Result<()> 545 | where 546 | T: ?Sized + Serialize, 547 | { 548 | value.serialize(Serializer::new(self.output, self.max_remaining_depth)) 549 | } 550 | 551 | fn end(self) -> Result<()> { 552 | Ok(()) 553 | } 554 | } 555 | -------------------------------------------------------------------------------- /src/test_helpers.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) The Diem Core Contributors 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | pub fn assert_canonical_encode_decode(t: T) 5 | where 6 | T: serde::Serialize + serde::de::DeserializeOwned + std::fmt::Debug + PartialEq, 7 | { 8 | let bytes = crate::to_bytes(&t).unwrap(); 9 | let s: T = crate::from_bytes(&bytes).unwrap(); 10 | assert_eq!(t, s); 11 | } 12 | -------------------------------------------------------------------------------- /tests/serde.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) The Diem Core Contributors 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | // For some reason deriving `Arbitrary` results in clippy firing a `unit_arg` violation 5 | #![allow(clippy::unit_arg)] 6 | 7 | use std::{ 8 | collections::{BTreeMap, BTreeSet}, 9 | fmt, 10 | }; 11 | 12 | use proptest::prelude::*; 13 | use proptest_derive::Arbitrary; 14 | use serde::{de::DeserializeOwned, Deserialize, Serialize}; 15 | 16 | use bcs::{from_bytes, serialized_size, to_bytes, Error, MAX_CONTAINER_DEPTH, MAX_SEQUENCE_LENGTH}; 17 | 18 | fn is_same(t: T) 19 | where 20 | T: Serialize + DeserializeOwned + fmt::Debug + PartialEq, 21 | { 22 | let bytes = to_bytes(&t).unwrap(); 23 | let s: T = from_bytes(&bytes).unwrap(); 24 | assert_eq!(t, s); 25 | assert_eq!(bytes.len(), serialized_size(&t).unwrap()); 26 | } 27 | 28 | // TODO deriving `Arbitrary` is currently broken for enum types 29 | // Once AltSysrq/proptest#163 is merged we can use `Arbitrary` again. 30 | #[derive(Debug, Deserialize, Serialize, PartialEq)] 31 | enum E { 32 | Unit, 33 | Newtype(u16), 34 | Tuple(u16, u16), 35 | Struct { a: u32 }, 36 | } 37 | 38 | #[test] 39 | fn test_enum() { 40 | let u = E::Unit; 41 | let expected = vec![0]; 42 | assert_eq!(to_bytes(&u).unwrap(), expected); 43 | is_same(u); 44 | 45 | let n = E::Newtype(1); 46 | let expected = vec![1, 1, 0]; 47 | assert_eq!(to_bytes(&n).unwrap(), expected); 48 | is_same(n); 49 | 50 | let t = E::Tuple(1, 2); 51 | let expected = vec![2, 1, 0, 2, 0]; 52 | assert_eq!(to_bytes(&t).unwrap(), expected); 53 | is_same(t); 54 | 55 | let s = E::Struct { a: 1 }; 56 | let expected = vec![3, 1, 0, 0, 0]; 57 | assert_eq!(to_bytes(&s).unwrap(), expected); 58 | is_same(s); 59 | } 60 | 61 | #[derive(Arbitrary, Debug, Deserialize, Serialize, PartialEq)] 62 | struct S { 63 | int: u16, 64 | option: Option, 65 | seq: Vec, 66 | boolean: bool, 67 | } 68 | 69 | proptest! { 70 | #[test] 71 | fn proptest_bool(v in any::()) { 72 | assert_eq!(to_bytes(&v)?, vec![u8::from(v)]); 73 | is_same(v); 74 | } 75 | 76 | #[test] 77 | fn proptest_i8(v in any::()) { 78 | assert_eq!(to_bytes(&v)?, v.to_le_bytes()); 79 | is_same(v); 80 | } 81 | 82 | #[test] 83 | fn proptest_i16(v in any::()) { 84 | assert_eq!(to_bytes(&v)?, v.to_le_bytes()); 85 | is_same(v); 86 | } 87 | 88 | #[test] 89 | fn proptest_i32(v in any::()) { 90 | assert_eq!(to_bytes(&v)?, v.to_le_bytes()); 91 | is_same(v); 92 | } 93 | 94 | #[test] 95 | fn proptest_i64(v in any::()) { 96 | assert_eq!(to_bytes(&v)?, v.to_le_bytes()); 97 | is_same(v); 98 | } 99 | 100 | #[test] 101 | fn proptest_i128(v in any::()) { 102 | assert_eq!(to_bytes(&v)?, v.to_le_bytes()); 103 | is_same(v); 104 | } 105 | 106 | #[test] 107 | fn proptest_u8(v in any::()) { 108 | assert_eq!(to_bytes(&v)?, v.to_le_bytes()); 109 | is_same(v); 110 | } 111 | 112 | #[test] 113 | fn proptest_u16(v in any::()) { 114 | assert_eq!(to_bytes(&v)?, v.to_le_bytes()); 115 | is_same(v); 116 | } 117 | 118 | #[test] 119 | fn proptest_u32(v in any::()) { 120 | assert_eq!(to_bytes(&v)?, v.to_le_bytes()); 121 | is_same(v); 122 | } 123 | 124 | #[test] 125 | fn proptest_u64(v in any::()) { 126 | assert_eq!(to_bytes(&v)?, v.to_le_bytes()); 127 | is_same(v); 128 | } 129 | 130 | #[test] 131 | fn proptest_u128(v in any::()) { 132 | assert_eq!(to_bytes(&v)?, v.to_le_bytes()); 133 | is_same(v); 134 | } 135 | 136 | #[test] 137 | fn proptest_string(v in any::()) { 138 | let mut expected = Vec::with_capacity(v.len() + 4); 139 | // Larger lengths have more complex uleb128 encodings. 140 | prop_assume!(v.len() < 128); 141 | expected.extend_from_slice(&(v.len() as u8).to_le_bytes()); 142 | expected.extend_from_slice(v.as_bytes()); 143 | assert_eq!(to_bytes(&v)?, expected); 144 | 145 | is_same(v); 146 | } 147 | 148 | #[test] 149 | fn proptest_vec(v in any::>()) { 150 | let mut expected = Vec::with_capacity(v.len() + 4); 151 | // Larger lengths have more complex uleb128 encodings. 152 | prop_assume!(v.len() < 128); 153 | expected.extend_from_slice(&(v.len() as u8).to_le_bytes()); 154 | expected.extend_from_slice(&v); 155 | assert_eq!(to_bytes(&v)?, expected); 156 | 157 | is_same(v); 158 | } 159 | 160 | #[test] 161 | fn proptest_option(v in any::>()) { 162 | let expected = v.map(|v| vec![1, v]).unwrap_or_else(|| vec![0]); 163 | assert_eq!(to_bytes(&v)?, expected); 164 | 165 | is_same(v); 166 | } 167 | 168 | #[test] 169 | fn proptest_btreemap(v in any::, Vec>>()) { 170 | is_same(v); 171 | } 172 | 173 | #[test] 174 | fn proptest_tuple2(v in any::<(i16, String)>()) { 175 | is_same(v); 176 | } 177 | 178 | #[test] 179 | fn proptest_tuple3(v in any::<(bool, u32, String)>()) { 180 | is_same(v); 181 | } 182 | 183 | #[test] 184 | fn proptest_tuple4(v in any::<(bool, u32, Option)>()) { 185 | is_same(v); 186 | } 187 | 188 | #[test] 189 | fn proptest_tuple_strings(v in any::<(String, String, String)>()) { 190 | is_same(v); 191 | } 192 | 193 | #[test] 194 | fn proptest_lexicographic_order(v in any::, Vec>>()) { 195 | let bytes = to_bytes(&v).unwrap(); 196 | // This test assumes small maps and small vectors. 197 | // This is what proptest always generates in practice but we will make 198 | // the assumptions explicit anyway. 199 | prop_assume!(v.len() < 128); 200 | 201 | let m : BTreeMap, Vec> = v.iter().filter_map(|(k, v)| { 202 | if k.len() >= 128 || v.len() >= 128 { 203 | return None; 204 | } 205 | let mut k_bytes = Vec::with_capacity(k.len() + 4); 206 | k_bytes.extend_from_slice(&(k.len() as u8).to_le_bytes()); 207 | k_bytes.extend(k.iter()); 208 | let mut v_bytes = Vec::with_capacity(v.len() + 4); 209 | v_bytes.extend_from_slice(&(v.len() as u8).to_le_bytes()); 210 | v_bytes.extend(v.iter()); 211 | 212 | Some((k_bytes, v_bytes)) 213 | }) 214 | .collect(); 215 | prop_assume!(v.len() == m.len()); 216 | 217 | let mut expected = Vec::with_capacity(bytes.len()); 218 | expected.extend_from_slice(&(m.len() as u8).to_le_bytes()); 219 | for (key, value) in m { 220 | expected.extend(key.iter()); 221 | expected.extend(value.iter()); 222 | } 223 | 224 | assert_eq!(expected, bytes); 225 | } 226 | 227 | #[test] 228 | fn proptest_box(v in any::>()) { 229 | is_same(v); 230 | } 231 | 232 | #[test] 233 | fn proptest_struct(v in any::()) { 234 | is_same(v); 235 | } 236 | 237 | #[test] 238 | fn proptest_addr(v in any::()) { 239 | is_same(v); 240 | } 241 | 242 | #[test] 243 | fn proptest_bar(v in any::()) { 244 | is_same(v); 245 | } 246 | 247 | #[test] 248 | fn proptest_foo(v in any::()) { 249 | is_same(v); 250 | } 251 | } 252 | 253 | #[test] 254 | fn invalid_utf8() { 255 | let invalid_utf8 = vec![1, 0xFF]; 256 | assert_eq!(from_bytes::(&invalid_utf8), Err(Error::Utf8)); 257 | } 258 | 259 | #[test] 260 | fn uleb_encoding_and_variant() { 261 | #[derive(Serialize, Deserialize, Debug, PartialEq)] 262 | enum Test { 263 | One, 264 | Two, 265 | } 266 | 267 | let valid_variant = vec![1]; 268 | from_bytes::(&valid_variant).unwrap(); 269 | 270 | let invalid_variant = vec![5]; 271 | // Error comes from serde 272 | assert_eq!( 273 | from_bytes::(&invalid_variant), 274 | Err(Error::Custom( 275 | "invalid value: integer `5`, expected variant index 0 <= i < 2".into() 276 | )) 277 | ); 278 | 279 | let invalid_bytes = vec![0x80, 0x80, 0x80, 0x80]; 280 | // Error is due to EOF. 281 | assert_eq!(from_bytes::(&invalid_bytes), Err(Error::Eof)); 282 | 283 | let invalid_uleb = vec![0x80, 0x80, 0x80, 0x80, 0x80]; 284 | // Error comes from uleb decoder because u32 are never that long. 285 | assert_eq!( 286 | from_bytes::(&invalid_uleb), 287 | Err(Error::IntegerOverflowDuringUleb128Decoding) 288 | ); 289 | 290 | let invalid_uleb = vec![0x80, 0x80, 0x80, 0x80, 0x1f]; 291 | // Error comes from uleb decoder because we are truncating a larger integer into u32. 292 | assert_eq!( 293 | from_bytes::(&invalid_uleb), 294 | Err(Error::IntegerOverflowDuringUleb128Decoding) 295 | ); 296 | 297 | let invalid_uleb = vec![0x80, 0x80, 0x80, 0x80, 0x0f]; 298 | // Error comes from Serde because ULEB integer is valid. 299 | assert_eq!( 300 | from_bytes::(&invalid_uleb), 301 | Err(Error::Custom( 302 | "invalid value: integer `4026531840`, expected variant index 0 <= i < 2".into() 303 | )) 304 | ); 305 | 306 | let invalid_uleb = vec![0x80, 0x80, 0x80, 0x00]; 307 | // Uleb decoder must reject non-canonical forms. 308 | assert_eq!( 309 | from_bytes::(&invalid_uleb), 310 | Err(Error::NonCanonicalUleb128Encoding) 311 | ); 312 | } 313 | 314 | #[test] 315 | fn invalid_option() { 316 | let invalid_option = vec![5, 0]; 317 | assert_eq!( 318 | from_bytes::>(&invalid_option), 319 | Err(Error::ExpectedOption) 320 | ); 321 | } 322 | 323 | #[test] 324 | fn invalid_bool() { 325 | let invalid_bool = vec![9]; 326 | assert_eq!( 327 | from_bytes::(&invalid_bool), 328 | Err(Error::ExpectedBoolean) 329 | ); 330 | } 331 | 332 | #[test] 333 | fn sequence_too_long() { 334 | let seq = vec![0; MAX_SEQUENCE_LENGTH + 1]; 335 | match to_bytes(&seq).unwrap_err() { 336 | Error::ExceededMaxLen(len) => assert_eq!(len, MAX_SEQUENCE_LENGTH + 1), 337 | _ => panic!(), 338 | } 339 | } 340 | 341 | #[test] 342 | fn variable_lengths() { 343 | assert_eq!(to_bytes(&vec![(); 1]).unwrap(), vec![0x01]); 344 | assert_eq!(to_bytes(&vec![(); 128]).unwrap(), vec![0x80, 0x01]); 345 | assert_eq!(to_bytes(&vec![(); 255]).unwrap(), vec![0xff, 0x01]); 346 | assert_eq!( 347 | to_bytes(&vec![(); 786_432]).unwrap(), 348 | vec![0x80, 0x80, 0x30] 349 | ); 350 | } 351 | 352 | #[test] 353 | fn sequence_not_long_enough() { 354 | let seq = vec![5, 1, 2, 3, 4]; // Missing 5th element 355 | assert_eq!(from_bytes::>(&seq), Err(Error::Eof)); 356 | } 357 | 358 | #[test] 359 | fn map_not_canonical() { 360 | let mut map = BTreeMap::new(); 361 | map.insert(4u8, ()); 362 | map.insert(5u8, ()); 363 | let seq = vec![2, 4, 5]; 364 | assert_eq!(from_bytes::>(&seq), Ok(map)); 365 | // Make sure out-of-order keys are rejected. 366 | let seq = vec![2, 5, 4]; 367 | assert_eq!( 368 | from_bytes::>(&seq), 369 | Err(Error::NonCanonicalMap) 370 | ); 371 | // Make sure duplicate keys are rejected. 372 | let seq = vec![2, 5, 5]; 373 | assert_eq!( 374 | from_bytes::>(&seq), 375 | Err(Error::NonCanonicalMap) 376 | ); 377 | } 378 | 379 | #[test] 380 | fn by_default_btreesets_are_serialized_as_sequences() { 381 | // See https://docs.serde.rs/src/serde/de/impls.rs.html 382 | // This is a big caveat for us, but luckily, generate-format will track this in the YAML output. 383 | let mut set = BTreeSet::new(); 384 | set.insert(4u8); 385 | set.insert(5u8); 386 | let seq = vec![2, 4, 5]; 387 | assert_eq!(from_bytes::>(&seq), Ok(set.clone())); 388 | let seq = vec![2, 5, 4]; 389 | assert_eq!(from_bytes::>(&seq), Ok(set.clone())); 390 | // Duplicate keys are just ok. 391 | let seq = vec![3, 5, 5, 4]; 392 | assert_eq!(from_bytes::>(&seq), Ok(set)); 393 | } 394 | 395 | #[test] 396 | fn leftover_bytes() { 397 | let seq = vec![5, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; // 5 extra elements 398 | assert_eq!(from_bytes::>(&seq), Err(Error::RemainingInput)); 399 | } 400 | 401 | #[test] 402 | fn test_f32() { 403 | assert!(to_bytes(&1.0f32).is_err()); 404 | } 405 | 406 | #[test] 407 | fn test_f64() { 408 | assert!(to_bytes(&42.0f64).is_err()); 409 | } 410 | 411 | #[test] 412 | fn test_char() { 413 | assert!(to_bytes(&'a').is_err()); 414 | } 415 | 416 | #[test] 417 | fn zero_copy_parse() { 418 | #[derive(Serialize, Deserialize, Eq, PartialEq, Debug)] 419 | struct Foo<'a> { 420 | borrowed_str: &'a str, 421 | borrowed_bytes: &'a [u8], 422 | } 423 | 424 | let f = Foo { 425 | borrowed_str: "hi", 426 | borrowed_bytes: &[0, 1, 2, 3], 427 | }; 428 | { 429 | let expected = vec![2, b'h', b'i', 4, 0, 1, 2, 3]; 430 | let encoded = to_bytes(&f).unwrap(); 431 | assert_eq!(expected, encoded); 432 | let out: Foo = from_bytes(&encoded[..]).unwrap(); 433 | assert_eq!(out, f); 434 | } 435 | } 436 | 437 | #[test] 438 | fn cow() { 439 | use std::borrow::Cow; 440 | 441 | let large_object = vec![1u32, 2, 3, 4, 5, 6]; 442 | let mut large_map = BTreeMap::new(); 443 | large_map.insert(1, 2); 444 | 445 | #[derive(Serialize, Deserialize, Debug)] 446 | enum Message<'a> { 447 | M1(Cow<'a, Vec>), 448 | M2(Cow<'a, BTreeMap>), 449 | } 450 | 451 | // M1 452 | { 453 | let serialized = to_bytes(&Message::M1(Cow::Borrowed(&large_object))).unwrap(); 454 | let deserialized: Message<'static> = from_bytes(&serialized).unwrap(); 455 | 456 | match deserialized { 457 | Message::M1(b) => assert_eq!(b.into_owned(), large_object), 458 | _ => panic!(), 459 | } 460 | } 461 | 462 | // M2 463 | { 464 | let serialized = to_bytes(&Message::M2(Cow::Borrowed(&large_map))).unwrap(); 465 | let deserialized: Message<'static> = from_bytes(&serialized).unwrap(); 466 | 467 | match deserialized { 468 | Message::M2(b) => assert_eq!(b.into_owned(), large_map), 469 | _ => panic!(), 470 | } 471 | } 472 | } 473 | 474 | #[test] 475 | fn strbox() { 476 | use std::borrow::Cow; 477 | 478 | let strx: &'static str = "hello world"; 479 | let serialized = to_bytes(&Cow::Borrowed(strx)).unwrap(); 480 | let deserialized: Cow<'static, String> = from_bytes(&serialized).unwrap(); 481 | let stringx: String = deserialized.into_owned(); 482 | assert_eq!(strx, stringx); 483 | } 484 | 485 | #[test] 486 | fn slicebox() { 487 | use std::borrow::Cow; 488 | 489 | let slice = [1u32, 2, 3, 4, 5]; 490 | let serialized = to_bytes(&Cow::Borrowed(&slice[..])).unwrap(); 491 | let deserialized: Cow<'static, Vec> = from_bytes(&serialized).unwrap(); 492 | { 493 | let sb: &[u32] = &deserialized; 494 | assert_eq!(slice, sb); 495 | } 496 | let vecx: Vec = deserialized.into_owned(); 497 | assert_eq!(slice, vecx[..]); 498 | } 499 | 500 | #[test] 501 | fn path_buf() { 502 | use std::path::{Path, PathBuf}; 503 | 504 | let path = Path::new("foo").to_path_buf(); 505 | let encoded = to_bytes(&path).unwrap(); 506 | let decoded: PathBuf = from_bytes(&encoded).unwrap(); 507 | assert!(path.to_str() == decoded.to_str()); 508 | } 509 | 510 | #[derive(Arbitrary, Debug, Deserialize, Serialize, PartialEq)] 511 | struct Addr([u8; 32]); 512 | 513 | #[derive(Arbitrary, Debug, Deserialize, Serialize, PartialEq)] 514 | struct Bar { 515 | a: u64, 516 | b: Vec, 517 | c: Addr, 518 | d: u32, 519 | } 520 | 521 | #[derive(Arbitrary, Debug, Deserialize, Serialize, PartialEq)] 522 | struct Foo { 523 | a: u64, 524 | b: Vec, 525 | c: Bar, 526 | d: bool, 527 | e: BTreeMap, Vec>, 528 | } 529 | 530 | #[test] 531 | fn serde_known_vector() { 532 | let b = Bar { 533 | a: 100, 534 | b: vec![0, 1, 2, 3, 4, 5, 6, 7, 8], 535 | c: Addr([5u8; 32]), 536 | d: 99, 537 | }; 538 | 539 | let mut map = BTreeMap::new(); 540 | map.insert(vec![0, 56, 21], vec![22, 10, 5]); 541 | map.insert(vec![1], vec![22, 21, 67]); 542 | map.insert(vec![20, 21, 89, 105], vec![201, 23, 90]); 543 | 544 | let f = Foo { 545 | a: u64::max_value(), 546 | b: vec![100, 99, 88, 77, 66, 55], 547 | c: b, 548 | d: true, 549 | e: map, 550 | }; 551 | 552 | let bytes = to_bytes(&f).unwrap(); 553 | 554 | let test_vector = vec![ 555 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x06, 0x64, 0x63, 0x58, 0x4d, 0x42, 0x37, 556 | 0x64, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 557 | 0x06, 0x07, 0x08, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 558 | 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 559 | 0x05, 0x05, 0x05, 0x05, 0x05, 0x63, 0x00, 0x00, 0x00, 0x01, 0x03, 0x01, 0x01, 0x03, 0x16, 560 | 0x15, 0x43, 0x03, 0x00, 0x38, 0x15, 0x03, 0x16, 0x0a, 0x05, 0x04, 0x14, 0x15, 0x59, 0x69, 561 | 0x03, 0xc9, 0x17, 0x5a, 562 | ]; 563 | 564 | // make sure we serialize into exact same bytes as before 565 | assert_eq!(test_vector, bytes); 566 | 567 | // make sure we can deserialize the test vector into expected struct 568 | let deserialized_foo: Foo = from_bytes(&test_vector).unwrap(); 569 | assert_eq!(f, deserialized_foo); 570 | } 571 | 572 | #[derive(Debug, Deserialize, Serialize, PartialEq, Eq, Clone)] 573 | struct List { 574 | value: T, 575 | next: Option>>, 576 | } 577 | impl List { 578 | fn head(value: T) -> Self { 579 | Self { value, next: None } 580 | } 581 | 582 | fn cons(value: T, tail: List) -> Self { 583 | Self { 584 | value, 585 | next: Some(Box::new(tail)), 586 | } 587 | } 588 | } 589 | impl List { 590 | fn repeat(len: usize, value: T) -> Self { 591 | if len == 0 { 592 | Self::head(value) 593 | } else { 594 | Self::cons(value.clone(), Self::repeat(len - 1, value)) 595 | } 596 | } 597 | } 598 | 599 | impl List { 600 | fn integers(len: usize) -> Self { 601 | if len == 0 { 602 | Self::head(0) 603 | } else { 604 | Self::cons(len, Self::integers(len - 1)) 605 | } 606 | } 607 | } 608 | 609 | #[test] 610 | fn test_recursion_limit() { 611 | let l1 = List::integers(4); 612 | let b1 = to_bytes(&l1).unwrap(); 613 | assert_eq!( 614 | b1, 615 | vec![ 616 | 4, 0, 0, 0, 0, 0, 0, 0, 1, 3, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 617 | 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 618 | ] 619 | ); 620 | assert_eq!(from_bytes::>(&b1).unwrap(), l1); 621 | 622 | let l2 = List::integers(MAX_CONTAINER_DEPTH - 1); 623 | let b2 = to_bytes(&l2).unwrap(); 624 | assert_eq!(from_bytes::>(&b2).unwrap(), l2); 625 | let l3 = List::integers(MAX_CONTAINER_DEPTH); 626 | assert_eq!( 627 | to_bytes(&l3), 628 | Err(Error::ExceededContainerDepthLimit("List")) 629 | ); 630 | let mut b3 = vec![244, 1, 0, 0, 0, 0, 0, 0, 1]; 631 | b3.extend(b2); 632 | assert_eq!( 633 | from_bytes::>(&b3), 634 | Err(Error::ExceededContainerDepthLimit("List")) 635 | ); 636 | 637 | let b2_pair = to_bytes(&(&l2, &l2)).unwrap(); 638 | assert_eq!( 639 | from_bytes::<(List<_>, List<_>)>(&b2_pair).unwrap(), 640 | (l2.clone(), l2.clone()) 641 | ); 642 | assert_eq!( 643 | to_bytes(&(&l2, &l3)), 644 | Err(Error::ExceededContainerDepthLimit("List")) 645 | ); 646 | assert_eq!( 647 | to_bytes(&(&l3, &l2)), 648 | Err(Error::ExceededContainerDepthLimit("List")) 649 | ); 650 | assert_eq!( 651 | to_bytes(&(&l3, &l3)), 652 | Err(Error::ExceededContainerDepthLimit("List")) 653 | ); 654 | } 655 | #[derive(Deserialize, Serialize, Clone, PartialEq, Eq, Debug)] 656 | enum EnumA { 657 | ValueA, 658 | } 659 | 660 | #[test] 661 | fn test_recursion_limit_enum() { 662 | let l1 = List::repeat(6, EnumA::ValueA); 663 | let b1 = to_bytes(&l1).unwrap(); 664 | assert_eq!(b1, vec![0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0],); 665 | assert_eq!(from_bytes::>(&b1).unwrap(), l1); 666 | 667 | let l2 = List::repeat(MAX_CONTAINER_DEPTH - 2, EnumA::ValueA); 668 | let b2 = to_bytes(&l2).unwrap(); 669 | assert_eq!(from_bytes::>(&b2).unwrap(), l2); 670 | 671 | let l3 = List::repeat(MAX_CONTAINER_DEPTH - 1, EnumA::ValueA); 672 | assert_eq!( 673 | to_bytes(&l3), 674 | Err(Error::ExceededContainerDepthLimit("EnumA")) 675 | ); 676 | let mut b3 = vec![0, 1]; 677 | b3.extend(b2); 678 | assert_eq!( 679 | from_bytes::>(&b3), 680 | Err(Error::ExceededContainerDepthLimit("EnumA")) 681 | ); 682 | } 683 | --------------------------------------------------------------------------------